xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll (revision 36e4176f1d83d04cdebb4e1870561099b2478d80)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6
7declare <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
8declare <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
9declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
10declare <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
11
12define <vscale x 1 x i8> @vmacc_vv_nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
13; CHECK-LABEL: vmacc_vv_nxv1i8:
14; CHECK:       # %bb.0:
15; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
16; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
17; CHECK-NEXT:    vmv1r.v v8, v10
18; CHECK-NEXT:    ret
19  %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
20  %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
21  %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl)
22  ret <vscale x 1 x i8> %u
23}
24
25define <vscale x 1 x i8> @vmacc_vv_nxv1i8_unmasked(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
26; CHECK-LABEL: vmacc_vv_nxv1i8_unmasked:
27; CHECK:       # %bb.0:
28; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, ma
29; CHECK-NEXT:    vmacc.vv v10, v8, v9
30; CHECK-NEXT:    vmv1r.v v8, v10
31; CHECK-NEXT:    ret
32  %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
33  %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
34  %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl)
35  ret <vscale x 1 x i8> %u
36}
37
38define <vscale x 1 x i8> @vmacc_vx_nxv1i8(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
39; CHECK-LABEL: vmacc_vx_nxv1i8:
40; CHECK:       # %bb.0:
41; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
42; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
43; CHECK-NEXT:    vmv1r.v v8, v9
44; CHECK-NEXT:    ret
45  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
46  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
47  %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
48  %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
49  %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl)
50  ret <vscale x 1 x i8> %u
51}
52
53define <vscale x 1 x i8> @vmacc_vx_nxv1i8_unmasked(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
54; CHECK-LABEL: vmacc_vx_nxv1i8_unmasked:
55; CHECK:       # %bb.0:
56; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, tu, ma
57; CHECK-NEXT:    vmacc.vx v9, a0, v8
58; CHECK-NEXT:    vmv1r.v v8, v9
59; CHECK-NEXT:    ret
60  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
61  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
62  %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
63  %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
64  %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl)
65  ret <vscale x 1 x i8> %u
66}
67
68define <vscale x 1 x i8> @vmacc_vv_nxv1i8_ta(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
69; CHECK-LABEL: vmacc_vv_nxv1i8_ta:
70; CHECK:       # %bb.0:
71; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
72; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
73; CHECK-NEXT:    vmv1r.v v8, v10
74; CHECK-NEXT:    ret
75  %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
76  %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
77  %u = call <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl)
78  ret <vscale x 1 x i8> %u
79}
80
81define <vscale x 1 x i8> @vmacc_vx_nxv1i8_ta(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
82; CHECK-LABEL: vmacc_vx_nxv1i8_ta:
83; CHECK:       # %bb.0:
84; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu
85; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
86; CHECK-NEXT:    vmv1r.v v8, v9
87; CHECK-NEXT:    ret
88  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
89  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
90  %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
91  %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
92  %u = call <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl)
93  ret <vscale x 1 x i8> %u
94}
95
96declare <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
97declare <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
98declare <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
99declare <vscale x 2 x i8> @llvm.vp.select.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
100
101define <vscale x 2 x i8> @vmacc_vv_nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
102; CHECK-LABEL: vmacc_vv_nxv2i8:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, mu
105; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
106; CHECK-NEXT:    vmv1r.v v8, v10
107; CHECK-NEXT:    ret
108  %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
109  %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
110  %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl)
111  ret <vscale x 2 x i8> %u
112}
113
114define <vscale x 2 x i8> @vmacc_vv_nxv2i8_unmasked(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
115; CHECK-LABEL: vmacc_vv_nxv2i8_unmasked:
116; CHECK:       # %bb.0:
117; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, ma
118; CHECK-NEXT:    vmacc.vv v10, v8, v9
119; CHECK-NEXT:    vmv1r.v v8, v10
120; CHECK-NEXT:    ret
121  %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
122  %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
123  %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl)
124  ret <vscale x 2 x i8> %u
125}
126
127define <vscale x 2 x i8> @vmacc_vx_nxv2i8(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
128; CHECK-LABEL: vmacc_vx_nxv2i8:
129; CHECK:       # %bb.0:
130; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, tu, mu
131; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
132; CHECK-NEXT:    vmv1r.v v8, v9
133; CHECK-NEXT:    ret
134  %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
135  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
136  %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
137  %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
138  %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl)
139  ret <vscale x 2 x i8> %u
140}
141
142define <vscale x 2 x i8> @vmacc_vx_nxv2i8_unmasked(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
143; CHECK-LABEL: vmacc_vx_nxv2i8_unmasked:
144; CHECK:       # %bb.0:
145; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, tu, ma
146; CHECK-NEXT:    vmacc.vx v9, a0, v8
147; CHECK-NEXT:    vmv1r.v v8, v9
148; CHECK-NEXT:    ret
149  %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
150  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
151  %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
152  %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
153  %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl)
154  ret <vscale x 2 x i8> %u
155}
156
157define <vscale x 2 x i8> @vmacc_vv_nxv2i8_ta(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
158; CHECK-LABEL: vmacc_vv_nxv2i8_ta:
159; CHECK:       # %bb.0:
160; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
161; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
162; CHECK-NEXT:    vmv1r.v v8, v10
163; CHECK-NEXT:    ret
164  %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
165  %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
166  %u = call <vscale x 2 x i8> @llvm.vp.select.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl)
167  ret <vscale x 2 x i8> %u
168}
169
170define <vscale x 2 x i8> @vmacc_vx_nxv2i8_ta(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
171; CHECK-LABEL: vmacc_vx_nxv2i8_ta:
172; CHECK:       # %bb.0:
173; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
174; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
175; CHECK-NEXT:    vmv1r.v v8, v9
176; CHECK-NEXT:    ret
177  %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
178  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
179  %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
180  %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
181  %u = call <vscale x 2 x i8> @llvm.vp.select.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl)
182  ret <vscale x 2 x i8> %u
183}
184
185declare <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
186declare <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
187declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
188declare <vscale x 4 x i8> @llvm.vp.select.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
189
190define <vscale x 4 x i8> @vmacc_vv_nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
191; CHECK-LABEL: vmacc_vv_nxv4i8:
192; CHECK:       # %bb.0:
193; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
194; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
195; CHECK-NEXT:    vmv1r.v v8, v10
196; CHECK-NEXT:    ret
197  %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
198  %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
199  %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl)
200  ret <vscale x 4 x i8> %u
201}
202
203define <vscale x 4 x i8> @vmacc_vv_nxv4i8_unmasked(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
204; CHECK-LABEL: vmacc_vv_nxv4i8_unmasked:
205; CHECK:       # %bb.0:
206; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, ma
207; CHECK-NEXT:    vmacc.vv v10, v8, v9
208; CHECK-NEXT:    vmv1r.v v8, v10
209; CHECK-NEXT:    ret
210  %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
211  %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
212  %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl)
213  ret <vscale x 4 x i8> %u
214}
215
216define <vscale x 4 x i8> @vmacc_vx_nxv4i8(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
217; CHECK-LABEL: vmacc_vx_nxv4i8:
218; CHECK:       # %bb.0:
219; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, mu
220; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
221; CHECK-NEXT:    vmv1r.v v8, v9
222; CHECK-NEXT:    ret
223  %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
224  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
225  %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
226  %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
227  %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl)
228  ret <vscale x 4 x i8> %u
229}
230
231define <vscale x 4 x i8> @vmacc_vx_nxv4i8_unmasked(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
232; CHECK-LABEL: vmacc_vx_nxv4i8_unmasked:
233; CHECK:       # %bb.0:
234; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, ma
235; CHECK-NEXT:    vmacc.vx v9, a0, v8
236; CHECK-NEXT:    vmv1r.v v8, v9
237; CHECK-NEXT:    ret
238  %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
239  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
240  %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
241  %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
242  %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl)
243  ret <vscale x 4 x i8> %u
244}
245
246define <vscale x 4 x i8> @vmacc_vv_nxv4i8_ta(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
247; CHECK-LABEL: vmacc_vv_nxv4i8_ta:
248; CHECK:       # %bb.0:
249; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
250; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
251; CHECK-NEXT:    vmv1r.v v8, v10
252; CHECK-NEXT:    ret
253  %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
254  %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
255  %u = call <vscale x 4 x i8> @llvm.vp.select.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl)
256  ret <vscale x 4 x i8> %u
257}
258
259define <vscale x 4 x i8> @vmacc_vx_nxv4i8_ta(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
260; CHECK-LABEL: vmacc_vx_nxv4i8_ta:
261; CHECK:       # %bb.0:
262; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu
263; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
264; CHECK-NEXT:    vmv1r.v v8, v9
265; CHECK-NEXT:    ret
266  %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
267  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
268  %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
269  %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
270  %u = call <vscale x 4 x i8> @llvm.vp.select.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl)
271  ret <vscale x 4 x i8> %u
272}
273
274declare <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
275declare <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
276declare <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
277declare <vscale x 8 x i8> @llvm.vp.select.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
278
279define <vscale x 8 x i8> @vmacc_vv_nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
280; CHECK-LABEL: vmacc_vv_nxv8i8:
281; CHECK:       # %bb.0:
282; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, mu
283; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
284; CHECK-NEXT:    vmv1r.v v8, v10
285; CHECK-NEXT:    ret
286  %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
287  %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
288  %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl)
289  ret <vscale x 8 x i8> %u
290}
291
292define <vscale x 8 x i8> @vmacc_vv_nxv8i8_unmasked(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
293; CHECK-LABEL: vmacc_vv_nxv8i8_unmasked:
294; CHECK:       # %bb.0:
295; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, ma
296; CHECK-NEXT:    vmacc.vv v10, v8, v9
297; CHECK-NEXT:    vmv1r.v v8, v10
298; CHECK-NEXT:    ret
299  %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
300  %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
301  %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl)
302  ret <vscale x 8 x i8> %u
303}
304
305define <vscale x 8 x i8> @vmacc_vx_nxv8i8(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
306; CHECK-LABEL: vmacc_vx_nxv8i8:
307; CHECK:       # %bb.0:
308; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, mu
309; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
310; CHECK-NEXT:    vmv1r.v v8, v9
311; CHECK-NEXT:    ret
312  %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
313  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
314  %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
315  %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
316  %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl)
317  ret <vscale x 8 x i8> %u
318}
319
320define <vscale x 8 x i8> @vmacc_vx_nxv8i8_unmasked(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
321; CHECK-LABEL: vmacc_vx_nxv8i8_unmasked:
322; CHECK:       # %bb.0:
323; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, ma
324; CHECK-NEXT:    vmacc.vx v9, a0, v8
325; CHECK-NEXT:    vmv1r.v v8, v9
326; CHECK-NEXT:    ret
327  %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
328  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
329  %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
330  %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
331  %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl)
332  ret <vscale x 8 x i8> %u
333}
334
335define <vscale x 8 x i8> @vmacc_vv_nxv8i8_ta(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
336; CHECK-LABEL: vmacc_vv_nxv8i8_ta:
337; CHECK:       # %bb.0:
338; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
339; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
340; CHECK-NEXT:    vmv.v.v v8, v10
341; CHECK-NEXT:    ret
342  %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
343  %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
344  %u = call <vscale x 8 x i8> @llvm.vp.select.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl)
345  ret <vscale x 8 x i8> %u
346}
347
348define <vscale x 8 x i8> @vmacc_vx_nxv8i8_ta(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
349; CHECK-LABEL: vmacc_vx_nxv8i8_ta:
350; CHECK:       # %bb.0:
351; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
352; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
353; CHECK-NEXT:    vmv.v.v v8, v9
354; CHECK-NEXT:    ret
355  %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
356  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
357  %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
358  %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
359  %u = call <vscale x 8 x i8> @llvm.vp.select.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl)
360  ret <vscale x 8 x i8> %u
361}
362
363declare <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
364declare <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
365declare <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
366declare <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
367
368define <vscale x 16 x i8> @vmacc_vv_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
369; CHECK-LABEL: vmacc_vv_nxv16i8:
370; CHECK:       # %bb.0:
371; CHECK-NEXT:    vsetvli zero, a0, e8, m2, tu, mu
372; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
373; CHECK-NEXT:    vmv2r.v v8, v12
374; CHECK-NEXT:    ret
375  %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
376  %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
377  %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl)
378  ret <vscale x 16 x i8> %u
379}
380
381define <vscale x 16 x i8> @vmacc_vv_nxv16i8_unmasked(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
382; CHECK-LABEL: vmacc_vv_nxv16i8_unmasked:
383; CHECK:       # %bb.0:
384; CHECK-NEXT:    vsetvli zero, a0, e8, m2, tu, ma
385; CHECK-NEXT:    vmacc.vv v12, v8, v10
386; CHECK-NEXT:    vmv2r.v v8, v12
387; CHECK-NEXT:    ret
388  %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
389  %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
390  %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl)
391  ret <vscale x 16 x i8> %u
392}
393
394define <vscale x 16 x i8> @vmacc_vx_nxv16i8(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
395; CHECK-LABEL: vmacc_vx_nxv16i8:
396; CHECK:       # %bb.0:
397; CHECK-NEXT:    vsetvli zero, a1, e8, m2, tu, mu
398; CHECK-NEXT:    vmacc.vx v10, a0, v8, v0.t
399; CHECK-NEXT:    vmv2r.v v8, v10
400; CHECK-NEXT:    ret
401  %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
402  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
403  %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
404  %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
405  %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl)
406  ret <vscale x 16 x i8> %u
407}
408
409define <vscale x 16 x i8> @vmacc_vx_nxv16i8_unmasked(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
410; CHECK-LABEL: vmacc_vx_nxv16i8_unmasked:
411; CHECK:       # %bb.0:
412; CHECK-NEXT:    vsetvli zero, a1, e8, m2, tu, ma
413; CHECK-NEXT:    vmacc.vx v10, a0, v8
414; CHECK-NEXT:    vmv2r.v v8, v10
415; CHECK-NEXT:    ret
416  %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
417  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
418  %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
419  %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
420  %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl)
421  ret <vscale x 16 x i8> %u
422}
423
424define <vscale x 16 x i8> @vmacc_vv_nxv16i8_ta(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
425; CHECK-LABEL: vmacc_vv_nxv16i8_ta:
426; CHECK:       # %bb.0:
427; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
428; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
429; CHECK-NEXT:    vmv.v.v v8, v12
430; CHECK-NEXT:    ret
431  %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
432  %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
433  %u = call <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl)
434  ret <vscale x 16 x i8> %u
435}
436
437define <vscale x 16 x i8> @vmacc_vx_nxv16i8_ta(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
438; CHECK-LABEL: vmacc_vx_nxv16i8_ta:
439; CHECK:       # %bb.0:
440; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
441; CHECK-NEXT:    vmacc.vx v10, a0, v8, v0.t
442; CHECK-NEXT:    vmv.v.v v8, v10
443; CHECK-NEXT:    ret
444  %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
445  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
446  %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
447  %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
448  %u = call <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl)
449  ret <vscale x 16 x i8> %u
450}
451
452declare <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
453declare <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
454declare <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1>, <vscale x 32 x i8>, <vscale x 32 x i8>, i32)
455declare <vscale x 32 x i8> @llvm.vp.select.nxv32i8(<vscale x 32 x i1>, <vscale x 32 x i8>, <vscale x 32 x i8>, i32)
456
457define <vscale x 32 x i8> @vmacc_vv_nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
458; CHECK-LABEL: vmacc_vv_nxv32i8:
459; CHECK:       # %bb.0:
460; CHECK-NEXT:    vsetvli zero, a0, e8, m4, tu, mu
461; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
462; CHECK-NEXT:    vmv4r.v v8, v16
463; CHECK-NEXT:    ret
464  %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
465  %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
466  %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl)
467  ret <vscale x 32 x i8> %u
468}
469
470define <vscale x 32 x i8> @vmacc_vv_nxv32i8_unmasked(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
471; CHECK-LABEL: vmacc_vv_nxv32i8_unmasked:
472; CHECK:       # %bb.0:
473; CHECK-NEXT:    vsetvli zero, a0, e8, m4, tu, ma
474; CHECK-NEXT:    vmacc.vv v16, v8, v12
475; CHECK-NEXT:    vmv4r.v v8, v16
476; CHECK-NEXT:    ret
477  %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
478  %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
479  %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl)
480  ret <vscale x 32 x i8> %u
481}
482
483define <vscale x 32 x i8> @vmacc_vx_nxv32i8(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
484; CHECK-LABEL: vmacc_vx_nxv32i8:
485; CHECK:       # %bb.0:
486; CHECK-NEXT:    vsetvli zero, a1, e8, m4, tu, mu
487; CHECK-NEXT:    vmacc.vx v12, a0, v8, v0.t
488; CHECK-NEXT:    vmv4r.v v8, v12
489; CHECK-NEXT:    ret
490  %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
491  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
492  %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
493  %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
494  %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl)
495  ret <vscale x 32 x i8> %u
496}
497
498define <vscale x 32 x i8> @vmacc_vx_nxv32i8_unmasked(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
499; CHECK-LABEL: vmacc_vx_nxv32i8_unmasked:
500; CHECK:       # %bb.0:
501; CHECK-NEXT:    vsetvli zero, a1, e8, m4, tu, ma
502; CHECK-NEXT:    vmacc.vx v12, a0, v8
503; CHECK-NEXT:    vmv4r.v v8, v12
504; CHECK-NEXT:    ret
505  %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
506  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
507  %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
508  %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
509  %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl)
510  ret <vscale x 32 x i8> %u
511}
512
513define <vscale x 32 x i8> @vmacc_vv_nxv32i8_ta(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
514; CHECK-LABEL: vmacc_vv_nxv32i8_ta:
515; CHECK:       # %bb.0:
516; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
517; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
518; CHECK-NEXT:    vmv.v.v v8, v16
519; CHECK-NEXT:    ret
520  %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
521  %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
522  %u = call <vscale x 32 x i8> @llvm.vp.select.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl)
523  ret <vscale x 32 x i8> %u
524}
525
526define <vscale x 32 x i8> @vmacc_vx_nxv32i8_ta(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
527; CHECK-LABEL: vmacc_vx_nxv32i8_ta:
528; CHECK:       # %bb.0:
529; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
530; CHECK-NEXT:    vmacc.vx v12, a0, v8, v0.t
531; CHECK-NEXT:    vmv.v.v v8, v12
532; CHECK-NEXT:    ret
533  %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
534  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
535  %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
536  %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
537  %u = call <vscale x 32 x i8> @llvm.vp.select.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl)
538  ret <vscale x 32 x i8> %u
539}
540
541declare <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
542declare <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
543declare <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1>, <vscale x 64 x i8>, <vscale x 64 x i8>, i32)
544declare <vscale x 64 x i8> @llvm.vp.select.nxv64i8(<vscale x 64 x i1>, <vscale x 64 x i8>, <vscale x 64 x i8>, i32)
545
546define <vscale x 64 x i8> @vmacc_vv_nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
547; CHECK-LABEL: vmacc_vv_nxv64i8:
548; CHECK:       # %bb.0:
549; CHECK-NEXT:    vl8r.v v24, (a0)
550; CHECK-NEXT:    vsetvli zero, a1, e8, m8, tu, mu
551; CHECK-NEXT:    vmacc.vv v24, v8, v16, v0.t
552; CHECK-NEXT:    vmv8r.v v8, v24
553; CHECK-NEXT:    ret
554  %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
555  %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
556  %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl)
557  ret <vscale x 64 x i8> %u
558}
559
560define <vscale x 64 x i8> @vmacc_vv_nxv64i8_unmasked(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
561; CHECK-LABEL: vmacc_vv_nxv64i8_unmasked:
562; CHECK:       # %bb.0:
563; CHECK-NEXT:    vl8r.v v24, (a0)
564; CHECK-NEXT:    vsetvli zero, a1, e8, m8, tu, ma
565; CHECK-NEXT:    vmacc.vv v24, v8, v16
566; CHECK-NEXT:    vmv8r.v v8, v24
567; CHECK-NEXT:    ret
568  %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
569  %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
570  %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> splat (i1 -1), <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl)
571  ret <vscale x 64 x i8> %u
572}
573
574define <vscale x 64 x i8> @vmacc_vx_nxv64i8(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
575; CHECK-LABEL: vmacc_vx_nxv64i8:
576; CHECK:       # %bb.0:
577; CHECK-NEXT:    vsetvli zero, a1, e8, m8, tu, mu
578; CHECK-NEXT:    vmacc.vx v16, a0, v8, v0.t
579; CHECK-NEXT:    vmv8r.v v8, v16
580; CHECK-NEXT:    ret
581  %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
582  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
583  %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
584  %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
585  %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl)
586  ret <vscale x 64 x i8> %u
587}
588
589define <vscale x 64 x i8> @vmacc_vx_nxv64i8_unmasked(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
590; CHECK-LABEL: vmacc_vx_nxv64i8_unmasked:
591; CHECK:       # %bb.0:
592; CHECK-NEXT:    vsetvli zero, a1, e8, m8, tu, ma
593; CHECK-NEXT:    vmacc.vx v16, a0, v8
594; CHECK-NEXT:    vmv8r.v v8, v16
595; CHECK-NEXT:    ret
596  %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
597  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
598  %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
599  %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
600  %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> splat (i1 -1), <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl)
601  ret <vscale x 64 x i8> %u
602}
603
604define <vscale x 64 x i8> @vmacc_vv_nxv64i8_ta(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
605; CHECK-LABEL: vmacc_vv_nxv64i8_ta:
606; CHECK:       # %bb.0:
607; CHECK-NEXT:    vl8r.v v24, (a0)
608; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, mu
609; CHECK-NEXT:    vmacc.vv v24, v8, v16, v0.t
610; CHECK-NEXT:    vmv.v.v v8, v24
611; CHECK-NEXT:    ret
612  %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
613  %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
614  %u = call <vscale x 64 x i8> @llvm.vp.select.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl)
615  ret <vscale x 64 x i8> %u
616}
617
618define <vscale x 64 x i8> @vmacc_vx_nxv64i8_ta(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
619; CHECK-LABEL: vmacc_vx_nxv64i8_ta:
620; CHECK:       # %bb.0:
621; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, mu
622; CHECK-NEXT:    vmacc.vx v16, a0, v8, v0.t
623; CHECK-NEXT:    vmv.v.v v8, v16
624; CHECK-NEXT:    ret
625  %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
626  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
627  %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
628  %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
629  %u = call <vscale x 64 x i8> @llvm.vp.select.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl)
630  ret <vscale x 64 x i8> %u
631}
632
633declare <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
634declare <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
635declare <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1>, <vscale x 1 x i16>, <vscale x 1 x i16>, i32)
636declare <vscale x 1 x i16> @llvm.vp.select.nxv1i16(<vscale x 1 x i1>, <vscale x 1 x i16>, <vscale x 1 x i16>, i32)
637
638define <vscale x 1 x i16> @vmacc_vv_nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
639; CHECK-LABEL: vmacc_vv_nxv1i16:
640; CHECK:       # %bb.0:
641; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
642; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
643; CHECK-NEXT:    vmv1r.v v8, v10
644; CHECK-NEXT:    ret
645  %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
646  %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
647  %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl)
648  ret <vscale x 1 x i16> %u
649}
650
651define <vscale x 1 x i16> @vmacc_vv_nxv1i16_unmasked(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
652; CHECK-LABEL: vmacc_vv_nxv1i16_unmasked:
653; CHECK:       # %bb.0:
654; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, ma
655; CHECK-NEXT:    vmacc.vv v10, v8, v9
656; CHECK-NEXT:    vmv1r.v v8, v10
657; CHECK-NEXT:    ret
658  %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
659  %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
660  %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl)
661  ret <vscale x 1 x i16> %u
662}
663
664define <vscale x 1 x i16> @vmacc_vx_nxv1i16(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
665; CHECK-LABEL: vmacc_vx_nxv1i16:
666; CHECK:       # %bb.0:
667; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
668; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
669; CHECK-NEXT:    vmv1r.v v8, v9
670; CHECK-NEXT:    ret
671  %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
672  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
673  %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
674  %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
675  %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl)
676  ret <vscale x 1 x i16> %u
677}
678
679define <vscale x 1 x i16> @vmacc_vx_nxv1i16_unmasked(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
680; CHECK-LABEL: vmacc_vx_nxv1i16_unmasked:
681; CHECK:       # %bb.0:
682; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, ma
683; CHECK-NEXT:    vmacc.vx v9, a0, v8
684; CHECK-NEXT:    vmv1r.v v8, v9
685; CHECK-NEXT:    ret
686  %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
687  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
688  %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
689  %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
690  %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl)
691  ret <vscale x 1 x i16> %u
692}
693
694define <vscale x 1 x i16> @vmacc_vv_nxv1i16_ta(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
695; CHECK-LABEL: vmacc_vv_nxv1i16_ta:
696; CHECK:       # %bb.0:
697; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
698; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
699; CHECK-NEXT:    vmv1r.v v8, v10
700; CHECK-NEXT:    ret
701  %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
702  %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
703  %u = call <vscale x 1 x i16> @llvm.vp.select.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl)
704  ret <vscale x 1 x i16> %u
705}
706
707define <vscale x 1 x i16> @vmacc_vx_nxv1i16_ta(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
708; CHECK-LABEL: vmacc_vx_nxv1i16_ta:
709; CHECK:       # %bb.0:
710; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
711; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
712; CHECK-NEXT:    vmv1r.v v8, v9
713; CHECK-NEXT:    ret
714  %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
715  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
716  %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
717  %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
718  %u = call <vscale x 1 x i16> @llvm.vp.select.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl)
719  ret <vscale x 1 x i16> %u
720}
721
722declare <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
723declare <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
724declare <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32)
725declare <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32)
726
727define <vscale x 2 x i16> @vmacc_vv_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
728; CHECK-LABEL: vmacc_vv_nxv2i16:
729; CHECK:       # %bb.0:
730; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
731; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
732; CHECK-NEXT:    vmv1r.v v8, v10
733; CHECK-NEXT:    ret
734  %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
735  %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
736  %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl)
737  ret <vscale x 2 x i16> %u
738}
739
740define <vscale x 2 x i16> @vmacc_vv_nxv2i16_unmasked(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
741; CHECK-LABEL: vmacc_vv_nxv2i16_unmasked:
742; CHECK:       # %bb.0:
743; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, ma
744; CHECK-NEXT:    vmacc.vv v10, v8, v9
745; CHECK-NEXT:    vmv1r.v v8, v10
746; CHECK-NEXT:    ret
747  %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
748  %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
749  %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl)
750  ret <vscale x 2 x i16> %u
751}
752
753define <vscale x 2 x i16> @vmacc_vx_nxv2i16(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
754; CHECK-LABEL: vmacc_vx_nxv2i16:
755; CHECK:       # %bb.0:
756; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
757; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
758; CHECK-NEXT:    vmv1r.v v8, v9
759; CHECK-NEXT:    ret
760  %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
761  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
762  %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
763  %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
764  %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl)
765  ret <vscale x 2 x i16> %u
766}
767
768define <vscale x 2 x i16> @vmacc_vx_nxv2i16_unmasked(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
769; CHECK-LABEL: vmacc_vx_nxv2i16_unmasked:
770; CHECK:       # %bb.0:
771; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, ma
772; CHECK-NEXT:    vmacc.vx v9, a0, v8
773; CHECK-NEXT:    vmv1r.v v8, v9
774; CHECK-NEXT:    ret
775  %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
776  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
777  %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
778  %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
779  %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl)
780  ret <vscale x 2 x i16> %u
781}
782
783define <vscale x 2 x i16> @vmacc_vv_nxv2i16_ta(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
784; CHECK-LABEL: vmacc_vv_nxv2i16_ta:
785; CHECK:       # %bb.0:
786; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
787; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
788; CHECK-NEXT:    vmv1r.v v8, v10
789; CHECK-NEXT:    ret
790  %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
791  %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
792  %u = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl)
793  ret <vscale x 2 x i16> %u
794}
795
796define <vscale x 2 x i16> @vmacc_vx_nxv2i16_ta(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
797; CHECK-LABEL: vmacc_vx_nxv2i16_ta:
798; CHECK:       # %bb.0:
799; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
800; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
801; CHECK-NEXT:    vmv1r.v v8, v9
802; CHECK-NEXT:    ret
803  %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
804  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
805  %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
806  %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
807  %u = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl)
808  ret <vscale x 2 x i16> %u
809}
810
811declare <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
812declare <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
813declare <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1>, <vscale x 4 x i16>, <vscale x 4 x i16>, i32)
814declare <vscale x 4 x i16> @llvm.vp.select.nxv4i16(<vscale x 4 x i1>, <vscale x 4 x i16>, <vscale x 4 x i16>, i32)
815
816define <vscale x 4 x i16> @vmacc_vv_nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
817; CHECK-LABEL: vmacc_vv_nxv4i16:
818; CHECK:       # %bb.0:
819; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
820; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
821; CHECK-NEXT:    vmv1r.v v8, v10
822; CHECK-NEXT:    ret
823  %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
824  %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
825  %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl)
826  ret <vscale x 4 x i16> %u
827}
828
829define <vscale x 4 x i16> @vmacc_vv_nxv4i16_unmasked(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
830; CHECK-LABEL: vmacc_vv_nxv4i16_unmasked:
831; CHECK:       # %bb.0:
832; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
833; CHECK-NEXT:    vmacc.vv v10, v8, v9
834; CHECK-NEXT:    vmv1r.v v8, v10
835; CHECK-NEXT:    ret
836  %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
837  %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
838  %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl)
839  ret <vscale x 4 x i16> %u
840}
841
842define <vscale x 4 x i16> @vmacc_vx_nxv4i16(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
843; CHECK-LABEL: vmacc_vx_nxv4i16:
844; CHECK:       # %bb.0:
845; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
846; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
847; CHECK-NEXT:    vmv1r.v v8, v9
848; CHECK-NEXT:    ret
849  %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
850  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
851  %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
852  %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
853  %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl)
854  ret <vscale x 4 x i16> %u
855}
856
857define <vscale x 4 x i16> @vmacc_vx_nxv4i16_unmasked(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
858; CHECK-LABEL: vmacc_vx_nxv4i16_unmasked:
859; CHECK:       # %bb.0:
860; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, ma
861; CHECK-NEXT:    vmacc.vx v9, a0, v8
862; CHECK-NEXT:    vmv1r.v v8, v9
863; CHECK-NEXT:    ret
864  %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
865  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
866  %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
867  %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
868  %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl)
869  ret <vscale x 4 x i16> %u
870}
871
872define <vscale x 4 x i16> @vmacc_vv_nxv4i16_ta(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
873; CHECK-LABEL: vmacc_vv_nxv4i16_ta:
874; CHECK:       # %bb.0:
875; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
876; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
877; CHECK-NEXT:    vmv.v.v v8, v10
878; CHECK-NEXT:    ret
879  %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
880  %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
881  %u = call <vscale x 4 x i16> @llvm.vp.select.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl)
882  ret <vscale x 4 x i16> %u
883}
884
885define <vscale x 4 x i16> @vmacc_vx_nxv4i16_ta(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
886; CHECK-LABEL: vmacc_vx_nxv4i16_ta:
887; CHECK:       # %bb.0:
888; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
889; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
890; CHECK-NEXT:    vmv.v.v v8, v9
891; CHECK-NEXT:    ret
892  %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
893  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
894  %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
895  %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
896  %u = call <vscale x 4 x i16> @llvm.vp.select.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl)
897  ret <vscale x 4 x i16> %u
898}
899
900declare <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
901declare <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
902declare <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
903declare <vscale x 8 x i16> @llvm.vp.select.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
904
905define <vscale x 8 x i16> @vmacc_vv_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
906; CHECK-LABEL: vmacc_vv_nxv8i16:
907; CHECK:       # %bb.0:
908; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
909; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
910; CHECK-NEXT:    vmv2r.v v8, v12
911; CHECK-NEXT:    ret
912  %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
913  %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
914  %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl)
915  ret <vscale x 8 x i16> %u
916}
917
918define <vscale x 8 x i16> @vmacc_vv_nxv8i16_unmasked(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
919; CHECK-LABEL: vmacc_vv_nxv8i16_unmasked:
920; CHECK:       # %bb.0:
921; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, ma
922; CHECK-NEXT:    vmacc.vv v12, v8, v10
923; CHECK-NEXT:    vmv2r.v v8, v12
924; CHECK-NEXT:    ret
925  %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
926  %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
927  %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl)
928  ret <vscale x 8 x i16> %u
929}
930
931define <vscale x 8 x i16> @vmacc_vx_nxv8i16(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
932; CHECK-LABEL: vmacc_vx_nxv8i16:
933; CHECK:       # %bb.0:
934; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
935; CHECK-NEXT:    vmacc.vx v10, a0, v8, v0.t
936; CHECK-NEXT:    vmv2r.v v8, v10
937; CHECK-NEXT:    ret
938  %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
939  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
940  %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
941  %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
942  %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl)
943  ret <vscale x 8 x i16> %u
944}
945
946define <vscale x 8 x i16> @vmacc_vx_nxv8i16_unmasked(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
947; CHECK-LABEL: vmacc_vx_nxv8i16_unmasked:
948; CHECK:       # %bb.0:
949; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, ma
950; CHECK-NEXT:    vmacc.vx v10, a0, v8
951; CHECK-NEXT:    vmv2r.v v8, v10
952; CHECK-NEXT:    ret
953  %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
954  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
955  %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
956  %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
957  %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl)
958  ret <vscale x 8 x i16> %u
959}
960
961define <vscale x 8 x i16> @vmacc_vv_nxv8i16_ta(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
962; CHECK-LABEL: vmacc_vv_nxv8i16_ta:
963; CHECK:       # %bb.0:
964; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
965; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
966; CHECK-NEXT:    vmv.v.v v8, v12
967; CHECK-NEXT:    ret
968  %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
969  %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
970  %u = call <vscale x 8 x i16> @llvm.vp.select.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl)
971  ret <vscale x 8 x i16> %u
972}
973
974define <vscale x 8 x i16> @vmacc_vx_nxv8i16_ta(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
975; CHECK-LABEL: vmacc_vx_nxv8i16_ta:
976; CHECK:       # %bb.0:
977; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
978; CHECK-NEXT:    vmacc.vx v10, a0, v8, v0.t
979; CHECK-NEXT:    vmv.v.v v8, v10
980; CHECK-NEXT:    ret
981  %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
982  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
983  %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
984  %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
985  %u = call <vscale x 8 x i16> @llvm.vp.select.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl)
986  ret <vscale x 8 x i16> %u
987}
988
989declare <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
990declare <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
991declare <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1>, <vscale x 16 x i16>, <vscale x 16 x i16>, i32)
992declare <vscale x 16 x i16> @llvm.vp.select.nxv16i16(<vscale x 16 x i1>, <vscale x 16 x i16>, <vscale x 16 x i16>, i32)
993
994define <vscale x 16 x i16> @vmacc_vv_nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
995; CHECK-LABEL: vmacc_vv_nxv16i16:
996; CHECK:       # %bb.0:
997; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
998; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
999; CHECK-NEXT:    vmv4r.v v8, v16
1000; CHECK-NEXT:    ret
1001  %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1002  %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1003  %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl)
1004  ret <vscale x 16 x i16> %u
1005}
1006
1007define <vscale x 16 x i16> @vmacc_vv_nxv16i16_unmasked(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1008; CHECK-LABEL: vmacc_vv_nxv16i16_unmasked:
1009; CHECK:       # %bb.0:
1010; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, ma
1011; CHECK-NEXT:    vmacc.vv v16, v8, v12
1012; CHECK-NEXT:    vmv4r.v v8, v16
1013; CHECK-NEXT:    ret
1014  %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1015  %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1016  %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl)
1017  ret <vscale x 16 x i16> %u
1018}
1019
1020define <vscale x 16 x i16> @vmacc_vx_nxv16i16(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1021; CHECK-LABEL: vmacc_vx_nxv16i16:
1022; CHECK:       # %bb.0:
1023; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
1024; CHECK-NEXT:    vmacc.vx v12, a0, v8, v0.t
1025; CHECK-NEXT:    vmv4r.v v8, v12
1026; CHECK-NEXT:    ret
1027  %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
1028  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
1029  %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1030  %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1031  %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl)
1032  ret <vscale x 16 x i16> %u
1033}
1034
1035define <vscale x 16 x i16> @vmacc_vx_nxv16i16_unmasked(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1036; CHECK-LABEL: vmacc_vx_nxv16i16_unmasked:
1037; CHECK:       # %bb.0:
1038; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, ma
1039; CHECK-NEXT:    vmacc.vx v12, a0, v8
1040; CHECK-NEXT:    vmv4r.v v8, v12
1041; CHECK-NEXT:    ret
1042  %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
1043  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
1044  %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1045  %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1046  %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl)
1047  ret <vscale x 16 x i16> %u
1048}
1049
1050define <vscale x 16 x i16> @vmacc_vv_nxv16i16_ta(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1051; CHECK-LABEL: vmacc_vv_nxv16i16_ta:
1052; CHECK:       # %bb.0:
1053; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
1054; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
1055; CHECK-NEXT:    vmv.v.v v8, v16
1056; CHECK-NEXT:    ret
1057  %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1058  %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1059  %u = call <vscale x 16 x i16> @llvm.vp.select.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl)
1060  ret <vscale x 16 x i16> %u
1061}
1062
1063define <vscale x 16 x i16> @vmacc_vx_nxv16i16_ta(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1064; CHECK-LABEL: vmacc_vx_nxv16i16_ta:
1065; CHECK:       # %bb.0:
1066; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
1067; CHECK-NEXT:    vmacc.vx v12, a0, v8, v0.t
1068; CHECK-NEXT:    vmv.v.v v8, v12
1069; CHECK-NEXT:    ret
1070  %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
1071  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
1072  %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1073  %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1074  %u = call <vscale x 16 x i16> @llvm.vp.select.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl)
1075  ret <vscale x 16 x i16> %u
1076}
1077
1078declare <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
1079declare <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
1080declare <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1>, <vscale x 32 x i16>, <vscale x 32 x i16>, i32)
1081declare <vscale x 32 x i16> @llvm.vp.select.nxv32i16(<vscale x 32 x i1>, <vscale x 32 x i16>, <vscale x 32 x i16>, i32)
1082
1083define <vscale x 32 x i16> @vmacc_vv_nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1084; CHECK-LABEL: vmacc_vv_nxv32i16:
1085; CHECK:       # %bb.0:
1086; CHECK-NEXT:    vl8re16.v v24, (a0)
1087; CHECK-NEXT:    vsetvli zero, a1, e16, m8, tu, mu
1088; CHECK-NEXT:    vmacc.vv v24, v8, v16, v0.t
1089; CHECK-NEXT:    vmv8r.v v8, v24
1090; CHECK-NEXT:    ret
1091  %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1092  %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1093  %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl)
1094  ret <vscale x 32 x i16> %u
1095}
1096
1097define <vscale x 32 x i16> @vmacc_vv_nxv32i16_unmasked(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1098; CHECK-LABEL: vmacc_vv_nxv32i16_unmasked:
1099; CHECK:       # %bb.0:
1100; CHECK-NEXT:    vl8re16.v v24, (a0)
1101; CHECK-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
1102; CHECK-NEXT:    vmacc.vv v24, v8, v16
1103; CHECK-NEXT:    vmv8r.v v8, v24
1104; CHECK-NEXT:    ret
1105  %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1106  %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1107  %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl)
1108  ret <vscale x 32 x i16> %u
1109}
1110
1111define <vscale x 32 x i16> @vmacc_vx_nxv32i16(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1112; CHECK-LABEL: vmacc_vx_nxv32i16:
1113; CHECK:       # %bb.0:
1114; CHECK-NEXT:    vsetvli zero, a1, e16, m8, tu, mu
1115; CHECK-NEXT:    vmacc.vx v16, a0, v8, v0.t
1116; CHECK-NEXT:    vmv8r.v v8, v16
1117; CHECK-NEXT:    ret
1118  %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
1119  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
1120  %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1121  %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1122  %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl)
1123  ret <vscale x 32 x i16> %u
1124}
1125
1126define <vscale x 32 x i16> @vmacc_vx_nxv32i16_unmasked(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1127; CHECK-LABEL: vmacc_vx_nxv32i16_unmasked:
1128; CHECK:       # %bb.0:
1129; CHECK-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
1130; CHECK-NEXT:    vmacc.vx v16, a0, v8
1131; CHECK-NEXT:    vmv8r.v v8, v16
1132; CHECK-NEXT:    ret
1133  %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
1134  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
1135  %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1136  %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1137  %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl)
1138  ret <vscale x 32 x i16> %u
1139}
1140
1141define <vscale x 32 x i16> @vmacc_vv_nxv32i16_ta(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1142; CHECK-LABEL: vmacc_vv_nxv32i16_ta:
1143; CHECK:       # %bb.0:
1144; CHECK-NEXT:    vl8re16.v v24, (a0)
1145; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
1146; CHECK-NEXT:    vmacc.vv v24, v8, v16, v0.t
1147; CHECK-NEXT:    vmv.v.v v8, v24
1148; CHECK-NEXT:    ret
1149  %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1150  %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1151  %u = call <vscale x 32 x i16> @llvm.vp.select.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl)
1152  ret <vscale x 32 x i16> %u
1153}
1154
1155define <vscale x 32 x i16> @vmacc_vx_nxv32i16_ta(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1156; CHECK-LABEL: vmacc_vx_nxv32i16_ta:
1157; CHECK:       # %bb.0:
1158; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
1159; CHECK-NEXT:    vmacc.vx v16, a0, v8, v0.t
1160; CHECK-NEXT:    vmv.v.v v8, v16
1161; CHECK-NEXT:    ret
1162  %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
1163  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
1164  %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1165  %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1166  %u = call <vscale x 32 x i16> @llvm.vp.select.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl)
1167  ret <vscale x 32 x i16> %u
1168}
1169
1170declare <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
1171declare <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
1172declare <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1>, <vscale x 1 x i32>, <vscale x 1 x i32>, i32)
1173declare <vscale x 1 x i32> @llvm.vp.select.nxv1i32(<vscale x 1 x i1>, <vscale x 1 x i32>, <vscale x 1 x i32>, i32)
1174
1175define <vscale x 1 x i32> @vmacc_vv_nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1176; CHECK-LABEL: vmacc_vv_nxv1i32:
1177; CHECK:       # %bb.0:
1178; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
1179; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
1180; CHECK-NEXT:    vmv1r.v v8, v10
1181; CHECK-NEXT:    ret
1182  %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1183  %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1184  %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl)
1185  ret <vscale x 1 x i32> %u
1186}
1187
1188define <vscale x 1 x i32> @vmacc_vv_nxv1i32_unmasked(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1189; CHECK-LABEL: vmacc_vv_nxv1i32_unmasked:
1190; CHECK:       # %bb.0:
1191; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, ma
1192; CHECK-NEXT:    vmacc.vv v10, v8, v9
1193; CHECK-NEXT:    vmv1r.v v8, v10
1194; CHECK-NEXT:    ret
1195  %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1196  %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1197  %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl)
1198  ret <vscale x 1 x i32> %u
1199}
1200
1201define <vscale x 1 x i32> @vmacc_vx_nxv1i32(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1202; CHECK-LABEL: vmacc_vx_nxv1i32:
1203; CHECK:       # %bb.0:
1204; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
1205; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
1206; CHECK-NEXT:    vmv1r.v v8, v9
1207; CHECK-NEXT:    ret
1208  %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
1209  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
1210  %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1211  %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1212  %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl)
1213  ret <vscale x 1 x i32> %u
1214}
1215
1216define <vscale x 1 x i32> @vmacc_vx_nxv1i32_unmasked(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1217; CHECK-LABEL: vmacc_vx_nxv1i32_unmasked:
1218; CHECK:       # %bb.0:
1219; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, ma
1220; CHECK-NEXT:    vmacc.vx v9, a0, v8
1221; CHECK-NEXT:    vmv1r.v v8, v9
1222; CHECK-NEXT:    ret
1223  %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
1224  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
1225  %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1226  %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1227  %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl)
1228  ret <vscale x 1 x i32> %u
1229}
1230
1231define <vscale x 1 x i32> @vmacc_vv_nxv1i32_ta(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1232; CHECK-LABEL: vmacc_vv_nxv1i32_ta:
1233; CHECK:       # %bb.0:
1234; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
1235; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
1236; CHECK-NEXT:    vmv1r.v v8, v10
1237; CHECK-NEXT:    ret
1238  %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1239  %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1240  %u = call <vscale x 1 x i32> @llvm.vp.select.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl)
1241  ret <vscale x 1 x i32> %u
1242}
1243
1244define <vscale x 1 x i32> @vmacc_vx_nxv1i32_ta(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1245; CHECK-LABEL: vmacc_vx_nxv1i32_ta:
1246; CHECK:       # %bb.0:
1247; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
1248; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
1249; CHECK-NEXT:    vmv1r.v v8, v9
1250; CHECK-NEXT:    ret
1251  %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
1252  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
1253  %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1254  %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1255  %u = call <vscale x 1 x i32> @llvm.vp.select.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl)
1256  ret <vscale x 1 x i32> %u
1257}
1258
1259declare <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1260declare <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1261declare <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
1262declare <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
1263
1264define <vscale x 2 x i32> @vmacc_vv_nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1265; CHECK-LABEL: vmacc_vv_nxv2i32:
1266; CHECK:       # %bb.0:
1267; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
1268; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
1269; CHECK-NEXT:    vmv1r.v v8, v10
1270; CHECK-NEXT:    ret
1271  %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1272  %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1273  %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl)
1274  ret <vscale x 2 x i32> %u
1275}
1276
1277define <vscale x 2 x i32> @vmacc_vv_nxv2i32_unmasked(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1278; CHECK-LABEL: vmacc_vv_nxv2i32_unmasked:
1279; CHECK:       # %bb.0:
1280; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
1281; CHECK-NEXT:    vmacc.vv v10, v8, v9
1282; CHECK-NEXT:    vmv1r.v v8, v10
1283; CHECK-NEXT:    ret
1284  %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1285  %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1286  %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl)
1287  ret <vscale x 2 x i32> %u
1288}
1289
1290define <vscale x 2 x i32> @vmacc_vx_nxv2i32(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1291; CHECK-LABEL: vmacc_vx_nxv2i32:
1292; CHECK:       # %bb.0:
1293; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
1294; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
1295; CHECK-NEXT:    vmv1r.v v8, v9
1296; CHECK-NEXT:    ret
1297  %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
1298  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
1299  %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1300  %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1301  %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl)
1302  ret <vscale x 2 x i32> %u
1303}
1304
1305define <vscale x 2 x i32> @vmacc_vx_nxv2i32_unmasked(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1306; CHECK-LABEL: vmacc_vx_nxv2i32_unmasked:
1307; CHECK:       # %bb.0:
1308; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, ma
1309; CHECK-NEXT:    vmacc.vx v9, a0, v8
1310; CHECK-NEXT:    vmv1r.v v8, v9
1311; CHECK-NEXT:    ret
1312  %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
1313  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
1314  %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1315  %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1316  %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl)
1317  ret <vscale x 2 x i32> %u
1318}
1319
1320define <vscale x 2 x i32> @vmacc_vv_nxv2i32_ta(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1321; CHECK-LABEL: vmacc_vv_nxv2i32_ta:
1322; CHECK:       # %bb.0:
1323; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
1324; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
1325; CHECK-NEXT:    vmv.v.v v8, v10
1326; CHECK-NEXT:    ret
1327  %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1328  %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1329  %u = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl)
1330  ret <vscale x 2 x i32> %u
1331}
1332
1333define <vscale x 2 x i32> @vmacc_vx_nxv2i32_ta(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1334; CHECK-LABEL: vmacc_vx_nxv2i32_ta:
1335; CHECK:       # %bb.0:
1336; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
1337; CHECK-NEXT:    vmacc.vx v9, a0, v8, v0.t
1338; CHECK-NEXT:    vmv.v.v v8, v9
1339; CHECK-NEXT:    ret
1340  %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
1341  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
1342  %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1343  %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1344  %u = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl)
1345  ret <vscale x 2 x i32> %u
1346}
1347
1348declare <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1349declare <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1350declare <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1351declare <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1352
1353define <vscale x 4 x i32> @vmacc_vv_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1354; CHECK-LABEL: vmacc_vv_nxv4i32:
1355; CHECK:       # %bb.0:
1356; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
1357; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
1358; CHECK-NEXT:    vmv2r.v v8, v12
1359; CHECK-NEXT:    ret
1360  %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1361  %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1362  %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl)
1363  ret <vscale x 4 x i32> %u
1364}
1365
1366define <vscale x 4 x i32> @vmacc_vv_nxv4i32_unmasked(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1367; CHECK-LABEL: vmacc_vv_nxv4i32_unmasked:
1368; CHECK:       # %bb.0:
1369; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
1370; CHECK-NEXT:    vmacc.vv v12, v8, v10
1371; CHECK-NEXT:    vmv2r.v v8, v12
1372; CHECK-NEXT:    ret
1373  %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1374  %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1375  %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl)
1376  ret <vscale x 4 x i32> %u
1377}
1378
1379define <vscale x 4 x i32> @vmacc_vx_nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1380; CHECK-LABEL: vmacc_vx_nxv4i32:
1381; CHECK:       # %bb.0:
1382; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
1383; CHECK-NEXT:    vmacc.vx v10, a0, v8, v0.t
1384; CHECK-NEXT:    vmv2r.v v8, v10
1385; CHECK-NEXT:    ret
1386  %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
1387  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1388  %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1389  %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1390  %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl)
1391  ret <vscale x 4 x i32> %u
1392}
1393
1394define <vscale x 4 x i32> @vmacc_vx_nxv4i32_unmasked(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1395; CHECK-LABEL: vmacc_vx_nxv4i32_unmasked:
1396; CHECK:       # %bb.0:
1397; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, ma
1398; CHECK-NEXT:    vmacc.vx v10, a0, v8
1399; CHECK-NEXT:    vmv2r.v v8, v10
1400; CHECK-NEXT:    ret
1401  %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
1402  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1403  %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1404  %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1405  %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl)
1406  ret <vscale x 4 x i32> %u
1407}
1408
1409define <vscale x 4 x i32> @vmacc_vv_nxv4i32_ta(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1410; CHECK-LABEL: vmacc_vv_nxv4i32_ta:
1411; CHECK:       # %bb.0:
1412; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
1413; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
1414; CHECK-NEXT:    vmv.v.v v8, v12
1415; CHECK-NEXT:    ret
1416  %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1417  %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1418  %u = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl)
1419  ret <vscale x 4 x i32> %u
1420}
1421
1422define <vscale x 4 x i32> @vmacc_vx_nxv4i32_ta(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1423; CHECK-LABEL: vmacc_vx_nxv4i32_ta:
1424; CHECK:       # %bb.0:
1425; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
1426; CHECK-NEXT:    vmacc.vx v10, a0, v8, v0.t
1427; CHECK-NEXT:    vmv.v.v v8, v10
1428; CHECK-NEXT:    ret
1429  %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
1430  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1431  %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1432  %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1433  %u = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl)
1434  ret <vscale x 4 x i32> %u
1435}
1436
1437declare <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1438declare <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1439declare <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
1440declare <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
1441
1442define <vscale x 8 x i32> @vmacc_vv_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1443; CHECK-LABEL: vmacc_vv_nxv8i32:
1444; CHECK:       # %bb.0:
1445; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
1446; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
1447; CHECK-NEXT:    vmv4r.v v8, v16
1448; CHECK-NEXT:    ret
1449  %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1450  %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1451  %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl)
1452  ret <vscale x 8 x i32> %u
1453}
1454
1455define <vscale x 8 x i32> @vmacc_vv_nxv8i32_unmasked(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1456; CHECK-LABEL: vmacc_vv_nxv8i32_unmasked:
1457; CHECK:       # %bb.0:
1458; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, ma
1459; CHECK-NEXT:    vmacc.vv v16, v8, v12
1460; CHECK-NEXT:    vmv4r.v v8, v16
1461; CHECK-NEXT:    ret
1462  %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1463  %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1464  %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl)
1465  ret <vscale x 8 x i32> %u
1466}
1467
1468define <vscale x 8 x i32> @vmacc_vx_nxv8i32(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1469; CHECK-LABEL: vmacc_vx_nxv8i32:
1470; CHECK:       # %bb.0:
1471; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
1472; CHECK-NEXT:    vmacc.vx v12, a0, v8, v0.t
1473; CHECK-NEXT:    vmv4r.v v8, v12
1474; CHECK-NEXT:    ret
1475  %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
1476  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
1477  %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1478  %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1479  %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl)
1480  ret <vscale x 8 x i32> %u
1481}
1482
1483define <vscale x 8 x i32> @vmacc_vx_nxv8i32_unmasked(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1484; CHECK-LABEL: vmacc_vx_nxv8i32_unmasked:
1485; CHECK:       # %bb.0:
1486; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, ma
1487; CHECK-NEXT:    vmacc.vx v12, a0, v8
1488; CHECK-NEXT:    vmv4r.v v8, v12
1489; CHECK-NEXT:    ret
1490  %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
1491  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
1492  %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1493  %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1494  %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl)
1495  ret <vscale x 8 x i32> %u
1496}
1497
1498define <vscale x 8 x i32> @vmacc_vv_nxv8i32_ta(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1499; CHECK-LABEL: vmacc_vv_nxv8i32_ta:
1500; CHECK:       # %bb.0:
1501; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
1502; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
1503; CHECK-NEXT:    vmv.v.v v8, v16
1504; CHECK-NEXT:    ret
1505  %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1506  %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1507  %u = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl)
1508  ret <vscale x 8 x i32> %u
1509}
1510
1511define <vscale x 8 x i32> @vmacc_vx_nxv8i32_ta(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1512; CHECK-LABEL: vmacc_vx_nxv8i32_ta:
1513; CHECK:       # %bb.0:
1514; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1515; CHECK-NEXT:    vmacc.vx v12, a0, v8, v0.t
1516; CHECK-NEXT:    vmv.v.v v8, v12
1517; CHECK-NEXT:    ret
1518  %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
1519  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
1520  %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1521  %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1522  %u = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl)
1523  ret <vscale x 8 x i32> %u
1524}
1525
1526declare <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1527declare <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1528declare <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1>, <vscale x 16 x i32>, <vscale x 16 x i32>, i32)
1529declare <vscale x 16 x i32> @llvm.vp.select.nxv16i32(<vscale x 16 x i1>, <vscale x 16 x i32>, <vscale x 16 x i32>, i32)
1530
1531define <vscale x 16 x i32> @vmacc_vv_nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1532; CHECK-LABEL: vmacc_vv_nxv16i32:
1533; CHECK:       # %bb.0:
1534; CHECK-NEXT:    vl8re32.v v24, (a0)
1535; CHECK-NEXT:    vsetvli zero, a1, e32, m8, tu, mu
1536; CHECK-NEXT:    vmacc.vv v24, v8, v16, v0.t
1537; CHECK-NEXT:    vmv8r.v v8, v24
1538; CHECK-NEXT:    ret
1539  %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1540  %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1541  %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl)
1542  ret <vscale x 16 x i32> %u
1543}
1544
1545define <vscale x 16 x i32> @vmacc_vv_nxv16i32_unmasked(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1546; CHECK-LABEL: vmacc_vv_nxv16i32_unmasked:
1547; CHECK:       # %bb.0:
1548; CHECK-NEXT:    vl8re32.v v24, (a0)
1549; CHECK-NEXT:    vsetvli zero, a1, e32, m8, tu, ma
1550; CHECK-NEXT:    vmacc.vv v24, v8, v16
1551; CHECK-NEXT:    vmv8r.v v8, v24
1552; CHECK-NEXT:    ret
1553  %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1554  %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1555  %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl)
1556  ret <vscale x 16 x i32> %u
1557}
1558
1559define <vscale x 16 x i32> @vmacc_vx_nxv16i32(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1560; CHECK-LABEL: vmacc_vx_nxv16i32:
1561; CHECK:       # %bb.0:
1562; CHECK-NEXT:    vsetvli zero, a1, e32, m8, tu, mu
1563; CHECK-NEXT:    vmacc.vx v16, a0, v8, v0.t
1564; CHECK-NEXT:    vmv8r.v v8, v16
1565; CHECK-NEXT:    ret
1566  %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
1567  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
1568  %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1569  %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1570  %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl)
1571  ret <vscale x 16 x i32> %u
1572}
1573
1574define <vscale x 16 x i32> @vmacc_vx_nxv16i32_unmasked(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1575; CHECK-LABEL: vmacc_vx_nxv16i32_unmasked:
1576; CHECK:       # %bb.0:
1577; CHECK-NEXT:    vsetvli zero, a1, e32, m8, tu, ma
1578; CHECK-NEXT:    vmacc.vx v16, a0, v8
1579; CHECK-NEXT:    vmv8r.v v8, v16
1580; CHECK-NEXT:    ret
1581  %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
1582  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
1583  %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1584  %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1585  %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl)
1586  ret <vscale x 16 x i32> %u
1587}
1588
1589define <vscale x 16 x i32> @vmacc_vv_nxv16i32_ta(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1590; CHECK-LABEL: vmacc_vv_nxv16i32_ta:
1591; CHECK:       # %bb.0:
1592; CHECK-NEXT:    vl8re32.v v24, (a0)
1593; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
1594; CHECK-NEXT:    vmacc.vv v24, v8, v16, v0.t
1595; CHECK-NEXT:    vmv.v.v v8, v24
1596; CHECK-NEXT:    ret
1597  %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1598  %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1599  %u = call <vscale x 16 x i32> @llvm.vp.select.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl)
1600  ret <vscale x 16 x i32> %u
1601}
1602
1603define <vscale x 16 x i32> @vmacc_vx_nxv16i32_ta(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1604; CHECK-LABEL: vmacc_vx_nxv16i32_ta:
1605; CHECK:       # %bb.0:
1606; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
1607; CHECK-NEXT:    vmacc.vx v16, a0, v8, v0.t
1608; CHECK-NEXT:    vmv.v.v v8, v16
1609; CHECK-NEXT:    ret
1610  %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
1611  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
1612  %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1613  %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1614  %u = call <vscale x 16 x i32> @llvm.vp.select.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl)
1615  ret <vscale x 16 x i32> %u
1616}
1617
1618declare <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1619declare <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1620declare <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1>, <vscale x 1 x i64>, <vscale x 1 x i64>, i32)
1621declare <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1>, <vscale x 1 x i64>, <vscale x 1 x i64>, i32)
1622
1623define <vscale x 1 x i64> @vmacc_vv_nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1624; CHECK-LABEL: vmacc_vv_nxv1i64:
1625; CHECK:       # %bb.0:
1626; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
1627; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
1628; CHECK-NEXT:    vmv1r.v v8, v10
1629; CHECK-NEXT:    ret
1630  %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1631  %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1632  %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl)
1633  ret <vscale x 1 x i64> %u
1634}
1635
1636define <vscale x 1 x i64> @vmacc_vv_nxv1i64_unmasked(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1637; CHECK-LABEL: vmacc_vv_nxv1i64_unmasked:
1638; CHECK:       # %bb.0:
1639; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, ma
1640; CHECK-NEXT:    vmacc.vv v10, v8, v9
1641; CHECK-NEXT:    vmv1r.v v8, v10
1642; CHECK-NEXT:    ret
1643  %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1644  %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1645  %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl)
1646  ret <vscale x 1 x i64> %u
1647}
1648
1649define <vscale x 1 x i64> @vmacc_vx_nxv1i64(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1650; RV32-LABEL: vmacc_vx_nxv1i64:
1651; RV32:       # %bb.0:
1652; RV32-NEXT:    addi sp, sp, -16
1653; RV32-NEXT:    .cfi_def_cfa_offset 16
1654; RV32-NEXT:    sw a0, 8(sp)
1655; RV32-NEXT:    sw a1, 12(sp)
1656; RV32-NEXT:    addi a0, sp, 8
1657; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
1658; RV32-NEXT:    vlse64.v v10, (a0), zero
1659; RV32-NEXT:    vsetvli zero, zero, e64, m1, tu, mu
1660; RV32-NEXT:    vmacc.vv v9, v8, v10, v0.t
1661; RV32-NEXT:    vmv1r.v v8, v9
1662; RV32-NEXT:    addi sp, sp, 16
1663; RV32-NEXT:    .cfi_def_cfa_offset 0
1664; RV32-NEXT:    ret
1665;
1666; RV64-LABEL: vmacc_vx_nxv1i64:
1667; RV64:       # %bb.0:
1668; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
1669; RV64-NEXT:    vmacc.vx v9, a0, v8, v0.t
1670; RV64-NEXT:    vmv1r.v v8, v9
1671; RV64-NEXT:    ret
1672  %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
1673  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
1674  %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1675  %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1676  %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl)
1677  ret <vscale x 1 x i64> %u
1678}
1679
1680define <vscale x 1 x i64> @vmacc_vx_nxv1i64_unmasked(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1681; RV32-LABEL: vmacc_vx_nxv1i64_unmasked:
1682; RV32:       # %bb.0:
1683; RV32-NEXT:    addi sp, sp, -16
1684; RV32-NEXT:    .cfi_def_cfa_offset 16
1685; RV32-NEXT:    sw a0, 8(sp)
1686; RV32-NEXT:    sw a1, 12(sp)
1687; RV32-NEXT:    addi a0, sp, 8
1688; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
1689; RV32-NEXT:    vlse64.v v10, (a0), zero
1690; RV32-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
1691; RV32-NEXT:    vmacc.vv v9, v8, v10
1692; RV32-NEXT:    vmv1r.v v8, v9
1693; RV32-NEXT:    addi sp, sp, 16
1694; RV32-NEXT:    .cfi_def_cfa_offset 0
1695; RV32-NEXT:    ret
1696;
1697; RV64-LABEL: vmacc_vx_nxv1i64_unmasked:
1698; RV64:       # %bb.0:
1699; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, ma
1700; RV64-NEXT:    vmacc.vx v9, a0, v8
1701; RV64-NEXT:    vmv1r.v v8, v9
1702; RV64-NEXT:    ret
1703  %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
1704  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
1705  %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1706  %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1707  %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl)
1708  ret <vscale x 1 x i64> %u
1709}
1710
1711define <vscale x 1 x i64> @vmacc_vv_nxv1i64_ta(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1712; CHECK-LABEL: vmacc_vv_nxv1i64_ta:
1713; CHECK:       # %bb.0:
1714; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
1715; CHECK-NEXT:    vmacc.vv v10, v8, v9, v0.t
1716; CHECK-NEXT:    vmv.v.v v8, v10
1717; CHECK-NEXT:    ret
1718  %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1719  %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1720  %u = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl)
1721  ret <vscale x 1 x i64> %u
1722}
1723
1724define <vscale x 1 x i64> @vmacc_vx_nxv1i64_ta(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1725; RV32-LABEL: vmacc_vx_nxv1i64_ta:
1726; RV32:       # %bb.0:
1727; RV32-NEXT:    addi sp, sp, -16
1728; RV32-NEXT:    .cfi_def_cfa_offset 16
1729; RV32-NEXT:    sw a0, 8(sp)
1730; RV32-NEXT:    sw a1, 12(sp)
1731; RV32-NEXT:    addi a0, sp, 8
1732; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
1733; RV32-NEXT:    vlse64.v v10, (a0), zero
1734; RV32-NEXT:    vmacc.vv v9, v8, v10, v0.t
1735; RV32-NEXT:    vmv.v.v v8, v9
1736; RV32-NEXT:    addi sp, sp, 16
1737; RV32-NEXT:    .cfi_def_cfa_offset 0
1738; RV32-NEXT:    ret
1739;
1740; RV64-LABEL: vmacc_vx_nxv1i64_ta:
1741; RV64:       # %bb.0:
1742; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
1743; RV64-NEXT:    vmacc.vx v9, a0, v8, v0.t
1744; RV64-NEXT:    vmv.v.v v8, v9
1745; RV64-NEXT:    ret
1746  %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
1747  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
1748  %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1749  %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1750  %u = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl)
1751  ret <vscale x 1 x i64> %u
1752}
1753
1754declare <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1755declare <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1756declare <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
1757declare <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
1758
1759define <vscale x 2 x i64> @vmacc_vv_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1760; CHECK-LABEL: vmacc_vv_nxv2i64:
1761; CHECK:       # %bb.0:
1762; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
1763; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
1764; CHECK-NEXT:    vmv2r.v v8, v12
1765; CHECK-NEXT:    ret
1766  %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1767  %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1768  %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl)
1769  ret <vscale x 2 x i64> %u
1770}
1771
1772define <vscale x 2 x i64> @vmacc_vv_nxv2i64_unmasked(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1773; CHECK-LABEL: vmacc_vv_nxv2i64_unmasked:
1774; CHECK:       # %bb.0:
1775; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, ma
1776; CHECK-NEXT:    vmacc.vv v12, v8, v10
1777; CHECK-NEXT:    vmv2r.v v8, v12
1778; CHECK-NEXT:    ret
1779  %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1780  %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1781  %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl)
1782  ret <vscale x 2 x i64> %u
1783}
1784
1785define <vscale x 2 x i64> @vmacc_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1786; RV32-LABEL: vmacc_vx_nxv2i64:
1787; RV32:       # %bb.0:
1788; RV32-NEXT:    addi sp, sp, -16
1789; RV32-NEXT:    .cfi_def_cfa_offset 16
1790; RV32-NEXT:    sw a0, 8(sp)
1791; RV32-NEXT:    sw a1, 12(sp)
1792; RV32-NEXT:    addi a0, sp, 8
1793; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
1794; RV32-NEXT:    vlse64.v v12, (a0), zero
1795; RV32-NEXT:    vsetvli zero, zero, e64, m2, tu, mu
1796; RV32-NEXT:    vmacc.vv v10, v8, v12, v0.t
1797; RV32-NEXT:    vmv2r.v v8, v10
1798; RV32-NEXT:    addi sp, sp, 16
1799; RV32-NEXT:    .cfi_def_cfa_offset 0
1800; RV32-NEXT:    ret
1801;
1802; RV64-LABEL: vmacc_vx_nxv2i64:
1803; RV64:       # %bb.0:
1804; RV64-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
1805; RV64-NEXT:    vmacc.vx v10, a0, v8, v0.t
1806; RV64-NEXT:    vmv2r.v v8, v10
1807; RV64-NEXT:    ret
1808  %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
1809  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
1810  %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1811  %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1812  %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl)
1813  ret <vscale x 2 x i64> %u
1814}
1815
1816define <vscale x 2 x i64> @vmacc_vx_nxv2i64_unmasked(<vscale x 2 x i64> %a, i64 %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1817; RV32-LABEL: vmacc_vx_nxv2i64_unmasked:
1818; RV32:       # %bb.0:
1819; RV32-NEXT:    addi sp, sp, -16
1820; RV32-NEXT:    .cfi_def_cfa_offset 16
1821; RV32-NEXT:    sw a0, 8(sp)
1822; RV32-NEXT:    sw a1, 12(sp)
1823; RV32-NEXT:    addi a0, sp, 8
1824; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
1825; RV32-NEXT:    vlse64.v v12, (a0), zero
1826; RV32-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
1827; RV32-NEXT:    vmacc.vv v10, v8, v12
1828; RV32-NEXT:    vmv2r.v v8, v10
1829; RV32-NEXT:    addi sp, sp, 16
1830; RV32-NEXT:    .cfi_def_cfa_offset 0
1831; RV32-NEXT:    ret
1832;
1833; RV64-LABEL: vmacc_vx_nxv2i64_unmasked:
1834; RV64:       # %bb.0:
1835; RV64-NEXT:    vsetvli zero, a1, e64, m2, tu, ma
1836; RV64-NEXT:    vmacc.vx v10, a0, v8
1837; RV64-NEXT:    vmv2r.v v8, v10
1838; RV64-NEXT:    ret
1839  %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
1840  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
1841  %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1842  %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1843  %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl)
1844  ret <vscale x 2 x i64> %u
1845}
1846
1847define <vscale x 2 x i64> @vmacc_vv_nxv2i64_ta(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1848; CHECK-LABEL: vmacc_vv_nxv2i64_ta:
1849; CHECK:       # %bb.0:
1850; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
1851; CHECK-NEXT:    vmacc.vv v12, v8, v10, v0.t
1852; CHECK-NEXT:    vmv.v.v v8, v12
1853; CHECK-NEXT:    ret
1854  %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1855  %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1856  %u = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl)
1857  ret <vscale x 2 x i64> %u
1858}
1859
1860define <vscale x 2 x i64> @vmacc_vx_nxv2i64_ta(<vscale x 2 x i64> %a, i64 %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1861; RV32-LABEL: vmacc_vx_nxv2i64_ta:
1862; RV32:       # %bb.0:
1863; RV32-NEXT:    addi sp, sp, -16
1864; RV32-NEXT:    .cfi_def_cfa_offset 16
1865; RV32-NEXT:    sw a0, 8(sp)
1866; RV32-NEXT:    sw a1, 12(sp)
1867; RV32-NEXT:    addi a0, sp, 8
1868; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
1869; RV32-NEXT:    vlse64.v v12, (a0), zero
1870; RV32-NEXT:    vmacc.vv v10, v8, v12, v0.t
1871; RV32-NEXT:    vmv.v.v v8, v10
1872; RV32-NEXT:    addi sp, sp, 16
1873; RV32-NEXT:    .cfi_def_cfa_offset 0
1874; RV32-NEXT:    ret
1875;
1876; RV64-LABEL: vmacc_vx_nxv2i64_ta:
1877; RV64:       # %bb.0:
1878; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
1879; RV64-NEXT:    vmacc.vx v10, a0, v8, v0.t
1880; RV64-NEXT:    vmv.v.v v8, v10
1881; RV64-NEXT:    ret
1882  %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
1883  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
1884  %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1885  %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1886  %u = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl)
1887  ret <vscale x 2 x i64> %u
1888}
1889
1890declare <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1891declare <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1892declare <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1>, <vscale x 4 x i64>, <vscale x 4 x i64>, i32)
1893declare <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1>, <vscale x 4 x i64>, <vscale x 4 x i64>, i32)
1894
1895define <vscale x 4 x i64> @vmacc_vv_nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1896; CHECK-LABEL: vmacc_vv_nxv4i64:
1897; CHECK:       # %bb.0:
1898; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
1899; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
1900; CHECK-NEXT:    vmv4r.v v8, v16
1901; CHECK-NEXT:    ret
1902  %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1903  %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1904  %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl)
1905  ret <vscale x 4 x i64> %u
1906}
1907
1908define <vscale x 4 x i64> @vmacc_vv_nxv4i64_unmasked(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1909; CHECK-LABEL: vmacc_vv_nxv4i64_unmasked:
1910; CHECK:       # %bb.0:
1911; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, ma
1912; CHECK-NEXT:    vmacc.vv v16, v8, v12
1913; CHECK-NEXT:    vmv4r.v v8, v16
1914; CHECK-NEXT:    ret
1915  %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1916  %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1917  %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl)
1918  ret <vscale x 4 x i64> %u
1919}
1920
1921define <vscale x 4 x i64> @vmacc_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1922; RV32-LABEL: vmacc_vx_nxv4i64:
1923; RV32:       # %bb.0:
1924; RV32-NEXT:    addi sp, sp, -16
1925; RV32-NEXT:    .cfi_def_cfa_offset 16
1926; RV32-NEXT:    sw a0, 8(sp)
1927; RV32-NEXT:    sw a1, 12(sp)
1928; RV32-NEXT:    addi a0, sp, 8
1929; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
1930; RV32-NEXT:    vlse64.v v16, (a0), zero
1931; RV32-NEXT:    vsetvli zero, zero, e64, m4, tu, mu
1932; RV32-NEXT:    vmacc.vv v12, v8, v16, v0.t
1933; RV32-NEXT:    vmv4r.v v8, v12
1934; RV32-NEXT:    addi sp, sp, 16
1935; RV32-NEXT:    .cfi_def_cfa_offset 0
1936; RV32-NEXT:    ret
1937;
1938; RV64-LABEL: vmacc_vx_nxv4i64:
1939; RV64:       # %bb.0:
1940; RV64-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
1941; RV64-NEXT:    vmacc.vx v12, a0, v8, v0.t
1942; RV64-NEXT:    vmv4r.v v8, v12
1943; RV64-NEXT:    ret
1944  %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
1945  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
1946  %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1947  %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1948  %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl)
1949  ret <vscale x 4 x i64> %u
1950}
1951
1952define <vscale x 4 x i64> @vmacc_vx_nxv4i64_unmasked(<vscale x 4 x i64> %a, i64 %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1953; RV32-LABEL: vmacc_vx_nxv4i64_unmasked:
1954; RV32:       # %bb.0:
1955; RV32-NEXT:    addi sp, sp, -16
1956; RV32-NEXT:    .cfi_def_cfa_offset 16
1957; RV32-NEXT:    sw a0, 8(sp)
1958; RV32-NEXT:    sw a1, 12(sp)
1959; RV32-NEXT:    addi a0, sp, 8
1960; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
1961; RV32-NEXT:    vlse64.v v16, (a0), zero
1962; RV32-NEXT:    vsetvli zero, zero, e64, m4, tu, ma
1963; RV32-NEXT:    vmacc.vv v12, v8, v16
1964; RV32-NEXT:    vmv4r.v v8, v12
1965; RV32-NEXT:    addi sp, sp, 16
1966; RV32-NEXT:    .cfi_def_cfa_offset 0
1967; RV32-NEXT:    ret
1968;
1969; RV64-LABEL: vmacc_vx_nxv4i64_unmasked:
1970; RV64:       # %bb.0:
1971; RV64-NEXT:    vsetvli zero, a1, e64, m4, tu, ma
1972; RV64-NEXT:    vmacc.vx v12, a0, v8
1973; RV64-NEXT:    vmv4r.v v8, v12
1974; RV64-NEXT:    ret
1975  %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
1976  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
1977  %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1978  %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1979  %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl)
1980  ret <vscale x 4 x i64> %u
1981}
1982
1983define <vscale x 4 x i64> @vmacc_vv_nxv4i64_ta(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1984; CHECK-LABEL: vmacc_vv_nxv4i64_ta:
1985; CHECK:       # %bb.0:
1986; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
1987; CHECK-NEXT:    vmacc.vv v16, v8, v12, v0.t
1988; CHECK-NEXT:    vmv.v.v v8, v16
1989; CHECK-NEXT:    ret
1990  %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1991  %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1992  %u = call <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl)
1993  ret <vscale x 4 x i64> %u
1994}
1995
1996define <vscale x 4 x i64> @vmacc_vx_nxv4i64_ta(<vscale x 4 x i64> %a, i64 %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1997; RV32-LABEL: vmacc_vx_nxv4i64_ta:
1998; RV32:       # %bb.0:
1999; RV32-NEXT:    addi sp, sp, -16
2000; RV32-NEXT:    .cfi_def_cfa_offset 16
2001; RV32-NEXT:    sw a0, 8(sp)
2002; RV32-NEXT:    sw a1, 12(sp)
2003; RV32-NEXT:    addi a0, sp, 8
2004; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
2005; RV32-NEXT:    vlse64.v v16, (a0), zero
2006; RV32-NEXT:    vmacc.vv v12, v8, v16, v0.t
2007; RV32-NEXT:    vmv.v.v v8, v12
2008; RV32-NEXT:    addi sp, sp, 16
2009; RV32-NEXT:    .cfi_def_cfa_offset 0
2010; RV32-NEXT:    ret
2011;
2012; RV64-LABEL: vmacc_vx_nxv4i64_ta:
2013; RV64:       # %bb.0:
2014; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
2015; RV64-NEXT:    vmacc.vx v12, a0, v8, v0.t
2016; RV64-NEXT:    vmv.v.v v8, v12
2017; RV64-NEXT:    ret
2018  %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
2019  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
2020  %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
2021  %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
2022  %u = call <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl)
2023  ret <vscale x 4 x i64> %u
2024}
2025
2026declare <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
2027declare <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
2028declare <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
2029declare <vscale x 8 x i64> @llvm.vp.select.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
2030
2031define <vscale x 8 x i64> @vmacc_vv_nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2032; CHECK-LABEL: vmacc_vv_nxv8i64:
2033; CHECK:       # %bb.0:
2034; CHECK-NEXT:    vl8re64.v v24, (a0)
2035; CHECK-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
2036; CHECK-NEXT:    vmacc.vv v24, v8, v16, v0.t
2037; CHECK-NEXT:    vmv8r.v v8, v24
2038; CHECK-NEXT:    ret
2039  %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2040  %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2041  %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl)
2042  ret <vscale x 8 x i64> %u
2043}
2044
2045define <vscale x 8 x i64> @vmacc_vv_nxv8i64_unmasked(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2046; CHECK-LABEL: vmacc_vv_nxv8i64_unmasked:
2047; CHECK:       # %bb.0:
2048; CHECK-NEXT:    vl8re64.v v24, (a0)
2049; CHECK-NEXT:    vsetvli zero, a1, e64, m8, tu, ma
2050; CHECK-NEXT:    vmacc.vv v24, v8, v16
2051; CHECK-NEXT:    vmv8r.v v8, v24
2052; CHECK-NEXT:    ret
2053  %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2054  %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2055  %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl)
2056  ret <vscale x 8 x i64> %u
2057}
2058
2059define <vscale x 8 x i64> @vmacc_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2060; RV32-LABEL: vmacc_vx_nxv8i64:
2061; RV32:       # %bb.0:
2062; RV32-NEXT:    addi sp, sp, -16
2063; RV32-NEXT:    .cfi_def_cfa_offset 16
2064; RV32-NEXT:    sw a0, 8(sp)
2065; RV32-NEXT:    sw a1, 12(sp)
2066; RV32-NEXT:    addi a0, sp, 8
2067; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2068; RV32-NEXT:    vlse64.v v24, (a0), zero
2069; RV32-NEXT:    vsetvli zero, zero, e64, m8, tu, mu
2070; RV32-NEXT:    vmacc.vv v16, v8, v24, v0.t
2071; RV32-NEXT:    vmv8r.v v8, v16
2072; RV32-NEXT:    addi sp, sp, 16
2073; RV32-NEXT:    .cfi_def_cfa_offset 0
2074; RV32-NEXT:    ret
2075;
2076; RV64-LABEL: vmacc_vx_nxv8i64:
2077; RV64:       # %bb.0:
2078; RV64-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
2079; RV64-NEXT:    vmacc.vx v16, a0, v8, v0.t
2080; RV64-NEXT:    vmv8r.v v8, v16
2081; RV64-NEXT:    ret
2082  %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
2083  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2084  %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2085  %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2086  %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl)
2087  ret <vscale x 8 x i64> %u
2088}
2089
2090define <vscale x 8 x i64> @vmacc_vx_nxv8i64_unmasked(<vscale x 8 x i64> %a, i64 %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2091; RV32-LABEL: vmacc_vx_nxv8i64_unmasked:
2092; RV32:       # %bb.0:
2093; RV32-NEXT:    addi sp, sp, -16
2094; RV32-NEXT:    .cfi_def_cfa_offset 16
2095; RV32-NEXT:    sw a0, 8(sp)
2096; RV32-NEXT:    sw a1, 12(sp)
2097; RV32-NEXT:    addi a0, sp, 8
2098; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2099; RV32-NEXT:    vlse64.v v24, (a0), zero
2100; RV32-NEXT:    vsetvli zero, zero, e64, m8, tu, ma
2101; RV32-NEXT:    vmacc.vv v16, v8, v24
2102; RV32-NEXT:    vmv8r.v v8, v16
2103; RV32-NEXT:    addi sp, sp, 16
2104; RV32-NEXT:    .cfi_def_cfa_offset 0
2105; RV32-NEXT:    ret
2106;
2107; RV64-LABEL: vmacc_vx_nxv8i64_unmasked:
2108; RV64:       # %bb.0:
2109; RV64-NEXT:    vsetvli zero, a1, e64, m8, tu, ma
2110; RV64-NEXT:    vmacc.vx v16, a0, v8
2111; RV64-NEXT:    vmv8r.v v8, v16
2112; RV64-NEXT:    ret
2113  %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
2114  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2115  %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2116  %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2117  %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl)
2118  ret <vscale x 8 x i64> %u
2119}
2120
2121define <vscale x 8 x i64> @vmacc_vv_nxv8i64_ta(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2122; CHECK-LABEL: vmacc_vv_nxv8i64_ta:
2123; CHECK:       # %bb.0:
2124; CHECK-NEXT:    vl8re64.v v24, (a0)
2125; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2126; CHECK-NEXT:    vmacc.vv v24, v8, v16, v0.t
2127; CHECK-NEXT:    vmv.v.v v8, v24
2128; CHECK-NEXT:    ret
2129  %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2130  %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2131  %u = call <vscale x 8 x i64> @llvm.vp.select.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl)
2132  ret <vscale x 8 x i64> %u
2133}
2134
2135define <vscale x 8 x i64> @vmacc_vx_nxv8i64_ta(<vscale x 8 x i64> %a, i64 %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2136; RV32-LABEL: vmacc_vx_nxv8i64_ta:
2137; RV32:       # %bb.0:
2138; RV32-NEXT:    addi sp, sp, -16
2139; RV32-NEXT:    .cfi_def_cfa_offset 16
2140; RV32-NEXT:    sw a0, 8(sp)
2141; RV32-NEXT:    sw a1, 12(sp)
2142; RV32-NEXT:    addi a0, sp, 8
2143; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
2144; RV32-NEXT:    vlse64.v v24, (a0), zero
2145; RV32-NEXT:    vmacc.vv v16, v8, v24, v0.t
2146; RV32-NEXT:    vmv.v.v v8, v16
2147; RV32-NEXT:    addi sp, sp, 16
2148; RV32-NEXT:    .cfi_def_cfa_offset 0
2149; RV32-NEXT:    ret
2150;
2151; RV64-LABEL: vmacc_vx_nxv8i64_ta:
2152; RV64:       # %bb.0:
2153; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2154; RV64-NEXT:    vmacc.vx v16, a0, v8, v0.t
2155; RV64-NEXT:    vmv.v.v v8, v16
2156; RV64-NEXT:    ret
2157  %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
2158  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2159  %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2160  %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2161  %u = call <vscale x 8 x i64> @llvm.vp.select.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl)
2162  ret <vscale x 8 x i64> %u
2163}
2164