xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll (revision 97982a8c605fac7c86d02e641a6cd7898b3ca343)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6
7declare <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
8declare <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
9declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
10declare <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
11
12define <vscale x 1 x i8> @vmadd_vv_nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
13; CHECK-LABEL: vmadd_vv_nxv1i8:
14; CHECK:       # %bb.0:
15; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
16; CHECK-NEXT:    vmadd.vv v9, v8, v10
17; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, tu, ma
18; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
19; CHECK-NEXT:    ret
20  %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
21  %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
22  %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %a, i32 %evl)
23  ret <vscale x 1 x i8> %u
24}
25
26define <vscale x 1 x i8> @vmadd_vv_nxv1i8_unmasked(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
27; CHECK-LABEL: vmadd_vv_nxv1i8_unmasked:
28; CHECK:       # %bb.0:
29; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
30; CHECK-NEXT:    vmadd.vv v9, v8, v10
31; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, tu, ma
32; CHECK-NEXT:    vmv.v.v v8, v9
33; CHECK-NEXT:    ret
34  %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
35  %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
36  %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i8> %y, <vscale x 1 x i8> %a, i32 %evl)
37  ret <vscale x 1 x i8> %u
38}
39
40define <vscale x 1 x i8> @vmadd_vx_nxv1i8(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
41; CHECK-LABEL: vmadd_vx_nxv1i8:
42; CHECK:       # %bb.0:
43; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
44; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
45; CHECK-NEXT:    ret
46  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
47  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
48  %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
49  %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
50  %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %a, i32 %evl)
51  ret <vscale x 1 x i8> %u
52}
53
54define <vscale x 1 x i8> @vmadd_vx_nxv1i8_unmasked(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
55; CHECK-LABEL: vmadd_vx_nxv1i8_unmasked:
56; CHECK:       # %bb.0:
57; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, tu, ma
58; CHECK-NEXT:    vmadd.vx v8, a0, v9
59; CHECK-NEXT:    ret
60  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
61  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
62  %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
63  %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
64  %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i8> %y, <vscale x 1 x i8> %a, i32 %evl)
65  ret <vscale x 1 x i8> %u
66}
67
68define <vscale x 1 x i8> @vmadd_vv_nxv1i8_ta(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
69; CHECK-LABEL: vmadd_vv_nxv1i8_ta:
70; CHECK:       # %bb.0:
71; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
72; CHECK-NEXT:    vmadd.vv v9, v8, v10
73; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
74; CHECK-NEXT:    ret
75  %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
76  %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
77  %u = call <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %a, i32 %evl)
78  ret <vscale x 1 x i8> %u
79}
80
81define <vscale x 1 x i8> @vmadd_vx_nxv1i8_ta(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
82; CHECK-LABEL: vmadd_vx_nxv1i8_ta:
83; CHECK:       # %bb.0:
84; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu
85; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
86; CHECK-NEXT:    ret
87  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
88  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
89  %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
90  %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
91  %u = call <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %a, i32 %evl)
92  ret <vscale x 1 x i8> %u
93}
94
95declare <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
96declare <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
97declare <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
98declare <vscale x 2 x i8> @llvm.vp.select.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
99
100define <vscale x 2 x i8> @vmadd_vv_nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
101; CHECK-LABEL: vmadd_vv_nxv2i8:
102; CHECK:       # %bb.0:
103; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
104; CHECK-NEXT:    vmadd.vv v9, v8, v10
105; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, tu, ma
106; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
107; CHECK-NEXT:    ret
108  %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
109  %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
110  %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %a, i32 %evl)
111  ret <vscale x 2 x i8> %u
112}
113
114define <vscale x 2 x i8> @vmadd_vv_nxv2i8_unmasked(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
115; CHECK-LABEL: vmadd_vv_nxv2i8_unmasked:
116; CHECK:       # %bb.0:
117; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
118; CHECK-NEXT:    vmadd.vv v9, v8, v10
119; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, tu, ma
120; CHECK-NEXT:    vmv.v.v v8, v9
121; CHECK-NEXT:    ret
122  %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
123  %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
124  %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i8> %y, <vscale x 2 x i8> %a, i32 %evl)
125  ret <vscale x 2 x i8> %u
126}
127
128define <vscale x 2 x i8> @vmadd_vx_nxv2i8(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
129; CHECK-LABEL: vmadd_vx_nxv2i8:
130; CHECK:       # %bb.0:
131; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, tu, mu
132; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
133; CHECK-NEXT:    ret
134  %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
135  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
136  %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
137  %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
138  %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %a, i32 %evl)
139  ret <vscale x 2 x i8> %u
140}
141
142define <vscale x 2 x i8> @vmadd_vx_nxv2i8_unmasked(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
143; CHECK-LABEL: vmadd_vx_nxv2i8_unmasked:
144; CHECK:       # %bb.0:
145; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, tu, ma
146; CHECK-NEXT:    vmadd.vx v8, a0, v9
147; CHECK-NEXT:    ret
148  %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
149  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
150  %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
151  %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
152  %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i8> %y, <vscale x 2 x i8> %a, i32 %evl)
153  ret <vscale x 2 x i8> %u
154}
155
156define <vscale x 2 x i8> @vmadd_vv_nxv2i8_ta(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
157; CHECK-LABEL: vmadd_vv_nxv2i8_ta:
158; CHECK:       # %bb.0:
159; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
160; CHECK-NEXT:    vmadd.vv v9, v8, v10
161; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
162; CHECK-NEXT:    ret
163  %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
164  %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
165  %u = call <vscale x 2 x i8> @llvm.vp.select.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %a, i32 %evl)
166  ret <vscale x 2 x i8> %u
167}
168
169define <vscale x 2 x i8> @vmadd_vx_nxv2i8_ta(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
170; CHECK-LABEL: vmadd_vx_nxv2i8_ta:
171; CHECK:       # %bb.0:
172; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
173; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
174; CHECK-NEXT:    ret
175  %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
176  %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
177  %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
178  %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
179  %u = call <vscale x 2 x i8> @llvm.vp.select.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %a, i32 %evl)
180  ret <vscale x 2 x i8> %u
181}
182
183declare <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
184declare <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
185declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
186declare <vscale x 4 x i8> @llvm.vp.select.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
187
188define <vscale x 4 x i8> @vmadd_vv_nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
189; CHECK-LABEL: vmadd_vv_nxv4i8:
190; CHECK:       # %bb.0:
191; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
192; CHECK-NEXT:    vmadd.vv v9, v8, v10
193; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, tu, ma
194; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
195; CHECK-NEXT:    ret
196  %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
197  %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
198  %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %a, i32 %evl)
199  ret <vscale x 4 x i8> %u
200}
201
202define <vscale x 4 x i8> @vmadd_vv_nxv4i8_unmasked(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
203; CHECK-LABEL: vmadd_vv_nxv4i8_unmasked:
204; CHECK:       # %bb.0:
205; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
206; CHECK-NEXT:    vmadd.vv v9, v8, v10
207; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, tu, ma
208; CHECK-NEXT:    vmv.v.v v8, v9
209; CHECK-NEXT:    ret
210  %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
211  %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
212  %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i8> %y, <vscale x 4 x i8> %a, i32 %evl)
213  ret <vscale x 4 x i8> %u
214}
215
216define <vscale x 4 x i8> @vmadd_vx_nxv4i8(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
217; CHECK-LABEL: vmadd_vx_nxv4i8:
218; CHECK:       # %bb.0:
219; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, mu
220; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
221; CHECK-NEXT:    ret
222  %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
223  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
224  %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
225  %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
226  %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %a, i32 %evl)
227  ret <vscale x 4 x i8> %u
228}
229
230define <vscale x 4 x i8> @vmadd_vx_nxv4i8_unmasked(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
231; CHECK-LABEL: vmadd_vx_nxv4i8_unmasked:
232; CHECK:       # %bb.0:
233; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, ma
234; CHECK-NEXT:    vmadd.vx v8, a0, v9
235; CHECK-NEXT:    ret
236  %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
237  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
238  %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
239  %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
240  %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i8> %y, <vscale x 4 x i8> %a, i32 %evl)
241  ret <vscale x 4 x i8> %u
242}
243
244define <vscale x 4 x i8> @vmadd_vv_nxv4i8_ta(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
245; CHECK-LABEL: vmadd_vv_nxv4i8_ta:
246; CHECK:       # %bb.0:
247; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
248; CHECK-NEXT:    vmadd.vv v9, v8, v10
249; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
250; CHECK-NEXT:    ret
251  %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
252  %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
253  %u = call <vscale x 4 x i8> @llvm.vp.select.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %a, i32 %evl)
254  ret <vscale x 4 x i8> %u
255}
256
257define <vscale x 4 x i8> @vmadd_vx_nxv4i8_ta(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
258; CHECK-LABEL: vmadd_vx_nxv4i8_ta:
259; CHECK:       # %bb.0:
260; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu
261; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
262; CHECK-NEXT:    ret
263  %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
264  %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
265  %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
266  %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
267  %u = call <vscale x 4 x i8> @llvm.vp.select.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %a, i32 %evl)
268  ret <vscale x 4 x i8> %u
269}
270
271declare <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
272declare <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
273declare <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
274declare <vscale x 8 x i8> @llvm.vp.select.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
275
276define <vscale x 8 x i8> @vmadd_vv_nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
277; CHECK-LABEL: vmadd_vv_nxv8i8:
278; CHECK:       # %bb.0:
279; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
280; CHECK-NEXT:    vmadd.vv v9, v8, v10
281; CHECK-NEXT:    vsetvli zero, zero, e8, m1, tu, ma
282; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
283; CHECK-NEXT:    ret
284  %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
285  %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
286  %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %a, i32 %evl)
287  ret <vscale x 8 x i8> %u
288}
289
290define <vscale x 8 x i8> @vmadd_vv_nxv8i8_unmasked(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
291; CHECK-LABEL: vmadd_vv_nxv8i8_unmasked:
292; CHECK:       # %bb.0:
293; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
294; CHECK-NEXT:    vmadd.vv v9, v8, v10
295; CHECK-NEXT:    vsetvli zero, zero, e8, m1, tu, ma
296; CHECK-NEXT:    vmv.v.v v8, v9
297; CHECK-NEXT:    ret
298  %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
299  %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
300  %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i8> %y, <vscale x 8 x i8> %a, i32 %evl)
301  ret <vscale x 8 x i8> %u
302}
303
304define <vscale x 8 x i8> @vmadd_vx_nxv8i8(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
305; CHECK-LABEL: vmadd_vx_nxv8i8:
306; CHECK:       # %bb.0:
307; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, mu
308; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
309; CHECK-NEXT:    ret
310  %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
311  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
312  %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
313  %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
314  %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %a, i32 %evl)
315  ret <vscale x 8 x i8> %u
316}
317
318define <vscale x 8 x i8> @vmadd_vx_nxv8i8_unmasked(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
319; CHECK-LABEL: vmadd_vx_nxv8i8_unmasked:
320; CHECK:       # %bb.0:
321; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, ma
322; CHECK-NEXT:    vmadd.vx v8, a0, v9
323; CHECK-NEXT:    ret
324  %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
325  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
326  %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
327  %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
328  %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i8> %y, <vscale x 8 x i8> %a, i32 %evl)
329  ret <vscale x 8 x i8> %u
330}
331
332define <vscale x 8 x i8> @vmadd_vv_nxv8i8_ta(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
333; CHECK-LABEL: vmadd_vv_nxv8i8_ta:
334; CHECK:       # %bb.0:
335; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
336; CHECK-NEXT:    vmadd.vv v9, v8, v10
337; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
338; CHECK-NEXT:    ret
339  %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
340  %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
341  %u = call <vscale x 8 x i8> @llvm.vp.select.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %a, i32 %evl)
342  ret <vscale x 8 x i8> %u
343}
344
345define <vscale x 8 x i8> @vmadd_vx_nxv8i8_ta(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
346; CHECK-LABEL: vmadd_vx_nxv8i8_ta:
347; CHECK:       # %bb.0:
348; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
349; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
350; CHECK-NEXT:    ret
351  %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
352  %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
353  %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
354  %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
355  %u = call <vscale x 8 x i8> @llvm.vp.select.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %a, i32 %evl)
356  ret <vscale x 8 x i8> %u
357}
358
359declare <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
360declare <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
361declare <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
362declare <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
363
364define <vscale x 16 x i8> @vmadd_vv_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
365; CHECK-LABEL: vmadd_vv_nxv16i8:
366; CHECK:       # %bb.0:
367; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
368; CHECK-NEXT:    vmadd.vv v10, v8, v12
369; CHECK-NEXT:    vsetvli zero, zero, e8, m2, tu, ma
370; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
371; CHECK-NEXT:    ret
372  %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
373  %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
374  %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %a, i32 %evl)
375  ret <vscale x 16 x i8> %u
376}
377
378define <vscale x 16 x i8> @vmadd_vv_nxv16i8_unmasked(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
379; CHECK-LABEL: vmadd_vv_nxv16i8_unmasked:
380; CHECK:       # %bb.0:
381; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
382; CHECK-NEXT:    vmadd.vv v10, v8, v12
383; CHECK-NEXT:    vsetvli zero, zero, e8, m2, tu, ma
384; CHECK-NEXT:    vmv.v.v v8, v10
385; CHECK-NEXT:    ret
386  %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
387  %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
388  %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i8> %y, <vscale x 16 x i8> %a, i32 %evl)
389  ret <vscale x 16 x i8> %u
390}
391
392define <vscale x 16 x i8> @vmadd_vx_nxv16i8(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
393; CHECK-LABEL: vmadd_vx_nxv16i8:
394; CHECK:       # %bb.0:
395; CHECK-NEXT:    vsetvli zero, a1, e8, m2, tu, mu
396; CHECK-NEXT:    vmadd.vx v8, a0, v10, v0.t
397; CHECK-NEXT:    ret
398  %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
399  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
400  %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
401  %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
402  %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %a, i32 %evl)
403  ret <vscale x 16 x i8> %u
404}
405
406define <vscale x 16 x i8> @vmadd_vx_nxv16i8_unmasked(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
407; CHECK-LABEL: vmadd_vx_nxv16i8_unmasked:
408; CHECK:       # %bb.0:
409; CHECK-NEXT:    vsetvli zero, a1, e8, m2, tu, ma
410; CHECK-NEXT:    vmadd.vx v8, a0, v10
411; CHECK-NEXT:    ret
412  %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
413  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
414  %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
415  %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
416  %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i8> %y, <vscale x 16 x i8> %a, i32 %evl)
417  ret <vscale x 16 x i8> %u
418}
419
420define <vscale x 16 x i8> @vmadd_vv_nxv16i8_ta(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
421; CHECK-LABEL: vmadd_vv_nxv16i8_ta:
422; CHECK:       # %bb.0:
423; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
424; CHECK-NEXT:    vmadd.vv v10, v8, v12
425; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
426; CHECK-NEXT:    ret
427  %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
428  %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
429  %u = call <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %a, i32 %evl)
430  ret <vscale x 16 x i8> %u
431}
432
433define <vscale x 16 x i8> @vmadd_vx_nxv16i8_ta(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
434; CHECK-LABEL: vmadd_vx_nxv16i8_ta:
435; CHECK:       # %bb.0:
436; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
437; CHECK-NEXT:    vmadd.vx v8, a0, v10, v0.t
438; CHECK-NEXT:    ret
439  %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
440  %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
441  %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
442  %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
443  %u = call <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %a, i32 %evl)
444  ret <vscale x 16 x i8> %u
445}
446
447declare <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
448declare <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
449declare <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1>, <vscale x 32 x i8>, <vscale x 32 x i8>, i32)
450declare <vscale x 32 x i8> @llvm.vp.select.nxv32i8(<vscale x 32 x i1>, <vscale x 32 x i8>, <vscale x 32 x i8>, i32)
451
452define <vscale x 32 x i8> @vmadd_vv_nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
453; CHECK-LABEL: vmadd_vv_nxv32i8:
454; CHECK:       # %bb.0:
455; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
456; CHECK-NEXT:    vmadd.vv v12, v8, v16
457; CHECK-NEXT:    vsetvli zero, zero, e8, m4, tu, ma
458; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
459; CHECK-NEXT:    ret
460  %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
461  %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
462  %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %a, i32 %evl)
463  ret <vscale x 32 x i8> %u
464}
465
466define <vscale x 32 x i8> @vmadd_vv_nxv32i8_unmasked(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
467; CHECK-LABEL: vmadd_vv_nxv32i8_unmasked:
468; CHECK:       # %bb.0:
469; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
470; CHECK-NEXT:    vmadd.vv v12, v8, v16
471; CHECK-NEXT:    vsetvli zero, zero, e8, m4, tu, ma
472; CHECK-NEXT:    vmv.v.v v8, v12
473; CHECK-NEXT:    ret
474  %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
475  %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
476  %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i8> %y, <vscale x 32 x i8> %a, i32 %evl)
477  ret <vscale x 32 x i8> %u
478}
479
480define <vscale x 32 x i8> @vmadd_vx_nxv32i8(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
481; CHECK-LABEL: vmadd_vx_nxv32i8:
482; CHECK:       # %bb.0:
483; CHECK-NEXT:    vsetvli zero, a1, e8, m4, tu, mu
484; CHECK-NEXT:    vmadd.vx v8, a0, v12, v0.t
485; CHECK-NEXT:    ret
486  %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
487  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
488  %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
489  %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
490  %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %a, i32 %evl)
491  ret <vscale x 32 x i8> %u
492}
493
494define <vscale x 32 x i8> @vmadd_vx_nxv32i8_unmasked(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
495; CHECK-LABEL: vmadd_vx_nxv32i8_unmasked:
496; CHECK:       # %bb.0:
497; CHECK-NEXT:    vsetvli zero, a1, e8, m4, tu, ma
498; CHECK-NEXT:    vmadd.vx v8, a0, v12
499; CHECK-NEXT:    ret
500  %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
501  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
502  %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
503  %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
504  %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i8> %y, <vscale x 32 x i8> %a, i32 %evl)
505  ret <vscale x 32 x i8> %u
506}
507
508define <vscale x 32 x i8> @vmadd_vv_nxv32i8_ta(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
509; CHECK-LABEL: vmadd_vv_nxv32i8_ta:
510; CHECK:       # %bb.0:
511; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
512; CHECK-NEXT:    vmadd.vv v12, v8, v16
513; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
514; CHECK-NEXT:    ret
515  %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
516  %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
517  %u = call <vscale x 32 x i8> @llvm.vp.select.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %a, i32 %evl)
518  ret <vscale x 32 x i8> %u
519}
520
521define <vscale x 32 x i8> @vmadd_vx_nxv32i8_ta(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
522; CHECK-LABEL: vmadd_vx_nxv32i8_ta:
523; CHECK:       # %bb.0:
524; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
525; CHECK-NEXT:    vmadd.vx v8, a0, v12, v0.t
526; CHECK-NEXT:    ret
527  %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
528  %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
529  %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
530  %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
531  %u = call <vscale x 32 x i8> @llvm.vp.select.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %a, i32 %evl)
532  ret <vscale x 32 x i8> %u
533}
534
535declare <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
536declare <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
537declare <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1>, <vscale x 64 x i8>, <vscale x 64 x i8>, i32)
538declare <vscale x 64 x i8> @llvm.vp.select.nxv64i8(<vscale x 64 x i1>, <vscale x 64 x i8>, <vscale x 64 x i8>, i32)
539
540define <vscale x 64 x i8> @vmadd_vv_nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
541; CHECK-LABEL: vmadd_vv_nxv64i8:
542; CHECK:       # %bb.0:
543; CHECK-NEXT:    vl8r.v v24, (a0)
544; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
545; CHECK-NEXT:    vmacc.vv v24, v8, v16
546; CHECK-NEXT:    vsetvli zero, zero, e8, m8, tu, ma
547; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
548; CHECK-NEXT:    ret
549  %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
550  %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
551  %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %a, i32 %evl)
552  ret <vscale x 64 x i8> %u
553}
554
555define <vscale x 64 x i8> @vmadd_vv_nxv64i8_unmasked(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
556; CHECK-LABEL: vmadd_vv_nxv64i8_unmasked:
557; CHECK:       # %bb.0:
558; CHECK-NEXT:    vl8r.v v24, (a0)
559; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
560; CHECK-NEXT:    vmacc.vv v24, v8, v16
561; CHECK-NEXT:    vsetvli zero, zero, e8, m8, tu, ma
562; CHECK-NEXT:    vmv.v.v v8, v24
563; CHECK-NEXT:    ret
564  %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
565  %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
566  %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> splat (i1 -1), <vscale x 64 x i8> %y, <vscale x 64 x i8> %a, i32 %evl)
567  ret <vscale x 64 x i8> %u
568}
569
570define <vscale x 64 x i8> @vmadd_vx_nxv64i8(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
571; CHECK-LABEL: vmadd_vx_nxv64i8:
572; CHECK:       # %bb.0:
573; CHECK-NEXT:    vsetvli zero, a1, e8, m8, tu, mu
574; CHECK-NEXT:    vmadd.vx v8, a0, v16, v0.t
575; CHECK-NEXT:    ret
576  %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
577  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
578  %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
579  %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
580  %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %a, i32 %evl)
581  ret <vscale x 64 x i8> %u
582}
583
584define <vscale x 64 x i8> @vmadd_vx_nxv64i8_unmasked(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
585; CHECK-LABEL: vmadd_vx_nxv64i8_unmasked:
586; CHECK:       # %bb.0:
587; CHECK-NEXT:    vsetvli zero, a1, e8, m8, tu, ma
588; CHECK-NEXT:    vmadd.vx v8, a0, v16
589; CHECK-NEXT:    ret
590  %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
591  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
592  %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
593  %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
594  %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> splat (i1 -1), <vscale x 64 x i8> %y, <vscale x 64 x i8> %a, i32 %evl)
595  ret <vscale x 64 x i8> %u
596}
597
598define <vscale x 64 x i8> @vmadd_vv_nxv64i8_ta(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
599; CHECK-LABEL: vmadd_vv_nxv64i8_ta:
600; CHECK:       # %bb.0:
601; CHECK-NEXT:    vl8r.v v24, (a0)
602; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
603; CHECK-NEXT:    vmacc.vv v24, v8, v16
604; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
605; CHECK-NEXT:    ret
606  %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
607  %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
608  %u = call <vscale x 64 x i8> @llvm.vp.select.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %a, i32 %evl)
609  ret <vscale x 64 x i8> %u
610}
611
612define <vscale x 64 x i8> @vmadd_vx_nxv64i8_ta(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
613; CHECK-LABEL: vmadd_vx_nxv64i8_ta:
614; CHECK:       # %bb.0:
615; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, mu
616; CHECK-NEXT:    vmadd.vx v8, a0, v16, v0.t
617; CHECK-NEXT:    ret
618  %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
619  %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
620  %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
621  %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
622  %u = call <vscale x 64 x i8> @llvm.vp.select.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %a, i32 %evl)
623  ret <vscale x 64 x i8> %u
624}
625
626declare <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
627declare <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
628declare <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1>, <vscale x 1 x i16>, <vscale x 1 x i16>, i32)
629declare <vscale x 1 x i16> @llvm.vp.select.nxv1i16(<vscale x 1 x i1>, <vscale x 1 x i16>, <vscale x 1 x i16>, i32)
630
631define <vscale x 1 x i16> @vmadd_vv_nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
632; CHECK-LABEL: vmadd_vv_nxv1i16:
633; CHECK:       # %bb.0:
634; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
635; CHECK-NEXT:    vmadd.vv v9, v8, v10
636; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, tu, ma
637; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
638; CHECK-NEXT:    ret
639  %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
640  %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
641  %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %a, i32 %evl)
642  ret <vscale x 1 x i16> %u
643}
644
645define <vscale x 1 x i16> @vmadd_vv_nxv1i16_unmasked(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
646; CHECK-LABEL: vmadd_vv_nxv1i16_unmasked:
647; CHECK:       # %bb.0:
648; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
649; CHECK-NEXT:    vmadd.vv v9, v8, v10
650; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, tu, ma
651; CHECK-NEXT:    vmv.v.v v8, v9
652; CHECK-NEXT:    ret
653  %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
654  %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
655  %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i16> %y, <vscale x 1 x i16> %a, i32 %evl)
656  ret <vscale x 1 x i16> %u
657}
658
659define <vscale x 1 x i16> @vmadd_vx_nxv1i16(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
660; CHECK-LABEL: vmadd_vx_nxv1i16:
661; CHECK:       # %bb.0:
662; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
663; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
664; CHECK-NEXT:    ret
665  %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
666  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
667  %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
668  %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
669  %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %a, i32 %evl)
670  ret <vscale x 1 x i16> %u
671}
672
673define <vscale x 1 x i16> @vmadd_vx_nxv1i16_unmasked(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
674; CHECK-LABEL: vmadd_vx_nxv1i16_unmasked:
675; CHECK:       # %bb.0:
676; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, ma
677; CHECK-NEXT:    vmadd.vx v8, a0, v9
678; CHECK-NEXT:    ret
679  %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
680  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
681  %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
682  %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
683  %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i16> %y, <vscale x 1 x i16> %a, i32 %evl)
684  ret <vscale x 1 x i16> %u
685}
686
687define <vscale x 1 x i16> @vmadd_vv_nxv1i16_ta(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
688; CHECK-LABEL: vmadd_vv_nxv1i16_ta:
689; CHECK:       # %bb.0:
690; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
691; CHECK-NEXT:    vmadd.vv v9, v8, v10
692; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
693; CHECK-NEXT:    ret
694  %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
695  %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
696  %u = call <vscale x 1 x i16> @llvm.vp.select.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %a, i32 %evl)
697  ret <vscale x 1 x i16> %u
698}
699
700define <vscale x 1 x i16> @vmadd_vx_nxv1i16_ta(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
701; CHECK-LABEL: vmadd_vx_nxv1i16_ta:
702; CHECK:       # %bb.0:
703; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
704; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
705; CHECK-NEXT:    ret
706  %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
707  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
708  %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
709  %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
710  %u = call <vscale x 1 x i16> @llvm.vp.select.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %a, i32 %evl)
711  ret <vscale x 1 x i16> %u
712}
713
714declare <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
715declare <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
716declare <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32)
717declare <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32)
718
719define <vscale x 2 x i16> @vmadd_vv_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
720; CHECK-LABEL: vmadd_vv_nxv2i16:
721; CHECK:       # %bb.0:
722; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
723; CHECK-NEXT:    vmadd.vv v9, v8, v10
724; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, tu, ma
725; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
726; CHECK-NEXT:    ret
727  %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
728  %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
729  %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %a, i32 %evl)
730  ret <vscale x 2 x i16> %u
731}
732
733define <vscale x 2 x i16> @vmadd_vv_nxv2i16_unmasked(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
734; CHECK-LABEL: vmadd_vv_nxv2i16_unmasked:
735; CHECK:       # %bb.0:
736; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
737; CHECK-NEXT:    vmadd.vv v9, v8, v10
738; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, tu, ma
739; CHECK-NEXT:    vmv.v.v v8, v9
740; CHECK-NEXT:    ret
741  %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
742  %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
743  %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i16> %y, <vscale x 2 x i16> %a, i32 %evl)
744  ret <vscale x 2 x i16> %u
745}
746
747define <vscale x 2 x i16> @vmadd_vx_nxv2i16(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
748; CHECK-LABEL: vmadd_vx_nxv2i16:
749; CHECK:       # %bb.0:
750; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
751; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
752; CHECK-NEXT:    ret
753  %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
754  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
755  %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
756  %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
757  %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %a, i32 %evl)
758  ret <vscale x 2 x i16> %u
759}
760
761define <vscale x 2 x i16> @vmadd_vx_nxv2i16_unmasked(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
762; CHECK-LABEL: vmadd_vx_nxv2i16_unmasked:
763; CHECK:       # %bb.0:
764; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, ma
765; CHECK-NEXT:    vmadd.vx v8, a0, v9
766; CHECK-NEXT:    ret
767  %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
768  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
769  %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
770  %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
771  %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i16> %y, <vscale x 2 x i16> %a, i32 %evl)
772  ret <vscale x 2 x i16> %u
773}
774
775define <vscale x 2 x i16> @vmadd_vv_nxv2i16_ta(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
776; CHECK-LABEL: vmadd_vv_nxv2i16_ta:
777; CHECK:       # %bb.0:
778; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
779; CHECK-NEXT:    vmadd.vv v9, v8, v10
780; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
781; CHECK-NEXT:    ret
782  %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
783  %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
784  %u = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %a, i32 %evl)
785  ret <vscale x 2 x i16> %u
786}
787
788define <vscale x 2 x i16> @vmadd_vx_nxv2i16_ta(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
789; CHECK-LABEL: vmadd_vx_nxv2i16_ta:
790; CHECK:       # %bb.0:
791; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
792; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
793; CHECK-NEXT:    ret
794  %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
795  %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
796  %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
797  %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
798  %u = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %a, i32 %evl)
799  ret <vscale x 2 x i16> %u
800}
801
802declare <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
803declare <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
804declare <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1>, <vscale x 4 x i16>, <vscale x 4 x i16>, i32)
805declare <vscale x 4 x i16> @llvm.vp.select.nxv4i16(<vscale x 4 x i1>, <vscale x 4 x i16>, <vscale x 4 x i16>, i32)
806
807define <vscale x 4 x i16> @vmadd_vv_nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
808; CHECK-LABEL: vmadd_vv_nxv4i16:
809; CHECK:       # %bb.0:
810; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
811; CHECK-NEXT:    vmadd.vv v9, v8, v10
812; CHECK-NEXT:    vsetvli zero, zero, e16, m1, tu, ma
813; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
814; CHECK-NEXT:    ret
815  %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
816  %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
817  %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %a, i32 %evl)
818  ret <vscale x 4 x i16> %u
819}
820
821define <vscale x 4 x i16> @vmadd_vv_nxv4i16_unmasked(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
822; CHECK-LABEL: vmadd_vv_nxv4i16_unmasked:
823; CHECK:       # %bb.0:
824; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
825; CHECK-NEXT:    vmadd.vv v9, v8, v10
826; CHECK-NEXT:    vsetvli zero, zero, e16, m1, tu, ma
827; CHECK-NEXT:    vmv.v.v v8, v9
828; CHECK-NEXT:    ret
829  %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
830  %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
831  %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i16> %y, <vscale x 4 x i16> %a, i32 %evl)
832  ret <vscale x 4 x i16> %u
833}
834
835define <vscale x 4 x i16> @vmadd_vx_nxv4i16(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
836; CHECK-LABEL: vmadd_vx_nxv4i16:
837; CHECK:       # %bb.0:
838; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
839; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
840; CHECK-NEXT:    ret
841  %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
842  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
843  %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
844  %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
845  %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %a, i32 %evl)
846  ret <vscale x 4 x i16> %u
847}
848
849define <vscale x 4 x i16> @vmadd_vx_nxv4i16_unmasked(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
850; CHECK-LABEL: vmadd_vx_nxv4i16_unmasked:
851; CHECK:       # %bb.0:
852; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, ma
853; CHECK-NEXT:    vmadd.vx v8, a0, v9
854; CHECK-NEXT:    ret
855  %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
856  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
857  %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
858  %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
859  %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i16> %y, <vscale x 4 x i16> %a, i32 %evl)
860  ret <vscale x 4 x i16> %u
861}
862
863define <vscale x 4 x i16> @vmadd_vv_nxv4i16_ta(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
864; CHECK-LABEL: vmadd_vv_nxv4i16_ta:
865; CHECK:       # %bb.0:
866; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
867; CHECK-NEXT:    vmadd.vv v9, v8, v10
868; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
869; CHECK-NEXT:    ret
870  %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
871  %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
872  %u = call <vscale x 4 x i16> @llvm.vp.select.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %a, i32 %evl)
873  ret <vscale x 4 x i16> %u
874}
875
876define <vscale x 4 x i16> @vmadd_vx_nxv4i16_ta(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
877; CHECK-LABEL: vmadd_vx_nxv4i16_ta:
878; CHECK:       # %bb.0:
879; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
880; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
881; CHECK-NEXT:    ret
882  %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
883  %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
884  %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
885  %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
886  %u = call <vscale x 4 x i16> @llvm.vp.select.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %a, i32 %evl)
887  ret <vscale x 4 x i16> %u
888}
889
890declare <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
891declare <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
892declare <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
893declare <vscale x 8 x i16> @llvm.vp.select.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
894
895define <vscale x 8 x i16> @vmadd_vv_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
896; CHECK-LABEL: vmadd_vv_nxv8i16:
897; CHECK:       # %bb.0:
898; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
899; CHECK-NEXT:    vmadd.vv v10, v8, v12
900; CHECK-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
901; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
902; CHECK-NEXT:    ret
903  %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
904  %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
905  %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %a, i32 %evl)
906  ret <vscale x 8 x i16> %u
907}
908
909define <vscale x 8 x i16> @vmadd_vv_nxv8i16_unmasked(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
910; CHECK-LABEL: vmadd_vv_nxv8i16_unmasked:
911; CHECK:       # %bb.0:
912; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
913; CHECK-NEXT:    vmadd.vv v10, v8, v12
914; CHECK-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
915; CHECK-NEXT:    vmv.v.v v8, v10
916; CHECK-NEXT:    ret
917  %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
918  %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
919  %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i16> %y, <vscale x 8 x i16> %a, i32 %evl)
920  ret <vscale x 8 x i16> %u
921}
922
923define <vscale x 8 x i16> @vmadd_vx_nxv8i16(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
924; CHECK-LABEL: vmadd_vx_nxv8i16:
925; CHECK:       # %bb.0:
926; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
927; CHECK-NEXT:    vmadd.vx v8, a0, v10, v0.t
928; CHECK-NEXT:    ret
929  %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
930  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
931  %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
932  %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
933  %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %a, i32 %evl)
934  ret <vscale x 8 x i16> %u
935}
936
937define <vscale x 8 x i16> @vmadd_vx_nxv8i16_unmasked(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
938; CHECK-LABEL: vmadd_vx_nxv8i16_unmasked:
939; CHECK:       # %bb.0:
940; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, ma
941; CHECK-NEXT:    vmadd.vx v8, a0, v10
942; CHECK-NEXT:    ret
943  %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
944  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
945  %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
946  %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
947  %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i16> %y, <vscale x 8 x i16> %a, i32 %evl)
948  ret <vscale x 8 x i16> %u
949}
950
951define <vscale x 8 x i16> @vmadd_vv_nxv8i16_ta(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
952; CHECK-LABEL: vmadd_vv_nxv8i16_ta:
953; CHECK:       # %bb.0:
954; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
955; CHECK-NEXT:    vmadd.vv v10, v8, v12
956; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
957; CHECK-NEXT:    ret
958  %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
959  %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
960  %u = call <vscale x 8 x i16> @llvm.vp.select.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %a, i32 %evl)
961  ret <vscale x 8 x i16> %u
962}
963
964define <vscale x 8 x i16> @vmadd_vx_nxv8i16_ta(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
965; CHECK-LABEL: vmadd_vx_nxv8i16_ta:
966; CHECK:       # %bb.0:
967; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
968; CHECK-NEXT:    vmadd.vx v8, a0, v10, v0.t
969; CHECK-NEXT:    ret
970  %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
971  %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
972  %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
973  %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
974  %u = call <vscale x 8 x i16> @llvm.vp.select.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %a, i32 %evl)
975  ret <vscale x 8 x i16> %u
976}
977
978declare <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
979declare <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
980declare <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1>, <vscale x 16 x i16>, <vscale x 16 x i16>, i32)
981declare <vscale x 16 x i16> @llvm.vp.select.nxv16i16(<vscale x 16 x i1>, <vscale x 16 x i16>, <vscale x 16 x i16>, i32)
982
983define <vscale x 16 x i16> @vmadd_vv_nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
984; CHECK-LABEL: vmadd_vv_nxv16i16:
985; CHECK:       # %bb.0:
986; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
987; CHECK-NEXT:    vmadd.vv v12, v8, v16
988; CHECK-NEXT:    vsetvli zero, zero, e16, m4, tu, ma
989; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
990; CHECK-NEXT:    ret
991  %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
992  %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
993  %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %a, i32 %evl)
994  ret <vscale x 16 x i16> %u
995}
996
997define <vscale x 16 x i16> @vmadd_vv_nxv16i16_unmasked(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
998; CHECK-LABEL: vmadd_vv_nxv16i16_unmasked:
999; CHECK:       # %bb.0:
1000; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1001; CHECK-NEXT:    vmadd.vv v12, v8, v16
1002; CHECK-NEXT:    vsetvli zero, zero, e16, m4, tu, ma
1003; CHECK-NEXT:    vmv.v.v v8, v12
1004; CHECK-NEXT:    ret
1005  %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1006  %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1007  %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i16> %y, <vscale x 16 x i16> %a, i32 %evl)
1008  ret <vscale x 16 x i16> %u
1009}
1010
1011define <vscale x 16 x i16> @vmadd_vx_nxv16i16(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1012; CHECK-LABEL: vmadd_vx_nxv16i16:
1013; CHECK:       # %bb.0:
1014; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
1015; CHECK-NEXT:    vmadd.vx v8, a0, v12, v0.t
1016; CHECK-NEXT:    ret
1017  %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
1018  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
1019  %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1020  %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1021  %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %a, i32 %evl)
1022  ret <vscale x 16 x i16> %u
1023}
1024
1025define <vscale x 16 x i16> @vmadd_vx_nxv16i16_unmasked(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1026; CHECK-LABEL: vmadd_vx_nxv16i16_unmasked:
1027; CHECK:       # %bb.0:
1028; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, ma
1029; CHECK-NEXT:    vmadd.vx v8, a0, v12
1030; CHECK-NEXT:    ret
1031  %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
1032  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
1033  %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1034  %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1035  %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i16> %y, <vscale x 16 x i16> %a, i32 %evl)
1036  ret <vscale x 16 x i16> %u
1037}
1038
1039define <vscale x 16 x i16> @vmadd_vv_nxv16i16_ta(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1040; CHECK-LABEL: vmadd_vv_nxv16i16_ta:
1041; CHECK:       # %bb.0:
1042; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1043; CHECK-NEXT:    vmadd.vv v12, v8, v16
1044; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
1045; CHECK-NEXT:    ret
1046  %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1047  %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1048  %u = call <vscale x 16 x i16> @llvm.vp.select.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %a, i32 %evl)
1049  ret <vscale x 16 x i16> %u
1050}
1051
1052define <vscale x 16 x i16> @vmadd_vx_nxv16i16_ta(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1053; CHECK-LABEL: vmadd_vx_nxv16i16_ta:
1054; CHECK:       # %bb.0:
1055; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
1056; CHECK-NEXT:    vmadd.vx v8, a0, v12, v0.t
1057; CHECK-NEXT:    ret
1058  %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
1059  %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
1060  %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1061  %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1062  %u = call <vscale x 16 x i16> @llvm.vp.select.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %a, i32 %evl)
1063  ret <vscale x 16 x i16> %u
1064}
1065
1066declare <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
1067declare <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
1068declare <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1>, <vscale x 32 x i16>, <vscale x 32 x i16>, i32)
1069declare <vscale x 32 x i16> @llvm.vp.select.nxv32i16(<vscale x 32 x i1>, <vscale x 32 x i16>, <vscale x 32 x i16>, i32)
1070
1071define <vscale x 32 x i16> @vmadd_vv_nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1072; CHECK-LABEL: vmadd_vv_nxv32i16:
1073; CHECK:       # %bb.0:
1074; CHECK-NEXT:    vl8re16.v v24, (a0)
1075; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
1076; CHECK-NEXT:    vmacc.vv v24, v8, v16
1077; CHECK-NEXT:    vsetvli zero, zero, e16, m8, tu, ma
1078; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
1079; CHECK-NEXT:    ret
1080  %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1081  %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1082  %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %a, i32 %evl)
1083  ret <vscale x 32 x i16> %u
1084}
1085
1086define <vscale x 32 x i16> @vmadd_vv_nxv32i16_unmasked(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1087; CHECK-LABEL: vmadd_vv_nxv32i16_unmasked:
1088; CHECK:       # %bb.0:
1089; CHECK-NEXT:    vl8re16.v v24, (a0)
1090; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
1091; CHECK-NEXT:    vmacc.vv v24, v8, v16
1092; CHECK-NEXT:    vsetvli zero, zero, e16, m8, tu, ma
1093; CHECK-NEXT:    vmv.v.v v8, v24
1094; CHECK-NEXT:    ret
1095  %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1096  %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1097  %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i16> %y, <vscale x 32 x i16> %a, i32 %evl)
1098  ret <vscale x 32 x i16> %u
1099}
1100
1101define <vscale x 32 x i16> @vmadd_vx_nxv32i16(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1102; CHECK-LABEL: vmadd_vx_nxv32i16:
1103; CHECK:       # %bb.0:
1104; CHECK-NEXT:    vsetvli zero, a1, e16, m8, tu, mu
1105; CHECK-NEXT:    vmadd.vx v8, a0, v16, v0.t
1106; CHECK-NEXT:    ret
1107  %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
1108  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
1109  %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1110  %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1111  %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %a, i32 %evl)
1112  ret <vscale x 32 x i16> %u
1113}
1114
1115define <vscale x 32 x i16> @vmadd_vx_nxv32i16_unmasked(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1116; CHECK-LABEL: vmadd_vx_nxv32i16_unmasked:
1117; CHECK:       # %bb.0:
1118; CHECK-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
1119; CHECK-NEXT:    vmadd.vx v8, a0, v16
1120; CHECK-NEXT:    ret
1121  %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
1122  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
1123  %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1124  %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1125  %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i16> %y, <vscale x 32 x i16> %a, i32 %evl)
1126  ret <vscale x 32 x i16> %u
1127}
1128
1129define <vscale x 32 x i16> @vmadd_vv_nxv32i16_ta(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1130; CHECK-LABEL: vmadd_vv_nxv32i16_ta:
1131; CHECK:       # %bb.0:
1132; CHECK-NEXT:    vl8re16.v v24, (a0)
1133; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
1134; CHECK-NEXT:    vmacc.vv v24, v8, v16
1135; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
1136; CHECK-NEXT:    ret
1137  %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1138  %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1139  %u = call <vscale x 32 x i16> @llvm.vp.select.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %a, i32 %evl)
1140  ret <vscale x 32 x i16> %u
1141}
1142
1143define <vscale x 32 x i16> @vmadd_vx_nxv32i16_ta(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1144; CHECK-LABEL: vmadd_vx_nxv32i16_ta:
1145; CHECK:       # %bb.0:
1146; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
1147; CHECK-NEXT:    vmadd.vx v8, a0, v16, v0.t
1148; CHECK-NEXT:    ret
1149  %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
1150  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
1151  %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1152  %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1153  %u = call <vscale x 32 x i16> @llvm.vp.select.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %a, i32 %evl)
1154  ret <vscale x 32 x i16> %u
1155}
1156
1157declare <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
1158declare <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
1159declare <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1>, <vscale x 1 x i32>, <vscale x 1 x i32>, i32)
1160declare <vscale x 1 x i32> @llvm.vp.select.nxv1i32(<vscale x 1 x i1>, <vscale x 1 x i32>, <vscale x 1 x i32>, i32)
1161
1162define <vscale x 1 x i32> @vmadd_vv_nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1163; CHECK-LABEL: vmadd_vv_nxv1i32:
1164; CHECK:       # %bb.0:
1165; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1166; CHECK-NEXT:    vmadd.vv v9, v8, v10
1167; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
1168; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
1169; CHECK-NEXT:    ret
1170  %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1171  %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1172  %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %a, i32 %evl)
1173  ret <vscale x 1 x i32> %u
1174}
1175
1176define <vscale x 1 x i32> @vmadd_vv_nxv1i32_unmasked(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1177; CHECK-LABEL: vmadd_vv_nxv1i32_unmasked:
1178; CHECK:       # %bb.0:
1179; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1180; CHECK-NEXT:    vmadd.vv v9, v8, v10
1181; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
1182; CHECK-NEXT:    vmv.v.v v8, v9
1183; CHECK-NEXT:    ret
1184  %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1185  %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1186  %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i32> %y, <vscale x 1 x i32> %a, i32 %evl)
1187  ret <vscale x 1 x i32> %u
1188}
1189
1190define <vscale x 1 x i32> @vmadd_vx_nxv1i32(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1191; CHECK-LABEL: vmadd_vx_nxv1i32:
1192; CHECK:       # %bb.0:
1193; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
1194; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
1195; CHECK-NEXT:    ret
1196  %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
1197  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
1198  %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1199  %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1200  %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %a, i32 %evl)
1201  ret <vscale x 1 x i32> %u
1202}
1203
1204define <vscale x 1 x i32> @vmadd_vx_nxv1i32_unmasked(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1205; CHECK-LABEL: vmadd_vx_nxv1i32_unmasked:
1206; CHECK:       # %bb.0:
1207; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, ma
1208; CHECK-NEXT:    vmadd.vx v8, a0, v9
1209; CHECK-NEXT:    ret
1210  %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
1211  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
1212  %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1213  %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1214  %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i32> %y, <vscale x 1 x i32> %a, i32 %evl)
1215  ret <vscale x 1 x i32> %u
1216}
1217
1218define <vscale x 1 x i32> @vmadd_vv_nxv1i32_ta(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1219; CHECK-LABEL: vmadd_vv_nxv1i32_ta:
1220; CHECK:       # %bb.0:
1221; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1222; CHECK-NEXT:    vmadd.vv v9, v8, v10
1223; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
1224; CHECK-NEXT:    ret
1225  %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1226  %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1227  %u = call <vscale x 1 x i32> @llvm.vp.select.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %a, i32 %evl)
1228  ret <vscale x 1 x i32> %u
1229}
1230
1231define <vscale x 1 x i32> @vmadd_vx_nxv1i32_ta(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1232; CHECK-LABEL: vmadd_vx_nxv1i32_ta:
1233; CHECK:       # %bb.0:
1234; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
1235; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
1236; CHECK-NEXT:    ret
1237  %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
1238  %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
1239  %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1240  %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1241  %u = call <vscale x 1 x i32> @llvm.vp.select.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %a, i32 %evl)
1242  ret <vscale x 1 x i32> %u
1243}
1244
1245declare <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1246declare <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1247declare <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
1248declare <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
1249
1250define <vscale x 2 x i32> @vmadd_vv_nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1251; CHECK-LABEL: vmadd_vv_nxv2i32:
1252; CHECK:       # %bb.0:
1253; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1254; CHECK-NEXT:    vmadd.vv v9, v8, v10
1255; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
1256; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
1257; CHECK-NEXT:    ret
1258  %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1259  %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1260  %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %a, i32 %evl)
1261  ret <vscale x 2 x i32> %u
1262}
1263
1264define <vscale x 2 x i32> @vmadd_vv_nxv2i32_unmasked(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1265; CHECK-LABEL: vmadd_vv_nxv2i32_unmasked:
1266; CHECK:       # %bb.0:
1267; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1268; CHECK-NEXT:    vmadd.vv v9, v8, v10
1269; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
1270; CHECK-NEXT:    vmv.v.v v8, v9
1271; CHECK-NEXT:    ret
1272  %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1273  %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1274  %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %y, <vscale x 2 x i32> %a, i32 %evl)
1275  ret <vscale x 2 x i32> %u
1276}
1277
1278define <vscale x 2 x i32> @vmadd_vx_nxv2i32(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1279; CHECK-LABEL: vmadd_vx_nxv2i32:
1280; CHECK:       # %bb.0:
1281; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
1282; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
1283; CHECK-NEXT:    ret
1284  %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
1285  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
1286  %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1287  %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1288  %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %a, i32 %evl)
1289  ret <vscale x 2 x i32> %u
1290}
1291
1292define <vscale x 2 x i32> @vmadd_vx_nxv2i32_unmasked(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1293; CHECK-LABEL: vmadd_vx_nxv2i32_unmasked:
1294; CHECK:       # %bb.0:
1295; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, ma
1296; CHECK-NEXT:    vmadd.vx v8, a0, v9
1297; CHECK-NEXT:    ret
1298  %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
1299  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
1300  %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1301  %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1302  %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %y, <vscale x 2 x i32> %a, i32 %evl)
1303  ret <vscale x 2 x i32> %u
1304}
1305
1306define <vscale x 2 x i32> @vmadd_vv_nxv2i32_ta(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1307; CHECK-LABEL: vmadd_vv_nxv2i32_ta:
1308; CHECK:       # %bb.0:
1309; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1310; CHECK-NEXT:    vmadd.vv v9, v8, v10
1311; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
1312; CHECK-NEXT:    ret
1313  %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1314  %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1315  %u = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %a, i32 %evl)
1316  ret <vscale x 2 x i32> %u
1317}
1318
1319define <vscale x 2 x i32> @vmadd_vx_nxv2i32_ta(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1320; CHECK-LABEL: vmadd_vx_nxv2i32_ta:
1321; CHECK:       # %bb.0:
1322; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
1323; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
1324; CHECK-NEXT:    ret
1325  %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
1326  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
1327  %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1328  %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1329  %u = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %a, i32 %evl)
1330  ret <vscale x 2 x i32> %u
1331}
1332
1333declare <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1334declare <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1335declare <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1336declare <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1337
1338define <vscale x 4 x i32> @vmadd_vv_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1339; CHECK-LABEL: vmadd_vv_nxv4i32:
1340; CHECK:       # %bb.0:
1341; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1342; CHECK-NEXT:    vmadd.vv v10, v8, v12
1343; CHECK-NEXT:    vsetvli zero, zero, e32, m2, tu, ma
1344; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
1345; CHECK-NEXT:    ret
1346  %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1347  %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1348  %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %a, i32 %evl)
1349  ret <vscale x 4 x i32> %u
1350}
1351
1352define <vscale x 4 x i32> @vmadd_vv_nxv4i32_unmasked(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1353; CHECK-LABEL: vmadd_vv_nxv4i32_unmasked:
1354; CHECK:       # %bb.0:
1355; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1356; CHECK-NEXT:    vmadd.vv v10, v8, v12
1357; CHECK-NEXT:    vsetvli zero, zero, e32, m2, tu, ma
1358; CHECK-NEXT:    vmv.v.v v8, v10
1359; CHECK-NEXT:    ret
1360  %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1361  %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1362  %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i32> %y, <vscale x 4 x i32> %a, i32 %evl)
1363  ret <vscale x 4 x i32> %u
1364}
1365
1366define <vscale x 4 x i32> @vmadd_vx_nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1367; CHECK-LABEL: vmadd_vx_nxv4i32:
1368; CHECK:       # %bb.0:
1369; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
1370; CHECK-NEXT:    vmadd.vx v8, a0, v10, v0.t
1371; CHECK-NEXT:    ret
1372  %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
1373  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1374  %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1375  %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1376  %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %a, i32 %evl)
1377  ret <vscale x 4 x i32> %u
1378}
1379
1380define <vscale x 4 x i32> @vmadd_vx_nxv4i32_unmasked(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1381; CHECK-LABEL: vmadd_vx_nxv4i32_unmasked:
1382; CHECK:       # %bb.0:
1383; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, ma
1384; CHECK-NEXT:    vmadd.vx v8, a0, v10
1385; CHECK-NEXT:    ret
1386  %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
1387  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1388  %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1389  %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1390  %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i32> %y, <vscale x 4 x i32> %a, i32 %evl)
1391  ret <vscale x 4 x i32> %u
1392}
1393
1394define <vscale x 4 x i32> @vmadd_vv_nxv4i32_ta(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1395; CHECK-LABEL: vmadd_vv_nxv4i32_ta:
1396; CHECK:       # %bb.0:
1397; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1398; CHECK-NEXT:    vmadd.vv v10, v8, v12
1399; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
1400; CHECK-NEXT:    ret
1401  %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1402  %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1403  %u = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %a, i32 %evl)
1404  ret <vscale x 4 x i32> %u
1405}
1406
1407define <vscale x 4 x i32> @vmadd_vx_nxv4i32_ta(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1408; CHECK-LABEL: vmadd_vx_nxv4i32_ta:
1409; CHECK:       # %bb.0:
1410; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
1411; CHECK-NEXT:    vmadd.vx v8, a0, v10, v0.t
1412; CHECK-NEXT:    ret
1413  %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
1414  %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1415  %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1416  %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1417  %u = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %a, i32 %evl)
1418  ret <vscale x 4 x i32> %u
1419}
1420
1421declare <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1422declare <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1423declare <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
1424declare <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
1425
1426define <vscale x 8 x i32> @vmadd_vv_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1427; CHECK-LABEL: vmadd_vv_nxv8i32:
1428; CHECK:       # %bb.0:
1429; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1430; CHECK-NEXT:    vmadd.vv v12, v8, v16
1431; CHECK-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
1432; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
1433; CHECK-NEXT:    ret
1434  %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1435  %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1436  %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %a, i32 %evl)
1437  ret <vscale x 8 x i32> %u
1438}
1439
1440define <vscale x 8 x i32> @vmadd_vv_nxv8i32_unmasked(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1441; CHECK-LABEL: vmadd_vv_nxv8i32_unmasked:
1442; CHECK:       # %bb.0:
1443; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1444; CHECK-NEXT:    vmadd.vv v12, v8, v16
1445; CHECK-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
1446; CHECK-NEXT:    vmv.v.v v8, v12
1447; CHECK-NEXT:    ret
1448  %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1449  %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1450  %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i32> %y, <vscale x 8 x i32> %a, i32 %evl)
1451  ret <vscale x 8 x i32> %u
1452}
1453
1454define <vscale x 8 x i32> @vmadd_vx_nxv8i32(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1455; CHECK-LABEL: vmadd_vx_nxv8i32:
1456; CHECK:       # %bb.0:
1457; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
1458; CHECK-NEXT:    vmadd.vx v8, a0, v12, v0.t
1459; CHECK-NEXT:    ret
1460  %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
1461  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
1462  %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1463  %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1464  %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %a, i32 %evl)
1465  ret <vscale x 8 x i32> %u
1466}
1467
1468define <vscale x 8 x i32> @vmadd_vx_nxv8i32_unmasked(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1469; CHECK-LABEL: vmadd_vx_nxv8i32_unmasked:
1470; CHECK:       # %bb.0:
1471; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, ma
1472; CHECK-NEXT:    vmadd.vx v8, a0, v12
1473; CHECK-NEXT:    ret
1474  %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
1475  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
1476  %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1477  %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1478  %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i32> %y, <vscale x 8 x i32> %a, i32 %evl)
1479  ret <vscale x 8 x i32> %u
1480}
1481
1482define <vscale x 8 x i32> @vmadd_vv_nxv8i32_ta(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1483; CHECK-LABEL: vmadd_vv_nxv8i32_ta:
1484; CHECK:       # %bb.0:
1485; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1486; CHECK-NEXT:    vmadd.vv v12, v8, v16
1487; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
1488; CHECK-NEXT:    ret
1489  %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1490  %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1491  %u = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %a, i32 %evl)
1492  ret <vscale x 8 x i32> %u
1493}
1494
1495define <vscale x 8 x i32> @vmadd_vx_nxv8i32_ta(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1496; CHECK-LABEL: vmadd_vx_nxv8i32_ta:
1497; CHECK:       # %bb.0:
1498; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
1499; CHECK-NEXT:    vmadd.vx v8, a0, v12, v0.t
1500; CHECK-NEXT:    ret
1501  %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
1502  %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
1503  %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1504  %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1505  %u = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %a, i32 %evl)
1506  ret <vscale x 8 x i32> %u
1507}
1508
1509declare <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1510declare <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1511declare <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1>, <vscale x 16 x i32>, <vscale x 16 x i32>, i32)
1512declare <vscale x 16 x i32> @llvm.vp.select.nxv16i32(<vscale x 16 x i1>, <vscale x 16 x i32>, <vscale x 16 x i32>, i32)
1513
1514define <vscale x 16 x i32> @vmadd_vv_nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1515; CHECK-LABEL: vmadd_vv_nxv16i32:
1516; CHECK:       # %bb.0:
1517; CHECK-NEXT:    vl8re32.v v24, (a0)
1518; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
1519; CHECK-NEXT:    vmacc.vv v24, v8, v16
1520; CHECK-NEXT:    vsetvli zero, zero, e32, m8, tu, ma
1521; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
1522; CHECK-NEXT:    ret
1523  %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1524  %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1525  %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %a, i32 %evl)
1526  ret <vscale x 16 x i32> %u
1527}
1528
1529define <vscale x 16 x i32> @vmadd_vv_nxv16i32_unmasked(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1530; CHECK-LABEL: vmadd_vv_nxv16i32_unmasked:
1531; CHECK:       # %bb.0:
1532; CHECK-NEXT:    vl8re32.v v24, (a0)
1533; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
1534; CHECK-NEXT:    vmacc.vv v24, v8, v16
1535; CHECK-NEXT:    vsetvli zero, zero, e32, m8, tu, ma
1536; CHECK-NEXT:    vmv.v.v v8, v24
1537; CHECK-NEXT:    ret
1538  %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1539  %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1540  %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i32> %y, <vscale x 16 x i32> %a, i32 %evl)
1541  ret <vscale x 16 x i32> %u
1542}
1543
1544define <vscale x 16 x i32> @vmadd_vx_nxv16i32(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1545; CHECK-LABEL: vmadd_vx_nxv16i32:
1546; CHECK:       # %bb.0:
1547; CHECK-NEXT:    vsetvli zero, a1, e32, m8, tu, mu
1548; CHECK-NEXT:    vmadd.vx v8, a0, v16, v0.t
1549; CHECK-NEXT:    ret
1550  %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
1551  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
1552  %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1553  %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1554  %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %a, i32 %evl)
1555  ret <vscale x 16 x i32> %u
1556}
1557
1558define <vscale x 16 x i32> @vmadd_vx_nxv16i32_unmasked(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1559; CHECK-LABEL: vmadd_vx_nxv16i32_unmasked:
1560; CHECK:       # %bb.0:
1561; CHECK-NEXT:    vsetvli zero, a1, e32, m8, tu, ma
1562; CHECK-NEXT:    vmadd.vx v8, a0, v16
1563; CHECK-NEXT:    ret
1564  %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
1565  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
1566  %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1567  %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1568  %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i32> %y, <vscale x 16 x i32> %a, i32 %evl)
1569  ret <vscale x 16 x i32> %u
1570}
1571
1572define <vscale x 16 x i32> @vmadd_vv_nxv16i32_ta(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1573; CHECK-LABEL: vmadd_vv_nxv16i32_ta:
1574; CHECK:       # %bb.0:
1575; CHECK-NEXT:    vl8re32.v v24, (a0)
1576; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
1577; CHECK-NEXT:    vmacc.vv v24, v8, v16
1578; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
1579; CHECK-NEXT:    ret
1580  %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1581  %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1582  %u = call <vscale x 16 x i32> @llvm.vp.select.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %a, i32 %evl)
1583  ret <vscale x 16 x i32> %u
1584}
1585
1586define <vscale x 16 x i32> @vmadd_vx_nxv16i32_ta(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1587; CHECK-LABEL: vmadd_vx_nxv16i32_ta:
1588; CHECK:       # %bb.0:
1589; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
1590; CHECK-NEXT:    vmadd.vx v8, a0, v16, v0.t
1591; CHECK-NEXT:    ret
1592  %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
1593  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
1594  %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1595  %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1596  %u = call <vscale x 16 x i32> @llvm.vp.select.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %a, i32 %evl)
1597  ret <vscale x 16 x i32> %u
1598}
1599
1600declare <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1601declare <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1602declare <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1>, <vscale x 1 x i64>, <vscale x 1 x i64>, i32)
1603declare <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1>, <vscale x 1 x i64>, <vscale x 1 x i64>, i32)
1604
1605define <vscale x 1 x i64> @vmadd_vv_nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1606; CHECK-LABEL: vmadd_vv_nxv1i64:
1607; CHECK:       # %bb.0:
1608; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1609; CHECK-NEXT:    vmadd.vv v9, v8, v10
1610; CHECK-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
1611; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
1612; CHECK-NEXT:    ret
1613  %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1614  %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1615  %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %a, i32 %evl)
1616  ret <vscale x 1 x i64> %u
1617}
1618
1619define <vscale x 1 x i64> @vmadd_vv_nxv1i64_unmasked(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1620; CHECK-LABEL: vmadd_vv_nxv1i64_unmasked:
1621; CHECK:       # %bb.0:
1622; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1623; CHECK-NEXT:    vmadd.vv v9, v8, v10
1624; CHECK-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
1625; CHECK-NEXT:    vmv.v.v v8, v9
1626; CHECK-NEXT:    ret
1627  %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1628  %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1629  %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i64> %y, <vscale x 1 x i64> %a, i32 %evl)
1630  ret <vscale x 1 x i64> %u
1631}
1632
1633define <vscale x 1 x i64> @vmadd_vx_nxv1i64(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1634; RV32-LABEL: vmadd_vx_nxv1i64:
1635; RV32:       # %bb.0:
1636; RV32-NEXT:    addi sp, sp, -16
1637; RV32-NEXT:    .cfi_def_cfa_offset 16
1638; RV32-NEXT:    sw a0, 8(sp)
1639; RV32-NEXT:    sw a1, 12(sp)
1640; RV32-NEXT:    addi a0, sp, 8
1641; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
1642; RV32-NEXT:    vlse64.v v10, (a0), zero
1643; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
1644; RV32-NEXT:    vmadd.vv v10, v8, v9
1645; RV32-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
1646; RV32-NEXT:    vmerge.vvm v8, v8, v10, v0
1647; RV32-NEXT:    addi sp, sp, 16
1648; RV32-NEXT:    .cfi_def_cfa_offset 0
1649; RV32-NEXT:    ret
1650;
1651; RV64-LABEL: vmadd_vx_nxv1i64:
1652; RV64:       # %bb.0:
1653; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
1654; RV64-NEXT:    vmadd.vx v8, a0, v9, v0.t
1655; RV64-NEXT:    ret
1656  %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
1657  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
1658  %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1659  %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1660  %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %a, i32 %evl)
1661  ret <vscale x 1 x i64> %u
1662}
1663
1664define <vscale x 1 x i64> @vmadd_vx_nxv1i64_unmasked(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1665; RV32-LABEL: vmadd_vx_nxv1i64_unmasked:
1666; RV32:       # %bb.0:
1667; RV32-NEXT:    addi sp, sp, -16
1668; RV32-NEXT:    .cfi_def_cfa_offset 16
1669; RV32-NEXT:    sw a0, 8(sp)
1670; RV32-NEXT:    sw a1, 12(sp)
1671; RV32-NEXT:    addi a0, sp, 8
1672; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
1673; RV32-NEXT:    vlse64.v v10, (a0), zero
1674; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
1675; RV32-NEXT:    vmadd.vv v10, v8, v9
1676; RV32-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
1677; RV32-NEXT:    vmv.v.v v8, v10
1678; RV32-NEXT:    addi sp, sp, 16
1679; RV32-NEXT:    .cfi_def_cfa_offset 0
1680; RV32-NEXT:    ret
1681;
1682; RV64-LABEL: vmadd_vx_nxv1i64_unmasked:
1683; RV64:       # %bb.0:
1684; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, ma
1685; RV64-NEXT:    vmadd.vx v8, a0, v9
1686; RV64-NEXT:    ret
1687  %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
1688  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
1689  %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1690  %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1691  %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i64> %y, <vscale x 1 x i64> %a, i32 %evl)
1692  ret <vscale x 1 x i64> %u
1693}
1694
1695define <vscale x 1 x i64> @vmadd_vv_nxv1i64_ta(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1696; CHECK-LABEL: vmadd_vv_nxv1i64_ta:
1697; CHECK:       # %bb.0:
1698; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1699; CHECK-NEXT:    vmadd.vv v9, v8, v10
1700; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
1701; CHECK-NEXT:    ret
1702  %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1703  %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1704  %u = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %a, i32 %evl)
1705  ret <vscale x 1 x i64> %u
1706}
1707
1708define <vscale x 1 x i64> @vmadd_vx_nxv1i64_ta(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1709; RV32-LABEL: vmadd_vx_nxv1i64_ta:
1710; RV32:       # %bb.0:
1711; RV32-NEXT:    addi sp, sp, -16
1712; RV32-NEXT:    .cfi_def_cfa_offset 16
1713; RV32-NEXT:    sw a0, 8(sp)
1714; RV32-NEXT:    sw a1, 12(sp)
1715; RV32-NEXT:    addi a0, sp, 8
1716; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
1717; RV32-NEXT:    vlse64.v v10, (a0), zero
1718; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
1719; RV32-NEXT:    vmadd.vv v10, v8, v9
1720; RV32-NEXT:    vmerge.vvm v8, v8, v10, v0
1721; RV32-NEXT:    addi sp, sp, 16
1722; RV32-NEXT:    .cfi_def_cfa_offset 0
1723; RV32-NEXT:    ret
1724;
1725; RV64-LABEL: vmadd_vx_nxv1i64_ta:
1726; RV64:       # %bb.0:
1727; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
1728; RV64-NEXT:    vmadd.vx v8, a0, v9, v0.t
1729; RV64-NEXT:    ret
1730  %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
1731  %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
1732  %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1733  %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1734  %u = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %a, i32 %evl)
1735  ret <vscale x 1 x i64> %u
1736}
1737
1738declare <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1739declare <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1740declare <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
1741declare <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
1742
1743define <vscale x 2 x i64> @vmadd_vv_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1744; CHECK-LABEL: vmadd_vv_nxv2i64:
1745; CHECK:       # %bb.0:
1746; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1747; CHECK-NEXT:    vmadd.vv v10, v8, v12
1748; CHECK-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
1749; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
1750; CHECK-NEXT:    ret
1751  %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1752  %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1753  %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %a, i32 %evl)
1754  ret <vscale x 2 x i64> %u
1755}
1756
1757define <vscale x 2 x i64> @vmadd_vv_nxv2i64_unmasked(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1758; CHECK-LABEL: vmadd_vv_nxv2i64_unmasked:
1759; CHECK:       # %bb.0:
1760; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1761; CHECK-NEXT:    vmadd.vv v10, v8, v12
1762; CHECK-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
1763; CHECK-NEXT:    vmv.v.v v8, v10
1764; CHECK-NEXT:    ret
1765  %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1766  %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1767  %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i64> %y, <vscale x 2 x i64> %a, i32 %evl)
1768  ret <vscale x 2 x i64> %u
1769}
1770
1771define <vscale x 2 x i64> @vmadd_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1772; RV32-LABEL: vmadd_vx_nxv2i64:
1773; RV32:       # %bb.0:
1774; RV32-NEXT:    addi sp, sp, -16
1775; RV32-NEXT:    .cfi_def_cfa_offset 16
1776; RV32-NEXT:    sw a0, 8(sp)
1777; RV32-NEXT:    sw a1, 12(sp)
1778; RV32-NEXT:    addi a0, sp, 8
1779; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
1780; RV32-NEXT:    vlse64.v v12, (a0), zero
1781; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
1782; RV32-NEXT:    vmadd.vv v12, v8, v10
1783; RV32-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
1784; RV32-NEXT:    vmerge.vvm v8, v8, v12, v0
1785; RV32-NEXT:    addi sp, sp, 16
1786; RV32-NEXT:    .cfi_def_cfa_offset 0
1787; RV32-NEXT:    ret
1788;
1789; RV64-LABEL: vmadd_vx_nxv2i64:
1790; RV64:       # %bb.0:
1791; RV64-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
1792; RV64-NEXT:    vmadd.vx v8, a0, v10, v0.t
1793; RV64-NEXT:    ret
1794  %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
1795  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
1796  %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1797  %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1798  %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %a, i32 %evl)
1799  ret <vscale x 2 x i64> %u
1800}
1801
1802define <vscale x 2 x i64> @vmadd_vx_nxv2i64_unmasked(<vscale x 2 x i64> %a, i64 %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1803; RV32-LABEL: vmadd_vx_nxv2i64_unmasked:
1804; RV32:       # %bb.0:
1805; RV32-NEXT:    addi sp, sp, -16
1806; RV32-NEXT:    .cfi_def_cfa_offset 16
1807; RV32-NEXT:    sw a0, 8(sp)
1808; RV32-NEXT:    sw a1, 12(sp)
1809; RV32-NEXT:    addi a0, sp, 8
1810; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
1811; RV32-NEXT:    vlse64.v v12, (a0), zero
1812; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
1813; RV32-NEXT:    vmadd.vv v12, v8, v10
1814; RV32-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
1815; RV32-NEXT:    vmv.v.v v8, v12
1816; RV32-NEXT:    addi sp, sp, 16
1817; RV32-NEXT:    .cfi_def_cfa_offset 0
1818; RV32-NEXT:    ret
1819;
1820; RV64-LABEL: vmadd_vx_nxv2i64_unmasked:
1821; RV64:       # %bb.0:
1822; RV64-NEXT:    vsetvli zero, a1, e64, m2, tu, ma
1823; RV64-NEXT:    vmadd.vx v8, a0, v10
1824; RV64-NEXT:    ret
1825  %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
1826  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
1827  %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1828  %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1829  %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i64> %y, <vscale x 2 x i64> %a, i32 %evl)
1830  ret <vscale x 2 x i64> %u
1831}
1832
1833define <vscale x 2 x i64> @vmadd_vv_nxv2i64_ta(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1834; CHECK-LABEL: vmadd_vv_nxv2i64_ta:
1835; CHECK:       # %bb.0:
1836; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1837; CHECK-NEXT:    vmadd.vv v10, v8, v12
1838; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
1839; CHECK-NEXT:    ret
1840  %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1841  %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1842  %u = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %a, i32 %evl)
1843  ret <vscale x 2 x i64> %u
1844}
1845
1846define <vscale x 2 x i64> @vmadd_vx_nxv2i64_ta(<vscale x 2 x i64> %a, i64 %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1847; RV32-LABEL: vmadd_vx_nxv2i64_ta:
1848; RV32:       # %bb.0:
1849; RV32-NEXT:    addi sp, sp, -16
1850; RV32-NEXT:    .cfi_def_cfa_offset 16
1851; RV32-NEXT:    sw a0, 8(sp)
1852; RV32-NEXT:    sw a1, 12(sp)
1853; RV32-NEXT:    addi a0, sp, 8
1854; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
1855; RV32-NEXT:    vlse64.v v12, (a0), zero
1856; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
1857; RV32-NEXT:    vmadd.vv v12, v8, v10
1858; RV32-NEXT:    vmerge.vvm v8, v8, v12, v0
1859; RV32-NEXT:    addi sp, sp, 16
1860; RV32-NEXT:    .cfi_def_cfa_offset 0
1861; RV32-NEXT:    ret
1862;
1863; RV64-LABEL: vmadd_vx_nxv2i64_ta:
1864; RV64:       # %bb.0:
1865; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
1866; RV64-NEXT:    vmadd.vx v8, a0, v10, v0.t
1867; RV64-NEXT:    ret
1868  %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
1869  %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
1870  %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1871  %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1872  %u = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %a, i32 %evl)
1873  ret <vscale x 2 x i64> %u
1874}
1875
1876declare <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1877declare <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1878declare <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1>, <vscale x 4 x i64>, <vscale x 4 x i64>, i32)
1879declare <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1>, <vscale x 4 x i64>, <vscale x 4 x i64>, i32)
1880
1881define <vscale x 4 x i64> @vmadd_vv_nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1882; CHECK-LABEL: vmadd_vv_nxv4i64:
1883; CHECK:       # %bb.0:
1884; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1885; CHECK-NEXT:    vmadd.vv v12, v8, v16
1886; CHECK-NEXT:    vsetvli zero, zero, e64, m4, tu, ma
1887; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
1888; CHECK-NEXT:    ret
1889  %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1890  %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1891  %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %a, i32 %evl)
1892  ret <vscale x 4 x i64> %u
1893}
1894
1895define <vscale x 4 x i64> @vmadd_vv_nxv4i64_unmasked(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1896; CHECK-LABEL: vmadd_vv_nxv4i64_unmasked:
1897; CHECK:       # %bb.0:
1898; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1899; CHECK-NEXT:    vmadd.vv v12, v8, v16
1900; CHECK-NEXT:    vsetvli zero, zero, e64, m4, tu, ma
1901; CHECK-NEXT:    vmv.v.v v8, v12
1902; CHECK-NEXT:    ret
1903  %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1904  %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1905  %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i64> %y, <vscale x 4 x i64> %a, i32 %evl)
1906  ret <vscale x 4 x i64> %u
1907}
1908
1909define <vscale x 4 x i64> @vmadd_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1910; RV32-LABEL: vmadd_vx_nxv4i64:
1911; RV32:       # %bb.0:
1912; RV32-NEXT:    addi sp, sp, -16
1913; RV32-NEXT:    .cfi_def_cfa_offset 16
1914; RV32-NEXT:    sw a0, 8(sp)
1915; RV32-NEXT:    sw a1, 12(sp)
1916; RV32-NEXT:    addi a0, sp, 8
1917; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
1918; RV32-NEXT:    vlse64.v v16, (a0), zero
1919; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
1920; RV32-NEXT:    vmadd.vv v16, v8, v12
1921; RV32-NEXT:    vsetvli zero, zero, e64, m4, tu, ma
1922; RV32-NEXT:    vmerge.vvm v8, v8, v16, v0
1923; RV32-NEXT:    addi sp, sp, 16
1924; RV32-NEXT:    .cfi_def_cfa_offset 0
1925; RV32-NEXT:    ret
1926;
1927; RV64-LABEL: vmadd_vx_nxv4i64:
1928; RV64:       # %bb.0:
1929; RV64-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
1930; RV64-NEXT:    vmadd.vx v8, a0, v12, v0.t
1931; RV64-NEXT:    ret
1932  %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
1933  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
1934  %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1935  %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1936  %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %a, i32 %evl)
1937  ret <vscale x 4 x i64> %u
1938}
1939
1940define <vscale x 4 x i64> @vmadd_vx_nxv4i64_unmasked(<vscale x 4 x i64> %a, i64 %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1941; RV32-LABEL: vmadd_vx_nxv4i64_unmasked:
1942; RV32:       # %bb.0:
1943; RV32-NEXT:    addi sp, sp, -16
1944; RV32-NEXT:    .cfi_def_cfa_offset 16
1945; RV32-NEXT:    sw a0, 8(sp)
1946; RV32-NEXT:    sw a1, 12(sp)
1947; RV32-NEXT:    addi a0, sp, 8
1948; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
1949; RV32-NEXT:    vlse64.v v16, (a0), zero
1950; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
1951; RV32-NEXT:    vmadd.vv v16, v8, v12
1952; RV32-NEXT:    vsetvli zero, zero, e64, m4, tu, ma
1953; RV32-NEXT:    vmv.v.v v8, v16
1954; RV32-NEXT:    addi sp, sp, 16
1955; RV32-NEXT:    .cfi_def_cfa_offset 0
1956; RV32-NEXT:    ret
1957;
1958; RV64-LABEL: vmadd_vx_nxv4i64_unmasked:
1959; RV64:       # %bb.0:
1960; RV64-NEXT:    vsetvli zero, a1, e64, m4, tu, ma
1961; RV64-NEXT:    vmadd.vx v8, a0, v12
1962; RV64-NEXT:    ret
1963  %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
1964  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
1965  %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1966  %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1967  %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i64> %y, <vscale x 4 x i64> %a, i32 %evl)
1968  ret <vscale x 4 x i64> %u
1969}
1970
1971define <vscale x 4 x i64> @vmadd_vv_nxv4i64_ta(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1972; CHECK-LABEL: vmadd_vv_nxv4i64_ta:
1973; CHECK:       # %bb.0:
1974; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1975; CHECK-NEXT:    vmadd.vv v12, v8, v16
1976; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
1977; CHECK-NEXT:    ret
1978  %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1979  %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1980  %u = call <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %a, i32 %evl)
1981  ret <vscale x 4 x i64> %u
1982}
1983
1984define <vscale x 4 x i64> @vmadd_vx_nxv4i64_ta(<vscale x 4 x i64> %a, i64 %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1985; RV32-LABEL: vmadd_vx_nxv4i64_ta:
1986; RV32:       # %bb.0:
1987; RV32-NEXT:    addi sp, sp, -16
1988; RV32-NEXT:    .cfi_def_cfa_offset 16
1989; RV32-NEXT:    sw a0, 8(sp)
1990; RV32-NEXT:    sw a1, 12(sp)
1991; RV32-NEXT:    addi a0, sp, 8
1992; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
1993; RV32-NEXT:    vlse64.v v16, (a0), zero
1994; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
1995; RV32-NEXT:    vmadd.vv v16, v8, v12
1996; RV32-NEXT:    vmerge.vvm v8, v8, v16, v0
1997; RV32-NEXT:    addi sp, sp, 16
1998; RV32-NEXT:    .cfi_def_cfa_offset 0
1999; RV32-NEXT:    ret
2000;
2001; RV64-LABEL: vmadd_vx_nxv4i64_ta:
2002; RV64:       # %bb.0:
2003; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
2004; RV64-NEXT:    vmadd.vx v8, a0, v12, v0.t
2005; RV64-NEXT:    ret
2006  %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
2007  %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
2008  %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
2009  %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
2010  %u = call <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %a, i32 %evl)
2011  ret <vscale x 4 x i64> %u
2012}
2013
2014declare <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
2015declare <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
2016declare <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
2017declare <vscale x 8 x i64> @llvm.vp.select.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
2018
2019define <vscale x 8 x i64> @vmadd_vv_nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2020; CHECK-LABEL: vmadd_vv_nxv8i64:
2021; CHECK:       # %bb.0:
2022; CHECK-NEXT:    vl8re64.v v24, (a0)
2023; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2024; CHECK-NEXT:    vmacc.vv v24, v8, v16
2025; CHECK-NEXT:    vsetvli zero, zero, e64, m8, tu, ma
2026; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
2027; CHECK-NEXT:    ret
2028  %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2029  %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2030  %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %a, i32 %evl)
2031  ret <vscale x 8 x i64> %u
2032}
2033
2034define <vscale x 8 x i64> @vmadd_vv_nxv8i64_unmasked(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2035; CHECK-LABEL: vmadd_vv_nxv8i64_unmasked:
2036; CHECK:       # %bb.0:
2037; CHECK-NEXT:    vl8re64.v v24, (a0)
2038; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2039; CHECK-NEXT:    vmacc.vv v24, v8, v16
2040; CHECK-NEXT:    vsetvli zero, zero, e64, m8, tu, ma
2041; CHECK-NEXT:    vmv.v.v v8, v24
2042; CHECK-NEXT:    ret
2043  %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2044  %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2045  %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i64> %y, <vscale x 8 x i64> %a, i32 %evl)
2046  ret <vscale x 8 x i64> %u
2047}
2048
2049define <vscale x 8 x i64> @vmadd_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2050; RV32-LABEL: vmadd_vx_nxv8i64:
2051; RV32:       # %bb.0:
2052; RV32-NEXT:    addi sp, sp, -16
2053; RV32-NEXT:    .cfi_def_cfa_offset 16
2054; RV32-NEXT:    sw a0, 8(sp)
2055; RV32-NEXT:    sw a1, 12(sp)
2056; RV32-NEXT:    addi a0, sp, 8
2057; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
2058; RV32-NEXT:    vlse64.v v24, (a0), zero
2059; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2060; RV32-NEXT:    vmadd.vv v24, v8, v16
2061; RV32-NEXT:    vsetvli zero, zero, e64, m8, tu, ma
2062; RV32-NEXT:    vmerge.vvm v8, v8, v24, v0
2063; RV32-NEXT:    addi sp, sp, 16
2064; RV32-NEXT:    .cfi_def_cfa_offset 0
2065; RV32-NEXT:    ret
2066;
2067; RV64-LABEL: vmadd_vx_nxv8i64:
2068; RV64:       # %bb.0:
2069; RV64-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
2070; RV64-NEXT:    vmadd.vx v8, a0, v16, v0.t
2071; RV64-NEXT:    ret
2072  %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
2073  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2074  %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2075  %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2076  %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %a, i32 %evl)
2077  ret <vscale x 8 x i64> %u
2078}
2079
2080define <vscale x 8 x i64> @vmadd_vx_nxv8i64_unmasked(<vscale x 8 x i64> %a, i64 %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2081; RV32-LABEL: vmadd_vx_nxv8i64_unmasked:
2082; RV32:       # %bb.0:
2083; RV32-NEXT:    addi sp, sp, -16
2084; RV32-NEXT:    .cfi_def_cfa_offset 16
2085; RV32-NEXT:    sw a0, 8(sp)
2086; RV32-NEXT:    sw a1, 12(sp)
2087; RV32-NEXT:    addi a0, sp, 8
2088; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
2089; RV32-NEXT:    vlse64.v v24, (a0), zero
2090; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2091; RV32-NEXT:    vmadd.vv v24, v8, v16
2092; RV32-NEXT:    vsetvli zero, zero, e64, m8, tu, ma
2093; RV32-NEXT:    vmv.v.v v8, v24
2094; RV32-NEXT:    addi sp, sp, 16
2095; RV32-NEXT:    .cfi_def_cfa_offset 0
2096; RV32-NEXT:    ret
2097;
2098; RV64-LABEL: vmadd_vx_nxv8i64_unmasked:
2099; RV64:       # %bb.0:
2100; RV64-NEXT:    vsetvli zero, a1, e64, m8, tu, ma
2101; RV64-NEXT:    vmadd.vx v8, a0, v16
2102; RV64-NEXT:    ret
2103  %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
2104  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2105  %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2106  %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2107  %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i64> %y, <vscale x 8 x i64> %a, i32 %evl)
2108  ret <vscale x 8 x i64> %u
2109}
2110
2111define <vscale x 8 x i64> @vmadd_vv_nxv8i64_ta(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2112; CHECK-LABEL: vmadd_vv_nxv8i64_ta:
2113; CHECK:       # %bb.0:
2114; CHECK-NEXT:    vl8re64.v v24, (a0)
2115; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2116; CHECK-NEXT:    vmacc.vv v24, v8, v16
2117; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
2118; CHECK-NEXT:    ret
2119  %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2120  %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2121  %u = call <vscale x 8 x i64> @llvm.vp.select.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %a, i32 %evl)
2122  ret <vscale x 8 x i64> %u
2123}
2124
2125define <vscale x 8 x i64> @vmadd_vx_nxv8i64_ta(<vscale x 8 x i64> %a, i64 %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2126; RV32-LABEL: vmadd_vx_nxv8i64_ta:
2127; RV32:       # %bb.0:
2128; RV32-NEXT:    addi sp, sp, -16
2129; RV32-NEXT:    .cfi_def_cfa_offset 16
2130; RV32-NEXT:    sw a0, 8(sp)
2131; RV32-NEXT:    sw a1, 12(sp)
2132; RV32-NEXT:    addi a0, sp, 8
2133; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
2134; RV32-NEXT:    vlse64.v v24, (a0), zero
2135; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2136; RV32-NEXT:    vmadd.vv v24, v8, v16
2137; RV32-NEXT:    vmerge.vvm v8, v8, v24, v0
2138; RV32-NEXT:    addi sp, sp, 16
2139; RV32-NEXT:    .cfi_def_cfa_offset 0
2140; RV32-NEXT:    ret
2141;
2142; RV64-LABEL: vmadd_vx_nxv8i64_ta:
2143; RV64:       # %bb.0:
2144; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
2145; RV64-NEXT:    vmadd.vx v8, a0, v16, v0.t
2146; RV64-NEXT:    ret
2147  %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
2148  %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2149  %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2150  %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2151  %u = call <vscale x 8 x i64> @llvm.vp.select.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %a, i32 %evl)
2152  ret <vscale x 8 x i64> %u
2153}
2154