xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmaccu.ll (revision 74f985b793bf4005e49736f8c2cef8b5cbf7c1ab)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
4
5define <2 x i16> @vwmaccu_v2i16(ptr %x, ptr %y, <2 x i16> %z) {
6; CHECK-LABEL: vwmaccu_v2i16:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
9; CHECK-NEXT:    vle8.v v9, (a0)
10; CHECK-NEXT:    vle8.v v10, (a1)
11; CHECK-NEXT:    vwmaccu.vv v8, v9, v10
12; CHECK-NEXT:    ret
13  %a = load <2 x i8>, ptr %x
14  %b = load <2 x i8>, ptr %y
15  %c = zext <2 x i8> %a to <2 x i16>
16  %d = zext <2 x i8> %b to <2 x i16>
17  %e = mul <2 x i16> %c, %d
18  %f = add <2 x i16> %e, %z
19  ret <2 x i16> %f
20}
21
22define <4 x i16> @vwmaccu_v4i16(ptr %x, ptr %y, <4 x i16> %z) {
23; CHECK-LABEL: vwmaccu_v4i16:
24; CHECK:       # %bb.0:
25; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
26; CHECK-NEXT:    vle8.v v9, (a0)
27; CHECK-NEXT:    vle8.v v10, (a1)
28; CHECK-NEXT:    vwmaccu.vv v8, v9, v10
29; CHECK-NEXT:    ret
30  %a = load <4 x i8>, ptr %x
31  %b = load <4 x i8>, ptr %y
32  %c = zext <4 x i8> %a to <4 x i16>
33  %d = zext <4 x i8> %b to <4 x i16>
34  %e = mul <4 x i16> %c, %d
35  %f = add <4 x i16> %e, %z
36  ret <4 x i16> %f
37}
38
39define <2 x i32> @vwmaccu_v2i32(ptr %x, ptr %y, <2 x i32> %z) {
40; CHECK-LABEL: vwmaccu_v2i32:
41; CHECK:       # %bb.0:
42; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
43; CHECK-NEXT:    vle16.v v9, (a0)
44; CHECK-NEXT:    vle16.v v10, (a1)
45; CHECK-NEXT:    vwmaccu.vv v8, v9, v10
46; CHECK-NEXT:    ret
47  %a = load <2 x i16>, ptr %x
48  %b = load <2 x i16>, ptr %y
49  %c = zext <2 x i16> %a to <2 x i32>
50  %d = zext <2 x i16> %b to <2 x i32>
51  %e = mul <2 x i32> %c, %d
52  %f = add <2 x i32> %e, %z
53  ret <2 x i32> %f
54}
55
56define <8 x i16> @vwmaccu_v8i16(ptr %x, ptr %y, <8 x i16> %z) {
57; CHECK-LABEL: vwmaccu_v8i16:
58; CHECK:       # %bb.0:
59; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
60; CHECK-NEXT:    vle8.v v9, (a0)
61; CHECK-NEXT:    vle8.v v10, (a1)
62; CHECK-NEXT:    vwmaccu.vv v8, v9, v10
63; CHECK-NEXT:    ret
64  %a = load <8 x i8>, ptr %x
65  %b = load <8 x i8>, ptr %y
66  %c = zext <8 x i8> %a to <8 x i16>
67  %d = zext <8 x i8> %b to <8 x i16>
68  %e = mul <8 x i16> %c, %d
69  %f = add <8 x i16> %e, %z
70  ret <8 x i16> %f
71}
72
73define <4 x i32> @vwmaccu_v4i32(ptr %x, ptr %y, <4 x i32> %z) {
74; CHECK-LABEL: vwmaccu_v4i32:
75; CHECK:       # %bb.0:
76; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
77; CHECK-NEXT:    vle16.v v9, (a0)
78; CHECK-NEXT:    vle16.v v10, (a1)
79; CHECK-NEXT:    vwmaccu.vv v8, v9, v10
80; CHECK-NEXT:    ret
81  %a = load <4 x i16>, ptr %x
82  %b = load <4 x i16>, ptr %y
83  %c = zext <4 x i16> %a to <4 x i32>
84  %d = zext <4 x i16> %b to <4 x i32>
85  %e = mul <4 x i32> %c, %d
86  %f = add <4 x i32> %e, %z
87  ret <4 x i32> %f
88}
89
90define <2 x i64> @vwmaccu_v2i64(ptr %x, ptr %y, <2 x i64> %z) {
91; CHECK-LABEL: vwmaccu_v2i64:
92; CHECK:       # %bb.0:
93; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
94; CHECK-NEXT:    vle32.v v9, (a0)
95; CHECK-NEXT:    vle32.v v10, (a1)
96; CHECK-NEXT:    vwmaccu.vv v8, v9, v10
97; CHECK-NEXT:    ret
98  %a = load <2 x i32>, ptr %x
99  %b = load <2 x i32>, ptr %y
100  %c = zext <2 x i32> %a to <2 x i64>
101  %d = zext <2 x i32> %b to <2 x i64>
102  %e = mul <2 x i64> %c, %d
103  %f = add <2 x i64> %e, %z
104  ret <2 x i64> %f
105}
106
107define <16 x i16> @vwmaccu_v16i16(ptr %x, ptr %y, <16 x i16> %z) {
108; CHECK-LABEL: vwmaccu_v16i16:
109; CHECK:       # %bb.0:
110; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
111; CHECK-NEXT:    vle8.v v10, (a0)
112; CHECK-NEXT:    vle8.v v11, (a1)
113; CHECK-NEXT:    vwmaccu.vv v8, v10, v11
114; CHECK-NEXT:    ret
115  %a = load <16 x i8>, ptr %x
116  %b = load <16 x i8>, ptr %y
117  %c = zext <16 x i8> %a to <16 x i16>
118  %d = zext <16 x i8> %b to <16 x i16>
119  %e = mul <16 x i16> %c, %d
120  %f = add <16 x i16> %e, %z
121  ret <16 x i16> %f
122}
123
124define <8 x i32> @vwmaccu_v8i32(ptr %x, ptr %y, <8 x i32> %z) {
125; CHECK-LABEL: vwmaccu_v8i32:
126; CHECK:       # %bb.0:
127; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
128; CHECK-NEXT:    vle16.v v10, (a0)
129; CHECK-NEXT:    vle16.v v11, (a1)
130; CHECK-NEXT:    vwmaccu.vv v8, v10, v11
131; CHECK-NEXT:    ret
132  %a = load <8 x i16>, ptr %x
133  %b = load <8 x i16>, ptr %y
134  %c = zext <8 x i16> %a to <8 x i32>
135  %d = zext <8 x i16> %b to <8 x i32>
136  %e = mul <8 x i32> %c, %d
137  %f = add <8 x i32> %e, %z
138  ret <8 x i32> %f
139}
140
141define <4 x i64> @vwmaccu_v4i64(ptr %x, ptr %y, <4 x i64> %z) {
142; CHECK-LABEL: vwmaccu_v4i64:
143; CHECK:       # %bb.0:
144; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
145; CHECK-NEXT:    vle32.v v10, (a0)
146; CHECK-NEXT:    vle32.v v11, (a1)
147; CHECK-NEXT:    vwmaccu.vv v8, v10, v11
148; CHECK-NEXT:    ret
149  %a = load <4 x i32>, ptr %x
150  %b = load <4 x i32>, ptr %y
151  %c = zext <4 x i32> %a to <4 x i64>
152  %d = zext <4 x i32> %b to <4 x i64>
153  %e = mul <4 x i64> %c, %d
154  %f = add <4 x i64> %e, %z
155  ret <4 x i64> %f
156}
157
158define <32 x i16> @vwmaccu_v32i16(ptr %x, ptr %y, <32 x i16> %z) {
159; CHECK-LABEL: vwmaccu_v32i16:
160; CHECK:       # %bb.0:
161; CHECK-NEXT:    li a2, 32
162; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
163; CHECK-NEXT:    vle8.v v12, (a0)
164; CHECK-NEXT:    vle8.v v14, (a1)
165; CHECK-NEXT:    vwmaccu.vv v8, v12, v14
166; CHECK-NEXT:    ret
167  %a = load <32 x i8>, ptr %x
168  %b = load <32 x i8>, ptr %y
169  %c = zext <32 x i8> %a to <32 x i16>
170  %d = zext <32 x i8> %b to <32 x i16>
171  %e = mul <32 x i16> %c, %d
172  %f = add <32 x i16> %e, %z
173  ret <32 x i16> %f
174}
175
176define <16 x i32> @vwmaccu_v16i32(ptr %x, ptr %y, <16 x i32> %z) {
177; CHECK-LABEL: vwmaccu_v16i32:
178; CHECK:       # %bb.0:
179; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
180; CHECK-NEXT:    vle16.v v12, (a0)
181; CHECK-NEXT:    vle16.v v14, (a1)
182; CHECK-NEXT:    vwmaccu.vv v8, v12, v14
183; CHECK-NEXT:    ret
184  %a = load <16 x i16>, ptr %x
185  %b = load <16 x i16>, ptr %y
186  %c = zext <16 x i16> %a to <16 x i32>
187  %d = zext <16 x i16> %b to <16 x i32>
188  %e = mul <16 x i32> %c, %d
189  %f = add <16 x i32> %e, %z
190  ret <16 x i32> %f
191}
192
193define <8 x i64> @vwmaccu_v8i64(ptr %x, ptr %y, <8 x i64> %z) {
194; CHECK-LABEL: vwmaccu_v8i64:
195; CHECK:       # %bb.0:
196; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
197; CHECK-NEXT:    vle32.v v12, (a0)
198; CHECK-NEXT:    vle32.v v14, (a1)
199; CHECK-NEXT:    vwmaccu.vv v8, v12, v14
200; CHECK-NEXT:    ret
201  %a = load <8 x i32>, ptr %x
202  %b = load <8 x i32>, ptr %y
203  %c = zext <8 x i32> %a to <8 x i64>
204  %d = zext <8 x i32> %b to <8 x i64>
205  %e = mul <8 x i64> %c, %d
206  %f = add <8 x i64> %e, %z
207  ret <8 x i64> %f
208}
209
210define <64 x i16> @vwmaccu_v64i16(ptr %x, ptr %y, <64 x i16> %z) {
211; CHECK-LABEL: vwmaccu_v64i16:
212; CHECK:       # %bb.0:
213; CHECK-NEXT:    li a2, 64
214; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
215; CHECK-NEXT:    vle8.v v16, (a0)
216; CHECK-NEXT:    vle8.v v20, (a1)
217; CHECK-NEXT:    vwmaccu.vv v8, v16, v20
218; CHECK-NEXT:    ret
219  %a = load <64 x i8>, ptr %x
220  %b = load <64 x i8>, ptr %y
221  %c = zext <64 x i8> %a to <64 x i16>
222  %d = zext <64 x i8> %b to <64 x i16>
223  %e = mul <64 x i16> %c, %d
224  %f = add <64 x i16> %e, %z
225  ret <64 x i16> %f
226}
227
228define <32 x i32> @vwmaccu_v32i32(ptr %x, ptr %y, <32 x i32> %z) {
229; CHECK-LABEL: vwmaccu_v32i32:
230; CHECK:       # %bb.0:
231; CHECK-NEXT:    li a2, 32
232; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
233; CHECK-NEXT:    vle16.v v16, (a0)
234; CHECK-NEXT:    vle16.v v20, (a1)
235; CHECK-NEXT:    vwmaccu.vv v8, v16, v20
236; CHECK-NEXT:    ret
237  %a = load <32 x i16>, ptr %x
238  %b = load <32 x i16>, ptr %y
239  %c = zext <32 x i16> %a to <32 x i32>
240  %d = zext <32 x i16> %b to <32 x i32>
241  %e = mul <32 x i32> %c, %d
242  %f = add <32 x i32> %e, %z
243  ret <32 x i32> %f
244}
245
246define <16 x i64> @vwmaccu_v16i64(ptr %x, ptr %y, <16 x i64> %z) {
247; CHECK-LABEL: vwmaccu_v16i64:
248; CHECK:       # %bb.0:
249; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
250; CHECK-NEXT:    vle32.v v16, (a0)
251; CHECK-NEXT:    vle32.v v20, (a1)
252; CHECK-NEXT:    vwmaccu.vv v8, v16, v20
253; CHECK-NEXT:    ret
254  %a = load <16 x i32>, ptr %x
255  %b = load <16 x i32>, ptr %y
256  %c = zext <16 x i32> %a to <16 x i64>
257  %d = zext <16 x i32> %b to <16 x i64>
258  %e = mul <16 x i64> %c, %d
259  %f = add <16 x i64> %e, %z
260  ret <16 x i64> %f
261}
262
263define <2 x i16> @vwmaccu_vx_v2i16(ptr %x, i8 %y, <2 x i16> %z) {
264; CHECK-LABEL: vwmaccu_vx_v2i16:
265; CHECK:       # %bb.0:
266; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
267; CHECK-NEXT:    vle8.v v9, (a0)
268; CHECK-NEXT:    vwmaccu.vx v8, a1, v9
269; CHECK-NEXT:    ret
270  %a = load <2 x i8>, ptr %x
271  %b = insertelement <2 x i8> poison, i8 %y, i32 0
272  %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
273  %d = zext <2 x i8> %a to <2 x i16>
274  %e = zext <2 x i8> %c to <2 x i16>
275  %f = mul <2 x i16> %d, %e
276  %g = add <2 x i16> %f, %z
277  ret <2 x i16> %g
278}
279
280define <4 x i16> @vwmaccu_vx_v4i16(ptr %x, i8 %y, <4 x i16> %z) {
281; CHECK-LABEL: vwmaccu_vx_v4i16:
282; CHECK:       # %bb.0:
283; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
284; CHECK-NEXT:    vle8.v v9, (a0)
285; CHECK-NEXT:    vwmaccu.vx v8, a1, v9
286; CHECK-NEXT:    ret
287  %a = load <4 x i8>, ptr %x
288  %b = insertelement <4 x i8> poison, i8 %y, i32 0
289  %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
290  %d = zext <4 x i8> %a to <4 x i16>
291  %e = zext <4 x i8> %c to <4 x i16>
292  %f = mul <4 x i16> %d, %e
293  %g = add <4 x i16> %f, %z
294  ret <4 x i16> %g
295}
296
297define <2 x i32> @vwmaccu_vx_v2i32(ptr %x, i16 %y, <2 x i32> %z) {
298; CHECK-LABEL: vwmaccu_vx_v2i32:
299; CHECK:       # %bb.0:
300; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
301; CHECK-NEXT:    vle16.v v9, (a0)
302; CHECK-NEXT:    vwmaccu.vx v8, a1, v9
303; CHECK-NEXT:    ret
304  %a = load <2 x i16>, ptr %x
305  %b = insertelement <2 x i16> poison, i16 %y, i32 0
306  %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
307  %d = zext <2 x i16> %a to <2 x i32>
308  %e = zext <2 x i16> %c to <2 x i32>
309  %f = mul <2 x i32> %d, %e
310  %g = add <2 x i32> %f, %z
311  ret <2 x i32> %g
312}
313
314define <8 x i16> @vwmaccu_vx_v8i16(ptr %x, i8 %y, <8 x i16> %z) {
315; CHECK-LABEL: vwmaccu_vx_v8i16:
316; CHECK:       # %bb.0:
317; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
318; CHECK-NEXT:    vle8.v v9, (a0)
319; CHECK-NEXT:    vwmaccu.vx v8, a1, v9
320; CHECK-NEXT:    ret
321  %a = load <8 x i8>, ptr %x
322  %b = insertelement <8 x i8> poison, i8 %y, i32 0
323  %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
324  %d = zext <8 x i8> %a to <8 x i16>
325  %e = zext <8 x i8> %c to <8 x i16>
326  %f = mul <8 x i16> %d, %e
327  %g = add <8 x i16> %f, %z
328  ret <8 x i16> %g
329}
330
331define <4 x i32> @vwmaccu_vx_v4i32(ptr %x, i16 %y, <4 x i32> %z) {
332; CHECK-LABEL: vwmaccu_vx_v4i32:
333; CHECK:       # %bb.0:
334; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
335; CHECK-NEXT:    vle16.v v9, (a0)
336; CHECK-NEXT:    vwmaccu.vx v8, a1, v9
337; CHECK-NEXT:    ret
338  %a = load <4 x i16>, ptr %x
339  %b = insertelement <4 x i16> poison, i16 %y, i32 0
340  %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
341  %d = zext <4 x i16> %a to <4 x i32>
342  %e = zext <4 x i16> %c to <4 x i32>
343  %f = mul <4 x i32> %d, %e
344  %g = add <4 x i32> %f, %z
345  ret <4 x i32> %g
346}
347
348define <2 x i64> @vwmaccu_vx_v2i64(ptr %x, i32 %y, <2 x i64> %z) {
349; CHECK-LABEL: vwmaccu_vx_v2i64:
350; CHECK:       # %bb.0:
351; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
352; CHECK-NEXT:    vle32.v v9, (a0)
353; CHECK-NEXT:    vwmaccu.vx v8, a1, v9
354; CHECK-NEXT:    ret
355  %a = load <2 x i32>, ptr %x
356  %b = insertelement <2 x i32> poison, i32 %y, i64 0
357  %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
358  %d = zext <2 x i32> %a to <2 x i64>
359  %e = zext <2 x i32> %c to <2 x i64>
360  %f = mul <2 x i64> %d, %e
361  %g = add <2 x i64> %f, %z
362  ret <2 x i64> %g
363}
364
365define <16 x i16> @vwmaccu_vx_v16i16(ptr %x, i8 %y, <16 x i16> %z) {
366; CHECK-LABEL: vwmaccu_vx_v16i16:
367; CHECK:       # %bb.0:
368; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
369; CHECK-NEXT:    vle8.v v10, (a0)
370; CHECK-NEXT:    vwmaccu.vx v8, a1, v10
371; CHECK-NEXT:    ret
372  %a = load <16 x i8>, ptr %x
373  %b = insertelement <16 x i8> poison, i8 %y, i32 0
374  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
375  %d = zext <16 x i8> %a to <16 x i16>
376  %e = zext <16 x i8> %c to <16 x i16>
377  %f = mul <16 x i16> %d, %e
378  %g = add <16 x i16> %f, %z
379  ret <16 x i16> %g
380}
381
382define <8 x i32> @vwmaccu_vx_v8i32(ptr %x, i16 %y, <8 x i32> %z) {
383; CHECK-LABEL: vwmaccu_vx_v8i32:
384; CHECK:       # %bb.0:
385; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
386; CHECK-NEXT:    vle16.v v10, (a0)
387; CHECK-NEXT:    vwmaccu.vx v8, a1, v10
388; CHECK-NEXT:    ret
389  %a = load <8 x i16>, ptr %x
390  %b = insertelement <8 x i16> poison, i16 %y, i32 0
391  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
392  %d = zext <8 x i16> %a to <8 x i32>
393  %e = zext <8 x i16> %c to <8 x i32>
394  %f = mul <8 x i32> %d, %e
395  %g = add <8 x i32> %f, %z
396  ret <8 x i32> %g
397}
398
399define <4 x i64> @vwmaccu_vx_v4i64(ptr %x, i32 %y, <4 x i64> %z) {
400; CHECK-LABEL: vwmaccu_vx_v4i64:
401; CHECK:       # %bb.0:
402; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
403; CHECK-NEXT:    vle32.v v10, (a0)
404; CHECK-NEXT:    vwmaccu.vx v8, a1, v10
405; CHECK-NEXT:    ret
406  %a = load <4 x i32>, ptr %x
407  %b = insertelement <4 x i32> poison, i32 %y, i64 0
408  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
409  %d = zext <4 x i32> %a to <4 x i64>
410  %e = zext <4 x i32> %c to <4 x i64>
411  %f = mul <4 x i64> %d, %e
412  %g = add <4 x i64> %f, %z
413  ret <4 x i64> %g
414}
415
416define <32 x i16> @vwmaccu_vx_v32i16(ptr %x, i8 %y, <32 x i16> %z) {
417; CHECK-LABEL: vwmaccu_vx_v32i16:
418; CHECK:       # %bb.0:
419; CHECK-NEXT:    li a2, 32
420; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
421; CHECK-NEXT:    vle8.v v12, (a0)
422; CHECK-NEXT:    vwmaccu.vx v8, a1, v12
423; CHECK-NEXT:    ret
424  %a = load <32 x i8>, ptr %x
425  %b = insertelement <32 x i8> poison, i8 %y, i32 0
426  %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
427  %d = zext <32 x i8> %a to <32 x i16>
428  %e = zext <32 x i8> %c to <32 x i16>
429  %f = mul <32 x i16> %d, %e
430  %g = add <32 x i16> %f, %z
431  ret <32 x i16> %g
432}
433
434define <16 x i32> @vwmaccu_vx_v16i32(ptr %x, i16 %y, <16 x i32> %z) {
435; CHECK-LABEL: vwmaccu_vx_v16i32:
436; CHECK:       # %bb.0:
437; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
438; CHECK-NEXT:    vle16.v v12, (a0)
439; CHECK-NEXT:    vwmaccu.vx v8, a1, v12
440; CHECK-NEXT:    ret
441  %a = load <16 x i16>, ptr %x
442  %b = insertelement <16 x i16> poison, i16 %y, i32 0
443  %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
444  %d = zext <16 x i16> %a to <16 x i32>
445  %e = zext <16 x i16> %c to <16 x i32>
446  %f = mul <16 x i32> %d, %e
447  %g = add <16 x i32> %f, %z
448  ret <16 x i32> %g
449}
450
451define <8 x i64> @vwmaccu_vx_v8i64(ptr %x, i32 %y, <8 x i64> %z) {
452; CHECK-LABEL: vwmaccu_vx_v8i64:
453; CHECK:       # %bb.0:
454; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
455; CHECK-NEXT:    vle32.v v12, (a0)
456; CHECK-NEXT:    vwmaccu.vx v8, a1, v12
457; CHECK-NEXT:    ret
458  %a = load <8 x i32>, ptr %x
459  %b = insertelement <8 x i32> poison, i32 %y, i64 0
460  %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
461  %d = zext <8 x i32> %a to <8 x i64>
462  %e = zext <8 x i32> %c to <8 x i64>
463  %f = mul <8 x i64> %d, %e
464  %g = add <8 x i64> %f, %z
465  ret <8 x i64> %g
466}
467
468define <64 x i16> @vwmaccu_vx_v64i16(ptr %x, i8 %y, <64 x i16> %z) {
469; CHECK-LABEL: vwmaccu_vx_v64i16:
470; CHECK:       # %bb.0:
471; CHECK-NEXT:    li a2, 64
472; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
473; CHECK-NEXT:    vle8.v v16, (a0)
474; CHECK-NEXT:    vwmaccu.vx v8, a1, v16
475; CHECK-NEXT:    ret
476  %a = load <64 x i8>, ptr %x
477  %b = insertelement <64 x i8> poison, i8 %y, i32 0
478  %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer
479  %d = zext <64 x i8> %a to <64 x i16>
480  %e = zext <64 x i8> %c to <64 x i16>
481  %f = mul <64 x i16> %d, %e
482  %g = add <64 x i16> %f, %z
483  ret <64 x i16> %g
484}
485
486define <32 x i32> @vwmaccu_vx_v32i32(ptr %x, i16 %y, <32 x i32> %z) {
487; CHECK-LABEL: vwmaccu_vx_v32i32:
488; CHECK:       # %bb.0:
489; CHECK-NEXT:    li a2, 32
490; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
491; CHECK-NEXT:    vle16.v v16, (a0)
492; CHECK-NEXT:    vwmaccu.vx v8, a1, v16
493; CHECK-NEXT:    ret
494  %a = load <32 x i16>, ptr %x
495  %b = insertelement <32 x i16> poison, i16 %y, i32 0
496  %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer
497  %d = zext <32 x i16> %a to <32 x i32>
498  %e = zext <32 x i16> %c to <32 x i32>
499  %f = mul <32 x i32> %d, %e
500  %g = add <32 x i32> %f, %z
501  ret <32 x i32> %g
502}
503
504define <16 x i64> @vwmaccu_vx_v16i64(ptr %x, i32 %y, <16 x i64> %z) {
505; CHECK-LABEL: vwmaccu_vx_v16i64:
506; CHECK:       # %bb.0:
507; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
508; CHECK-NEXT:    vle32.v v16, (a0)
509; CHECK-NEXT:    vwmaccu.vx v8, a1, v16
510; CHECK-NEXT:    ret
511  %a = load <16 x i32>, ptr %x
512  %b = insertelement <16 x i32> poison, i32 %y, i64 0
513  %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer
514  %d = zext <16 x i32> %a to <16 x i64>
515  %e = zext <16 x i32> %c to <16 x i64>
516  %f = mul <16 x i64> %d, %e
517  %g = add <16 x i64> %f, %z
518  ret <16 x i64> %g
519}
520