xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll (revision 97982a8c605fac7c86d02e641a6cd7898b3ca343)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
4
5define <2 x i16> @vwmulu_v2i16(ptr %x, ptr %y) {
6; CHECK-LABEL: vwmulu_v2i16:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
9; CHECK-NEXT:    vle8.v v9, (a0)
10; CHECK-NEXT:    vle8.v v10, (a1)
11; CHECK-NEXT:    vwmulu.vv v8, v9, v10
12; CHECK-NEXT:    ret
13  %a = load <2 x i8>, ptr %x
14  %b = load <2 x i8>, ptr %y
15  %c = zext <2 x i8> %a to <2 x i16>
16  %d = zext <2 x i8> %b to <2 x i16>
17  %e = mul <2 x i16> %c, %d
18  ret <2 x i16> %e
19}
20
21define <4 x i16> @vwmulu_v4i16(ptr %x, ptr %y) {
22; CHECK-LABEL: vwmulu_v4i16:
23; CHECK:       # %bb.0:
24; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
25; CHECK-NEXT:    vle8.v v9, (a0)
26; CHECK-NEXT:    vle8.v v10, (a1)
27; CHECK-NEXT:    vwmulu.vv v8, v9, v10
28; CHECK-NEXT:    ret
29  %a = load <4 x i8>, ptr %x
30  %b = load <4 x i8>, ptr %y
31  %c = zext <4 x i8> %a to <4 x i16>
32  %d = zext <4 x i8> %b to <4 x i16>
33  %e = mul <4 x i16> %c, %d
34  ret <4 x i16> %e
35}
36
37define <2 x i32> @vwmulu_v2i32(ptr %x, ptr %y) {
38; CHECK-LABEL: vwmulu_v2i32:
39; CHECK:       # %bb.0:
40; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
41; CHECK-NEXT:    vle16.v v9, (a0)
42; CHECK-NEXT:    vle16.v v10, (a1)
43; CHECK-NEXT:    vwmulu.vv v8, v9, v10
44; CHECK-NEXT:    ret
45  %a = load <2 x i16>, ptr %x
46  %b = load <2 x i16>, ptr %y
47  %c = zext <2 x i16> %a to <2 x i32>
48  %d = zext <2 x i16> %b to <2 x i32>
49  %e = mul <2 x i32> %c, %d
50  ret <2 x i32> %e
51}
52
53define <8 x i16> @vwmulu_v8i16(ptr %x, ptr %y) {
54; CHECK-LABEL: vwmulu_v8i16:
55; CHECK:       # %bb.0:
56; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
57; CHECK-NEXT:    vle8.v v9, (a0)
58; CHECK-NEXT:    vle8.v v10, (a1)
59; CHECK-NEXT:    vwmulu.vv v8, v9, v10
60; CHECK-NEXT:    ret
61  %a = load <8 x i8>, ptr %x
62  %b = load <8 x i8>, ptr %y
63  %c = zext <8 x i8> %a to <8 x i16>
64  %d = zext <8 x i8> %b to <8 x i16>
65  %e = mul <8 x i16> %c, %d
66  ret <8 x i16> %e
67}
68
69define <4 x i32> @vwmulu_v4i32(ptr %x, ptr %y) {
70; CHECK-LABEL: vwmulu_v4i32:
71; CHECK:       # %bb.0:
72; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
73; CHECK-NEXT:    vle16.v v9, (a0)
74; CHECK-NEXT:    vle16.v v10, (a1)
75; CHECK-NEXT:    vwmulu.vv v8, v9, v10
76; CHECK-NEXT:    ret
77  %a = load <4 x i16>, ptr %x
78  %b = load <4 x i16>, ptr %y
79  %c = zext <4 x i16> %a to <4 x i32>
80  %d = zext <4 x i16> %b to <4 x i32>
81  %e = mul <4 x i32> %c, %d
82  ret <4 x i32> %e
83}
84
85define <2 x i64> @vwmulu_v2i64(ptr %x, ptr %y) {
86; CHECK-LABEL: vwmulu_v2i64:
87; CHECK:       # %bb.0:
88; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
89; CHECK-NEXT:    vle32.v v9, (a0)
90; CHECK-NEXT:    vle32.v v10, (a1)
91; CHECK-NEXT:    vwmulu.vv v8, v9, v10
92; CHECK-NEXT:    ret
93  %a = load <2 x i32>, ptr %x
94  %b = load <2 x i32>, ptr %y
95  %c = zext <2 x i32> %a to <2 x i64>
96  %d = zext <2 x i32> %b to <2 x i64>
97  %e = mul <2 x i64> %c, %d
98  ret <2 x i64> %e
99}
100
101define <16 x i16> @vwmulu_v16i16(ptr %x, ptr %y) {
102; CHECK-LABEL: vwmulu_v16i16:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
105; CHECK-NEXT:    vle8.v v10, (a0)
106; CHECK-NEXT:    vle8.v v11, (a1)
107; CHECK-NEXT:    vwmulu.vv v8, v10, v11
108; CHECK-NEXT:    ret
109  %a = load <16 x i8>, ptr %x
110  %b = load <16 x i8>, ptr %y
111  %c = zext <16 x i8> %a to <16 x i16>
112  %d = zext <16 x i8> %b to <16 x i16>
113  %e = mul <16 x i16> %c, %d
114  ret <16 x i16> %e
115}
116
117define <8 x i32> @vwmulu_v8i32(ptr %x, ptr %y) {
118; CHECK-LABEL: vwmulu_v8i32:
119; CHECK:       # %bb.0:
120; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
121; CHECK-NEXT:    vle16.v v10, (a0)
122; CHECK-NEXT:    vle16.v v11, (a1)
123; CHECK-NEXT:    vwmulu.vv v8, v10, v11
124; CHECK-NEXT:    ret
125  %a = load <8 x i16>, ptr %x
126  %b = load <8 x i16>, ptr %y
127  %c = zext <8 x i16> %a to <8 x i32>
128  %d = zext <8 x i16> %b to <8 x i32>
129  %e = mul <8 x i32> %c, %d
130  ret <8 x i32> %e
131}
132
133define <4 x i64> @vwmulu_v4i64(ptr %x, ptr %y) {
134; CHECK-LABEL: vwmulu_v4i64:
135; CHECK:       # %bb.0:
136; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
137; CHECK-NEXT:    vle32.v v10, (a0)
138; CHECK-NEXT:    vle32.v v11, (a1)
139; CHECK-NEXT:    vwmulu.vv v8, v10, v11
140; CHECK-NEXT:    ret
141  %a = load <4 x i32>, ptr %x
142  %b = load <4 x i32>, ptr %y
143  %c = zext <4 x i32> %a to <4 x i64>
144  %d = zext <4 x i32> %b to <4 x i64>
145  %e = mul <4 x i64> %c, %d
146  ret <4 x i64> %e
147}
148
149define <32 x i16> @vwmulu_v32i16(ptr %x, ptr %y) {
150; CHECK-LABEL: vwmulu_v32i16:
151; CHECK:       # %bb.0:
152; CHECK-NEXT:    li a2, 32
153; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
154; CHECK-NEXT:    vle8.v v12, (a0)
155; CHECK-NEXT:    vle8.v v14, (a1)
156; CHECK-NEXT:    vwmulu.vv v8, v12, v14
157; CHECK-NEXT:    ret
158  %a = load <32 x i8>, ptr %x
159  %b = load <32 x i8>, ptr %y
160  %c = zext <32 x i8> %a to <32 x i16>
161  %d = zext <32 x i8> %b to <32 x i16>
162  %e = mul <32 x i16> %c, %d
163  ret <32 x i16> %e
164}
165
166define <16 x i32> @vwmulu_v16i32(ptr %x, ptr %y) {
167; CHECK-LABEL: vwmulu_v16i32:
168; CHECK:       # %bb.0:
169; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
170; CHECK-NEXT:    vle16.v v12, (a0)
171; CHECK-NEXT:    vle16.v v14, (a1)
172; CHECK-NEXT:    vwmulu.vv v8, v12, v14
173; CHECK-NEXT:    ret
174  %a = load <16 x i16>, ptr %x
175  %b = load <16 x i16>, ptr %y
176  %c = zext <16 x i16> %a to <16 x i32>
177  %d = zext <16 x i16> %b to <16 x i32>
178  %e = mul <16 x i32> %c, %d
179  ret <16 x i32> %e
180}
181
182define <8 x  i64> @vwmulu_v8i64(ptr %x, ptr %y) {
183; CHECK-LABEL: vwmulu_v8i64:
184; CHECK:       # %bb.0:
185; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
186; CHECK-NEXT:    vle32.v v12, (a0)
187; CHECK-NEXT:    vle32.v v14, (a1)
188; CHECK-NEXT:    vwmulu.vv v8, v12, v14
189; CHECK-NEXT:    ret
190  %a = load <8 x  i32>, ptr %x
191  %b = load <8 x  i32>, ptr %y
192  %c = zext <8 x  i32> %a to <8 x  i64>
193  %d = zext <8 x  i32> %b to <8 x  i64>
194  %e = mul <8 x  i64> %c, %d
195  ret <8 x  i64> %e
196}
197
198define <64 x i16> @vwmulu_v64i16(ptr %x, ptr %y) {
199; CHECK-LABEL: vwmulu_v64i16:
200; CHECK:       # %bb.0:
201; CHECK-NEXT:    li a2, 64
202; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
203; CHECK-NEXT:    vle8.v v16, (a0)
204; CHECK-NEXT:    vle8.v v20, (a1)
205; CHECK-NEXT:    vwmulu.vv v8, v16, v20
206; CHECK-NEXT:    ret
207  %a = load <64 x i8>, ptr %x
208  %b = load <64 x i8>, ptr %y
209  %c = zext <64 x i8> %a to <64 x i16>
210  %d = zext <64 x i8> %b to <64 x i16>
211  %e = mul <64 x i16> %c, %d
212  ret <64 x i16> %e
213}
214
215define <32 x i32> @vwmulu_v32i32(ptr %x, ptr %y) {
216; CHECK-LABEL: vwmulu_v32i32:
217; CHECK:       # %bb.0:
218; CHECK-NEXT:    li a2, 32
219; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
220; CHECK-NEXT:    vle16.v v16, (a0)
221; CHECK-NEXT:    vle16.v v20, (a1)
222; CHECK-NEXT:    vwmulu.vv v8, v16, v20
223; CHECK-NEXT:    ret
224  %a = load <32 x i16>, ptr %x
225  %b = load <32 x i16>, ptr %y
226  %c = zext <32 x i16> %a to <32 x i32>
227  %d = zext <32 x i16> %b to <32 x i32>
228  %e = mul <32 x i32> %c, %d
229  ret <32 x i32> %e
230}
231
232define <16 x i64> @vwmulu_v16i64(ptr %x, ptr %y) {
233; CHECK-LABEL: vwmulu_v16i64:
234; CHECK:       # %bb.0:
235; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
236; CHECK-NEXT:    vle32.v v16, (a0)
237; CHECK-NEXT:    vle32.v v20, (a1)
238; CHECK-NEXT:    vwmulu.vv v8, v16, v20
239; CHECK-NEXT:    ret
240  %a = load <16 x i32>, ptr %x
241  %b = load <16 x i32>, ptr %y
242  %c = zext <16 x i32> %a to <16 x i64>
243  %d = zext <16 x i32> %b to <16 x i64>
244  %e = mul <16 x i64> %c, %d
245  ret <16 x i64> %e
246}
247
248define <128 x i16> @vwmulu_v128i16(ptr %x, ptr %y) {
249; CHECK-LABEL: vwmulu_v128i16:
250; CHECK:       # %bb.0:
251; CHECK-NEXT:    addi sp, sp, -16
252; CHECK-NEXT:    .cfi_def_cfa_offset 16
253; CHECK-NEXT:    csrr a2, vlenb
254; CHECK-NEXT:    slli a2, a2, 4
255; CHECK-NEXT:    sub sp, sp, a2
256; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
257; CHECK-NEXT:    li a2, 128
258; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
259; CHECK-NEXT:    vle8.v v8, (a0)
260; CHECK-NEXT:    addi a0, sp, 16
261; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
262; CHECK-NEXT:    vle8.v v0, (a1)
263; CHECK-NEXT:    li a0, 64
264; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
265; CHECK-NEXT:    vslidedown.vx v16, v8, a0
266; CHECK-NEXT:    vslidedown.vx v8, v0, a0
267; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
268; CHECK-NEXT:    vwmulu.vv v24, v16, v8
269; CHECK-NEXT:    csrr a0, vlenb
270; CHECK-NEXT:    slli a0, a0, 3
271; CHECK-NEXT:    add a0, sp, a0
272; CHECK-NEXT:    addi a0, a0, 16
273; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
274; CHECK-NEXT:    addi a0, sp, 16
275; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
276; CHECK-NEXT:    vwmulu.vv v8, v16, v0
277; CHECK-NEXT:    csrr a0, vlenb
278; CHECK-NEXT:    slli a0, a0, 3
279; CHECK-NEXT:    add a0, sp, a0
280; CHECK-NEXT:    addi a0, a0, 16
281; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
282; CHECK-NEXT:    csrr a0, vlenb
283; CHECK-NEXT:    slli a0, a0, 4
284; CHECK-NEXT:    add sp, sp, a0
285; CHECK-NEXT:    .cfi_def_cfa sp, 16
286; CHECK-NEXT:    addi sp, sp, 16
287; CHECK-NEXT:    .cfi_def_cfa_offset 0
288; CHECK-NEXT:    ret
289  %a = load <128 x i8>, ptr %x
290  %b = load <128 x i8>, ptr %y
291  %c = zext <128 x i8> %a to <128 x i16>
292  %d = zext <128 x i8> %b to <128 x i16>
293  %e = mul <128 x i16> %c, %d
294  ret <128 x i16> %e
295}
296
297define <64 x i32> @vwmulu_v64i32(ptr %x, ptr %y) {
298; CHECK-LABEL: vwmulu_v64i32:
299; CHECK:       # %bb.0:
300; CHECK-NEXT:    addi sp, sp, -16
301; CHECK-NEXT:    .cfi_def_cfa_offset 16
302; CHECK-NEXT:    csrr a2, vlenb
303; CHECK-NEXT:    slli a2, a2, 4
304; CHECK-NEXT:    sub sp, sp, a2
305; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
306; CHECK-NEXT:    li a2, 64
307; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
308; CHECK-NEXT:    vle16.v v8, (a0)
309; CHECK-NEXT:    addi a0, sp, 16
310; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
311; CHECK-NEXT:    vle16.v v0, (a1)
312; CHECK-NEXT:    li a0, 32
313; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
314; CHECK-NEXT:    vslidedown.vx v16, v8, a0
315; CHECK-NEXT:    vslidedown.vx v8, v0, a0
316; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
317; CHECK-NEXT:    vwmulu.vv v24, v16, v8
318; CHECK-NEXT:    csrr a0, vlenb
319; CHECK-NEXT:    slli a0, a0, 3
320; CHECK-NEXT:    add a0, sp, a0
321; CHECK-NEXT:    addi a0, a0, 16
322; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
323; CHECK-NEXT:    addi a0, sp, 16
324; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
325; CHECK-NEXT:    vwmulu.vv v8, v16, v0
326; CHECK-NEXT:    csrr a0, vlenb
327; CHECK-NEXT:    slli a0, a0, 3
328; CHECK-NEXT:    add a0, sp, a0
329; CHECK-NEXT:    addi a0, a0, 16
330; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
331; CHECK-NEXT:    csrr a0, vlenb
332; CHECK-NEXT:    slli a0, a0, 4
333; CHECK-NEXT:    add sp, sp, a0
334; CHECK-NEXT:    .cfi_def_cfa sp, 16
335; CHECK-NEXT:    addi sp, sp, 16
336; CHECK-NEXT:    .cfi_def_cfa_offset 0
337; CHECK-NEXT:    ret
338  %a = load <64 x i16>, ptr %x
339  %b = load <64 x i16>, ptr %y
340  %c = zext <64 x i16> %a to <64 x i32>
341  %d = zext <64 x i16> %b to <64 x i32>
342  %e = mul <64 x i32> %c, %d
343  ret <64 x i32> %e
344}
345
346define <32 x i64> @vwmulu_v32i64(ptr %x, ptr %y) {
347; CHECK-LABEL: vwmulu_v32i64:
348; CHECK:       # %bb.0:
349; CHECK-NEXT:    addi sp, sp, -16
350; CHECK-NEXT:    .cfi_def_cfa_offset 16
351; CHECK-NEXT:    csrr a2, vlenb
352; CHECK-NEXT:    slli a2, a2, 4
353; CHECK-NEXT:    sub sp, sp, a2
354; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
355; CHECK-NEXT:    li a2, 32
356; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
357; CHECK-NEXT:    vle32.v v8, (a0)
358; CHECK-NEXT:    addi a0, sp, 16
359; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
360; CHECK-NEXT:    vle32.v v0, (a1)
361; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
362; CHECK-NEXT:    vslidedown.vi v16, v8, 16
363; CHECK-NEXT:    vslidedown.vi v8, v0, 16
364; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
365; CHECK-NEXT:    vwmulu.vv v24, v16, v8
366; CHECK-NEXT:    csrr a0, vlenb
367; CHECK-NEXT:    slli a0, a0, 3
368; CHECK-NEXT:    add a0, sp, a0
369; CHECK-NEXT:    addi a0, a0, 16
370; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
371; CHECK-NEXT:    addi a0, sp, 16
372; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
373; CHECK-NEXT:    vwmulu.vv v8, v16, v0
374; CHECK-NEXT:    csrr a0, vlenb
375; CHECK-NEXT:    slli a0, a0, 3
376; CHECK-NEXT:    add a0, sp, a0
377; CHECK-NEXT:    addi a0, a0, 16
378; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
379; CHECK-NEXT:    csrr a0, vlenb
380; CHECK-NEXT:    slli a0, a0, 4
381; CHECK-NEXT:    add sp, sp, a0
382; CHECK-NEXT:    .cfi_def_cfa sp, 16
383; CHECK-NEXT:    addi sp, sp, 16
384; CHECK-NEXT:    .cfi_def_cfa_offset 0
385; CHECK-NEXT:    ret
386  %a = load <32 x i32>, ptr %x
387  %b = load <32 x i32>, ptr %y
388  %c = zext <32 x i32> %a to <32 x i64>
389  %d = zext <32 x i32> %b to <32 x i64>
390  %e = mul <32 x i64> %c, %d
391  ret <32 x i64> %e
392}
393
394define <2 x i32> @vwmulu_v2i32_v2i8(ptr %x, ptr %y) {
395; CHECK-LABEL: vwmulu_v2i32_v2i8:
396; CHECK:       # %bb.0:
397; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
398; CHECK-NEXT:    vle8.v v8, (a0)
399; CHECK-NEXT:    vle8.v v9, (a1)
400; CHECK-NEXT:    vwmulu.vv v10, v8, v9
401; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
402; CHECK-NEXT:    vzext.vf2 v8, v10
403; CHECK-NEXT:    ret
404  %a = load <2 x i8>, ptr %x
405  %b = load <2 x i8>, ptr %y
406  %c = zext <2 x i8> %a to <2 x i32>
407  %d = zext <2 x i8> %b to <2 x i32>
408  %e = mul <2 x i32> %c, %d
409  ret <2 x i32> %e
410}
411
412define <4 x i32> @vwmulu_v4i32_v4i8_v4i16(ptr %x, ptr %y) {
413; CHECK-LABEL: vwmulu_v4i32_v4i8_v4i16:
414; CHECK:       # %bb.0:
415; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
416; CHECK-NEXT:    vle8.v v8, (a0)
417; CHECK-NEXT:    vle16.v v9, (a1)
418; CHECK-NEXT:    vzext.vf2 v10, v8
419; CHECK-NEXT:    vwmulu.vv v8, v10, v9
420; CHECK-NEXT:    ret
421  %a = load <4 x i8>, ptr %x
422  %b = load <4 x i16>, ptr %y
423  %c = zext <4 x i8> %a to <4 x i32>
424  %d = zext <4 x i16> %b to <4 x i32>
425  %e = mul <4 x i32> %c, %d
426  ret <4 x i32> %e
427}
428
429define <4 x i64> @vwmulu_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
430; CHECK-LABEL: vwmulu_v4i64_v4i32_v4i8:
431; CHECK:       # %bb.0:
432; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
433; CHECK-NEXT:    vle8.v v8, (a1)
434; CHECK-NEXT:    vle32.v v10, (a0)
435; CHECK-NEXT:    vzext.vf4 v11, v8
436; CHECK-NEXT:    vwmulu.vv v8, v10, v11
437; CHECK-NEXT:    ret
438  %a = load <4 x i32>, ptr %x
439  %b = load <4 x i8>, ptr %y
440  %c = zext <4 x i32> %a to <4 x i64>
441  %d = zext <4 x i8> %b to <4 x i64>
442  %e = mul <4 x i64> %c, %d
443  ret <4 x i64> %e
444}
445
446define <2 x i16> @vwmulu_vx_v2i16(ptr %x, i8 %y) {
447; CHECK-LABEL: vwmulu_vx_v2i16:
448; CHECK:       # %bb.0:
449; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
450; CHECK-NEXT:    vle8.v v9, (a0)
451; CHECK-NEXT:    vwmulu.vx v8, v9, a1
452; CHECK-NEXT:    ret
453  %a = load <2 x i8>, ptr %x
454  %b = insertelement <2 x i8> poison, i8 %y, i32 0
455  %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
456  %d = zext <2 x i8> %a to <2 x i16>
457  %e = zext <2 x i8> %c to <2 x i16>
458  %f = mul <2 x i16> %d, %e
459  ret <2 x i16> %f
460}
461
462define <4 x i16> @vwmulu_vx_v4i16(ptr %x, i8 %y) {
463; CHECK-LABEL: vwmulu_vx_v4i16:
464; CHECK:       # %bb.0:
465; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
466; CHECK-NEXT:    vle8.v v9, (a0)
467; CHECK-NEXT:    vwmulu.vx v8, v9, a1
468; CHECK-NEXT:    ret
469  %a = load <4 x i8>, ptr %x
470  %b = insertelement <4 x i8> poison, i8 %y, i32 0
471  %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
472  %d = zext <4 x i8> %a to <4 x i16>
473  %e = zext <4 x i8> %c to <4 x i16>
474  %f = mul <4 x i16> %d, %e
475  ret <4 x i16> %f
476}
477
478define <2 x i32> @vwmulu_vx_v2i32(ptr %x, i16 %y) {
479; CHECK-LABEL: vwmulu_vx_v2i32:
480; CHECK:       # %bb.0:
481; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
482; CHECK-NEXT:    vle16.v v9, (a0)
483; CHECK-NEXT:    vwmulu.vx v8, v9, a1
484; CHECK-NEXT:    ret
485  %a = load <2 x i16>, ptr %x
486  %b = insertelement <2 x i16> poison, i16 %y, i32 0
487  %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
488  %d = zext <2 x i16> %a to <2 x i32>
489  %e = zext <2 x i16> %c to <2 x i32>
490  %f = mul <2 x i32> %d, %e
491  ret <2 x i32> %f
492}
493
494define <8 x i16> @vwmulu_vx_v8i16(ptr %x, i8 %y) {
495; CHECK-LABEL: vwmulu_vx_v8i16:
496; CHECK:       # %bb.0:
497; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
498; CHECK-NEXT:    vle8.v v9, (a0)
499; CHECK-NEXT:    vwmulu.vx v8, v9, a1
500; CHECK-NEXT:    ret
501  %a = load <8 x i8>, ptr %x
502  %b = insertelement <8 x i8> poison, i8 %y, i32 0
503  %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
504  %d = zext <8 x i8> %a to <8 x i16>
505  %e = zext <8 x i8> %c to <8 x i16>
506  %f = mul <8 x i16> %d, %e
507  ret <8 x i16> %f
508}
509
510define <4 x i32> @vwmulu_vx_v4i32(ptr %x, i16 %y) {
511; CHECK-LABEL: vwmulu_vx_v4i32:
512; CHECK:       # %bb.0:
513; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
514; CHECK-NEXT:    vle16.v v9, (a0)
515; CHECK-NEXT:    vwmulu.vx v8, v9, a1
516; CHECK-NEXT:    ret
517  %a = load <4 x i16>, ptr %x
518  %b = insertelement <4 x i16> poison, i16 %y, i32 0
519  %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
520  %d = zext <4 x i16> %a to <4 x i32>
521  %e = zext <4 x i16> %c to <4 x i32>
522  %f = mul <4 x i32> %d, %e
523  ret <4 x i32> %f
524}
525
526define <2 x i64> @vwmulu_vx_v2i64(ptr %x, i32 %y) {
527; CHECK-LABEL: vwmulu_vx_v2i64:
528; CHECK:       # %bb.0:
529; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
530; CHECK-NEXT:    vle32.v v9, (a0)
531; CHECK-NEXT:    vwmulu.vx v8, v9, a1
532; CHECK-NEXT:    ret
533  %a = load <2 x i32>, ptr %x
534  %b = insertelement <2 x i32> poison, i32 %y, i64 0
535  %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
536  %d = zext <2 x i32> %a to <2 x i64>
537  %e = zext <2 x i32> %c to <2 x i64>
538  %f = mul <2 x i64> %d, %e
539  ret <2 x i64> %f
540}
541
542define <16 x i16> @vwmulu_vx_v16i16(ptr %x, i8 %y) {
543; CHECK-LABEL: vwmulu_vx_v16i16:
544; CHECK:       # %bb.0:
545; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
546; CHECK-NEXT:    vle8.v v10, (a0)
547; CHECK-NEXT:    vwmulu.vx v8, v10, a1
548; CHECK-NEXT:    ret
549  %a = load <16 x i8>, ptr %x
550  %b = insertelement <16 x i8> poison, i8 %y, i32 0
551  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
552  %d = zext <16 x i8> %a to <16 x i16>
553  %e = zext <16 x i8> %c to <16 x i16>
554  %f = mul <16 x i16> %d, %e
555  ret <16 x i16> %f
556}
557
558define <8 x i32> @vwmulu_vx_v8i32(ptr %x, i16 %y) {
559; CHECK-LABEL: vwmulu_vx_v8i32:
560; CHECK:       # %bb.0:
561; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
562; CHECK-NEXT:    vle16.v v10, (a0)
563; CHECK-NEXT:    vwmulu.vx v8, v10, a1
564; CHECK-NEXT:    ret
565  %a = load <8 x i16>, ptr %x
566  %b = insertelement <8 x i16> poison, i16 %y, i32 0
567  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
568  %d = zext <8 x i16> %a to <8 x i32>
569  %e = zext <8 x i16> %c to <8 x i32>
570  %f = mul <8 x i32> %d, %e
571  ret <8 x i32> %f
572}
573
574define <4 x i64> @vwmulu_vx_v4i64(ptr %x, i32 %y) {
575; CHECK-LABEL: vwmulu_vx_v4i64:
576; CHECK:       # %bb.0:
577; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
578; CHECK-NEXT:    vle32.v v10, (a0)
579; CHECK-NEXT:    vwmulu.vx v8, v10, a1
580; CHECK-NEXT:    ret
581  %a = load <4 x i32>, ptr %x
582  %b = insertelement <4 x i32> poison, i32 %y, i64 0
583  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
584  %d = zext <4 x i32> %a to <4 x i64>
585  %e = zext <4 x i32> %c to <4 x i64>
586  %f = mul <4 x i64> %d, %e
587  ret <4 x i64> %f
588}
589
590define <32 x i16> @vwmulu_vx_v32i16(ptr %x, i8 %y) {
591; CHECK-LABEL: vwmulu_vx_v32i16:
592; CHECK:       # %bb.0:
593; CHECK-NEXT:    li a2, 32
594; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
595; CHECK-NEXT:    vle8.v v12, (a0)
596; CHECK-NEXT:    vwmulu.vx v8, v12, a1
597; CHECK-NEXT:    ret
598  %a = load <32 x i8>, ptr %x
599  %b = insertelement <32 x i8> poison, i8 %y, i32 0
600  %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
601  %d = zext <32 x i8> %a to <32 x i16>
602  %e = zext <32 x i8> %c to <32 x i16>
603  %f = mul <32 x i16> %d, %e
604  ret <32 x i16> %f
605}
606
607define <16 x i32> @vwmulu_vx_v16i32(ptr %x, i16 %y) {
608; CHECK-LABEL: vwmulu_vx_v16i32:
609; CHECK:       # %bb.0:
610; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
611; CHECK-NEXT:    vle16.v v12, (a0)
612; CHECK-NEXT:    vwmulu.vx v8, v12, a1
613; CHECK-NEXT:    ret
614  %a = load <16 x i16>, ptr %x
615  %b = insertelement <16 x i16> poison, i16 %y, i32 0
616  %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
617  %d = zext <16 x i16> %a to <16 x i32>
618  %e = zext <16 x i16> %c to <16 x i32>
619  %f = mul <16 x i32> %d, %e
620  ret <16 x i32> %f
621}
622
623define <8 x i64> @vwmulu_vx_v8i64(ptr %x, i32 %y) {
624; CHECK-LABEL: vwmulu_vx_v8i64:
625; CHECK:       # %bb.0:
626; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
627; CHECK-NEXT:    vle32.v v12, (a0)
628; CHECK-NEXT:    vwmulu.vx v8, v12, a1
629; CHECK-NEXT:    ret
630  %a = load <8 x i32>, ptr %x
631  %b = insertelement <8 x i32> poison, i32 %y, i64 0
632  %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
633  %d = zext <8 x i32> %a to <8 x i64>
634  %e = zext <8 x i32> %c to <8 x i64>
635  %f = mul <8 x i64> %d, %e
636  ret <8 x i64> %f
637}
638
639define <64 x i16> @vwmulu_vx_v64i16(ptr %x, i8 %y) {
640; CHECK-LABEL: vwmulu_vx_v64i16:
641; CHECK:       # %bb.0:
642; CHECK-NEXT:    li a2, 64
643; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
644; CHECK-NEXT:    vle8.v v16, (a0)
645; CHECK-NEXT:    vwmulu.vx v8, v16, a1
646; CHECK-NEXT:    ret
647  %a = load <64 x i8>, ptr %x
648  %b = insertelement <64 x i8> poison, i8 %y, i32 0
649  %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer
650  %d = zext <64 x i8> %a to <64 x i16>
651  %e = zext <64 x i8> %c to <64 x i16>
652  %f = mul <64 x i16> %d, %e
653  ret <64 x i16> %f
654}
655
656define <32 x i32> @vwmulu_vx_v32i32(ptr %x, i16 %y) {
657; CHECK-LABEL: vwmulu_vx_v32i32:
658; CHECK:       # %bb.0:
659; CHECK-NEXT:    li a2, 32
660; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
661; CHECK-NEXT:    vle16.v v16, (a0)
662; CHECK-NEXT:    vwmulu.vx v8, v16, a1
663; CHECK-NEXT:    ret
664  %a = load <32 x i16>, ptr %x
665  %b = insertelement <32 x i16> poison, i16 %y, i32 0
666  %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer
667  %d = zext <32 x i16> %a to <32 x i32>
668  %e = zext <32 x i16> %c to <32 x i32>
669  %f = mul <32 x i32> %d, %e
670  ret <32 x i32> %f
671}
672
673define <16 x i64> @vwmulu_vx_v16i64(ptr %x, i32 %y) {
674; CHECK-LABEL: vwmulu_vx_v16i64:
675; CHECK:       # %bb.0:
676; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
677; CHECK-NEXT:    vle32.v v16, (a0)
678; CHECK-NEXT:    vwmulu.vx v8, v16, a1
679; CHECK-NEXT:    ret
680  %a = load <16 x i32>, ptr %x
681  %b = insertelement <16 x i32> poison, i32 %y, i64 0
682  %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer
683  %d = zext <16 x i32> %a to <16 x i64>
684  %e = zext <16 x i32> %c to <16 x i64>
685  %f = mul <16 x i64> %d, %e
686  ret <16 x i64> %f
687}
688
689define <8 x i16> @vwmulu_vx_v8i16_i8(ptr %x, ptr %y) {
690; CHECK-LABEL: vwmulu_vx_v8i16_i8:
691; CHECK:       # %bb.0:
692; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
693; CHECK-NEXT:    vle8.v v9, (a0)
694; CHECK-NEXT:    lbu a0, 0(a1)
695; CHECK-NEXT:    vwmulu.vx v8, v9, a0
696; CHECK-NEXT:    ret
697  %a = load <8 x i8>, ptr %x
698  %b = load i8, ptr %y
699  %c = zext i8 %b to i16
700  %d = insertelement <8 x i16> poison, i16 %c, i32 0
701  %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
702  %f = zext <8 x i8> %a to <8 x i16>
703  %g = mul <8 x i16> %e, %f
704  ret <8 x i16> %g
705}
706
707define <8 x i16> @vwmulu_vx_v8i16_i16(ptr %x, ptr %y) {
708; CHECK-LABEL: vwmulu_vx_v8i16_i16:
709; CHECK:       # %bb.0:
710; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
711; CHECK-NEXT:    vle8.v v8, (a0)
712; CHECK-NEXT:    lh a0, 0(a1)
713; CHECK-NEXT:    vzext.vf2 v9, v8
714; CHECK-NEXT:    vmul.vx v8, v9, a0
715; CHECK-NEXT:    ret
716  %a = load <8 x i8>, ptr %x
717  %b = load i16, ptr %y
718  %d = insertelement <8 x i16> poison, i16 %b, i32 0
719  %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
720  %f = zext <8 x i8> %a to <8 x i16>
721  %g = mul <8 x i16> %e, %f
722  ret <8 x i16> %g
723}
724
725define <4 x i32> @vwmulu_vx_v4i32_i8(ptr %x, ptr %y) {
726; CHECK-LABEL: vwmulu_vx_v4i32_i8:
727; CHECK:       # %bb.0:
728; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
729; CHECK-NEXT:    vle16.v v9, (a0)
730; CHECK-NEXT:    lbu a0, 0(a1)
731; CHECK-NEXT:    vwmulu.vx v8, v9, a0
732; CHECK-NEXT:    ret
733  %a = load <4 x i16>, ptr %x
734  %b = load i8, ptr %y
735  %c = zext i8 %b to i32
736  %d = insertelement <4 x i32> poison, i32 %c, i32 0
737  %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
738  %f = zext <4 x i16> %a to <4 x i32>
739  %g = mul <4 x i32> %e, %f
740  ret <4 x i32> %g
741}
742
743define <4 x i32> @vwmulu_vx_v4i32_i16(ptr %x, ptr %y) {
744; CHECK-LABEL: vwmulu_vx_v4i32_i16:
745; CHECK:       # %bb.0:
746; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
747; CHECK-NEXT:    vle16.v v9, (a0)
748; CHECK-NEXT:    lhu a0, 0(a1)
749; CHECK-NEXT:    vwmulu.vx v8, v9, a0
750; CHECK-NEXT:    ret
751  %a = load <4 x i16>, ptr %x
752  %b = load i16, ptr %y
753  %c = zext i16 %b to i32
754  %d = insertelement <4 x i32> poison, i32 %c, i32 0
755  %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
756  %f = zext <4 x i16> %a to <4 x i32>
757  %g = mul <4 x i32> %e, %f
758  ret <4 x i32> %g
759}
760
761define <4 x i32> @vwmulu_vx_v4i32_i32(ptr %x, ptr %y) {
762; CHECK-LABEL: vwmulu_vx_v4i32_i32:
763; CHECK:       # %bb.0:
764; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
765; CHECK-NEXT:    vle16.v v8, (a0)
766; CHECK-NEXT:    lw a0, 0(a1)
767; CHECK-NEXT:    vzext.vf2 v9, v8
768; CHECK-NEXT:    vmul.vx v8, v9, a0
769; CHECK-NEXT:    ret
770  %a = load <4 x i16>, ptr %x
771  %b = load i32, ptr %y
772  %d = insertelement <4 x i32> poison, i32 %b, i32 0
773  %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
774  %f = zext <4 x i16> %a to <4 x i32>
775  %g = mul <4 x i32> %e, %f
776  ret <4 x i32> %g
777}
778
779define <2 x i64> @vwmulu_vx_v2i64_i8(ptr %x, ptr %y) {
780; RV32-LABEL: vwmulu_vx_v2i64_i8:
781; RV32:       # %bb.0:
782; RV32-NEXT:    addi sp, sp, -16
783; RV32-NEXT:    .cfi_def_cfa_offset 16
784; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
785; RV32-NEXT:    lb a1, 0(a1)
786; RV32-NEXT:    vle32.v v25, (a0)
787; RV32-NEXT:    srai a0, a1, 31
788; RV32-NEXT:    sw a1, 8(sp)
789; RV32-NEXT:    sw a0, 12(sp)
790; RV32-NEXT:    addi a0, sp, 8
791; RV32-NEXT:    vlse64.v v26, (a0), zero
792; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
793; RV32-NEXT:    vzext.vf2 v27, v25
794; RV32-NEXT:    vmul.vv v8, v26, v27
795; RV32-NEXT:    addi sp, sp, 16
796; RV32-NEXT:    ret
797;
798; RV64-LABEL: vwmulu_vx_v2i64_i8:
799; RV64:       # %bb.0:
800; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
801; RV64-NEXT:    vle32.v v25, (a0)
802; RV64-NEXT:    lb a0, 0(a1)
803; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
804; RV64-NEXT:    vzext.vf2 v26, v25
805; RV64-NEXT:    vmul.vx v8, v26, a0
806; RV64-NEXT:    ret
807  %a = load <2 x i32>, ptr %x
808  %b = load i8, ptr %y
809  %c = zext i8 %b to i64
810  %d = insertelement <2 x i64> poison, i64 %c, i64 0
811  %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
812  %f = zext <2 x i32> %a to <2 x i64>
813  %g = mul <2 x i64> %e, %f
814  ret <2 x i64> %g
815}
816
817define <2 x i64> @vwmulu_vx_v2i64_i16(ptr %x, ptr %y) {
818; RV32-LABEL: vwmulu_vx_v2i64_i16:
819; RV32:       # %bb.0:
820; RV32-NEXT:    addi sp, sp, -16
821; RV32-NEXT:    .cfi_def_cfa_offset 16
822; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
823; RV32-NEXT:    lh a1, 0(a1)
824; RV32-NEXT:    vle32.v v25, (a0)
825; RV32-NEXT:    srai a0, a1, 31
826; RV32-NEXT:    sw a1, 8(sp)
827; RV32-NEXT:    sw a0, 12(sp)
828; RV32-NEXT:    addi a0, sp, 8
829; RV32-NEXT:    vlse64.v v26, (a0), zero
830; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
831; RV32-NEXT:    vzext.vf2 v27, v25
832; RV32-NEXT:    vmul.vv v8, v26, v27
833; RV32-NEXT:    addi sp, sp, 16
834; RV32-NEXT:    ret
835;
836; RV64-LABEL: vwmulu_vx_v2i64_i16:
837; RV64:       # %bb.0:
838; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
839; RV64-NEXT:    vle32.v v25, (a0)
840; RV64-NEXT:    lh a0, 0(a1)
841; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
842; RV64-NEXT:    vzext.vf2 v26, v25
843; RV64-NEXT:    vmul.vx v8, v26, a0
844; RV64-NEXT:    ret
845  %a = load <2 x i32>, ptr %x
846  %b = load i16, ptr %y
847  %c = zext i16 %b to i64
848  %d = insertelement <2 x i64> poison, i64 %c, i64 0
849  %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
850  %f = zext <2 x i32> %a to <2 x i64>
851  %g = mul <2 x i64> %e, %f
852  ret <2 x i64> %g
853}
854
855define <2 x i64> @vwmulu_vx_v2i64_i32(ptr %x, ptr %y) {
856; RV32-LABEL: vwmulu_vx_v2i64_i32:
857; RV32:       # %bb.0:
858; RV32-NEXT:    addi sp, sp, -16
859; RV32-NEXT:    .cfi_def_cfa_offset 16
860; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
861; RV32-NEXT:    lw a1, 0(a1)
862; RV32-NEXT:    vle32.v v25, (a0)
863; RV32-NEXT:    srai a0, a1, 31
864; RV32-NEXT:    sw a1, 8(sp)
865; RV32-NEXT:    sw a0, 12(sp)
866; RV32-NEXT:    addi a0, sp, 8
867; RV32-NEXT:    vlse64.v v26, (a0), zero
868; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
869; RV32-NEXT:    vzext.vf2 v27, v25
870; RV32-NEXT:    vmul.vv v8, v26, v27
871; RV32-NEXT:    addi sp, sp, 16
872; RV32-NEXT:    ret
873;
874; RV64-LABEL: vwmulu_vx_v2i64_i32:
875; RV64:       # %bb.0:
876; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
877; RV64-NEXT:    vle32.v v25, (a0)
878; RV64-NEXT:    lw a0, 0(a1)
879; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
880; RV64-NEXT:    vzext.vf2 v26, v25
881; RV64-NEXT:    vmul.vx v8, v26, a0
882; RV64-NEXT:    ret
883  %a = load <2 x i32>, ptr %x
884  %b = load i32, ptr %y
885  %c = zext i32 %b to i64
886  %d = insertelement <2 x i64> poison, i64 %c, i64 0
887  %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
888  %f = zext <2 x i32> %a to <2 x i64>
889  %g = mul <2 x i64> %e, %f
890  ret <2 x i64> %g
891}
892
893define <2 x i64> @vwmulu_vx_v2i64_i64(ptr %x, ptr %y) {
894; RV32-LABEL: vwmulu_vx_v2i64_i64:
895; RV32:       # %bb.0:
896; RV32-NEXT:    addi sp, sp, -16
897; RV32-NEXT:    .cfi_def_cfa_offset 16
898; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
899; RV32-NEXT:    lw a2, 4(a1)
900; RV32-NEXT:    lw a1, 0(a1)
901; RV32-NEXT:    vle32.v v25, (a0)
902; RV32-NEXT:    sw a2, 12(sp)
903; RV32-NEXT:    sw a1, 8(sp)
904; RV32-NEXT:    addi a0, sp, 8
905; RV32-NEXT:    vlse64.v v26, (a0), zero
906; RV32-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
907; RV32-NEXT:    vzext.vf2 v27, v25
908; RV32-NEXT:    vmul.vv v8, v26, v27
909; RV32-NEXT:    addi sp, sp, 16
910; RV32-NEXT:    ret
911;
912; RV64-LABEL: vwmulu_vx_v2i64_i64:
913; RV64:       # %bb.0:
914; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
915; RV64-NEXT:    vle32.v v25, (a0)
916; RV64-NEXT:    ld a0, 0(a1)
917; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
918; RV64-NEXT:    vzext.vf2 v26, v25
919; RV64-NEXT:    vmul.vx v8, v26, a0
920; RV64-NEXT:    ret
921  %a = load <2 x i32>, ptr %x
922  %b = load i64, ptr %y
923  %d = insertelement <2 x i64> poison, i64 %b, i64 0
924  %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
925  %f = zext <2 x i32> %a to <2 x i64>
926  %g = mul <2 x i64> %e, %f
927  ret <2 x i64> %g
928}
929