xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll (revision 97982a8c605fac7c86d02e641a6cd7898b3ca343)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define <2 x i16> @vwmulsu_v2i16(ptr %x, ptr %y) {
6; CHECK-LABEL: vwmulsu_v2i16:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
9; CHECK-NEXT:    vle8.v v9, (a0)
10; CHECK-NEXT:    vle8.v v10, (a1)
11; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
12; CHECK-NEXT:    ret
13  %a = load <2 x i8>, ptr %x
14  %b = load <2 x i8>, ptr %y
15  %c = zext <2 x i8> %a to <2 x i16>
16  %d = sext <2 x i8> %b to <2 x i16>
17  %e = mul <2 x i16> %c, %d
18  ret <2 x i16> %e
19}
20
21define <2 x i16> @vwmulsu_v2i16_swap(ptr %x, ptr %y) {
22; CHECK-LABEL: vwmulsu_v2i16_swap:
23; CHECK:       # %bb.0:
24; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
25; CHECK-NEXT:    vle8.v v9, (a0)
26; CHECK-NEXT:    vle8.v v10, (a1)
27; CHECK-NEXT:    vwmulsu.vv v8, v9, v10
28; CHECK-NEXT:    ret
29  %a = load <2 x i8>, ptr %x
30  %b = load <2 x i8>, ptr %y
31  %c = sext <2 x i8> %a to <2 x i16>
32  %d = zext <2 x i8> %b to <2 x i16>
33  %e = mul <2 x i16> %c, %d
34  ret <2 x i16> %e
35}
36
37define <4 x i16> @vwmulsu_v4i16(ptr %x, ptr %y) {
38; CHECK-LABEL: vwmulsu_v4i16:
39; CHECK:       # %bb.0:
40; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
41; CHECK-NEXT:    vle8.v v9, (a0)
42; CHECK-NEXT:    vle8.v v10, (a1)
43; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
44; CHECK-NEXT:    ret
45  %a = load <4 x i8>, ptr %x
46  %b = load <4 x i8>, ptr %y
47  %c = zext <4 x i8> %a to <4 x i16>
48  %d = sext <4 x i8> %b to <4 x i16>
49  %e = mul <4 x i16> %c, %d
50  ret <4 x i16> %e
51}
52
53define <2 x i32> @vwmulsu_v2i32(ptr %x, ptr %y) {
54; CHECK-LABEL: vwmulsu_v2i32:
55; CHECK:       # %bb.0:
56; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
57; CHECK-NEXT:    vle16.v v9, (a0)
58; CHECK-NEXT:    vle16.v v10, (a1)
59; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
60; CHECK-NEXT:    ret
61  %a = load <2 x i16>, ptr %x
62  %b = load <2 x i16>, ptr %y
63  %c = zext <2 x i16> %a to <2 x i32>
64  %d = sext <2 x i16> %b to <2 x i32>
65  %e = mul <2 x i32> %c, %d
66  ret <2 x i32> %e
67}
68
69define <8 x i16> @vwmulsu_v8i16(ptr %x, ptr %y) {
70; CHECK-LABEL: vwmulsu_v8i16:
71; CHECK:       # %bb.0:
72; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
73; CHECK-NEXT:    vle8.v v9, (a0)
74; CHECK-NEXT:    vle8.v v10, (a1)
75; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
76; CHECK-NEXT:    ret
77  %a = load <8 x i8>, ptr %x
78  %b = load <8 x i8>, ptr %y
79  %c = zext <8 x i8> %a to <8 x i16>
80  %d = sext <8 x i8> %b to <8 x i16>
81  %e = mul <8 x i16> %c, %d
82  ret <8 x i16> %e
83}
84
85define <4 x i32> @vwmulsu_v4i32(ptr %x, ptr %y) {
86; CHECK-LABEL: vwmulsu_v4i32:
87; CHECK:       # %bb.0:
88; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
89; CHECK-NEXT:    vle16.v v9, (a0)
90; CHECK-NEXT:    vle16.v v10, (a1)
91; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
92; CHECK-NEXT:    ret
93  %a = load <4 x i16>, ptr %x
94  %b = load <4 x i16>, ptr %y
95  %c = zext <4 x i16> %a to <4 x i32>
96  %d = sext <4 x i16> %b to <4 x i32>
97  %e = mul <4 x i32> %c, %d
98  ret <4 x i32> %e
99}
100
101define <2 x i64> @vwmulsu_v2i64(ptr %x, ptr %y) {
102; CHECK-LABEL: vwmulsu_v2i64:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
105; CHECK-NEXT:    vle32.v v9, (a0)
106; CHECK-NEXT:    vle32.v v10, (a1)
107; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
108; CHECK-NEXT:    ret
109  %a = load <2 x i32>, ptr %x
110  %b = load <2 x i32>, ptr %y
111  %c = zext <2 x i32> %a to <2 x i64>
112  %d = sext <2 x i32> %b to <2 x i64>
113  %e = mul <2 x i64> %c, %d
114  ret <2 x i64> %e
115}
116
117define <16 x i16> @vwmulsu_v16i16(ptr %x, ptr %y) {
118; CHECK-LABEL: vwmulsu_v16i16:
119; CHECK:       # %bb.0:
120; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
121; CHECK-NEXT:    vle8.v v10, (a0)
122; CHECK-NEXT:    vle8.v v11, (a1)
123; CHECK-NEXT:    vwmulsu.vv v8, v11, v10
124; CHECK-NEXT:    ret
125  %a = load <16 x i8>, ptr %x
126  %b = load <16 x i8>, ptr %y
127  %c = zext <16 x i8> %a to <16 x i16>
128  %d = sext <16 x i8> %b to <16 x i16>
129  %e = mul <16 x i16> %c, %d
130  ret <16 x i16> %e
131}
132
133define <8 x i32> @vwmulsu_v8i32(ptr %x, ptr %y) {
134; CHECK-LABEL: vwmulsu_v8i32:
135; CHECK:       # %bb.0:
136; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
137; CHECK-NEXT:    vle16.v v10, (a0)
138; CHECK-NEXT:    vle16.v v11, (a1)
139; CHECK-NEXT:    vwmulsu.vv v8, v11, v10
140; CHECK-NEXT:    ret
141  %a = load <8 x i16>, ptr %x
142  %b = load <8 x i16>, ptr %y
143  %c = zext <8 x i16> %a to <8 x i32>
144  %d = sext <8 x i16> %b to <8 x i32>
145  %e = mul <8 x i32> %c, %d
146  ret <8 x i32> %e
147}
148
149define <4 x i64> @vwmulsu_v4i64(ptr %x, ptr %y) {
150; CHECK-LABEL: vwmulsu_v4i64:
151; CHECK:       # %bb.0:
152; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
153; CHECK-NEXT:    vle32.v v10, (a0)
154; CHECK-NEXT:    vle32.v v11, (a1)
155; CHECK-NEXT:    vwmulsu.vv v8, v11, v10
156; CHECK-NEXT:    ret
157  %a = load <4 x i32>, ptr %x
158  %b = load <4 x i32>, ptr %y
159  %c = zext <4 x i32> %a to <4 x i64>
160  %d = sext <4 x i32> %b to <4 x i64>
161  %e = mul <4 x i64> %c, %d
162  ret <4 x i64> %e
163}
164
165define <32 x i16> @vwmulsu_v32i16(ptr %x, ptr %y) {
166; CHECK-LABEL: vwmulsu_v32i16:
167; CHECK:       # %bb.0:
168; CHECK-NEXT:    li a2, 32
169; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
170; CHECK-NEXT:    vle8.v v12, (a0)
171; CHECK-NEXT:    vle8.v v14, (a1)
172; CHECK-NEXT:    vwmulsu.vv v8, v14, v12
173; CHECK-NEXT:    ret
174  %a = load <32 x i8>, ptr %x
175  %b = load <32 x i8>, ptr %y
176  %c = zext <32 x i8> %a to <32 x i16>
177  %d = sext <32 x i8> %b to <32 x i16>
178  %e = mul <32 x i16> %c, %d
179  ret <32 x i16> %e
180}
181
182define <16 x i32> @vwmulsu_v16i32(ptr %x, ptr %y) {
183; CHECK-LABEL: vwmulsu_v16i32:
184; CHECK:       # %bb.0:
185; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
186; CHECK-NEXT:    vle16.v v12, (a0)
187; CHECK-NEXT:    vle16.v v14, (a1)
188; CHECK-NEXT:    vwmulsu.vv v8, v14, v12
189; CHECK-NEXT:    ret
190  %a = load <16 x i16>, ptr %x
191  %b = load <16 x i16>, ptr %y
192  %c = zext <16 x i16> %a to <16 x i32>
193  %d = sext <16 x i16> %b to <16 x i32>
194  %e = mul <16 x i32> %c, %d
195  ret <16 x i32> %e
196}
197
198define <8 x  i64> @vwmulsu_v8i64(ptr %x, ptr %y) {
199; CHECK-LABEL: vwmulsu_v8i64:
200; CHECK:       # %bb.0:
201; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
202; CHECK-NEXT:    vle32.v v12, (a0)
203; CHECK-NEXT:    vle32.v v14, (a1)
204; CHECK-NEXT:    vwmulsu.vv v8, v14, v12
205; CHECK-NEXT:    ret
206  %a = load <8 x  i32>, ptr %x
207  %b = load <8 x  i32>, ptr %y
208  %c = zext <8 x  i32> %a to <8 x  i64>
209  %d = sext <8 x  i32> %b to <8 x  i64>
210  %e = mul <8 x  i64> %c, %d
211  ret <8 x  i64> %e
212}
213
214define <64 x i16> @vwmulsu_v64i16(ptr %x, ptr %y) {
215; CHECK-LABEL: vwmulsu_v64i16:
216; CHECK:       # %bb.0:
217; CHECK-NEXT:    li a2, 64
218; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
219; CHECK-NEXT:    vle8.v v16, (a0)
220; CHECK-NEXT:    vle8.v v20, (a1)
221; CHECK-NEXT:    vwmulsu.vv v8, v20, v16
222; CHECK-NEXT:    ret
223  %a = load <64 x i8>, ptr %x
224  %b = load <64 x i8>, ptr %y
225  %c = zext <64 x i8> %a to <64 x i16>
226  %d = sext <64 x i8> %b to <64 x i16>
227  %e = mul <64 x i16> %c, %d
228  ret <64 x i16> %e
229}
230
231define <32 x i32> @vwmulsu_v32i32(ptr %x, ptr %y) {
232; CHECK-LABEL: vwmulsu_v32i32:
233; CHECK:       # %bb.0:
234; CHECK-NEXT:    li a2, 32
235; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
236; CHECK-NEXT:    vle16.v v16, (a0)
237; CHECK-NEXT:    vle16.v v20, (a1)
238; CHECK-NEXT:    vwmulsu.vv v8, v20, v16
239; CHECK-NEXT:    ret
240  %a = load <32 x i16>, ptr %x
241  %b = load <32 x i16>, ptr %y
242  %c = zext <32 x i16> %a to <32 x i32>
243  %d = sext <32 x i16> %b to <32 x i32>
244  %e = mul <32 x i32> %c, %d
245  ret <32 x i32> %e
246}
247
248define <16 x i64> @vwmulsu_v16i64(ptr %x, ptr %y) {
249; CHECK-LABEL: vwmulsu_v16i64:
250; CHECK:       # %bb.0:
251; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
252; CHECK-NEXT:    vle32.v v16, (a0)
253; CHECK-NEXT:    vle32.v v20, (a1)
254; CHECK-NEXT:    vwmulsu.vv v8, v20, v16
255; CHECK-NEXT:    ret
256  %a = load <16 x i32>, ptr %x
257  %b = load <16 x i32>, ptr %y
258  %c = zext <16 x i32> %a to <16 x i64>
259  %d = sext <16 x i32> %b to <16 x i64>
260  %e = mul <16 x i64> %c, %d
261  ret <16 x i64> %e
262}
263
264define <128 x i16> @vwmulsu_v128i16(ptr %x, ptr %y) {
265; CHECK-LABEL: vwmulsu_v128i16:
266; CHECK:       # %bb.0:
267; CHECK-NEXT:    addi sp, sp, -16
268; CHECK-NEXT:    .cfi_def_cfa_offset 16
269; CHECK-NEXT:    csrr a2, vlenb
270; CHECK-NEXT:    slli a2, a2, 4
271; CHECK-NEXT:    sub sp, sp, a2
272; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
273; CHECK-NEXT:    li a2, 128
274; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
275; CHECK-NEXT:    vle8.v v8, (a0)
276; CHECK-NEXT:    addi a0, sp, 16
277; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
278; CHECK-NEXT:    vle8.v v0, (a1)
279; CHECK-NEXT:    li a0, 64
280; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
281; CHECK-NEXT:    vslidedown.vx v16, v8, a0
282; CHECK-NEXT:    vslidedown.vx v8, v0, a0
283; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
284; CHECK-NEXT:    vwmulsu.vv v24, v8, v16
285; CHECK-NEXT:    csrr a0, vlenb
286; CHECK-NEXT:    slli a0, a0, 3
287; CHECK-NEXT:    add a0, sp, a0
288; CHECK-NEXT:    addi a0, a0, 16
289; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
290; CHECK-NEXT:    addi a0, sp, 16
291; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
292; CHECK-NEXT:    vwmulsu.vv v8, v0, v16
293; CHECK-NEXT:    csrr a0, vlenb
294; CHECK-NEXT:    slli a0, a0, 3
295; CHECK-NEXT:    add a0, sp, a0
296; CHECK-NEXT:    addi a0, a0, 16
297; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
298; CHECK-NEXT:    csrr a0, vlenb
299; CHECK-NEXT:    slli a0, a0, 4
300; CHECK-NEXT:    add sp, sp, a0
301; CHECK-NEXT:    .cfi_def_cfa sp, 16
302; CHECK-NEXT:    addi sp, sp, 16
303; CHECK-NEXT:    .cfi_def_cfa_offset 0
304; CHECK-NEXT:    ret
305  %a = load <128 x i8>, ptr %x
306  %b = load <128 x i8>, ptr %y
307  %c = zext <128 x i8> %a to <128 x i16>
308  %d = sext <128 x i8> %b to <128 x i16>
309  %e = mul <128 x i16> %c, %d
310  ret <128 x i16> %e
311}
312
313define <64 x i32> @vwmulsu_v64i32(ptr %x, ptr %y) {
314; CHECK-LABEL: vwmulsu_v64i32:
315; CHECK:       # %bb.0:
316; CHECK-NEXT:    addi sp, sp, -16
317; CHECK-NEXT:    .cfi_def_cfa_offset 16
318; CHECK-NEXT:    csrr a2, vlenb
319; CHECK-NEXT:    slli a2, a2, 4
320; CHECK-NEXT:    sub sp, sp, a2
321; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
322; CHECK-NEXT:    li a2, 64
323; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
324; CHECK-NEXT:    vle16.v v8, (a0)
325; CHECK-NEXT:    addi a0, sp, 16
326; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
327; CHECK-NEXT:    vle16.v v0, (a1)
328; CHECK-NEXT:    li a0, 32
329; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
330; CHECK-NEXT:    vslidedown.vx v16, v8, a0
331; CHECK-NEXT:    vslidedown.vx v8, v0, a0
332; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
333; CHECK-NEXT:    vwmulsu.vv v24, v8, v16
334; CHECK-NEXT:    csrr a0, vlenb
335; CHECK-NEXT:    slli a0, a0, 3
336; CHECK-NEXT:    add a0, sp, a0
337; CHECK-NEXT:    addi a0, a0, 16
338; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
339; CHECK-NEXT:    addi a0, sp, 16
340; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
341; CHECK-NEXT:    vwmulsu.vv v8, v0, v16
342; CHECK-NEXT:    csrr a0, vlenb
343; CHECK-NEXT:    slli a0, a0, 3
344; CHECK-NEXT:    add a0, sp, a0
345; CHECK-NEXT:    addi a0, a0, 16
346; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
347; CHECK-NEXT:    csrr a0, vlenb
348; CHECK-NEXT:    slli a0, a0, 4
349; CHECK-NEXT:    add sp, sp, a0
350; CHECK-NEXT:    .cfi_def_cfa sp, 16
351; CHECK-NEXT:    addi sp, sp, 16
352; CHECK-NEXT:    .cfi_def_cfa_offset 0
353; CHECK-NEXT:    ret
354  %a = load <64 x i16>, ptr %x
355  %b = load <64 x i16>, ptr %y
356  %c = zext <64 x i16> %a to <64 x i32>
357  %d = sext <64 x i16> %b to <64 x i32>
358  %e = mul <64 x i32> %c, %d
359  ret <64 x i32> %e
360}
361
362define <32 x i64> @vwmulsu_v32i64(ptr %x, ptr %y) {
363; CHECK-LABEL: vwmulsu_v32i64:
364; CHECK:       # %bb.0:
365; CHECK-NEXT:    addi sp, sp, -16
366; CHECK-NEXT:    .cfi_def_cfa_offset 16
367; CHECK-NEXT:    csrr a2, vlenb
368; CHECK-NEXT:    slli a2, a2, 4
369; CHECK-NEXT:    sub sp, sp, a2
370; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
371; CHECK-NEXT:    li a2, 32
372; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
373; CHECK-NEXT:    vle32.v v8, (a0)
374; CHECK-NEXT:    addi a0, sp, 16
375; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
376; CHECK-NEXT:    vle32.v v0, (a1)
377; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
378; CHECK-NEXT:    vslidedown.vi v16, v8, 16
379; CHECK-NEXT:    vslidedown.vi v8, v0, 16
380; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
381; CHECK-NEXT:    vwmulsu.vv v24, v8, v16
382; CHECK-NEXT:    csrr a0, vlenb
383; CHECK-NEXT:    slli a0, a0, 3
384; CHECK-NEXT:    add a0, sp, a0
385; CHECK-NEXT:    addi a0, a0, 16
386; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
387; CHECK-NEXT:    addi a0, sp, 16
388; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
389; CHECK-NEXT:    vwmulsu.vv v8, v0, v16
390; CHECK-NEXT:    csrr a0, vlenb
391; CHECK-NEXT:    slli a0, a0, 3
392; CHECK-NEXT:    add a0, sp, a0
393; CHECK-NEXT:    addi a0, a0, 16
394; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
395; CHECK-NEXT:    csrr a0, vlenb
396; CHECK-NEXT:    slli a0, a0, 4
397; CHECK-NEXT:    add sp, sp, a0
398; CHECK-NEXT:    .cfi_def_cfa sp, 16
399; CHECK-NEXT:    addi sp, sp, 16
400; CHECK-NEXT:    .cfi_def_cfa_offset 0
401; CHECK-NEXT:    ret
402  %a = load <32 x i32>, ptr %x
403  %b = load <32 x i32>, ptr %y
404  %c = zext <32 x i32> %a to <32 x i64>
405  %d = sext <32 x i32> %b to <32 x i64>
406  %e = mul <32 x i64> %c, %d
407  ret <32 x i64> %e
408}
409
410define <2 x i32> @vwmulsu_v2i32_v2i8(ptr %x, ptr %y) {
411; CHECK-LABEL: vwmulsu_v2i32_v2i8:
412; CHECK:       # %bb.0:
413; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
414; CHECK-NEXT:    vle8.v v8, (a0)
415; CHECK-NEXT:    vle8.v v9, (a1)
416; CHECK-NEXT:    vzext.vf2 v10, v8
417; CHECK-NEXT:    vsext.vf2 v11, v9
418; CHECK-NEXT:    vwmulsu.vv v8, v11, v10
419; CHECK-NEXT:    ret
420  %a = load <2 x i8>, ptr %x
421  %b = load <2 x i8>, ptr %y
422  %c = zext <2 x i8> %a to <2 x i32>
423  %d = sext <2 x i8> %b to <2 x i32>
424  %e = mul <2 x i32> %c, %d
425  ret <2 x i32> %e
426}
427
428define <4 x i32> @vwmulsu_v4i32_v4i8_v4i16(ptr %x, ptr %y) {
429; CHECK-LABEL: vwmulsu_v4i32_v4i8_v4i16:
430; CHECK:       # %bb.0:
431; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
432; CHECK-NEXT:    vle8.v v8, (a0)
433; CHECK-NEXT:    vle16.v v9, (a1)
434; CHECK-NEXT:    vzext.vf2 v10, v8
435; CHECK-NEXT:    vwmulsu.vv v8, v9, v10
436; CHECK-NEXT:    ret
437  %a = load <4 x i8>, ptr %x
438  %b = load <4 x i16>, ptr %y
439  %c = zext <4 x i8> %a to <4 x i32>
440  %d = sext <4 x i16> %b to <4 x i32>
441  %e = mul <4 x i32> %c, %d
442  ret <4 x i32> %e
443}
444
445define <4 x i64> @vwmulsu_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
446; CHECK-LABEL: vwmulsu_v4i64_v4i32_v4i8:
447; CHECK:       # %bb.0:
448; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
449; CHECK-NEXT:    vle8.v v8, (a1)
450; CHECK-NEXT:    vle32.v v10, (a0)
451; CHECK-NEXT:    vsext.vf4 v11, v8
452; CHECK-NEXT:    vwmulsu.vv v8, v11, v10
453; CHECK-NEXT:    ret
454  %a = load <4 x i32>, ptr %x
455  %b = load <4 x i8>, ptr %y
456  %c = zext <4 x i32> %a to <4 x i64>
457  %d = sext <4 x i8> %b to <4 x i64>
458  %e = mul <4 x i64> %c, %d
459  ret <4 x i64> %e
460}
461
462define <2 x i16> @vwmulsu_vx_v2i16(ptr %x, i8 %y) {
463; CHECK-LABEL: vwmulsu_vx_v2i16:
464; CHECK:       # %bb.0:
465; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
466; CHECK-NEXT:    vle8.v v9, (a0)
467; CHECK-NEXT:    vwmulsu.vx v8, v9, a1
468; CHECK-NEXT:    ret
469  %a = load <2 x i8>, ptr %x
470  %b = insertelement <2 x i8> poison, i8 %y, i32 0
471  %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
472  %d = sext <2 x i8> %a to <2 x i16>
473  %e = zext <2 x i8> %c to <2 x i16>
474  %f = mul <2 x i16> %d, %e
475  ret <2 x i16> %f
476}
477
478define <2 x i16> @vwmulsu_vx_v2i16_swap(ptr %x, i8 %y) {
479; CHECK-LABEL: vwmulsu_vx_v2i16_swap:
480; CHECK:       # %bb.0:
481; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
482; CHECK-NEXT:    vle8.v v9, (a0)
483; CHECK-NEXT:    vmv.v.x v10, a1
484; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
485; CHECK-NEXT:    ret
486  %a = load <2 x i8>, ptr %x
487  %b = insertelement <2 x i8> poison, i8 %y, i32 0
488  %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
489  %d = zext <2 x i8> %a to <2 x i16>
490  %e = sext <2 x i8> %c to <2 x i16>
491  %f = mul <2 x i16> %d, %e
492  ret <2 x i16> %f
493}
494
495define <4 x i16> @vwmulsu_vx_v4i16(ptr %x, i8 %y) {
496; CHECK-LABEL: vwmulsu_vx_v4i16:
497; CHECK:       # %bb.0:
498; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
499; CHECK-NEXT:    vle8.v v9, (a0)
500; CHECK-NEXT:    vwmulsu.vx v8, v9, a1
501; CHECK-NEXT:    ret
502  %a = load <4 x i8>, ptr %x
503  %b = insertelement <4 x i8> poison, i8 %y, i32 0
504  %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
505  %d = sext <4 x i8> %a to <4 x i16>
506  %e = zext <4 x i8> %c to <4 x i16>
507  %f = mul <4 x i16> %d, %e
508  ret <4 x i16> %f
509}
510
511define <2 x i32> @vwmulsu_vx_v2i32(ptr %x, i16 %y) {
512; CHECK-LABEL: vwmulsu_vx_v2i32:
513; CHECK:       # %bb.0:
514; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
515; CHECK-NEXT:    vle16.v v9, (a0)
516; CHECK-NEXT:    vwmulsu.vx v8, v9, a1
517; CHECK-NEXT:    ret
518  %a = load <2 x i16>, ptr %x
519  %b = insertelement <2 x i16> poison, i16 %y, i32 0
520  %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
521  %d = sext <2 x i16> %a to <2 x i32>
522  %e = zext <2 x i16> %c to <2 x i32>
523  %f = mul <2 x i32> %d, %e
524  ret <2 x i32> %f
525}
526
527define <8 x i16> @vwmulsu_vx_v8i16(ptr %x, i8 %y) {
528; CHECK-LABEL: vwmulsu_vx_v8i16:
529; CHECK:       # %bb.0:
530; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
531; CHECK-NEXT:    vle8.v v9, (a0)
532; CHECK-NEXT:    vwmulsu.vx v8, v9, a1
533; CHECK-NEXT:    ret
534  %a = load <8 x i8>, ptr %x
535  %b = insertelement <8 x i8> poison, i8 %y, i32 0
536  %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
537  %d = sext <8 x i8> %a to <8 x i16>
538  %e = zext <8 x i8> %c to <8 x i16>
539  %f = mul <8 x i16> %d, %e
540  ret <8 x i16> %f
541}
542
543define <4 x i32> @vwmulsu_vx_v4i32(ptr %x, i16 %y) {
544; CHECK-LABEL: vwmulsu_vx_v4i32:
545; CHECK:       # %bb.0:
546; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
547; CHECK-NEXT:    vle16.v v9, (a0)
548; CHECK-NEXT:    vwmulsu.vx v8, v9, a1
549; CHECK-NEXT:    ret
550  %a = load <4 x i16>, ptr %x
551  %b = insertelement <4 x i16> poison, i16 %y, i32 0
552  %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
553  %d = sext <4 x i16> %a to <4 x i32>
554  %e = zext <4 x i16> %c to <4 x i32>
555  %f = mul <4 x i32> %d, %e
556  ret <4 x i32> %f
557}
558
559define <2 x i64> @vwmulsu_vx_v2i64(ptr %x, i32 %y) {
560; CHECK-LABEL: vwmulsu_vx_v2i64:
561; CHECK:       # %bb.0:
562; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
563; CHECK-NEXT:    vle32.v v9, (a0)
564; CHECK-NEXT:    vwmulsu.vx v8, v9, a1
565; CHECK-NEXT:    ret
566  %a = load <2 x i32>, ptr %x
567  %b = insertelement <2 x i32> poison, i32 %y, i64 0
568  %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
569  %d = sext <2 x i32> %a to <2 x i64>
570  %e = zext <2 x i32> %c to <2 x i64>
571  %f = mul <2 x i64> %d, %e
572  ret <2 x i64> %f
573}
574
575define <16 x i16> @vwmulsu_vx_v16i16(ptr %x, i8 %y) {
576; CHECK-LABEL: vwmulsu_vx_v16i16:
577; CHECK:       # %bb.0:
578; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
579; CHECK-NEXT:    vle8.v v10, (a0)
580; CHECK-NEXT:    vwmulsu.vx v8, v10, a1
581; CHECK-NEXT:    ret
582  %a = load <16 x i8>, ptr %x
583  %b = insertelement <16 x i8> poison, i8 %y, i32 0
584  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
585  %d = sext <16 x i8> %a to <16 x i16>
586  %e = zext <16 x i8> %c to <16 x i16>
587  %f = mul <16 x i16> %d, %e
588  ret <16 x i16> %f
589}
590
591define <8 x i32> @vwmulsu_vx_v8i32(ptr %x, i16 %y) {
592; CHECK-LABEL: vwmulsu_vx_v8i32:
593; CHECK:       # %bb.0:
594; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
595; CHECK-NEXT:    vle16.v v10, (a0)
596; CHECK-NEXT:    vwmulsu.vx v8, v10, a1
597; CHECK-NEXT:    ret
598  %a = load <8 x i16>, ptr %x
599  %b = insertelement <8 x i16> poison, i16 %y, i32 0
600  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
601  %d = sext <8 x i16> %a to <8 x i32>
602  %e = zext <8 x i16> %c to <8 x i32>
603  %f = mul <8 x i32> %d, %e
604  ret <8 x i32> %f
605}
606
607define <4 x i64> @vwmulsu_vx_v4i64(ptr %x, i32 %y) {
608; CHECK-LABEL: vwmulsu_vx_v4i64:
609; CHECK:       # %bb.0:
610; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
611; CHECK-NEXT:    vle32.v v10, (a0)
612; CHECK-NEXT:    vwmulsu.vx v8, v10, a1
613; CHECK-NEXT:    ret
614  %a = load <4 x i32>, ptr %x
615  %b = insertelement <4 x i32> poison, i32 %y, i64 0
616  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
617  %d = sext <4 x i32> %a to <4 x i64>
618  %e = zext <4 x i32> %c to <4 x i64>
619  %f = mul <4 x i64> %d, %e
620  ret <4 x i64> %f
621}
622
623define <32 x i16> @vwmulsu_vx_v32i16(ptr %x, i8 %y) {
624; CHECK-LABEL: vwmulsu_vx_v32i16:
625; CHECK:       # %bb.0:
626; CHECK-NEXT:    li a2, 32
627; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
628; CHECK-NEXT:    vle8.v v12, (a0)
629; CHECK-NEXT:    vwmulsu.vx v8, v12, a1
630; CHECK-NEXT:    ret
631  %a = load <32 x i8>, ptr %x
632  %b = insertelement <32 x i8> poison, i8 %y, i32 0
633  %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
634  %d = sext <32 x i8> %a to <32 x i16>
635  %e = zext <32 x i8> %c to <32 x i16>
636  %f = mul <32 x i16> %d, %e
637  ret <32 x i16> %f
638}
639
640define <16 x i32> @vwmulsu_vx_v16i32(ptr %x, i16 %y) {
641; CHECK-LABEL: vwmulsu_vx_v16i32:
642; CHECK:       # %bb.0:
643; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
644; CHECK-NEXT:    vle16.v v12, (a0)
645; CHECK-NEXT:    vwmulsu.vx v8, v12, a1
646; CHECK-NEXT:    ret
647  %a = load <16 x i16>, ptr %x
648  %b = insertelement <16 x i16> poison, i16 %y, i32 0
649  %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
650  %d = sext <16 x i16> %a to <16 x i32>
651  %e = zext <16 x i16> %c to <16 x i32>
652  %f = mul <16 x i32> %d, %e
653  ret <16 x i32> %f
654}
655
656define <8 x i64> @vwmulsu_vx_v8i64(ptr %x, i32 %y) {
657; CHECK-LABEL: vwmulsu_vx_v8i64:
658; CHECK:       # %bb.0:
659; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
660; CHECK-NEXT:    vle32.v v12, (a0)
661; CHECK-NEXT:    vwmulsu.vx v8, v12, a1
662; CHECK-NEXT:    ret
663  %a = load <8 x i32>, ptr %x
664  %b = insertelement <8 x i32> poison, i32 %y, i64 0
665  %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
666  %d = sext <8 x i32> %a to <8 x i64>
667  %e = zext <8 x i32> %c to <8 x i64>
668  %f = mul <8 x i64> %d, %e
669  ret <8 x i64> %f
670}
671
672define <64 x i16> @vwmulsu_vx_v64i16(ptr %x, i8 %y) {
673; CHECK-LABEL: vwmulsu_vx_v64i16:
674; CHECK:       # %bb.0:
675; CHECK-NEXT:    li a2, 64
676; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
677; CHECK-NEXT:    vle8.v v16, (a0)
678; CHECK-NEXT:    vwmulsu.vx v8, v16, a1
679; CHECK-NEXT:    ret
680  %a = load <64 x i8>, ptr %x
681  %b = insertelement <64 x i8> poison, i8 %y, i32 0
682  %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer
683  %d = sext <64 x i8> %a to <64 x i16>
684  %e = zext <64 x i8> %c to <64 x i16>
685  %f = mul <64 x i16> %d, %e
686  ret <64 x i16> %f
687}
688
689define <32 x i32> @vwmulsu_vx_v32i32(ptr %x, i16 %y) {
690; CHECK-LABEL: vwmulsu_vx_v32i32:
691; CHECK:       # %bb.0:
692; CHECK-NEXT:    li a2, 32
693; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
694; CHECK-NEXT:    vle16.v v16, (a0)
695; CHECK-NEXT:    vwmulsu.vx v8, v16, a1
696; CHECK-NEXT:    ret
697  %a = load <32 x i16>, ptr %x
698  %b = insertelement <32 x i16> poison, i16 %y, i32 0
699  %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer
700  %d = sext <32 x i16> %a to <32 x i32>
701  %e = zext <32 x i16> %c to <32 x i32>
702  %f = mul <32 x i32> %d, %e
703  ret <32 x i32> %f
704}
705
706define <16 x i64> @vwmulsu_vx_v16i64(ptr %x, i32 %y) {
707; CHECK-LABEL: vwmulsu_vx_v16i64:
708; CHECK:       # %bb.0:
709; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
710; CHECK-NEXT:    vle32.v v16, (a0)
711; CHECK-NEXT:    vwmulsu.vx v8, v16, a1
712; CHECK-NEXT:    ret
713  %a = load <16 x i32>, ptr %x
714  %b = insertelement <16 x i32> poison, i32 %y, i64 0
715  %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer
716  %d = sext <16 x i32> %a to <16 x i64>
717  %e = zext <16 x i32> %c to <16 x i64>
718  %f = mul <16 x i64> %d, %e
719  ret <16 x i64> %f
720}
721
722define <8 x i16> @vwmulsu_vx_v8i16_i8(ptr %x, ptr %y) {
723; CHECK-LABEL: vwmulsu_vx_v8i16_i8:
724; CHECK:       # %bb.0:
725; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
726; CHECK-NEXT:    vle8.v v9, (a0)
727; CHECK-NEXT:    lbu a0, 0(a1)
728; CHECK-NEXT:    vwmulsu.vx v8, v9, a0
729; CHECK-NEXT:    ret
730  %a = load <8 x i8>, ptr %x
731  %b = load i8, ptr %y
732  %c = zext i8 %b to i16
733  %d = insertelement <8 x i16> poison, i16 %c, i32 0
734  %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
735  %f = sext <8 x i8> %a to <8 x i16>
736  %g = mul <8 x i16> %e, %f
737  ret <8 x i16> %g
738}
739
740define <8 x i16> @vwmulsu_vx_v8i16_i8_swap(ptr %x, ptr %y) {
741; CHECK-LABEL: vwmulsu_vx_v8i16_i8_swap:
742; CHECK:       # %bb.0:
743; CHECK-NEXT:    lb a1, 0(a1)
744; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
745; CHECK-NEXT:    vle8.v v9, (a0)
746; CHECK-NEXT:    vmv.v.x v10, a1
747; CHECK-NEXT:    vwmulsu.vv v8, v10, v9
748; CHECK-NEXT:    ret
749  %a = load <8 x i8>, ptr %x
750  %b = load i8, ptr %y
751  %c = sext i8 %b to i16
752  %d = insertelement <8 x i16> poison, i16 %c, i32 0
753  %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
754  %f = zext <8 x i8> %a to <8 x i16>
755  %g = mul <8 x i16> %e, %f
756  ret <8 x i16> %g
757}
758
759define <4 x i32> @vwmulsu_vx_v4i32_i8(ptr %x, ptr %y) {
760; CHECK-LABEL: vwmulsu_vx_v4i32_i8:
761; CHECK:       # %bb.0:
762; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
763; CHECK-NEXT:    vle16.v v9, (a0)
764; CHECK-NEXT:    lbu a0, 0(a1)
765; CHECK-NEXT:    vwmul.vx v8, v9, a0
766; CHECK-NEXT:    ret
767  %a = load <4 x i16>, ptr %x
768  %b = load i8, ptr %y
769  %c = zext i8 %b to i32
770  %d = insertelement <4 x i32> poison, i32 %c, i32 0
771  %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
772  %f = sext <4 x i16> %a to <4 x i32>
773  %g = mul <4 x i32> %e, %f
774  ret <4 x i32> %g
775}
776
777define <4 x i32> @vwmulsu_vx_v4i32_i16(ptr %x, ptr %y) {
778; CHECK-LABEL: vwmulsu_vx_v4i32_i16:
779; CHECK:       # %bb.0:
780; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
781; CHECK-NEXT:    vle16.v v9, (a0)
782; CHECK-NEXT:    lhu a0, 0(a1)
783; CHECK-NEXT:    vwmulsu.vx v8, v9, a0
784; CHECK-NEXT:    ret
785  %a = load <4 x i16>, ptr %x
786  %b = load i16, ptr %y
787  %c = zext i16 %b to i32
788  %d = insertelement <4 x i32> poison, i32 %c, i32 0
789  %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
790  %f = sext <4 x i16> %a to <4 x i32>
791  %g = mul <4 x i32> %e, %f
792  ret <4 x i32> %g
793}
794
795define <2 x i64> @vwmulsu_vx_v2i64_i8(ptr %x, ptr %y) {
796; RV32-LABEL: vwmulsu_vx_v2i64_i8:
797; RV32:       # %bb.0:
798; RV32-NEXT:    addi sp, sp, -16
799; RV32-NEXT:    .cfi_def_cfa_offset 16
800; RV32-NEXT:    lbu a1, 0(a1)
801; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
802; RV32-NEXT:    vle32.v v8, (a0)
803; RV32-NEXT:    sw a1, 8(sp)
804; RV32-NEXT:    sw zero, 12(sp)
805; RV32-NEXT:    addi a0, sp, 8
806; RV32-NEXT:    vlse64.v v9, (a0), zero
807; RV32-NEXT:    vsext.vf2 v10, v8
808; RV32-NEXT:    vmul.vv v8, v9, v10
809; RV32-NEXT:    addi sp, sp, 16
810; RV32-NEXT:    .cfi_def_cfa_offset 0
811; RV32-NEXT:    ret
812;
813; RV64-LABEL: vwmulsu_vx_v2i64_i8:
814; RV64:       # %bb.0:
815; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
816; RV64-NEXT:    vle32.v v9, (a0)
817; RV64-NEXT:    lbu a0, 0(a1)
818; RV64-NEXT:    vwmul.vx v8, v9, a0
819; RV64-NEXT:    ret
820  %a = load <2 x i32>, ptr %x
821  %b = load i8, ptr %y
822  %c = zext i8 %b to i64
823  %d = insertelement <2 x i64> poison, i64 %c, i64 0
824  %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
825  %f = sext <2 x i32> %a to <2 x i64>
826  %g = mul <2 x i64> %e, %f
827  ret <2 x i64> %g
828}
829
830define <2 x i64> @vwmulsu_vx_v2i64_i16(ptr %x, ptr %y) {
831; RV32-LABEL: vwmulsu_vx_v2i64_i16:
832; RV32:       # %bb.0:
833; RV32-NEXT:    addi sp, sp, -16
834; RV32-NEXT:    .cfi_def_cfa_offset 16
835; RV32-NEXT:    lhu a1, 0(a1)
836; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
837; RV32-NEXT:    vle32.v v8, (a0)
838; RV32-NEXT:    sw a1, 8(sp)
839; RV32-NEXT:    sw zero, 12(sp)
840; RV32-NEXT:    addi a0, sp, 8
841; RV32-NEXT:    vlse64.v v9, (a0), zero
842; RV32-NEXT:    vsext.vf2 v10, v8
843; RV32-NEXT:    vmul.vv v8, v9, v10
844; RV32-NEXT:    addi sp, sp, 16
845; RV32-NEXT:    .cfi_def_cfa_offset 0
846; RV32-NEXT:    ret
847;
848; RV64-LABEL: vwmulsu_vx_v2i64_i16:
849; RV64:       # %bb.0:
850; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
851; RV64-NEXT:    vle32.v v9, (a0)
852; RV64-NEXT:    lhu a0, 0(a1)
853; RV64-NEXT:    vwmul.vx v8, v9, a0
854; RV64-NEXT:    ret
855  %a = load <2 x i32>, ptr %x
856  %b = load i16, ptr %y
857  %c = zext i16 %b to i64
858  %d = insertelement <2 x i64> poison, i64 %c, i64 0
859  %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
860  %f = sext <2 x i32> %a to <2 x i64>
861  %g = mul <2 x i64> %e, %f
862  ret <2 x i64> %g
863}
864
865define <2 x i64> @vwmulsu_vx_v2i64_i32(ptr %x, ptr %y) {
866; RV32-LABEL: vwmulsu_vx_v2i64_i32:
867; RV32:       # %bb.0:
868; RV32-NEXT:    addi sp, sp, -16
869; RV32-NEXT:    .cfi_def_cfa_offset 16
870; RV32-NEXT:    lw a1, 0(a1)
871; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
872; RV32-NEXT:    vle32.v v8, (a0)
873; RV32-NEXT:    sw a1, 8(sp)
874; RV32-NEXT:    sw zero, 12(sp)
875; RV32-NEXT:    addi a0, sp, 8
876; RV32-NEXT:    vlse64.v v9, (a0), zero
877; RV32-NEXT:    vsext.vf2 v10, v8
878; RV32-NEXT:    vmul.vv v8, v9, v10
879; RV32-NEXT:    addi sp, sp, 16
880; RV32-NEXT:    .cfi_def_cfa_offset 0
881; RV32-NEXT:    ret
882;
883; RV64-LABEL: vwmulsu_vx_v2i64_i32:
884; RV64:       # %bb.0:
885; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
886; RV64-NEXT:    vle32.v v9, (a0)
887; RV64-NEXT:    lwu a0, 0(a1)
888; RV64-NEXT:    vwmulsu.vx v8, v9, a0
889; RV64-NEXT:    ret
890  %a = load <2 x i32>, ptr %x
891  %b = load i32, ptr %y
892  %c = zext i32 %b to i64
893  %d = insertelement <2 x i64> poison, i64 %c, i64 0
894  %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
895  %f = sext <2 x i32> %a to <2 x i64>
896  %g = mul <2 x i64> %e, %f
897  ret <2 x i64> %g
898}
899
900define <8 x i16> @vwmulsu_vx_v8i16_i8_and(ptr %x, i16 %y) {
901; CHECK-LABEL: vwmulsu_vx_v8i16_i8_and:
902; CHECK:       # %bb.0:
903; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
904; CHECK-NEXT:    vle8.v v9, (a0)
905; CHECK-NEXT:    vwmulsu.vx v8, v9, a1
906; CHECK-NEXT:    ret
907  %a = load <8 x i8>, ptr %x
908  %b = and i16 %y, 255
909  %c = insertelement <8 x i16> poison, i16 %b, i32 0
910  %d = shufflevector <8 x i16> %c, <8 x i16> poison, <8 x i32> zeroinitializer
911  %e = sext <8 x i8> %a to <8 x i16>
912  %f = mul <8 x i16> %d, %e
913  ret <8 x i16> %f
914}
915
916define <8 x i16> @vwmulsu_vx_v8i16_i8_and1(ptr %x, i16 %y) {
917; CHECK-LABEL: vwmulsu_vx_v8i16_i8_and1:
918; CHECK:       # %bb.0:
919; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
920; CHECK-NEXT:    vle8.v v9, (a0)
921; CHECK-NEXT:    andi a0, a1, 254
922; CHECK-NEXT:    vwmulsu.vx v8, v9, a0
923; CHECK-NEXT:    ret
924  %a = load <8 x i8>, ptr %x
925  %b = and i16 %y, 254
926  %c = insertelement <8 x i16> poison, i16 %b, i32 0
927  %d = shufflevector <8 x i16> %c, <8 x i16> poison, <8 x i32> zeroinitializer
928  %e = sext <8 x i8> %a to <8 x i16>
929  %f = mul <8 x i16> %d, %e
930  ret <8 x i16> %f
931}
932
933define <4 x i32> @vwmulsu_vx_v4i32_i16_and(ptr %x, i32 %y) {
934; CHECK-LABEL: vwmulsu_vx_v4i32_i16_and:
935; CHECK:       # %bb.0:
936; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
937; CHECK-NEXT:    vle16.v v9, (a0)
938; CHECK-NEXT:    vwmulsu.vx v8, v9, a1
939; CHECK-NEXT:    ret
940  %a = load <4 x i16>, ptr %x
941  %b = and i32 %y, 65535
942  %c = insertelement <4 x i32> poison, i32 %b, i32 0
943  %d = shufflevector <4 x i32> %c, <4 x i32> poison, <4 x i32> zeroinitializer
944  %e = sext <4 x i16> %a to <4 x i32>
945  %f = mul <4 x i32> %d, %e
946  ret <4 x i32> %f
947}
948
949define <4 x i32> @vwmulsu_vx_v4i32_i16_zext(ptr %x, i16 %y) {
950; CHECK-LABEL: vwmulsu_vx_v4i32_i16_zext:
951; CHECK:       # %bb.0:
952; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
953; CHECK-NEXT:    vle16.v v9, (a0)
954; CHECK-NEXT:    vwmulsu.vx v8, v9, a1
955; CHECK-NEXT:    ret
956  %a = load <4 x i16>, ptr %x
957  %b = zext i16 %y to i32
958  %c = insertelement <4 x i32> poison, i32 %b, i32 0
959  %d = shufflevector <4 x i32> %c, <4 x i32> poison, <4 x i32> zeroinitializer
960  %e = sext <4 x i16> %a to <4 x i32>
961  %f = mul <4 x i32> %d, %e
962  ret <4 x i32> %f
963}
964