xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll (revision 97982a8c605fac7c86d02e641a6cd7898b3ca343)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define <2 x i16> @vwmul_v2i16(ptr %x, ptr %y) {
6; CHECK-LABEL: vwmul_v2i16:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
9; CHECK-NEXT:    vle8.v v9, (a0)
10; CHECK-NEXT:    vle8.v v10, (a1)
11; CHECK-NEXT:    vwmul.vv v8, v9, v10
12; CHECK-NEXT:    ret
13  %a = load <2 x i8>, ptr %x
14  %b = load <2 x i8>, ptr %y
15  %c = sext <2 x i8> %a to <2 x i16>
16  %d = sext <2 x i8> %b to <2 x i16>
17  %e = mul <2 x i16> %c, %d
18  ret <2 x i16> %e
19}
20
21define <2 x i16> @vwmul_v2i16_multiple_users(ptr %x, ptr %y, ptr %z) {
22; CHECK-LABEL: vwmul_v2i16_multiple_users:
23; CHECK:       # %bb.0:
24; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
25; CHECK-NEXT:    vle8.v v8, (a0)
26; CHECK-NEXT:    vle8.v v9, (a1)
27; CHECK-NEXT:    vle8.v v10, (a2)
28; CHECK-NEXT:    vwmul.vv v11, v8, v9
29; CHECK-NEXT:    vwmul.vv v9, v8, v10
30; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
31; CHECK-NEXT:    vor.vv v8, v11, v9
32; CHECK-NEXT:    ret
33  %a = load <2 x i8>, ptr %x
34  %b = load <2 x i8>, ptr %y
35  %b2 = load <2 x i8>, ptr %z
36  %c = sext <2 x i8> %a to <2 x i16>
37  %d = sext <2 x i8> %b to <2 x i16>
38  %d2 = sext <2 x i8> %b2 to <2 x i16>
39  %e = mul <2 x i16> %c, %d
40  %f = mul <2 x i16> %c, %d2
41  %g = or <2 x i16> %e, %f
42  ret <2 x i16> %g
43}
44
45define <4 x i16> @vwmul_v4i16(ptr %x, ptr %y) {
46; CHECK-LABEL: vwmul_v4i16:
47; CHECK:       # %bb.0:
48; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
49; CHECK-NEXT:    vle8.v v9, (a0)
50; CHECK-NEXT:    vle8.v v10, (a1)
51; CHECK-NEXT:    vwmul.vv v8, v9, v10
52; CHECK-NEXT:    ret
53  %a = load <4 x i8>, ptr %x
54  %b = load <4 x i8>, ptr %y
55  %c = sext <4 x i8> %a to <4 x i16>
56  %d = sext <4 x i8> %b to <4 x i16>
57  %e = mul <4 x i16> %c, %d
58  ret <4 x i16> %e
59}
60
61define <2 x i32> @vwmul_v2i32(ptr %x, ptr %y) {
62; CHECK-LABEL: vwmul_v2i32:
63; CHECK:       # %bb.0:
64; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
65; CHECK-NEXT:    vle16.v v9, (a0)
66; CHECK-NEXT:    vle16.v v10, (a1)
67; CHECK-NEXT:    vwmul.vv v8, v9, v10
68; CHECK-NEXT:    ret
69  %a = load <2 x i16>, ptr %x
70  %b = load <2 x i16>, ptr %y
71  %c = sext <2 x i16> %a to <2 x i32>
72  %d = sext <2 x i16> %b to <2 x i32>
73  %e = mul <2 x i32> %c, %d
74  ret <2 x i32> %e
75}
76
77define <8 x i16> @vwmul_v8i16(ptr %x, ptr %y) {
78; CHECK-LABEL: vwmul_v8i16:
79; CHECK:       # %bb.0:
80; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
81; CHECK-NEXT:    vle8.v v9, (a0)
82; CHECK-NEXT:    vle8.v v10, (a1)
83; CHECK-NEXT:    vwmul.vv v8, v9, v10
84; CHECK-NEXT:    ret
85  %a = load <8 x i8>, ptr %x
86  %b = load <8 x i8>, ptr %y
87  %c = sext <8 x i8> %a to <8 x i16>
88  %d = sext <8 x i8> %b to <8 x i16>
89  %e = mul <8 x i16> %c, %d
90  ret <8 x i16> %e
91}
92
93define <4 x i32> @vwmul_v4i32(ptr %x, ptr %y) {
94; CHECK-LABEL: vwmul_v4i32:
95; CHECK:       # %bb.0:
96; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
97; CHECK-NEXT:    vle16.v v9, (a0)
98; CHECK-NEXT:    vle16.v v10, (a1)
99; CHECK-NEXT:    vwmul.vv v8, v9, v10
100; CHECK-NEXT:    ret
101  %a = load <4 x i16>, ptr %x
102  %b = load <4 x i16>, ptr %y
103  %c = sext <4 x i16> %a to <4 x i32>
104  %d = sext <4 x i16> %b to <4 x i32>
105  %e = mul <4 x i32> %c, %d
106  ret <4 x i32> %e
107}
108
109define <2 x i64> @vwmul_v2i64(ptr %x, ptr %y) {
110; CHECK-LABEL: vwmul_v2i64:
111; CHECK:       # %bb.0:
112; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
113; CHECK-NEXT:    vle32.v v9, (a0)
114; CHECK-NEXT:    vle32.v v10, (a1)
115; CHECK-NEXT:    vwmul.vv v8, v9, v10
116; CHECK-NEXT:    ret
117  %a = load <2 x i32>, ptr %x
118  %b = load <2 x i32>, ptr %y
119  %c = sext <2 x i32> %a to <2 x i64>
120  %d = sext <2 x i32> %b to <2 x i64>
121  %e = mul <2 x i64> %c, %d
122  ret <2 x i64> %e
123}
124
125define <16 x i16> @vwmul_v16i16(ptr %x, ptr %y) {
126; CHECK-LABEL: vwmul_v16i16:
127; CHECK:       # %bb.0:
128; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
129; CHECK-NEXT:    vle8.v v10, (a0)
130; CHECK-NEXT:    vle8.v v11, (a1)
131; CHECK-NEXT:    vwmul.vv v8, v10, v11
132; CHECK-NEXT:    ret
133  %a = load <16 x i8>, ptr %x
134  %b = load <16 x i8>, ptr %y
135  %c = sext <16 x i8> %a to <16 x i16>
136  %d = sext <16 x i8> %b to <16 x i16>
137  %e = mul <16 x i16> %c, %d
138  ret <16 x i16> %e
139}
140
141define <8 x i32> @vwmul_v8i32(ptr %x, ptr %y) {
142; CHECK-LABEL: vwmul_v8i32:
143; CHECK:       # %bb.0:
144; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
145; CHECK-NEXT:    vle16.v v10, (a0)
146; CHECK-NEXT:    vle16.v v11, (a1)
147; CHECK-NEXT:    vwmul.vv v8, v10, v11
148; CHECK-NEXT:    ret
149  %a = load <8 x i16>, ptr %x
150  %b = load <8 x i16>, ptr %y
151  %c = sext <8 x i16> %a to <8 x i32>
152  %d = sext <8 x i16> %b to <8 x i32>
153  %e = mul <8 x i32> %c, %d
154  ret <8 x i32> %e
155}
156
157define <4 x i64> @vwmul_v4i64(ptr %x, ptr %y) {
158; CHECK-LABEL: vwmul_v4i64:
159; CHECK:       # %bb.0:
160; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
161; CHECK-NEXT:    vle32.v v10, (a0)
162; CHECK-NEXT:    vle32.v v11, (a1)
163; CHECK-NEXT:    vwmul.vv v8, v10, v11
164; CHECK-NEXT:    ret
165  %a = load <4 x i32>, ptr %x
166  %b = load <4 x i32>, ptr %y
167  %c = sext <4 x i32> %a to <4 x i64>
168  %d = sext <4 x i32> %b to <4 x i64>
169  %e = mul <4 x i64> %c, %d
170  ret <4 x i64> %e
171}
172
173define <32 x i16> @vwmul_v32i16(ptr %x, ptr %y) {
174; CHECK-LABEL: vwmul_v32i16:
175; CHECK:       # %bb.0:
176; CHECK-NEXT:    li a2, 32
177; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
178; CHECK-NEXT:    vle8.v v12, (a0)
179; CHECK-NEXT:    vle8.v v14, (a1)
180; CHECK-NEXT:    vwmul.vv v8, v12, v14
181; CHECK-NEXT:    ret
182  %a = load <32 x i8>, ptr %x
183  %b = load <32 x i8>, ptr %y
184  %c = sext <32 x i8> %a to <32 x i16>
185  %d = sext <32 x i8> %b to <32 x i16>
186  %e = mul <32 x i16> %c, %d
187  ret <32 x i16> %e
188}
189
190define <16 x i32> @vwmul_v16i32(ptr %x, ptr %y) {
191; CHECK-LABEL: vwmul_v16i32:
192; CHECK:       # %bb.0:
193; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
194; CHECK-NEXT:    vle16.v v12, (a0)
195; CHECK-NEXT:    vle16.v v14, (a1)
196; CHECK-NEXT:    vwmul.vv v8, v12, v14
197; CHECK-NEXT:    ret
198  %a = load <16 x i16>, ptr %x
199  %b = load <16 x i16>, ptr %y
200  %c = sext <16 x i16> %a to <16 x i32>
201  %d = sext <16 x i16> %b to <16 x i32>
202  %e = mul <16 x i32> %c, %d
203  ret <16 x i32> %e
204}
205
206define <8 x  i64> @vwmul_v8i64(ptr %x, ptr %y) {
207; CHECK-LABEL: vwmul_v8i64:
208; CHECK:       # %bb.0:
209; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
210; CHECK-NEXT:    vle32.v v12, (a0)
211; CHECK-NEXT:    vle32.v v14, (a1)
212; CHECK-NEXT:    vwmul.vv v8, v12, v14
213; CHECK-NEXT:    ret
214  %a = load <8 x  i32>, ptr %x
215  %b = load <8 x  i32>, ptr %y
216  %c = sext <8 x  i32> %a to <8 x  i64>
217  %d = sext <8 x  i32> %b to <8 x  i64>
218  %e = mul <8 x  i64> %c, %d
219  ret <8 x  i64> %e
220}
221
222define <64 x i16> @vwmul_v64i16(ptr %x, ptr %y) {
223; CHECK-LABEL: vwmul_v64i16:
224; CHECK:       # %bb.0:
225; CHECK-NEXT:    li a2, 64
226; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
227; CHECK-NEXT:    vle8.v v16, (a0)
228; CHECK-NEXT:    vle8.v v20, (a1)
229; CHECK-NEXT:    vwmul.vv v8, v16, v20
230; CHECK-NEXT:    ret
231  %a = load <64 x i8>, ptr %x
232  %b = load <64 x i8>, ptr %y
233  %c = sext <64 x i8> %a to <64 x i16>
234  %d = sext <64 x i8> %b to <64 x i16>
235  %e = mul <64 x i16> %c, %d
236  ret <64 x i16> %e
237}
238
239define <32 x i32> @vwmul_v32i32(ptr %x, ptr %y) {
240; CHECK-LABEL: vwmul_v32i32:
241; CHECK:       # %bb.0:
242; CHECK-NEXT:    li a2, 32
243; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
244; CHECK-NEXT:    vle16.v v16, (a0)
245; CHECK-NEXT:    vle16.v v20, (a1)
246; CHECK-NEXT:    vwmul.vv v8, v16, v20
247; CHECK-NEXT:    ret
248  %a = load <32 x i16>, ptr %x
249  %b = load <32 x i16>, ptr %y
250  %c = sext <32 x i16> %a to <32 x i32>
251  %d = sext <32 x i16> %b to <32 x i32>
252  %e = mul <32 x i32> %c, %d
253  ret <32 x i32> %e
254}
255
256define <16 x i64> @vwmul_v16i64(ptr %x, ptr %y) {
257; CHECK-LABEL: vwmul_v16i64:
258; CHECK:       # %bb.0:
259; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
260; CHECK-NEXT:    vle32.v v16, (a0)
261; CHECK-NEXT:    vle32.v v20, (a1)
262; CHECK-NEXT:    vwmul.vv v8, v16, v20
263; CHECK-NEXT:    ret
264  %a = load <16 x i32>, ptr %x
265  %b = load <16 x i32>, ptr %y
266  %c = sext <16 x i32> %a to <16 x i64>
267  %d = sext <16 x i32> %b to <16 x i64>
268  %e = mul <16 x i64> %c, %d
269  ret <16 x i64> %e
270}
271
272define <128 x i16> @vwmul_v128i16(ptr %x, ptr %y) {
273; CHECK-LABEL: vwmul_v128i16:
274; CHECK:       # %bb.0:
275; CHECK-NEXT:    addi sp, sp, -16
276; CHECK-NEXT:    .cfi_def_cfa_offset 16
277; CHECK-NEXT:    csrr a2, vlenb
278; CHECK-NEXT:    slli a2, a2, 4
279; CHECK-NEXT:    sub sp, sp, a2
280; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
281; CHECK-NEXT:    li a2, 128
282; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
283; CHECK-NEXT:    vle8.v v8, (a0)
284; CHECK-NEXT:    addi a0, sp, 16
285; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
286; CHECK-NEXT:    vle8.v v0, (a1)
287; CHECK-NEXT:    li a0, 64
288; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
289; CHECK-NEXT:    vslidedown.vx v16, v8, a0
290; CHECK-NEXT:    vslidedown.vx v8, v0, a0
291; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
292; CHECK-NEXT:    vwmul.vv v24, v16, v8
293; CHECK-NEXT:    csrr a0, vlenb
294; CHECK-NEXT:    slli a0, a0, 3
295; CHECK-NEXT:    add a0, sp, a0
296; CHECK-NEXT:    addi a0, a0, 16
297; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
298; CHECK-NEXT:    addi a0, sp, 16
299; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
300; CHECK-NEXT:    vwmul.vv v8, v16, v0
301; CHECK-NEXT:    csrr a0, vlenb
302; CHECK-NEXT:    slli a0, a0, 3
303; CHECK-NEXT:    add a0, sp, a0
304; CHECK-NEXT:    addi a0, a0, 16
305; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
306; CHECK-NEXT:    csrr a0, vlenb
307; CHECK-NEXT:    slli a0, a0, 4
308; CHECK-NEXT:    add sp, sp, a0
309; CHECK-NEXT:    .cfi_def_cfa sp, 16
310; CHECK-NEXT:    addi sp, sp, 16
311; CHECK-NEXT:    .cfi_def_cfa_offset 0
312; CHECK-NEXT:    ret
313  %a = load <128 x i8>, ptr %x
314  %b = load <128 x i8>, ptr %y
315  %c = sext <128 x i8> %a to <128 x i16>
316  %d = sext <128 x i8> %b to <128 x i16>
317  %e = mul <128 x i16> %c, %d
318  ret <128 x i16> %e
319}
320
321define <64 x i32> @vwmul_v64i32(ptr %x, ptr %y) {
322; CHECK-LABEL: vwmul_v64i32:
323; CHECK:       # %bb.0:
324; CHECK-NEXT:    addi sp, sp, -16
325; CHECK-NEXT:    .cfi_def_cfa_offset 16
326; CHECK-NEXT:    csrr a2, vlenb
327; CHECK-NEXT:    slli a2, a2, 4
328; CHECK-NEXT:    sub sp, sp, a2
329; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
330; CHECK-NEXT:    li a2, 64
331; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
332; CHECK-NEXT:    vle16.v v8, (a0)
333; CHECK-NEXT:    addi a0, sp, 16
334; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
335; CHECK-NEXT:    vle16.v v0, (a1)
336; CHECK-NEXT:    li a0, 32
337; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
338; CHECK-NEXT:    vslidedown.vx v16, v8, a0
339; CHECK-NEXT:    vslidedown.vx v8, v0, a0
340; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
341; CHECK-NEXT:    vwmul.vv v24, v16, v8
342; CHECK-NEXT:    csrr a0, vlenb
343; CHECK-NEXT:    slli a0, a0, 3
344; CHECK-NEXT:    add a0, sp, a0
345; CHECK-NEXT:    addi a0, a0, 16
346; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
347; CHECK-NEXT:    addi a0, sp, 16
348; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
349; CHECK-NEXT:    vwmul.vv v8, v16, v0
350; CHECK-NEXT:    csrr a0, vlenb
351; CHECK-NEXT:    slli a0, a0, 3
352; CHECK-NEXT:    add a0, sp, a0
353; CHECK-NEXT:    addi a0, a0, 16
354; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
355; CHECK-NEXT:    csrr a0, vlenb
356; CHECK-NEXT:    slli a0, a0, 4
357; CHECK-NEXT:    add sp, sp, a0
358; CHECK-NEXT:    .cfi_def_cfa sp, 16
359; CHECK-NEXT:    addi sp, sp, 16
360; CHECK-NEXT:    .cfi_def_cfa_offset 0
361; CHECK-NEXT:    ret
362  %a = load <64 x i16>, ptr %x
363  %b = load <64 x i16>, ptr %y
364  %c = sext <64 x i16> %a to <64 x i32>
365  %d = sext <64 x i16> %b to <64 x i32>
366  %e = mul <64 x i32> %c, %d
367  ret <64 x i32> %e
368}
369
370define <32 x i64> @vwmul_v32i64(ptr %x, ptr %y) {
371; CHECK-LABEL: vwmul_v32i64:
372; CHECK:       # %bb.0:
373; CHECK-NEXT:    addi sp, sp, -16
374; CHECK-NEXT:    .cfi_def_cfa_offset 16
375; CHECK-NEXT:    csrr a2, vlenb
376; CHECK-NEXT:    slli a2, a2, 4
377; CHECK-NEXT:    sub sp, sp, a2
378; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
379; CHECK-NEXT:    li a2, 32
380; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
381; CHECK-NEXT:    vle32.v v8, (a0)
382; CHECK-NEXT:    addi a0, sp, 16
383; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
384; CHECK-NEXT:    vle32.v v0, (a1)
385; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
386; CHECK-NEXT:    vslidedown.vi v16, v8, 16
387; CHECK-NEXT:    vslidedown.vi v8, v0, 16
388; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
389; CHECK-NEXT:    vwmul.vv v24, v16, v8
390; CHECK-NEXT:    csrr a0, vlenb
391; CHECK-NEXT:    slli a0, a0, 3
392; CHECK-NEXT:    add a0, sp, a0
393; CHECK-NEXT:    addi a0, a0, 16
394; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
395; CHECK-NEXT:    addi a0, sp, 16
396; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
397; CHECK-NEXT:    vwmul.vv v8, v16, v0
398; CHECK-NEXT:    csrr a0, vlenb
399; CHECK-NEXT:    slli a0, a0, 3
400; CHECK-NEXT:    add a0, sp, a0
401; CHECK-NEXT:    addi a0, a0, 16
402; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
403; CHECK-NEXT:    csrr a0, vlenb
404; CHECK-NEXT:    slli a0, a0, 4
405; CHECK-NEXT:    add sp, sp, a0
406; CHECK-NEXT:    .cfi_def_cfa sp, 16
407; CHECK-NEXT:    addi sp, sp, 16
408; CHECK-NEXT:    .cfi_def_cfa_offset 0
409; CHECK-NEXT:    ret
410  %a = load <32 x i32>, ptr %x
411  %b = load <32 x i32>, ptr %y
412  %c = sext <32 x i32> %a to <32 x i64>
413  %d = sext <32 x i32> %b to <32 x i64>
414  %e = mul <32 x i64> %c, %d
415  ret <32 x i64> %e
416}
417
418define <2 x i32> @vwmul_v2i32_v2i8(ptr %x, ptr %y) {
419; CHECK-LABEL: vwmul_v2i32_v2i8:
420; CHECK:       # %bb.0:
421; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
422; CHECK-NEXT:    vle8.v v8, (a0)
423; CHECK-NEXT:    vle8.v v9, (a1)
424; CHECK-NEXT:    vsext.vf2 v10, v8
425; CHECK-NEXT:    vsext.vf2 v11, v9
426; CHECK-NEXT:    vwmul.vv v8, v10, v11
427; CHECK-NEXT:    ret
428  %a = load <2 x i8>, ptr %x
429  %b = load <2 x i8>, ptr %y
430  %c = sext <2 x i8> %a to <2 x i32>
431  %d = sext <2 x i8> %b to <2 x i32>
432  %e = mul <2 x i32> %c, %d
433  ret <2 x i32> %e
434}
435
436define <4 x i32> @vwmul_v4i32_v4i8_v4i16(ptr %x, ptr %y) {
437; CHECK-LABEL: vwmul_v4i32_v4i8_v4i16:
438; CHECK:       # %bb.0:
439; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
440; CHECK-NEXT:    vle8.v v8, (a0)
441; CHECK-NEXT:    vle16.v v9, (a1)
442; CHECK-NEXT:    vsext.vf2 v10, v8
443; CHECK-NEXT:    vwmul.vv v8, v10, v9
444; CHECK-NEXT:    ret
445  %a = load <4 x i8>, ptr %x
446  %b = load <4 x i16>, ptr %y
447  %c = sext <4 x i8> %a to <4 x i32>
448  %d = sext <4 x i16> %b to <4 x i32>
449  %e = mul <4 x i32> %c, %d
450  ret <4 x i32> %e
451}
452
453define <4 x i64> @vwmul_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
454; CHECK-LABEL: vwmul_v4i64_v4i32_v4i8:
455; CHECK:       # %bb.0:
456; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
457; CHECK-NEXT:    vle8.v v8, (a1)
458; CHECK-NEXT:    vle32.v v10, (a0)
459; CHECK-NEXT:    vsext.vf4 v11, v8
460; CHECK-NEXT:    vwmul.vv v8, v10, v11
461; CHECK-NEXT:    ret
462  %a = load <4 x i32>, ptr %x
463  %b = load <4 x i8>, ptr %y
464  %c = sext <4 x i32> %a to <4 x i64>
465  %d = sext <4 x i8> %b to <4 x i64>
466  %e = mul <4 x i64> %c, %d
467  ret <4 x i64> %e
468}
469
470define <2 x i16> @vwmul_vx_v2i16(ptr %x, i8 %y) {
471; CHECK-LABEL: vwmul_vx_v2i16:
472; CHECK:       # %bb.0:
473; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
474; CHECK-NEXT:    vle8.v v9, (a0)
475; CHECK-NEXT:    vwmul.vx v8, v9, a1
476; CHECK-NEXT:    ret
477  %a = load <2 x i8>, ptr %x
478  %b = insertelement <2 x i8> poison, i8 %y, i32 0
479  %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
480  %d = sext <2 x i8> %a to <2 x i16>
481  %e = sext <2 x i8> %c to <2 x i16>
482  %f = mul <2 x i16> %d, %e
483  ret <2 x i16> %f
484}
485
486define <4 x i16> @vwmul_vx_v4i16(ptr %x, i8 %y) {
487; CHECK-LABEL: vwmul_vx_v4i16:
488; CHECK:       # %bb.0:
489; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
490; CHECK-NEXT:    vle8.v v9, (a0)
491; CHECK-NEXT:    vwmul.vx v8, v9, a1
492; CHECK-NEXT:    ret
493  %a = load <4 x i8>, ptr %x
494  %b = insertelement <4 x i8> poison, i8 %y, i32 0
495  %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
496  %d = sext <4 x i8> %a to <4 x i16>
497  %e = sext <4 x i8> %c to <4 x i16>
498  %f = mul <4 x i16> %d, %e
499  ret <4 x i16> %f
500}
501
502define <2 x i32> @vwmul_vx_v2i32(ptr %x, i16 %y) {
503; CHECK-LABEL: vwmul_vx_v2i32:
504; CHECK:       # %bb.0:
505; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
506; CHECK-NEXT:    vle16.v v9, (a0)
507; CHECK-NEXT:    vwmul.vx v8, v9, a1
508; CHECK-NEXT:    ret
509  %a = load <2 x i16>, ptr %x
510  %b = insertelement <2 x i16> poison, i16 %y, i32 0
511  %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
512  %d = sext <2 x i16> %a to <2 x i32>
513  %e = sext <2 x i16> %c to <2 x i32>
514  %f = mul <2 x i32> %d, %e
515  ret <2 x i32> %f
516}
517
518define <8 x i16> @vwmul_vx_v8i16(ptr %x, i8 %y) {
519; CHECK-LABEL: vwmul_vx_v8i16:
520; CHECK:       # %bb.0:
521; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
522; CHECK-NEXT:    vle8.v v9, (a0)
523; CHECK-NEXT:    vwmul.vx v8, v9, a1
524; CHECK-NEXT:    ret
525  %a = load <8 x i8>, ptr %x
526  %b = insertelement <8 x i8> poison, i8 %y, i32 0
527  %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
528  %d = sext <8 x i8> %a to <8 x i16>
529  %e = sext <8 x i8> %c to <8 x i16>
530  %f = mul <8 x i16> %d, %e
531  ret <8 x i16> %f
532}
533
534define <4 x i32> @vwmul_vx_v4i32(ptr %x, i16 %y) {
535; CHECK-LABEL: vwmul_vx_v4i32:
536; CHECK:       # %bb.0:
537; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
538; CHECK-NEXT:    vle16.v v9, (a0)
539; CHECK-NEXT:    vwmul.vx v8, v9, a1
540; CHECK-NEXT:    ret
541  %a = load <4 x i16>, ptr %x
542  %b = insertelement <4 x i16> poison, i16 %y, i32 0
543  %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
544  %d = sext <4 x i16> %a to <4 x i32>
545  %e = sext <4 x i16> %c to <4 x i32>
546  %f = mul <4 x i32> %d, %e
547  ret <4 x i32> %f
548}
549
550define <2 x i64> @vwmul_vx_v2i64(ptr %x, i32 %y) {
551; CHECK-LABEL: vwmul_vx_v2i64:
552; CHECK:       # %bb.0:
553; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
554; CHECK-NEXT:    vle32.v v9, (a0)
555; CHECK-NEXT:    vwmul.vx v8, v9, a1
556; CHECK-NEXT:    ret
557  %a = load <2 x i32>, ptr %x
558  %b = insertelement <2 x i32> poison, i32 %y, i64 0
559  %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
560  %d = sext <2 x i32> %a to <2 x i64>
561  %e = sext <2 x i32> %c to <2 x i64>
562  %f = mul <2 x i64> %d, %e
563  ret <2 x i64> %f
564}
565
566define <16 x i16> @vwmul_vx_v16i16(ptr %x, i8 %y) {
567; CHECK-LABEL: vwmul_vx_v16i16:
568; CHECK:       # %bb.0:
569; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
570; CHECK-NEXT:    vle8.v v10, (a0)
571; CHECK-NEXT:    vwmul.vx v8, v10, a1
572; CHECK-NEXT:    ret
573  %a = load <16 x i8>, ptr %x
574  %b = insertelement <16 x i8> poison, i8 %y, i32 0
575  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
576  %d = sext <16 x i8> %a to <16 x i16>
577  %e = sext <16 x i8> %c to <16 x i16>
578  %f = mul <16 x i16> %d, %e
579  ret <16 x i16> %f
580}
581
582define <8 x i32> @vwmul_vx_v8i32(ptr %x, i16 %y) {
583; CHECK-LABEL: vwmul_vx_v8i32:
584; CHECK:       # %bb.0:
585; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
586; CHECK-NEXT:    vle16.v v10, (a0)
587; CHECK-NEXT:    vwmul.vx v8, v10, a1
588; CHECK-NEXT:    ret
589  %a = load <8 x i16>, ptr %x
590  %b = insertelement <8 x i16> poison, i16 %y, i32 0
591  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
592  %d = sext <8 x i16> %a to <8 x i32>
593  %e = sext <8 x i16> %c to <8 x i32>
594  %f = mul <8 x i32> %d, %e
595  ret <8 x i32> %f
596}
597
598define <4 x i64> @vwmul_vx_v4i64(ptr %x, i32 %y) {
599; CHECK-LABEL: vwmul_vx_v4i64:
600; CHECK:       # %bb.0:
601; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
602; CHECK-NEXT:    vle32.v v10, (a0)
603; CHECK-NEXT:    vwmul.vx v8, v10, a1
604; CHECK-NEXT:    ret
605  %a = load <4 x i32>, ptr %x
606  %b = insertelement <4 x i32> poison, i32 %y, i64 0
607  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
608  %d = sext <4 x i32> %a to <4 x i64>
609  %e = sext <4 x i32> %c to <4 x i64>
610  %f = mul <4 x i64> %d, %e
611  ret <4 x i64> %f
612}
613
614define <32 x i16> @vwmul_vx_v32i16(ptr %x, i8 %y) {
615; CHECK-LABEL: vwmul_vx_v32i16:
616; CHECK:       # %bb.0:
617; CHECK-NEXT:    li a2, 32
618; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
619; CHECK-NEXT:    vle8.v v12, (a0)
620; CHECK-NEXT:    vwmul.vx v8, v12, a1
621; CHECK-NEXT:    ret
622  %a = load <32 x i8>, ptr %x
623  %b = insertelement <32 x i8> poison, i8 %y, i32 0
624  %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
625  %d = sext <32 x i8> %a to <32 x i16>
626  %e = sext <32 x i8> %c to <32 x i16>
627  %f = mul <32 x i16> %d, %e
628  ret <32 x i16> %f
629}
630
631define <16 x i32> @vwmul_vx_v16i32(ptr %x, i16 %y) {
632; CHECK-LABEL: vwmul_vx_v16i32:
633; CHECK:       # %bb.0:
634; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
635; CHECK-NEXT:    vle16.v v12, (a0)
636; CHECK-NEXT:    vwmul.vx v8, v12, a1
637; CHECK-NEXT:    ret
638  %a = load <16 x i16>, ptr %x
639  %b = insertelement <16 x i16> poison, i16 %y, i32 0
640  %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
641  %d = sext <16 x i16> %a to <16 x i32>
642  %e = sext <16 x i16> %c to <16 x i32>
643  %f = mul <16 x i32> %d, %e
644  ret <16 x i32> %f
645}
646
647define <8 x i64> @vwmul_vx_v8i64(ptr %x, i32 %y) {
648; CHECK-LABEL: vwmul_vx_v8i64:
649; CHECK:       # %bb.0:
650; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
651; CHECK-NEXT:    vle32.v v12, (a0)
652; CHECK-NEXT:    vwmul.vx v8, v12, a1
653; CHECK-NEXT:    ret
654  %a = load <8 x i32>, ptr %x
655  %b = insertelement <8 x i32> poison, i32 %y, i64 0
656  %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
657  %d = sext <8 x i32> %a to <8 x i64>
658  %e = sext <8 x i32> %c to <8 x i64>
659  %f = mul <8 x i64> %d, %e
660  ret <8 x i64> %f
661}
662
663define <64 x i16> @vwmul_vx_v64i16(ptr %x, i8 %y) {
664; CHECK-LABEL: vwmul_vx_v64i16:
665; CHECK:       # %bb.0:
666; CHECK-NEXT:    li a2, 64
667; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
668; CHECK-NEXT:    vle8.v v16, (a0)
669; CHECK-NEXT:    vwmul.vx v8, v16, a1
670; CHECK-NEXT:    ret
671  %a = load <64 x i8>, ptr %x
672  %b = insertelement <64 x i8> poison, i8 %y, i32 0
673  %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer
674  %d = sext <64 x i8> %a to <64 x i16>
675  %e = sext <64 x i8> %c to <64 x i16>
676  %f = mul <64 x i16> %d, %e
677  ret <64 x i16> %f
678}
679
680define <32 x i32> @vwmul_vx_v32i32(ptr %x, i16 %y) {
681; CHECK-LABEL: vwmul_vx_v32i32:
682; CHECK:       # %bb.0:
683; CHECK-NEXT:    li a2, 32
684; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
685; CHECK-NEXT:    vle16.v v16, (a0)
686; CHECK-NEXT:    vwmul.vx v8, v16, a1
687; CHECK-NEXT:    ret
688  %a = load <32 x i16>, ptr %x
689  %b = insertelement <32 x i16> poison, i16 %y, i32 0
690  %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer
691  %d = sext <32 x i16> %a to <32 x i32>
692  %e = sext <32 x i16> %c to <32 x i32>
693  %f = mul <32 x i32> %d, %e
694  ret <32 x i32> %f
695}
696
697define <16 x i64> @vwmul_vx_v16i64(ptr %x, i32 %y) {
698; CHECK-LABEL: vwmul_vx_v16i64:
699; CHECK:       # %bb.0:
700; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
701; CHECK-NEXT:    vle32.v v16, (a0)
702; CHECK-NEXT:    vwmul.vx v8, v16, a1
703; CHECK-NEXT:    ret
704  %a = load <16 x i32>, ptr %x
705  %b = insertelement <16 x i32> poison, i32 %y, i64 0
706  %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer
707  %d = sext <16 x i32> %a to <16 x i64>
708  %e = sext <16 x i32> %c to <16 x i64>
709  %f = mul <16 x i64> %d, %e
710  ret <16 x i64> %f
711}
712
713define <8 x i16> @vwmul_vx_v8i16_i8(ptr %x, ptr %y) {
714; CHECK-LABEL: vwmul_vx_v8i16_i8:
715; CHECK:       # %bb.0:
716; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
717; CHECK-NEXT:    vle8.v v9, (a0)
718; CHECK-NEXT:    lb a0, 0(a1)
719; CHECK-NEXT:    vwmul.vx v8, v9, a0
720; CHECK-NEXT:    ret
721  %a = load <8 x i8>, ptr %x
722  %b = load i8, ptr %y
723  %c = sext i8 %b to i16
724  %d = insertelement <8 x i16> poison, i16 %c, i32 0
725  %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
726  %f = sext <8 x i8> %a to <8 x i16>
727  %g = mul <8 x i16> %e, %f
728  ret <8 x i16> %g
729}
730
731define <8 x i16> @vwmul_vx_v8i16_i16(ptr %x, ptr %y) {
732; CHECK-LABEL: vwmul_vx_v8i16_i16:
733; CHECK:       # %bb.0:
734; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
735; CHECK-NEXT:    vle8.v v8, (a0)
736; CHECK-NEXT:    lh a0, 0(a1)
737; CHECK-NEXT:    vsext.vf2 v9, v8
738; CHECK-NEXT:    vmul.vx v8, v9, a0
739; CHECK-NEXT:    ret
740  %a = load <8 x i8>, ptr %x
741  %b = load i16, ptr %y
742  %d = insertelement <8 x i16> poison, i16 %b, i32 0
743  %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
744  %f = sext <8 x i8> %a to <8 x i16>
745  %g = mul <8 x i16> %e, %f
746  ret <8 x i16> %g
747}
748
749define <4 x i32> @vwmul_vx_v4i32_i8(ptr %x, ptr %y) {
750; CHECK-LABEL: vwmul_vx_v4i32_i8:
751; CHECK:       # %bb.0:
752; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
753; CHECK-NEXT:    vle16.v v9, (a0)
754; CHECK-NEXT:    lb a0, 0(a1)
755; CHECK-NEXT:    vwmul.vx v8, v9, a0
756; CHECK-NEXT:    ret
757  %a = load <4 x i16>, ptr %x
758  %b = load i8, ptr %y
759  %c = sext i8 %b to i32
760  %d = insertelement <4 x i32> poison, i32 %c, i32 0
761  %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
762  %f = sext <4 x i16> %a to <4 x i32>
763  %g = mul <4 x i32> %e, %f
764  ret <4 x i32> %g
765}
766
767define <4 x i32> @vwmul_vx_v4i32_i16(ptr %x, ptr %y) {
768; CHECK-LABEL: vwmul_vx_v4i32_i16:
769; CHECK:       # %bb.0:
770; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
771; CHECK-NEXT:    vle16.v v9, (a0)
772; CHECK-NEXT:    lh a0, 0(a1)
773; CHECK-NEXT:    vwmul.vx v8, v9, a0
774; CHECK-NEXT:    ret
775  %a = load <4 x i16>, ptr %x
776  %b = load i16, ptr %y
777  %c = sext i16 %b to i32
778  %d = insertelement <4 x i32> poison, i32 %c, i32 0
779  %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
780  %f = sext <4 x i16> %a to <4 x i32>
781  %g = mul <4 x i32> %e, %f
782  ret <4 x i32> %g
783}
784
785define <4 x i32> @vwmul_vx_v4i32_i32(ptr %x, ptr %y) {
786; CHECK-LABEL: vwmul_vx_v4i32_i32:
787; CHECK:       # %bb.0:
788; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
789; CHECK-NEXT:    vle16.v v8, (a0)
790; CHECK-NEXT:    lw a0, 0(a1)
791; CHECK-NEXT:    vsext.vf2 v9, v8
792; CHECK-NEXT:    vmul.vx v8, v9, a0
793; CHECK-NEXT:    ret
794  %a = load <4 x i16>, ptr %x
795  %b = load i32, ptr %y
796  %d = insertelement <4 x i32> poison, i32 %b, i32 0
797  %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
798  %f = sext <4 x i16> %a to <4 x i32>
799  %g = mul <4 x i32> %e, %f
800  ret <4 x i32> %g
801}
802
803define <2 x i64> @vwmul_vx_v2i64_i8(ptr %x, ptr %y) {
804; CHECK-LABEL: vwmul_vx_v2i64_i8:
805; CHECK:       # %bb.0:
806; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
807; CHECK-NEXT:    vle32.v v9, (a0)
808; CHECK-NEXT:    lb a0, 0(a1)
809; CHECK-NEXT:    vwmul.vx v8, v9, a0
810; CHECK-NEXT:    ret
811  %a = load <2 x i32>, ptr %x
812  %b = load i8, ptr %y
813  %c = sext i8 %b to i64
814  %d = insertelement <2 x i64> poison, i64 %c, i64 0
815  %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
816  %f = sext <2 x i32> %a to <2 x i64>
817  %g = mul <2 x i64> %e, %f
818  ret <2 x i64> %g
819}
820
821define <2 x i64> @vwmul_vx_v2i64_i16(ptr %x, ptr %y) {
822; CHECK-LABEL: vwmul_vx_v2i64_i16:
823; CHECK:       # %bb.0:
824; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
825; CHECK-NEXT:    vle32.v v9, (a0)
826; CHECK-NEXT:    lh a0, 0(a1)
827; CHECK-NEXT:    vwmul.vx v8, v9, a0
828; CHECK-NEXT:    ret
829  %a = load <2 x i32>, ptr %x
830  %b = load i16, ptr %y
831  %c = sext i16 %b to i64
832  %d = insertelement <2 x i64> poison, i64 %c, i64 0
833  %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
834  %f = sext <2 x i32> %a to <2 x i64>
835  %g = mul <2 x i64> %e, %f
836  ret <2 x i64> %g
837}
838
839define <2 x i64> @vwmul_vx_v2i64_i32(ptr %x, ptr %y) {
840; CHECK-LABEL: vwmul_vx_v2i64_i32:
841; CHECK:       # %bb.0:
842; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
843; CHECK-NEXT:    vle32.v v9, (a0)
844; CHECK-NEXT:    lw a0, 0(a1)
845; CHECK-NEXT:    vwmul.vx v8, v9, a0
846; CHECK-NEXT:    ret
847  %a = load <2 x i32>, ptr %x
848  %b = load i32, ptr %y
849  %c = sext i32 %b to i64
850  %d = insertelement <2 x i64> poison, i64 %c, i64 0
851  %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
852  %f = sext <2 x i32> %a to <2 x i64>
853  %g = mul <2 x i64> %e, %f
854  ret <2 x i64> %g
855}
856
857define <2 x i64> @vwmul_vx_v2i64_i64(ptr %x, ptr %y) {
858; RV32-LABEL: vwmul_vx_v2i64_i64:
859; RV32:       # %bb.0:
860; RV32-NEXT:    addi sp, sp, -16
861; RV32-NEXT:    .cfi_def_cfa_offset 16
862; RV32-NEXT:    lw a2, 0(a1)
863; RV32-NEXT:    lw a1, 4(a1)
864; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
865; RV32-NEXT:    vle32.v v8, (a0)
866; RV32-NEXT:    sw a2, 8(sp)
867; RV32-NEXT:    sw a1, 12(sp)
868; RV32-NEXT:    addi a0, sp, 8
869; RV32-NEXT:    vlse64.v v9, (a0), zero
870; RV32-NEXT:    vsext.vf2 v10, v8
871; RV32-NEXT:    vmul.vv v8, v9, v10
872; RV32-NEXT:    addi sp, sp, 16
873; RV32-NEXT:    .cfi_def_cfa_offset 0
874; RV32-NEXT:    ret
875;
876; RV64-LABEL: vwmul_vx_v2i64_i64:
877; RV64:       # %bb.0:
878; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
879; RV64-NEXT:    vle32.v v8, (a0)
880; RV64-NEXT:    ld a0, 0(a1)
881; RV64-NEXT:    vsext.vf2 v9, v8
882; RV64-NEXT:    vmul.vx v8, v9, a0
883; RV64-NEXT:    ret
884  %a = load <2 x i32>, ptr %x
885  %b = load i64, ptr %y
886  %d = insertelement <2 x i64> poison, i64 %b, i64 0
887  %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
888  %f = sext <2 x i32> %a to <2 x i64>
889  %g = mul <2 x i64> %e, %f
890  ret <2 x i64> %g
891}
892
893define <2 x i16> @vwmul_v2i16_multiuse(ptr %x, ptr %y, ptr %z, ptr %w) {
894; CHECK-LABEL: vwmul_v2i16_multiuse:
895; CHECK:       # %bb.0:
896; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
897; CHECK-NEXT:    vle8.v v8, (a0)
898; CHECK-NEXT:    vle8.v v9, (a1)
899; CHECK-NEXT:    vle8.v v10, (a2)
900; CHECK-NEXT:    vle8.v v11, (a3)
901; CHECK-NEXT:    vsext.vf2 v12, v8
902; CHECK-NEXT:    vsext.vf2 v8, v9
903; CHECK-NEXT:    vsext.vf2 v9, v10
904; CHECK-NEXT:    vsext.vf2 v10, v11
905; CHECK-NEXT:    vmul.vv v11, v12, v10
906; CHECK-NEXT:    vmul.vv v10, v8, v10
907; CHECK-NEXT:    vdivu.vv v8, v8, v9
908; CHECK-NEXT:    vor.vv v9, v11, v10
909; CHECK-NEXT:    vor.vv v8, v9, v8
910; CHECK-NEXT:    ret
911  %a = load <2 x i8>, ptr %x
912  %b = load <2 x i8>, ptr %y
913  %c = load <2 x i8>, ptr %z
914  %d = load <2 x i8>, ptr %w
915
916  %as = sext <2 x i8> %a to <2 x i16>
917  %bs = sext <2 x i8> %b to <2 x i16>
918  %cs = sext <2 x i8> %c to <2 x i16>
919  %ds = sext <2 x i8> %d to <2 x i16>
920
921  %e = mul <2 x i16> %as, %ds
922  %f = mul <2 x i16> %bs, %ds ; shares 1 use with %e
923  %g = udiv <2 x i16> %bs, %cs ; shares 1 use with %f, and no uses with %e
924
925  %h = or <2 x i16> %e, %f
926  %i = or <2 x i16> %h, %g
927  ret <2 x i16> %i
928}
929