xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll (revision 2967e5f8007d873a3e9d97870d2461d0827a3976)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define <2 x i16> @vwsubu_v2i16(ptr %x, ptr %y) {
6; CHECK-LABEL: vwsubu_v2i16:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
9; CHECK-NEXT:    vle8.v v9, (a0)
10; CHECK-NEXT:    vle8.v v10, (a1)
11; CHECK-NEXT:    vwsubu.vv v8, v9, v10
12; CHECK-NEXT:    ret
13  %a = load <2 x i8>, ptr %x
14  %b = load <2 x i8>, ptr %y
15  %c = zext <2 x i8> %a to <2 x i16>
16  %d = zext <2 x i8> %b to <2 x i16>
17  %e = sub <2 x i16> %c, %d
18  ret <2 x i16> %e
19}
20
21define <4 x i16> @vwsubu_v4i16(ptr %x, ptr %y) {
22; CHECK-LABEL: vwsubu_v4i16:
23; CHECK:       # %bb.0:
24; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
25; CHECK-NEXT:    vle8.v v9, (a0)
26; CHECK-NEXT:    vle8.v v10, (a1)
27; CHECK-NEXT:    vwsubu.vv v8, v9, v10
28; CHECK-NEXT:    ret
29  %a = load <4 x i8>, ptr %x
30  %b = load <4 x i8>, ptr %y
31  %c = zext <4 x i8> %a to <4 x i16>
32  %d = zext <4 x i8> %b to <4 x i16>
33  %e = sub <4 x i16> %c, %d
34  ret <4 x i16> %e
35}
36
37define <2 x i32> @vwsubu_v2i32(ptr %x, ptr %y) {
38; CHECK-LABEL: vwsubu_v2i32:
39; CHECK:       # %bb.0:
40; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
41; CHECK-NEXT:    vle16.v v9, (a0)
42; CHECK-NEXT:    vle16.v v10, (a1)
43; CHECK-NEXT:    vwsubu.vv v8, v9, v10
44; CHECK-NEXT:    ret
45  %a = load <2 x i16>, ptr %x
46  %b = load <2 x i16>, ptr %y
47  %c = zext <2 x i16> %a to <2 x i32>
48  %d = zext <2 x i16> %b to <2 x i32>
49  %e = sub <2 x i32> %c, %d
50  ret <2 x i32> %e
51}
52
53define <8 x i16> @vwsubu_v8i16(ptr %x, ptr %y) {
54; CHECK-LABEL: vwsubu_v8i16:
55; CHECK:       # %bb.0:
56; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
57; CHECK-NEXT:    vle8.v v9, (a0)
58; CHECK-NEXT:    vle8.v v10, (a1)
59; CHECK-NEXT:    vwsubu.vv v8, v9, v10
60; CHECK-NEXT:    ret
61  %a = load <8 x i8>, ptr %x
62  %b = load <8 x i8>, ptr %y
63  %c = zext <8 x i8> %a to <8 x i16>
64  %d = zext <8 x i8> %b to <8 x i16>
65  %e = sub <8 x i16> %c, %d
66  ret <8 x i16> %e
67}
68
69define <4 x i32> @vwsubu_v4i32(ptr %x, ptr %y) {
70; CHECK-LABEL: vwsubu_v4i32:
71; CHECK:       # %bb.0:
72; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
73; CHECK-NEXT:    vle16.v v9, (a0)
74; CHECK-NEXT:    vle16.v v10, (a1)
75; CHECK-NEXT:    vwsubu.vv v8, v9, v10
76; CHECK-NEXT:    ret
77  %a = load <4 x i16>, ptr %x
78  %b = load <4 x i16>, ptr %y
79  %c = zext <4 x i16> %a to <4 x i32>
80  %d = zext <4 x i16> %b to <4 x i32>
81  %e = sub <4 x i32> %c, %d
82  ret <4 x i32> %e
83}
84
85define <2 x i64> @vwsubu_v2i64(ptr %x, ptr %y) {
86; CHECK-LABEL: vwsubu_v2i64:
87; CHECK:       # %bb.0:
88; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
89; CHECK-NEXT:    vle32.v v9, (a0)
90; CHECK-NEXT:    vle32.v v10, (a1)
91; CHECK-NEXT:    vwsubu.vv v8, v9, v10
92; CHECK-NEXT:    ret
93  %a = load <2 x i32>, ptr %x
94  %b = load <2 x i32>, ptr %y
95  %c = zext <2 x i32> %a to <2 x i64>
96  %d = zext <2 x i32> %b to <2 x i64>
97  %e = sub <2 x i64> %c, %d
98  ret <2 x i64> %e
99}
100
101define <16 x i16> @vwsubu_v16i16(ptr %x, ptr %y) {
102; CHECK-LABEL: vwsubu_v16i16:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
105; CHECK-NEXT:    vle8.v v10, (a0)
106; CHECK-NEXT:    vle8.v v11, (a1)
107; CHECK-NEXT:    vwsubu.vv v8, v10, v11
108; CHECK-NEXT:    ret
109  %a = load <16 x i8>, ptr %x
110  %b = load <16 x i8>, ptr %y
111  %c = zext <16 x i8> %a to <16 x i16>
112  %d = zext <16 x i8> %b to <16 x i16>
113  %e = sub <16 x i16> %c, %d
114  ret <16 x i16> %e
115}
116
117define <8 x i32> @vwsubu_v8i32(ptr %x, ptr %y) {
118; CHECK-LABEL: vwsubu_v8i32:
119; CHECK:       # %bb.0:
120; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
121; CHECK-NEXT:    vle16.v v10, (a0)
122; CHECK-NEXT:    vle16.v v11, (a1)
123; CHECK-NEXT:    vwsubu.vv v8, v10, v11
124; CHECK-NEXT:    ret
125  %a = load <8 x i16>, ptr %x
126  %b = load <8 x i16>, ptr %y
127  %c = zext <8 x i16> %a to <8 x i32>
128  %d = zext <8 x i16> %b to <8 x i32>
129  %e = sub <8 x i32> %c, %d
130  ret <8 x i32> %e
131}
132
133define <4 x i64> @vwsubu_v4i64(ptr %x, ptr %y) {
134; CHECK-LABEL: vwsubu_v4i64:
135; CHECK:       # %bb.0:
136; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
137; CHECK-NEXT:    vle32.v v10, (a0)
138; CHECK-NEXT:    vle32.v v11, (a1)
139; CHECK-NEXT:    vwsubu.vv v8, v10, v11
140; CHECK-NEXT:    ret
141  %a = load <4 x i32>, ptr %x
142  %b = load <4 x i32>, ptr %y
143  %c = zext <4 x i32> %a to <4 x i64>
144  %d = zext <4 x i32> %b to <4 x i64>
145  %e = sub <4 x i64> %c, %d
146  ret <4 x i64> %e
147}
148
149define <32 x i16> @vwsubu_v32i16(ptr %x, ptr %y) {
150; CHECK-LABEL: vwsubu_v32i16:
151; CHECK:       # %bb.0:
152; CHECK-NEXT:    li a2, 32
153; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
154; CHECK-NEXT:    vle8.v v12, (a0)
155; CHECK-NEXT:    vle8.v v14, (a1)
156; CHECK-NEXT:    vwsubu.vv v8, v12, v14
157; CHECK-NEXT:    ret
158  %a = load <32 x i8>, ptr %x
159  %b = load <32 x i8>, ptr %y
160  %c = zext <32 x i8> %a to <32 x i16>
161  %d = zext <32 x i8> %b to <32 x i16>
162  %e = sub <32 x i16> %c, %d
163  ret <32 x i16> %e
164}
165
166define <16 x i32> @vwsubu_v16i32(ptr %x, ptr %y) {
167; CHECK-LABEL: vwsubu_v16i32:
168; CHECK:       # %bb.0:
169; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
170; CHECK-NEXT:    vle16.v v12, (a0)
171; CHECK-NEXT:    vle16.v v14, (a1)
172; CHECK-NEXT:    vwsubu.vv v8, v12, v14
173; CHECK-NEXT:    ret
174  %a = load <16 x i16>, ptr %x
175  %b = load <16 x i16>, ptr %y
176  %c = zext <16 x i16> %a to <16 x i32>
177  %d = zext <16 x i16> %b to <16 x i32>
178  %e = sub <16 x i32> %c, %d
179  ret <16 x i32> %e
180}
181
182define <8 x  i64> @vwsubu_v8i64(ptr %x, ptr %y) {
183; CHECK-LABEL: vwsubu_v8i64:
184; CHECK:       # %bb.0:
185; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
186; CHECK-NEXT:    vle32.v v12, (a0)
187; CHECK-NEXT:    vle32.v v14, (a1)
188; CHECK-NEXT:    vwsubu.vv v8, v12, v14
189; CHECK-NEXT:    ret
190  %a = load <8 x  i32>, ptr %x
191  %b = load <8 x  i32>, ptr %y
192  %c = zext <8 x  i32> %a to <8 x  i64>
193  %d = zext <8 x  i32> %b to <8 x  i64>
194  %e = sub <8 x  i64> %c, %d
195  ret <8 x  i64> %e
196}
197
198define <64 x i16> @vwsubu_v64i16(ptr %x, ptr %y) {
199; CHECK-LABEL: vwsubu_v64i16:
200; CHECK:       # %bb.0:
201; CHECK-NEXT:    li a2, 64
202; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
203; CHECK-NEXT:    vle8.v v16, (a0)
204; CHECK-NEXT:    vle8.v v20, (a1)
205; CHECK-NEXT:    vwsubu.vv v8, v16, v20
206; CHECK-NEXT:    ret
207  %a = load <64 x i8>, ptr %x
208  %b = load <64 x i8>, ptr %y
209  %c = zext <64 x i8> %a to <64 x i16>
210  %d = zext <64 x i8> %b to <64 x i16>
211  %e = sub <64 x i16> %c, %d
212  ret <64 x i16> %e
213}
214
215define <32 x i32> @vwsubu_v32i32(ptr %x, ptr %y) {
216; CHECK-LABEL: vwsubu_v32i32:
217; CHECK:       # %bb.0:
218; CHECK-NEXT:    li a2, 32
219; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
220; CHECK-NEXT:    vle16.v v16, (a0)
221; CHECK-NEXT:    vle16.v v20, (a1)
222; CHECK-NEXT:    vwsubu.vv v8, v16, v20
223; CHECK-NEXT:    ret
224  %a = load <32 x i16>, ptr %x
225  %b = load <32 x i16>, ptr %y
226  %c = zext <32 x i16> %a to <32 x i32>
227  %d = zext <32 x i16> %b to <32 x i32>
228  %e = sub <32 x i32> %c, %d
229  ret <32 x i32> %e
230}
231
232define <16 x i64> @vwsubu_v16i64(ptr %x, ptr %y) {
233; CHECK-LABEL: vwsubu_v16i64:
234; CHECK:       # %bb.0:
235; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
236; CHECK-NEXT:    vle32.v v16, (a0)
237; CHECK-NEXT:    vle32.v v20, (a1)
238; CHECK-NEXT:    vwsubu.vv v8, v16, v20
239; CHECK-NEXT:    ret
240  %a = load <16 x i32>, ptr %x
241  %b = load <16 x i32>, ptr %y
242  %c = zext <16 x i32> %a to <16 x i64>
243  %d = zext <16 x i32> %b to <16 x i64>
244  %e = sub <16 x i64> %c, %d
245  ret <16 x i64> %e
246}
247
248define <128 x i16> @vwsubu_v128i16(ptr %x, ptr %y) nounwind {
249; CHECK-LABEL: vwsubu_v128i16:
250; CHECK:       # %bb.0:
251; CHECK-NEXT:    addi sp, sp, -16
252; CHECK-NEXT:    csrr a2, vlenb
253; CHECK-NEXT:    slli a2, a2, 4
254; CHECK-NEXT:    sub sp, sp, a2
255; CHECK-NEXT:    li a2, 128
256; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, ma
257; CHECK-NEXT:    vle8.v v8, (a0)
258; CHECK-NEXT:    addi a0, sp, 16
259; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
260; CHECK-NEXT:    vle8.v v0, (a1)
261; CHECK-NEXT:    li a0, 64
262; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
263; CHECK-NEXT:    vslidedown.vx v16, v8, a0
264; CHECK-NEXT:    vslidedown.vx v8, v0, a0
265; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
266; CHECK-NEXT:    vwsubu.vv v24, v16, v8
267; CHECK-NEXT:    csrr a0, vlenb
268; CHECK-NEXT:    slli a0, a0, 3
269; CHECK-NEXT:    add a0, sp, a0
270; CHECK-NEXT:    addi a0, a0, 16
271; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
272; CHECK-NEXT:    addi a0, sp, 16
273; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
274; CHECK-NEXT:    vwsubu.vv v8, v16, v0
275; CHECK-NEXT:    csrr a0, vlenb
276; CHECK-NEXT:    slli a0, a0, 3
277; CHECK-NEXT:    add a0, sp, a0
278; CHECK-NEXT:    addi a0, a0, 16
279; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
280; CHECK-NEXT:    csrr a0, vlenb
281; CHECK-NEXT:    slli a0, a0, 4
282; CHECK-NEXT:    add sp, sp, a0
283; CHECK-NEXT:    addi sp, sp, 16
284; CHECK-NEXT:    ret
285  %a = load <128 x i8>, ptr %x
286  %b = load <128 x i8>, ptr %y
287  %c = zext <128 x i8> %a to <128 x i16>
288  %d = zext <128 x i8> %b to <128 x i16>
289  %e = sub <128 x i16> %c, %d
290  ret <128 x i16> %e
291}
292
293define <64 x i32> @vwsubu_v64i32(ptr %x, ptr %y) nounwind {
294; CHECK-LABEL: vwsubu_v64i32:
295; CHECK:       # %bb.0:
296; CHECK-NEXT:    addi sp, sp, -16
297; CHECK-NEXT:    csrr a2, vlenb
298; CHECK-NEXT:    slli a2, a2, 4
299; CHECK-NEXT:    sub sp, sp, a2
300; CHECK-NEXT:    li a2, 64
301; CHECK-NEXT:    vsetvli zero, a2, e16, m8, ta, ma
302; CHECK-NEXT:    vle16.v v8, (a0)
303; CHECK-NEXT:    addi a0, sp, 16
304; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
305; CHECK-NEXT:    vle16.v v0, (a1)
306; CHECK-NEXT:    li a0, 32
307; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, ma
308; CHECK-NEXT:    vslidedown.vx v16, v8, a0
309; CHECK-NEXT:    vslidedown.vx v8, v0, a0
310; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
311; CHECK-NEXT:    vwsubu.vv v24, v16, v8
312; CHECK-NEXT:    csrr a0, vlenb
313; CHECK-NEXT:    slli a0, a0, 3
314; CHECK-NEXT:    add a0, sp, a0
315; CHECK-NEXT:    addi a0, a0, 16
316; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
317; CHECK-NEXT:    addi a0, sp, 16
318; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
319; CHECK-NEXT:    vwsubu.vv v8, v16, v0
320; CHECK-NEXT:    csrr a0, vlenb
321; CHECK-NEXT:    slli a0, a0, 3
322; CHECK-NEXT:    add a0, sp, a0
323; CHECK-NEXT:    addi a0, a0, 16
324; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
325; CHECK-NEXT:    csrr a0, vlenb
326; CHECK-NEXT:    slli a0, a0, 4
327; CHECK-NEXT:    add sp, sp, a0
328; CHECK-NEXT:    addi sp, sp, 16
329; CHECK-NEXT:    ret
330  %a = load <64 x i16>, ptr %x
331  %b = load <64 x i16>, ptr %y
332  %c = zext <64 x i16> %a to <64 x i32>
333  %d = zext <64 x i16> %b to <64 x i32>
334  %e = sub <64 x i32> %c, %d
335  ret <64 x i32> %e
336}
337
338define <32 x i64> @vwsubu_v32i64(ptr %x, ptr %y) nounwind {
339; CHECK-LABEL: vwsubu_v32i64:
340; CHECK:       # %bb.0:
341; CHECK-NEXT:    addi sp, sp, -16
342; CHECK-NEXT:    csrr a2, vlenb
343; CHECK-NEXT:    slli a2, a2, 4
344; CHECK-NEXT:    sub sp, sp, a2
345; CHECK-NEXT:    li a2, 32
346; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
347; CHECK-NEXT:    vle32.v v8, (a0)
348; CHECK-NEXT:    addi a0, sp, 16
349; CHECK-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
350; CHECK-NEXT:    vle32.v v0, (a1)
351; CHECK-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
352; CHECK-NEXT:    vslidedown.vi v16, v8, 16
353; CHECK-NEXT:    vslidedown.vi v8, v0, 16
354; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
355; CHECK-NEXT:    vwsubu.vv v24, v16, v8
356; CHECK-NEXT:    csrr a0, vlenb
357; CHECK-NEXT:    slli a0, a0, 3
358; CHECK-NEXT:    add a0, sp, a0
359; CHECK-NEXT:    addi a0, a0, 16
360; CHECK-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
361; CHECK-NEXT:    addi a0, sp, 16
362; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
363; CHECK-NEXT:    vwsubu.vv v8, v16, v0
364; CHECK-NEXT:    csrr a0, vlenb
365; CHECK-NEXT:    slli a0, a0, 3
366; CHECK-NEXT:    add a0, sp, a0
367; CHECK-NEXT:    addi a0, a0, 16
368; CHECK-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
369; CHECK-NEXT:    csrr a0, vlenb
370; CHECK-NEXT:    slli a0, a0, 4
371; CHECK-NEXT:    add sp, sp, a0
372; CHECK-NEXT:    addi sp, sp, 16
373; CHECK-NEXT:    ret
374  %a = load <32 x i32>, ptr %x
375  %b = load <32 x i32>, ptr %y
376  %c = zext <32 x i32> %a to <32 x i64>
377  %d = zext <32 x i32> %b to <32 x i64>
378  %e = sub <32 x i64> %c, %d
379  ret <32 x i64> %e
380}
381
382define <2 x i32> @vwsubu_v2i32_v2i8(ptr %x, ptr %y) {
383; CHECK-LABEL: vwsubu_v2i32_v2i8:
384; CHECK:       # %bb.0:
385; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
386; CHECK-NEXT:    vle8.v v8, (a0)
387; CHECK-NEXT:    vle8.v v9, (a1)
388; CHECK-NEXT:    vwsubu.vv v10, v8, v9
389; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
390; CHECK-NEXT:    vsext.vf2 v8, v10
391; CHECK-NEXT:    ret
392  %a = load <2 x i8>, ptr %x
393  %b = load <2 x i8>, ptr %y
394  %c = zext <2 x i8> %a to <2 x i32>
395  %d = zext <2 x i8> %b to <2 x i32>
396  %e = sub <2 x i32> %c, %d
397  ret <2 x i32> %e
398}
399
400define <4 x i32> @vwsubu_v4i32_v4i8_v4i16(ptr %x, ptr %y) {
401; CHECK-LABEL: vwsubu_v4i32_v4i8_v4i16:
402; CHECK:       # %bb.0:
403; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
404; CHECK-NEXT:    vle8.v v8, (a0)
405; CHECK-NEXT:    vle16.v v9, (a1)
406; CHECK-NEXT:    vzext.vf2 v10, v8
407; CHECK-NEXT:    vwsubu.vv v8, v10, v9
408; CHECK-NEXT:    ret
409  %a = load <4 x i8>, ptr %x
410  %b = load <4 x i16>, ptr %y
411  %c = zext <4 x i8> %a to <4 x i32>
412  %d = zext <4 x i16> %b to <4 x i32>
413  %e = sub <4 x i32> %c, %d
414  ret <4 x i32> %e
415}
416
417define <4 x i64> @vwsubu_v4i64_v4i32_v4i8(ptr %x, ptr %y) {
418; CHECK-LABEL: vwsubu_v4i64_v4i32_v4i8:
419; CHECK:       # %bb.0:
420; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
421; CHECK-NEXT:    vle8.v v8, (a1)
422; CHECK-NEXT:    vle32.v v10, (a0)
423; CHECK-NEXT:    vzext.vf4 v11, v8
424; CHECK-NEXT:    vwsubu.vv v8, v10, v11
425; CHECK-NEXT:    ret
426  %a = load <4 x i32>, ptr %x
427  %b = load <4 x i8>, ptr %y
428  %c = zext <4 x i32> %a to <4 x i64>
429  %d = zext <4 x i8> %b to <4 x i64>
430  %e = sub <4 x i64> %c, %d
431  ret <4 x i64> %e
432}
433
434define <2 x i16> @vwsubu_vx_v2i16(ptr %x, i8 %y) {
435; CHECK-LABEL: vwsubu_vx_v2i16:
436; CHECK:       # %bb.0:
437; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
438; CHECK-NEXT:    vle8.v v9, (a0)
439; CHECK-NEXT:    vwsubu.vx v8, v9, a1
440; CHECK-NEXT:    ret
441  %a = load <2 x i8>, ptr %x
442  %b = insertelement <2 x i8> poison, i8 %y, i32 0
443  %c = shufflevector <2 x i8> %b, <2 x i8> poison, <2 x i32> zeroinitializer
444  %d = zext <2 x i8> %a to <2 x i16>
445  %e = zext <2 x i8> %c to <2 x i16>
446  %f = sub <2 x i16> %d, %e
447  ret <2 x i16> %f
448}
449
450define <4 x i16> @vwsubu_vx_v4i16(ptr %x, i8 %y) {
451; CHECK-LABEL: vwsubu_vx_v4i16:
452; CHECK:       # %bb.0:
453; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
454; CHECK-NEXT:    vle8.v v9, (a0)
455; CHECK-NEXT:    vwsubu.vx v8, v9, a1
456; CHECK-NEXT:    ret
457  %a = load <4 x i8>, ptr %x
458  %b = insertelement <4 x i8> poison, i8 %y, i32 0
459  %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
460  %d = zext <4 x i8> %a to <4 x i16>
461  %e = zext <4 x i8> %c to <4 x i16>
462  %f = sub <4 x i16> %d, %e
463  ret <4 x i16> %f
464}
465
466define <2 x i32> @vwsubu_vx_v2i32(ptr %x, i16 %y) {
467; CHECK-LABEL: vwsubu_vx_v2i32:
468; CHECK:       # %bb.0:
469; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
470; CHECK-NEXT:    vle16.v v9, (a0)
471; CHECK-NEXT:    vwsubu.vx v8, v9, a1
472; CHECK-NEXT:    ret
473  %a = load <2 x i16>, ptr %x
474  %b = insertelement <2 x i16> poison, i16 %y, i32 0
475  %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
476  %d = zext <2 x i16> %a to <2 x i32>
477  %e = zext <2 x i16> %c to <2 x i32>
478  %f = sub <2 x i32> %d, %e
479  ret <2 x i32> %f
480}
481
482define <8 x i16> @vwsubu_vx_v8i16(ptr %x, i8 %y) {
483; CHECK-LABEL: vwsubu_vx_v8i16:
484; CHECK:       # %bb.0:
485; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
486; CHECK-NEXT:    vle8.v v9, (a0)
487; CHECK-NEXT:    vwsubu.vx v8, v9, a1
488; CHECK-NEXT:    ret
489  %a = load <8 x i8>, ptr %x
490  %b = insertelement <8 x i8> poison, i8 %y, i32 0
491  %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer
492  %d = zext <8 x i8> %a to <8 x i16>
493  %e = zext <8 x i8> %c to <8 x i16>
494  %f = sub <8 x i16> %d, %e
495  ret <8 x i16> %f
496}
497
498define <4 x i32> @vwsubu_vx_v4i32(ptr %x, i16 %y) {
499; CHECK-LABEL: vwsubu_vx_v4i32:
500; CHECK:       # %bb.0:
501; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
502; CHECK-NEXT:    vle16.v v9, (a0)
503; CHECK-NEXT:    vwsubu.vx v8, v9, a1
504; CHECK-NEXT:    ret
505  %a = load <4 x i16>, ptr %x
506  %b = insertelement <4 x i16> poison, i16 %y, i32 0
507  %c = shufflevector <4 x i16> %b, <4 x i16> poison, <4 x i32> zeroinitializer
508  %d = zext <4 x i16> %a to <4 x i32>
509  %e = zext <4 x i16> %c to <4 x i32>
510  %f = sub <4 x i32> %d, %e
511  ret <4 x i32> %f
512}
513
514define <2 x i64> @vwsubu_vx_v2i64(ptr %x, i32 %y) {
515; CHECK-LABEL: vwsubu_vx_v2i64:
516; CHECK:       # %bb.0:
517; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
518; CHECK-NEXT:    vle32.v v9, (a0)
519; CHECK-NEXT:    vwsubu.vx v8, v9, a1
520; CHECK-NEXT:    ret
521  %a = load <2 x i32>, ptr %x
522  %b = insertelement <2 x i32> poison, i32 %y, i64 0
523  %c = shufflevector <2 x i32> %b, <2 x i32> poison, <2 x i32> zeroinitializer
524  %d = zext <2 x i32> %a to <2 x i64>
525  %e = zext <2 x i32> %c to <2 x i64>
526  %f = sub <2 x i64> %d, %e
527  ret <2 x i64> %f
528}
529
530define <16 x i16> @vwsubu_vx_v16i16(ptr %x, i8 %y) {
531; CHECK-LABEL: vwsubu_vx_v16i16:
532; CHECK:       # %bb.0:
533; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
534; CHECK-NEXT:    vle8.v v10, (a0)
535; CHECK-NEXT:    vwsubu.vx v8, v10, a1
536; CHECK-NEXT:    ret
537  %a = load <16 x i8>, ptr %x
538  %b = insertelement <16 x i8> poison, i8 %y, i32 0
539  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
540  %d = zext <16 x i8> %a to <16 x i16>
541  %e = zext <16 x i8> %c to <16 x i16>
542  %f = sub <16 x i16> %d, %e
543  ret <16 x i16> %f
544}
545
546define <8 x i32> @vwsubu_vx_v8i32(ptr %x, i16 %y) {
547; CHECK-LABEL: vwsubu_vx_v8i32:
548; CHECK:       # %bb.0:
549; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
550; CHECK-NEXT:    vle16.v v10, (a0)
551; CHECK-NEXT:    vwsubu.vx v8, v10, a1
552; CHECK-NEXT:    ret
553  %a = load <8 x i16>, ptr %x
554  %b = insertelement <8 x i16> poison, i16 %y, i32 0
555  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
556  %d = zext <8 x i16> %a to <8 x i32>
557  %e = zext <8 x i16> %c to <8 x i32>
558  %f = sub <8 x i32> %d, %e
559  ret <8 x i32> %f
560}
561
562define <4 x i64> @vwsubu_vx_v4i64(ptr %x, i32 %y) {
563; CHECK-LABEL: vwsubu_vx_v4i64:
564; CHECK:       # %bb.0:
565; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
566; CHECK-NEXT:    vle32.v v10, (a0)
567; CHECK-NEXT:    vwsubu.vx v8, v10, a1
568; CHECK-NEXT:    ret
569  %a = load <4 x i32>, ptr %x
570  %b = insertelement <4 x i32> poison, i32 %y, i64 0
571  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
572  %d = zext <4 x i32> %a to <4 x i64>
573  %e = zext <4 x i32> %c to <4 x i64>
574  %f = sub <4 x i64> %d, %e
575  ret <4 x i64> %f
576}
577
578define <32 x i16> @vwsubu_vx_v32i16(ptr %x, i8 %y) {
579; CHECK-LABEL: vwsubu_vx_v32i16:
580; CHECK:       # %bb.0:
581; CHECK-NEXT:    li a2, 32
582; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
583; CHECK-NEXT:    vle8.v v12, (a0)
584; CHECK-NEXT:    vwsubu.vx v8, v12, a1
585; CHECK-NEXT:    ret
586  %a = load <32 x i8>, ptr %x
587  %b = insertelement <32 x i8> poison, i8 %y, i32 0
588  %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer
589  %d = zext <32 x i8> %a to <32 x i16>
590  %e = zext <32 x i8> %c to <32 x i16>
591  %f = sub <32 x i16> %d, %e
592  ret <32 x i16> %f
593}
594
595define <16 x i32> @vwsubu_vx_v16i32(ptr %x, i16 %y) {
596; CHECK-LABEL: vwsubu_vx_v16i32:
597; CHECK:       # %bb.0:
598; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
599; CHECK-NEXT:    vle16.v v12, (a0)
600; CHECK-NEXT:    vwsubu.vx v8, v12, a1
601; CHECK-NEXT:    ret
602  %a = load <16 x i16>, ptr %x
603  %b = insertelement <16 x i16> poison, i16 %y, i32 0
604  %c = shufflevector <16 x i16> %b, <16 x i16> poison, <16 x i32> zeroinitializer
605  %d = zext <16 x i16> %a to <16 x i32>
606  %e = zext <16 x i16> %c to <16 x i32>
607  %f = sub <16 x i32> %d, %e
608  ret <16 x i32> %f
609}
610
611define <8 x i64> @vwsubu_vx_v8i64(ptr %x, i32 %y) {
612; CHECK-LABEL: vwsubu_vx_v8i64:
613; CHECK:       # %bb.0:
614; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
615; CHECK-NEXT:    vle32.v v12, (a0)
616; CHECK-NEXT:    vwsubu.vx v8, v12, a1
617; CHECK-NEXT:    ret
618  %a = load <8 x i32>, ptr %x
619  %b = insertelement <8 x i32> poison, i32 %y, i64 0
620  %c = shufflevector <8 x i32> %b, <8 x i32> poison, <8 x i32> zeroinitializer
621  %d = zext <8 x i32> %a to <8 x i64>
622  %e = zext <8 x i32> %c to <8 x i64>
623  %f = sub <8 x i64> %d, %e
624  ret <8 x i64> %f
625}
626
627define <64 x i16> @vwsubu_vx_v64i16(ptr %x, i8 %y) {
628; CHECK-LABEL: vwsubu_vx_v64i16:
629; CHECK:       # %bb.0:
630; CHECK-NEXT:    li a2, 64
631; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, ma
632; CHECK-NEXT:    vle8.v v16, (a0)
633; CHECK-NEXT:    vwsubu.vx v8, v16, a1
634; CHECK-NEXT:    ret
635  %a = load <64 x i8>, ptr %x
636  %b = insertelement <64 x i8> poison, i8 %y, i32 0
637  %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer
638  %d = zext <64 x i8> %a to <64 x i16>
639  %e = zext <64 x i8> %c to <64 x i16>
640  %f = sub <64 x i16> %d, %e
641  ret <64 x i16> %f
642}
643
644define <32 x i32> @vwsubu_vx_v32i32(ptr %x, i16 %y) {
645; CHECK-LABEL: vwsubu_vx_v32i32:
646; CHECK:       # %bb.0:
647; CHECK-NEXT:    li a2, 32
648; CHECK-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
649; CHECK-NEXT:    vle16.v v16, (a0)
650; CHECK-NEXT:    vwsubu.vx v8, v16, a1
651; CHECK-NEXT:    ret
652  %a = load <32 x i16>, ptr %x
653  %b = insertelement <32 x i16> poison, i16 %y, i32 0
654  %c = shufflevector <32 x i16> %b, <32 x i16> poison, <32 x i32> zeroinitializer
655  %d = zext <32 x i16> %a to <32 x i32>
656  %e = zext <32 x i16> %c to <32 x i32>
657  %f = sub <32 x i32> %d, %e
658  ret <32 x i32> %f
659}
660
661define <16 x i64> @vwsubu_vx_v16i64(ptr %x, i32 %y) {
662; CHECK-LABEL: vwsubu_vx_v16i64:
663; CHECK:       # %bb.0:
664; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
665; CHECK-NEXT:    vle32.v v16, (a0)
666; CHECK-NEXT:    vwsubu.vx v8, v16, a1
667; CHECK-NEXT:    ret
668  %a = load <16 x i32>, ptr %x
669  %b = insertelement <16 x i32> poison, i32 %y, i64 0
670  %c = shufflevector <16 x i32> %b, <16 x i32> poison, <16 x i32> zeroinitializer
671  %d = zext <16 x i32> %a to <16 x i64>
672  %e = zext <16 x i32> %c to <16 x i64>
673  %f = sub <16 x i64> %d, %e
674  ret <16 x i64> %f
675}
676
677define <8 x i16> @vwsubu_vx_v8i16_i8(ptr %x, ptr %y) {
678; CHECK-LABEL: vwsubu_vx_v8i16_i8:
679; CHECK:       # %bb.0:
680; CHECK-NEXT:    lbu a1, 0(a1)
681; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
682; CHECK-NEXT:    vle8.v v9, (a0)
683; CHECK-NEXT:    vmv.v.x v10, a1
684; CHECK-NEXT:    vwsubu.vv v8, v10, v9
685; CHECK-NEXT:    ret
686  %a = load <8 x i8>, ptr %x
687  %b = load i8, ptr %y
688  %c = zext i8 %b to i16
689  %d = insertelement <8 x i16> poison, i16 %c, i32 0
690  %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
691  %f = zext <8 x i8> %a to <8 x i16>
692  %g = sub <8 x i16> %e, %f
693  ret <8 x i16> %g
694}
695
696define <8 x i16> @vwsubu_vx_v8i16_i16(ptr %x, ptr %y) {
697; CHECK-LABEL: vwsubu_vx_v8i16_i16:
698; CHECK:       # %bb.0:
699; CHECK-NEXT:    lh a1, 0(a1)
700; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
701; CHECK-NEXT:    vle8.v v9, (a0)
702; CHECK-NEXT:    vmv.v.x v8, a1
703; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
704; CHECK-NEXT:    vwsubu.wv v8, v8, v9
705; CHECK-NEXT:    ret
706  %a = load <8 x i8>, ptr %x
707  %b = load i16, ptr %y
708  %d = insertelement <8 x i16> poison, i16 %b, i32 0
709  %e = shufflevector <8 x i16> %d, <8 x i16> poison, <8 x i32> zeroinitializer
710  %f = zext <8 x i8> %a to <8 x i16>
711  %g = sub <8 x i16> %e, %f
712  ret <8 x i16> %g
713}
714
715define <4 x i32> @vwsubu_vx_v4i32_i8(ptr %x, ptr %y) {
716; CHECK-LABEL: vwsubu_vx_v4i32_i8:
717; CHECK:       # %bb.0:
718; CHECK-NEXT:    lbu a1, 0(a1)
719; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
720; CHECK-NEXT:    vle16.v v9, (a0)
721; CHECK-NEXT:    vmv.v.x v10, a1
722; CHECK-NEXT:    vwsubu.vv v8, v10, v9
723; CHECK-NEXT:    ret
724  %a = load <4 x i16>, ptr %x
725  %b = load i8, ptr %y
726  %c = zext i8 %b to i32
727  %d = insertelement <4 x i32> poison, i32 %c, i32 0
728  %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
729  %f = zext <4 x i16> %a to <4 x i32>
730  %g = sub <4 x i32> %e, %f
731  ret <4 x i32> %g
732}
733
734define <4 x i32> @vwsubu_vx_v4i32_i16(ptr %x, ptr %y) {
735; CHECK-LABEL: vwsubu_vx_v4i32_i16:
736; CHECK:       # %bb.0:
737; CHECK-NEXT:    lhu a1, 0(a1)
738; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
739; CHECK-NEXT:    vle16.v v9, (a0)
740; CHECK-NEXT:    vmv.v.x v10, a1
741; CHECK-NEXT:    vwsubu.vv v8, v10, v9
742; CHECK-NEXT:    ret
743  %a = load <4 x i16>, ptr %x
744  %b = load i16, ptr %y
745  %c = zext i16 %b to i32
746  %d = insertelement <4 x i32> poison, i32 %c, i32 0
747  %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
748  %f = zext <4 x i16> %a to <4 x i32>
749  %g = sub <4 x i32> %e, %f
750  ret <4 x i32> %g
751}
752
753define <4 x i32> @vwsubu_vx_v4i32_i32(ptr %x, ptr %y) {
754; CHECK-LABEL: vwsubu_vx_v4i32_i32:
755; CHECK:       # %bb.0:
756; CHECK-NEXT:    lw a1, 0(a1)
757; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
758; CHECK-NEXT:    vle16.v v9, (a0)
759; CHECK-NEXT:    vmv.v.x v8, a1
760; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
761; CHECK-NEXT:    vwsubu.wv v8, v8, v9
762; CHECK-NEXT:    ret
763  %a = load <4 x i16>, ptr %x
764  %b = load i32, ptr %y
765  %d = insertelement <4 x i32> poison, i32 %b, i32 0
766  %e = shufflevector <4 x i32> %d, <4 x i32> poison, <4 x i32> zeroinitializer
767  %f = zext <4 x i16> %a to <4 x i32>
768  %g = sub <4 x i32> %e, %f
769  ret <4 x i32> %g
770}
771
772define <2 x i64> @vwsubu_vx_v2i64_i8(ptr %x, ptr %y) nounwind {
773; RV32-LABEL: vwsubu_vx_v2i64_i8:
774; RV32:       # %bb.0:
775; RV32-NEXT:    addi sp, sp, -16
776; RV32-NEXT:    lbu a1, 0(a1)
777; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
778; RV32-NEXT:    vle32.v v9, (a0)
779; RV32-NEXT:    sw a1, 8(sp)
780; RV32-NEXT:    sw zero, 12(sp)
781; RV32-NEXT:    addi a0, sp, 8
782; RV32-NEXT:    vlse64.v v8, (a0), zero
783; RV32-NEXT:    vwsubu.wv v8, v8, v9
784; RV32-NEXT:    addi sp, sp, 16
785; RV32-NEXT:    ret
786;
787; RV64-LABEL: vwsubu_vx_v2i64_i8:
788; RV64:       # %bb.0:
789; RV64-NEXT:    lbu a1, 0(a1)
790; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
791; RV64-NEXT:    vle32.v v9, (a0)
792; RV64-NEXT:    vmv.v.x v10, a1
793; RV64-NEXT:    vwsubu.vv v8, v10, v9
794; RV64-NEXT:    ret
795  %a = load <2 x i32>, ptr %x
796  %b = load i8, ptr %y
797  %c = zext i8 %b to i64
798  %d = insertelement <2 x i64> poison, i64 %c, i64 0
799  %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
800  %f = zext <2 x i32> %a to <2 x i64>
801  %g = sub <2 x i64> %e, %f
802  ret <2 x i64> %g
803}
804
805define <2 x i64> @vwsubu_vx_v2i64_i16(ptr %x, ptr %y) nounwind {
806; RV32-LABEL: vwsubu_vx_v2i64_i16:
807; RV32:       # %bb.0:
808; RV32-NEXT:    addi sp, sp, -16
809; RV32-NEXT:    lhu a1, 0(a1)
810; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
811; RV32-NEXT:    vle32.v v9, (a0)
812; RV32-NEXT:    sw a1, 8(sp)
813; RV32-NEXT:    sw zero, 12(sp)
814; RV32-NEXT:    addi a0, sp, 8
815; RV32-NEXT:    vlse64.v v8, (a0), zero
816; RV32-NEXT:    vwsubu.wv v8, v8, v9
817; RV32-NEXT:    addi sp, sp, 16
818; RV32-NEXT:    ret
819;
820; RV64-LABEL: vwsubu_vx_v2i64_i16:
821; RV64:       # %bb.0:
822; RV64-NEXT:    lhu a1, 0(a1)
823; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
824; RV64-NEXT:    vle32.v v9, (a0)
825; RV64-NEXT:    vmv.v.x v10, a1
826; RV64-NEXT:    vwsubu.vv v8, v10, v9
827; RV64-NEXT:    ret
828  %a = load <2 x i32>, ptr %x
829  %b = load i16, ptr %y
830  %c = zext i16 %b to i64
831  %d = insertelement <2 x i64> poison, i64 %c, i64 0
832  %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
833  %f = zext <2 x i32> %a to <2 x i64>
834  %g = sub <2 x i64> %e, %f
835  ret <2 x i64> %g
836}
837
838define <2 x i64> @vwsubu_vx_v2i64_i32(ptr %x, ptr %y) nounwind {
839; RV32-LABEL: vwsubu_vx_v2i64_i32:
840; RV32:       # %bb.0:
841; RV32-NEXT:    addi sp, sp, -16
842; RV32-NEXT:    lw a1, 0(a1)
843; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
844; RV32-NEXT:    vle32.v v9, (a0)
845; RV32-NEXT:    sw a1, 8(sp)
846; RV32-NEXT:    sw zero, 12(sp)
847; RV32-NEXT:    addi a0, sp, 8
848; RV32-NEXT:    vlse64.v v8, (a0), zero
849; RV32-NEXT:    vwsubu.wv v8, v8, v9
850; RV32-NEXT:    addi sp, sp, 16
851; RV32-NEXT:    ret
852;
853; RV64-LABEL: vwsubu_vx_v2i64_i32:
854; RV64:       # %bb.0:
855; RV64-NEXT:    lwu a1, 0(a1)
856; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
857; RV64-NEXT:    vle32.v v9, (a0)
858; RV64-NEXT:    vmv.v.x v10, a1
859; RV64-NEXT:    vwsubu.vv v8, v10, v9
860; RV64-NEXT:    ret
861  %a = load <2 x i32>, ptr %x
862  %b = load i32, ptr %y
863  %c = zext i32 %b to i64
864  %d = insertelement <2 x i64> poison, i64 %c, i64 0
865  %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
866  %f = zext <2 x i32> %a to <2 x i64>
867  %g = sub <2 x i64> %e, %f
868  ret <2 x i64> %g
869}
870
871define <2 x i64> @vwsubu_vx_v2i64_i64(ptr %x, ptr %y) nounwind {
872; RV32-LABEL: vwsubu_vx_v2i64_i64:
873; RV32:       # %bb.0:
874; RV32-NEXT:    addi sp, sp, -16
875; RV32-NEXT:    lw a2, 0(a1)
876; RV32-NEXT:    lw a1, 4(a1)
877; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
878; RV32-NEXT:    vle32.v v9, (a0)
879; RV32-NEXT:    sw a2, 8(sp)
880; RV32-NEXT:    sw a1, 12(sp)
881; RV32-NEXT:    addi a0, sp, 8
882; RV32-NEXT:    vlse64.v v8, (a0), zero
883; RV32-NEXT:    vwsubu.wv v8, v8, v9
884; RV32-NEXT:    addi sp, sp, 16
885; RV32-NEXT:    ret
886;
887; RV64-LABEL: vwsubu_vx_v2i64_i64:
888; RV64:       # %bb.0:
889; RV64-NEXT:    ld a1, 0(a1)
890; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
891; RV64-NEXT:    vle32.v v9, (a0)
892; RV64-NEXT:    vmv.v.x v8, a1
893; RV64-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
894; RV64-NEXT:    vwsubu.wv v8, v8, v9
895; RV64-NEXT:    ret
896  %a = load <2 x i32>, ptr %x
897  %b = load i64, ptr %y
898  %d = insertelement <2 x i64> poison, i64 %b, i64 0
899  %e = shufflevector <2 x i64> %d, <2 x i64> poison, <2 x i32> zeroinitializer
900  %f = zext <2 x i32> %a to <2 x i64>
901  %g = sub <2 x i64> %e, %f
902  ret <2 x i64> %g
903}
904
905define <2 x i32> @vwsubu_v2i32_of_v2i8(ptr %x, ptr %y) {
906; CHECK-LABEL: vwsubu_v2i32_of_v2i8:
907; CHECK:       # %bb.0:
908; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
909; CHECK-NEXT:    vle8.v v8, (a0)
910; CHECK-NEXT:    vle8.v v9, (a1)
911; CHECK-NEXT:    vwsubu.vv v10, v8, v9
912; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
913; CHECK-NEXT:    vsext.vf2 v8, v10
914; CHECK-NEXT:    ret
915  %a = load <2 x i8>, ptr %x
916  %b = load <2 x i8>, ptr %y
917  %c = zext <2 x i8> %a to <2 x i32>
918  %d = zext <2 x i8> %b to <2 x i32>
919  %e = sub <2 x i32> %c, %d
920  ret <2 x i32> %e
921}
922
923define <2 x i64> @vwsubu_v2i64_of_v2i8(ptr %x, ptr %y) {
924; CHECK-LABEL: vwsubu_v2i64_of_v2i8:
925; CHECK:       # %bb.0:
926; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
927; CHECK-NEXT:    vle8.v v8, (a0)
928; CHECK-NEXT:    vle8.v v9, (a1)
929; CHECK-NEXT:    vwsubu.vv v10, v8, v9
930; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
931; CHECK-NEXT:    vsext.vf4 v8, v10
932; CHECK-NEXT:    ret
933  %a = load <2 x i8>, ptr %x
934  %b = load <2 x i8>, ptr %y
935  %c = zext <2 x i8> %a to <2 x i64>
936  %d = zext <2 x i8> %b to <2 x i64>
937  %e = sub <2 x i64> %c, %d
938  ret <2 x i64> %e
939}
940
941define <2 x i64> @vwsubu_v2i64_of_v2i16(ptr %x, ptr %y) {
942; CHECK-LABEL: vwsubu_v2i64_of_v2i16:
943; CHECK:       # %bb.0:
944; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
945; CHECK-NEXT:    vle16.v v8, (a0)
946; CHECK-NEXT:    vle16.v v9, (a1)
947; CHECK-NEXT:    vwsubu.vv v10, v8, v9
948; CHECK-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
949; CHECK-NEXT:    vsext.vf2 v8, v10
950; CHECK-NEXT:    ret
951  %a = load <2 x i16>, ptr %x
952  %b = load <2 x i16>, ptr %y
953  %c = zext <2 x i16> %a to <2 x i64>
954  %d = zext <2 x i16> %b to <2 x i64>
955  %e = sub <2 x i64> %c, %d
956  ret <2 x i64> %e
957}
958