xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V
3; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X
4; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V
5; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X
6
7define <vscale x 1 x i8> @vremu_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
8; CHECK-LABEL: vremu_vv_nxv1i8:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
11; CHECK-NEXT:    vremu.vv v8, v8, v9
12; CHECK-NEXT:    ret
13  %vc = urem <vscale x 1 x i8> %va, %vb
14  ret <vscale x 1 x i8> %vc
15}
16
17define <vscale x 1 x i8> @vremu_vx_nxv1i8(<vscale x 1 x i8> %va, i8 signext %b) {
18; CHECK-LABEL: vremu_vx_nxv1i8:
19; CHECK:       # %bb.0:
20; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
21; CHECK-NEXT:    vremu.vx v8, v8, a0
22; CHECK-NEXT:    ret
23  %head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
24  %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
25  %vc = urem <vscale x 1 x i8> %va, %splat
26  ret <vscale x 1 x i8> %vc
27}
28
29define <vscale x 1 x i8> @vremu_vi_nxv1i8_0(<vscale x 1 x i8> %va) {
30; CHECK-LABEL: vremu_vi_nxv1i8_0:
31; CHECK:       # %bb.0:
32; CHECK-NEXT:    li a0, 33
33; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
34; CHECK-NEXT:    vmulhu.vx v9, v8, a0
35; CHECK-NEXT:    vsrl.vi v9, v9, 5
36; CHECK-NEXT:    li a0, -7
37; CHECK-NEXT:    vnmsac.vx v8, a0, v9
38; CHECK-NEXT:    ret
39  %vc = urem <vscale x 1 x i8> %va, splat (i8 -7)
40  ret <vscale x 1 x i8> %vc
41}
42
43define <vscale x 2 x i8> @vremu_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb) {
44; CHECK-LABEL: vremu_vv_nxv2i8:
45; CHECK:       # %bb.0:
46; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
47; CHECK-NEXT:    vremu.vv v8, v8, v9
48; CHECK-NEXT:    ret
49  %vc = urem <vscale x 2 x i8> %va, %vb
50  ret <vscale x 2 x i8> %vc
51}
52
53define <vscale x 2 x i8> @vremu_vx_nxv2i8(<vscale x 2 x i8> %va, i8 signext %b) {
54; CHECK-LABEL: vremu_vx_nxv2i8:
55; CHECK:       # %bb.0:
56; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
57; CHECK-NEXT:    vremu.vx v8, v8, a0
58; CHECK-NEXT:    ret
59  %head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
60  %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
61  %vc = urem <vscale x 2 x i8> %va, %splat
62  ret <vscale x 2 x i8> %vc
63}
64
65define <vscale x 2 x i8> @vremu_vi_nxv2i8_0(<vscale x 2 x i8> %va) {
66; CHECK-LABEL: vremu_vi_nxv2i8_0:
67; CHECK:       # %bb.0:
68; CHECK-NEXT:    li a0, 33
69; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
70; CHECK-NEXT:    vmulhu.vx v9, v8, a0
71; CHECK-NEXT:    vsrl.vi v9, v9, 5
72; CHECK-NEXT:    li a0, -7
73; CHECK-NEXT:    vnmsac.vx v8, a0, v9
74; CHECK-NEXT:    ret
75  %vc = urem <vscale x 2 x i8> %va, splat (i8 -7)
76  ret <vscale x 2 x i8> %vc
77}
78
79define <vscale x 4 x i8> @vremu_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb) {
80; CHECK-LABEL: vremu_vv_nxv4i8:
81; CHECK:       # %bb.0:
82; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
83; CHECK-NEXT:    vremu.vv v8, v8, v9
84; CHECK-NEXT:    ret
85  %vc = urem <vscale x 4 x i8> %va, %vb
86  ret <vscale x 4 x i8> %vc
87}
88
89define <vscale x 4 x i8> @vremu_vx_nxv4i8(<vscale x 4 x i8> %va, i8 signext %b) {
90; CHECK-LABEL: vremu_vx_nxv4i8:
91; CHECK:       # %bb.0:
92; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
93; CHECK-NEXT:    vremu.vx v8, v8, a0
94; CHECK-NEXT:    ret
95  %head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
96  %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
97  %vc = urem <vscale x 4 x i8> %va, %splat
98  ret <vscale x 4 x i8> %vc
99}
100
101define <vscale x 4 x i8> @vremu_vi_nxv4i8_0(<vscale x 4 x i8> %va) {
102; CHECK-LABEL: vremu_vi_nxv4i8_0:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    li a0, 33
105; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, ma
106; CHECK-NEXT:    vmulhu.vx v9, v8, a0
107; CHECK-NEXT:    vsrl.vi v9, v9, 5
108; CHECK-NEXT:    li a0, -7
109; CHECK-NEXT:    vnmsac.vx v8, a0, v9
110; CHECK-NEXT:    ret
111  %vc = urem <vscale x 4 x i8> %va, splat (i8 -7)
112  ret <vscale x 4 x i8> %vc
113}
114
115define <vscale x 8 x i8> @vremu_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
116; CHECK-LABEL: vremu_vv_nxv8i8:
117; CHECK:       # %bb.0:
118; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
119; CHECK-NEXT:    vremu.vv v8, v8, v9
120; CHECK-NEXT:    ret
121  %vc = urem <vscale x 8 x i8> %va, %vb
122  ret <vscale x 8 x i8> %vc
123}
124
125define <vscale x 8 x i8> @vremu_vx_nxv8i8(<vscale x 8 x i8> %va, i8 signext %b) {
126; CHECK-LABEL: vremu_vx_nxv8i8:
127; CHECK:       # %bb.0:
128; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
129; CHECK-NEXT:    vremu.vx v8, v8, a0
130; CHECK-NEXT:    ret
131  %head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
132  %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
133  %vc = urem <vscale x 8 x i8> %va, %splat
134  ret <vscale x 8 x i8> %vc
135}
136
137define <vscale x 8 x i8> @vremu_vi_nxv8i8_0(<vscale x 8 x i8> %va) {
138; CHECK-LABEL: vremu_vi_nxv8i8_0:
139; CHECK:       # %bb.0:
140; CHECK-NEXT:    li a0, 33
141; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
142; CHECK-NEXT:    vmulhu.vx v9, v8, a0
143; CHECK-NEXT:    vsrl.vi v9, v9, 5
144; CHECK-NEXT:    li a0, -7
145; CHECK-NEXT:    vnmsac.vx v8, a0, v9
146; CHECK-NEXT:    ret
147  %vc = urem <vscale x 8 x i8> %va, splat (i8 -7)
148  ret <vscale x 8 x i8> %vc
149}
150
151define <vscale x 16 x i8> @vremu_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb) {
152; CHECK-LABEL: vremu_vv_nxv16i8:
153; CHECK:       # %bb.0:
154; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, ma
155; CHECK-NEXT:    vremu.vv v8, v8, v10
156; CHECK-NEXT:    ret
157  %vc = urem <vscale x 16 x i8> %va, %vb
158  ret <vscale x 16 x i8> %vc
159}
160
161define <vscale x 16 x i8> @vremu_vx_nxv16i8(<vscale x 16 x i8> %va, i8 signext %b) {
162; CHECK-LABEL: vremu_vx_nxv16i8:
163; CHECK:       # %bb.0:
164; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, ma
165; CHECK-NEXT:    vremu.vx v8, v8, a0
166; CHECK-NEXT:    ret
167  %head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
168  %splat = shufflevector <vscale x 16 x i8> %head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
169  %vc = urem <vscale x 16 x i8> %va, %splat
170  ret <vscale x 16 x i8> %vc
171}
172
173define <vscale x 16 x i8> @vremu_vi_nxv16i8_0(<vscale x 16 x i8> %va) {
174; CHECK-LABEL: vremu_vi_nxv16i8_0:
175; CHECK:       # %bb.0:
176; CHECK-NEXT:    li a0, 33
177; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, ma
178; CHECK-NEXT:    vmulhu.vx v10, v8, a0
179; CHECK-NEXT:    vsrl.vi v10, v10, 5
180; CHECK-NEXT:    li a0, -7
181; CHECK-NEXT:    vnmsac.vx v8, a0, v10
182; CHECK-NEXT:    ret
183  %vc = urem <vscale x 16 x i8> %va, splat (i8 -7)
184  ret <vscale x 16 x i8> %vc
185}
186
187define <vscale x 32 x i8> @vremu_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb) {
188; CHECK-LABEL: vremu_vv_nxv32i8:
189; CHECK:       # %bb.0:
190; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, ma
191; CHECK-NEXT:    vremu.vv v8, v8, v12
192; CHECK-NEXT:    ret
193  %vc = urem <vscale x 32 x i8> %va, %vb
194  ret <vscale x 32 x i8> %vc
195}
196
197define <vscale x 32 x i8> @vremu_vx_nxv32i8(<vscale x 32 x i8> %va, i8 signext %b) {
198; CHECK-LABEL: vremu_vx_nxv32i8:
199; CHECK:       # %bb.0:
200; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
201; CHECK-NEXT:    vremu.vx v8, v8, a0
202; CHECK-NEXT:    ret
203  %head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
204  %splat = shufflevector <vscale x 32 x i8> %head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
205  %vc = urem <vscale x 32 x i8> %va, %splat
206  ret <vscale x 32 x i8> %vc
207}
208
209define <vscale x 32 x i8> @vremu_vi_nxv32i8_0(<vscale x 32 x i8> %va) {
210; CHECK-LABEL: vremu_vi_nxv32i8_0:
211; CHECK:       # %bb.0:
212; CHECK-NEXT:    li a0, 33
213; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
214; CHECK-NEXT:    vmulhu.vx v12, v8, a0
215; CHECK-NEXT:    vsrl.vi v12, v12, 5
216; CHECK-NEXT:    li a0, -7
217; CHECK-NEXT:    vnmsac.vx v8, a0, v12
218; CHECK-NEXT:    ret
219  %vc = urem <vscale x 32 x i8> %va, splat (i8 -7)
220  ret <vscale x 32 x i8> %vc
221}
222
223define <vscale x 64 x i8> @vremu_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb) {
224; CHECK-LABEL: vremu_vv_nxv64i8:
225; CHECK:       # %bb.0:
226; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, ma
227; CHECK-NEXT:    vremu.vv v8, v8, v16
228; CHECK-NEXT:    ret
229  %vc = urem <vscale x 64 x i8> %va, %vb
230  ret <vscale x 64 x i8> %vc
231}
232
233define <vscale x 64 x i8> @vremu_vx_nxv64i8(<vscale x 64 x i8> %va, i8 signext %b) {
234; CHECK-LABEL: vremu_vx_nxv64i8:
235; CHECK:       # %bb.0:
236; CHECK-NEXT:    vsetvli a1, zero, e8, m8, ta, ma
237; CHECK-NEXT:    vremu.vx v8, v8, a0
238; CHECK-NEXT:    ret
239  %head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
240  %splat = shufflevector <vscale x 64 x i8> %head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
241  %vc = urem <vscale x 64 x i8> %va, %splat
242  ret <vscale x 64 x i8> %vc
243}
244
245define <vscale x 64 x i8> @vremu_vi_nxv64i8_0(<vscale x 64 x i8> %va) {
246; CHECK-LABEL: vremu_vi_nxv64i8_0:
247; CHECK:       # %bb.0:
248; CHECK-NEXT:    li a0, 33
249; CHECK-NEXT:    vsetvli a1, zero, e8, m8, ta, ma
250; CHECK-NEXT:    vmulhu.vx v16, v8, a0
251; CHECK-NEXT:    vsrl.vi v16, v16, 5
252; CHECK-NEXT:    li a0, -7
253; CHECK-NEXT:    vnmsac.vx v8, a0, v16
254; CHECK-NEXT:    ret
255  %vc = urem <vscale x 64 x i8> %va, splat (i8 -7)
256  ret <vscale x 64 x i8> %vc
257}
258
259define <vscale x 1 x i16> @vremu_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb) {
260; CHECK-LABEL: vremu_vv_nxv1i16:
261; CHECK:       # %bb.0:
262; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
263; CHECK-NEXT:    vremu.vv v8, v8, v9
264; CHECK-NEXT:    ret
265  %vc = urem <vscale x 1 x i16> %va, %vb
266  ret <vscale x 1 x i16> %vc
267}
268
269define <vscale x 1 x i16> @vremu_vx_nxv1i16(<vscale x 1 x i16> %va, i16 signext %b) {
270; CHECK-LABEL: vremu_vx_nxv1i16:
271; CHECK:       # %bb.0:
272; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
273; CHECK-NEXT:    vremu.vx v8, v8, a0
274; CHECK-NEXT:    ret
275  %head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
276  %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
277  %vc = urem <vscale x 1 x i16> %va, %splat
278  ret <vscale x 1 x i16> %vc
279}
280
281define <vscale x 1 x i16> @vremu_vi_nxv1i16_0(<vscale x 1 x i16> %va) {
282; CHECK-LABEL: vremu_vi_nxv1i16_0:
283; CHECK:       # %bb.0:
284; CHECK-NEXT:    lui a0, 2
285; CHECK-NEXT:    addi a0, a0, 1
286; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
287; CHECK-NEXT:    vmulhu.vx v9, v8, a0
288; CHECK-NEXT:    vsrl.vi v9, v9, 13
289; CHECK-NEXT:    li a0, -7
290; CHECK-NEXT:    vnmsac.vx v8, a0, v9
291; CHECK-NEXT:    ret
292  %vc = urem <vscale x 1 x i16> %va, splat (i16 -7)
293  ret <vscale x 1 x i16> %vc
294}
295
296define <vscale x 2 x i16> @vremu_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb) {
297; CHECK-LABEL: vremu_vv_nxv2i16:
298; CHECK:       # %bb.0:
299; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
300; CHECK-NEXT:    vremu.vv v8, v8, v9
301; CHECK-NEXT:    ret
302  %vc = urem <vscale x 2 x i16> %va, %vb
303  ret <vscale x 2 x i16> %vc
304}
305
306define <vscale x 2 x i16> @vremu_vx_nxv2i16(<vscale x 2 x i16> %va, i16 signext %b) {
307; CHECK-LABEL: vremu_vx_nxv2i16:
308; CHECK:       # %bb.0:
309; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
310; CHECK-NEXT:    vremu.vx v8, v8, a0
311; CHECK-NEXT:    ret
312  %head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
313  %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
314  %vc = urem <vscale x 2 x i16> %va, %splat
315  ret <vscale x 2 x i16> %vc
316}
317
318define <vscale x 2 x i16> @vremu_vi_nxv2i16_0(<vscale x 2 x i16> %va) {
319; CHECK-LABEL: vremu_vi_nxv2i16_0:
320; CHECK:       # %bb.0:
321; CHECK-NEXT:    lui a0, 2
322; CHECK-NEXT:    addi a0, a0, 1
323; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
324; CHECK-NEXT:    vmulhu.vx v9, v8, a0
325; CHECK-NEXT:    vsrl.vi v9, v9, 13
326; CHECK-NEXT:    li a0, -7
327; CHECK-NEXT:    vnmsac.vx v8, a0, v9
328; CHECK-NEXT:    ret
329  %vc = urem <vscale x 2 x i16> %va, splat (i16 -7)
330  ret <vscale x 2 x i16> %vc
331}
332
333define <vscale x 4 x i16> @vremu_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb) {
334; CHECK-LABEL: vremu_vv_nxv4i16:
335; CHECK:       # %bb.0:
336; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
337; CHECK-NEXT:    vremu.vv v8, v8, v9
338; CHECK-NEXT:    ret
339  %vc = urem <vscale x 4 x i16> %va, %vb
340  ret <vscale x 4 x i16> %vc
341}
342
343define <vscale x 4 x i16> @vremu_vx_nxv4i16(<vscale x 4 x i16> %va, i16 signext %b) {
344; CHECK-LABEL: vremu_vx_nxv4i16:
345; CHECK:       # %bb.0:
346; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
347; CHECK-NEXT:    vremu.vx v8, v8, a0
348; CHECK-NEXT:    ret
349  %head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
350  %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
351  %vc = urem <vscale x 4 x i16> %va, %splat
352  ret <vscale x 4 x i16> %vc
353}
354
355define <vscale x 4 x i16> @vremu_vi_nxv4i16_0(<vscale x 4 x i16> %va) {
356; CHECK-LABEL: vremu_vi_nxv4i16_0:
357; CHECK:       # %bb.0:
358; CHECK-NEXT:    lui a0, 2
359; CHECK-NEXT:    addi a0, a0, 1
360; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
361; CHECK-NEXT:    vmulhu.vx v9, v8, a0
362; CHECK-NEXT:    vsrl.vi v9, v9, 13
363; CHECK-NEXT:    li a0, -7
364; CHECK-NEXT:    vnmsac.vx v8, a0, v9
365; CHECK-NEXT:    ret
366  %vc = urem <vscale x 4 x i16> %va, splat (i16 -7)
367  ret <vscale x 4 x i16> %vc
368}
369
370define <vscale x 8 x i16> @vremu_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb) {
371; CHECK-LABEL: vremu_vv_nxv8i16:
372; CHECK:       # %bb.0:
373; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
374; CHECK-NEXT:    vremu.vv v8, v8, v10
375; CHECK-NEXT:    ret
376  %vc = urem <vscale x 8 x i16> %va, %vb
377  ret <vscale x 8 x i16> %vc
378}
379
380define <vscale x 8 x i16> @vremu_vx_nxv8i16(<vscale x 8 x i16> %va, i16 signext %b) {
381; CHECK-LABEL: vremu_vx_nxv8i16:
382; CHECK:       # %bb.0:
383; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
384; CHECK-NEXT:    vremu.vx v8, v8, a0
385; CHECK-NEXT:    ret
386  %head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
387  %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
388  %vc = urem <vscale x 8 x i16> %va, %splat
389  ret <vscale x 8 x i16> %vc
390}
391
392define <vscale x 8 x i16> @vremu_vi_nxv8i16_0(<vscale x 8 x i16> %va) {
393; CHECK-LABEL: vremu_vi_nxv8i16_0:
394; CHECK:       # %bb.0:
395; CHECK-NEXT:    lui a0, 2
396; CHECK-NEXT:    addi a0, a0, 1
397; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
398; CHECK-NEXT:    vmulhu.vx v10, v8, a0
399; CHECK-NEXT:    vsrl.vi v10, v10, 13
400; CHECK-NEXT:    li a0, -7
401; CHECK-NEXT:    vnmsac.vx v8, a0, v10
402; CHECK-NEXT:    ret
403  %vc = urem <vscale x 8 x i16> %va, splat (i16 -7)
404  ret <vscale x 8 x i16> %vc
405}
406
407define <vscale x 16 x i16> @vremu_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb) {
408; CHECK-LABEL: vremu_vv_nxv16i16:
409; CHECK:       # %bb.0:
410; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
411; CHECK-NEXT:    vremu.vv v8, v8, v12
412; CHECK-NEXT:    ret
413  %vc = urem <vscale x 16 x i16> %va, %vb
414  ret <vscale x 16 x i16> %vc
415}
416
417define <vscale x 16 x i16> @vremu_vx_nxv16i16(<vscale x 16 x i16> %va, i16 signext %b) {
418; CHECK-LABEL: vremu_vx_nxv16i16:
419; CHECK:       # %bb.0:
420; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
421; CHECK-NEXT:    vremu.vx v8, v8, a0
422; CHECK-NEXT:    ret
423  %head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
424  %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
425  %vc = urem <vscale x 16 x i16> %va, %splat
426  ret <vscale x 16 x i16> %vc
427}
428
429define <vscale x 16 x i16> @vremu_vi_nxv16i16_0(<vscale x 16 x i16> %va) {
430; CHECK-LABEL: vremu_vi_nxv16i16_0:
431; CHECK:       # %bb.0:
432; CHECK-NEXT:    lui a0, 2
433; CHECK-NEXT:    addi a0, a0, 1
434; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, ma
435; CHECK-NEXT:    vmulhu.vx v12, v8, a0
436; CHECK-NEXT:    vsrl.vi v12, v12, 13
437; CHECK-NEXT:    li a0, -7
438; CHECK-NEXT:    vnmsac.vx v8, a0, v12
439; CHECK-NEXT:    ret
440  %vc = urem <vscale x 16 x i16> %va, splat (i16 -7)
441  ret <vscale x 16 x i16> %vc
442}
443
444define <vscale x 32 x i16> @vremu_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb) {
445; CHECK-LABEL: vremu_vv_nxv32i16:
446; CHECK:       # %bb.0:
447; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
448; CHECK-NEXT:    vremu.vv v8, v8, v16
449; CHECK-NEXT:    ret
450  %vc = urem <vscale x 32 x i16> %va, %vb
451  ret <vscale x 32 x i16> %vc
452}
453
454define <vscale x 32 x i16> @vremu_vx_nxv32i16(<vscale x 32 x i16> %va, i16 signext %b) {
455; CHECK-LABEL: vremu_vx_nxv32i16:
456; CHECK:       # %bb.0:
457; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
458; CHECK-NEXT:    vremu.vx v8, v8, a0
459; CHECK-NEXT:    ret
460  %head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
461  %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
462  %vc = urem <vscale x 32 x i16> %va, %splat
463  ret <vscale x 32 x i16> %vc
464}
465
466define <vscale x 32 x i16> @vremu_vi_nxv32i16_0(<vscale x 32 x i16> %va) {
467; CHECK-LABEL: vremu_vi_nxv32i16_0:
468; CHECK:       # %bb.0:
469; CHECK-NEXT:    lui a0, 2
470; CHECK-NEXT:    addi a0, a0, 1
471; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, ma
472; CHECK-NEXT:    vmulhu.vx v16, v8, a0
473; CHECK-NEXT:    vsrl.vi v16, v16, 13
474; CHECK-NEXT:    li a0, -7
475; CHECK-NEXT:    vnmsac.vx v8, a0, v16
476; CHECK-NEXT:    ret
477  %vc = urem <vscale x 32 x i16> %va, splat (i16 -7)
478  ret <vscale x 32 x i16> %vc
479}
480
481define <vscale x 1 x i32> @vremu_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
482; CHECK-LABEL: vremu_vv_nxv1i32:
483; CHECK:       # %bb.0:
484; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
485; CHECK-NEXT:    vremu.vv v8, v8, v9
486; CHECK-NEXT:    ret
487  %vc = urem <vscale x 1 x i32> %va, %vb
488  ret <vscale x 1 x i32> %vc
489}
490
491define <vscale x 1 x i32> @vremu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 signext %b) {
492; CHECK-LABEL: vremu_vx_nxv1i32:
493; CHECK:       # %bb.0:
494; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
495; CHECK-NEXT:    vremu.vx v8, v8, a0
496; CHECK-NEXT:    ret
497  %head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
498  %splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
499  %vc = urem <vscale x 1 x i32> %va, %splat
500  ret <vscale x 1 x i32> %vc
501}
502
503define <vscale x 1 x i32> @vremu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
504; CHECK-LABEL: vremu_vi_nxv1i32_0:
505; CHECK:       # %bb.0:
506; CHECK-NEXT:    lui a0, 131072
507; CHECK-NEXT:    addi a0, a0, 1
508; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
509; CHECK-NEXT:    vmulhu.vx v9, v8, a0
510; CHECK-NEXT:    vsrl.vi v9, v9, 29
511; CHECK-NEXT:    li a0, -7
512; CHECK-NEXT:    vnmsac.vx v8, a0, v9
513; CHECK-NEXT:    ret
514  %vc = urem <vscale x 1 x i32> %va, splat (i32 -7)
515  ret <vscale x 1 x i32> %vc
516}
517
518define <vscale x 2 x i32> @vremu_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb) {
519; CHECK-LABEL: vremu_vv_nxv2i32:
520; CHECK:       # %bb.0:
521; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
522; CHECK-NEXT:    vremu.vv v8, v8, v9
523; CHECK-NEXT:    ret
524  %vc = urem <vscale x 2 x i32> %va, %vb
525  ret <vscale x 2 x i32> %vc
526}
527
528define <vscale x 2 x i32> @vremu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 signext %b) {
529; CHECK-LABEL: vremu_vx_nxv2i32:
530; CHECK:       # %bb.0:
531; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
532; CHECK-NEXT:    vremu.vx v8, v8, a0
533; CHECK-NEXT:    ret
534  %head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
535  %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
536  %vc = urem <vscale x 2 x i32> %va, %splat
537  ret <vscale x 2 x i32> %vc
538}
539
540define <vscale x 2 x i32> @vremu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
541; CHECK-LABEL: vremu_vi_nxv2i32_0:
542; CHECK:       # %bb.0:
543; CHECK-NEXT:    lui a0, 131072
544; CHECK-NEXT:    addi a0, a0, 1
545; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
546; CHECK-NEXT:    vmulhu.vx v9, v8, a0
547; CHECK-NEXT:    vsrl.vi v9, v9, 29
548; CHECK-NEXT:    li a0, -7
549; CHECK-NEXT:    vnmsac.vx v8, a0, v9
550; CHECK-NEXT:    ret
551  %vc = urem <vscale x 2 x i32> %va, splat (i32 -7)
552  ret <vscale x 2 x i32> %vc
553}
554
555define <vscale x 4 x i32> @vremu_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb) {
556; CHECK-LABEL: vremu_vv_nxv4i32:
557; CHECK:       # %bb.0:
558; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
559; CHECK-NEXT:    vremu.vv v8, v8, v10
560; CHECK-NEXT:    ret
561  %vc = urem <vscale x 4 x i32> %va, %vb
562  ret <vscale x 4 x i32> %vc
563}
564
565define <vscale x 4 x i32> @vremu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 signext %b) {
566; CHECK-LABEL: vremu_vx_nxv4i32:
567; CHECK:       # %bb.0:
568; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
569; CHECK-NEXT:    vremu.vx v8, v8, a0
570; CHECK-NEXT:    ret
571  %head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
572  %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
573  %vc = urem <vscale x 4 x i32> %va, %splat
574  ret <vscale x 4 x i32> %vc
575}
576
577define <vscale x 4 x i32> @vremu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
578; CHECK-LABEL: vremu_vi_nxv4i32_0:
579; CHECK:       # %bb.0:
580; CHECK-NEXT:    lui a0, 131072
581; CHECK-NEXT:    addi a0, a0, 1
582; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
583; CHECK-NEXT:    vmulhu.vx v10, v8, a0
584; CHECK-NEXT:    vsrl.vi v10, v10, 29
585; CHECK-NEXT:    li a0, -7
586; CHECK-NEXT:    vnmsac.vx v8, a0, v10
587; CHECK-NEXT:    ret
588  %vc = urem <vscale x 4 x i32> %va, splat (i32 -7)
589  ret <vscale x 4 x i32> %vc
590}
591
592define <vscale x 8 x i32> @vremu_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb) {
593; CHECK-LABEL: vremu_vv_nxv8i32:
594; CHECK:       # %bb.0:
595; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
596; CHECK-NEXT:    vremu.vv v8, v8, v12
597; CHECK-NEXT:    ret
598  %vc = urem <vscale x 8 x i32> %va, %vb
599  ret <vscale x 8 x i32> %vc
600}
601
602define <vscale x 8 x i32> @vremu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 signext %b) {
603; CHECK-LABEL: vremu_vx_nxv8i32:
604; CHECK:       # %bb.0:
605; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
606; CHECK-NEXT:    vremu.vx v8, v8, a0
607; CHECK-NEXT:    ret
608  %head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
609  %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
610  %vc = urem <vscale x 8 x i32> %va, %splat
611  ret <vscale x 8 x i32> %vc
612}
613
614define <vscale x 8 x i32> @vremu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
615; CHECK-LABEL: vremu_vi_nxv8i32_0:
616; CHECK:       # %bb.0:
617; CHECK-NEXT:    lui a0, 131072
618; CHECK-NEXT:    addi a0, a0, 1
619; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
620; CHECK-NEXT:    vmulhu.vx v12, v8, a0
621; CHECK-NEXT:    vsrl.vi v12, v12, 29
622; CHECK-NEXT:    li a0, -7
623; CHECK-NEXT:    vnmsac.vx v8, a0, v12
624; CHECK-NEXT:    ret
625  %vc = urem <vscale x 8 x i32> %va, splat (i32 -7)
626  ret <vscale x 8 x i32> %vc
627}
628
629define <vscale x 16 x i32> @vremu_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb) {
630; CHECK-LABEL: vremu_vv_nxv16i32:
631; CHECK:       # %bb.0:
632; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
633; CHECK-NEXT:    vremu.vv v8, v8, v16
634; CHECK-NEXT:    ret
635  %vc = urem <vscale x 16 x i32> %va, %vb
636  ret <vscale x 16 x i32> %vc
637}
638
639define <vscale x 16 x i32> @vremu_vx_nxv16i32(<vscale x 16 x i32> %va, i32 signext %b) {
640; CHECK-LABEL: vremu_vx_nxv16i32:
641; CHECK:       # %bb.0:
642; CHECK-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
643; CHECK-NEXT:    vremu.vx v8, v8, a0
644; CHECK-NEXT:    ret
645  %head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
646  %splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
647  %vc = urem <vscale x 16 x i32> %va, %splat
648  ret <vscale x 16 x i32> %vc
649}
650
651define <vscale x 16 x i32> @vremu_vi_nxv16i32_0(<vscale x 16 x i32> %va) {
652; CHECK-LABEL: vremu_vi_nxv16i32_0:
653; CHECK:       # %bb.0:
654; CHECK-NEXT:    lui a0, 131072
655; CHECK-NEXT:    addi a0, a0, 1
656; CHECK-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
657; CHECK-NEXT:    vmulhu.vx v16, v8, a0
658; CHECK-NEXT:    vsrl.vi v16, v16, 29
659; CHECK-NEXT:    li a0, -7
660; CHECK-NEXT:    vnmsac.vx v8, a0, v16
661; CHECK-NEXT:    ret
662  %vc = urem <vscale x 16 x i32> %va, splat (i32 -7)
663  ret <vscale x 16 x i32> %vc
664}
665
666define <vscale x 1 x i64> @vremu_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb) {
667; CHECK-LABEL: vremu_vv_nxv1i64:
668; CHECK:       # %bb.0:
669; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
670; CHECK-NEXT:    vremu.vv v8, v8, v9
671; CHECK-NEXT:    ret
672  %vc = urem <vscale x 1 x i64> %va, %vb
673  ret <vscale x 1 x i64> %vc
674}
675
676define <vscale x 1 x i64> @vremu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
677; RV32-LABEL: vremu_vx_nxv1i64:
678; RV32:       # %bb.0:
679; RV32-NEXT:    addi sp, sp, -16
680; RV32-NEXT:    .cfi_def_cfa_offset 16
681; RV32-NEXT:    sw a0, 8(sp)
682; RV32-NEXT:    sw a1, 12(sp)
683; RV32-NEXT:    addi a0, sp, 8
684; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
685; RV32-NEXT:    vlse64.v v9, (a0), zero
686; RV32-NEXT:    vremu.vv v8, v8, v9
687; RV32-NEXT:    addi sp, sp, 16
688; RV32-NEXT:    .cfi_def_cfa_offset 0
689; RV32-NEXT:    ret
690;
691; RV64-LABEL: vremu_vx_nxv1i64:
692; RV64:       # %bb.0:
693; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
694; RV64-NEXT:    vremu.vx v8, v8, a0
695; RV64-NEXT:    ret
696  %head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
697  %splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
698  %vc = urem <vscale x 1 x i64> %va, %splat
699  ret <vscale x 1 x i64> %vc
700}
701
702define <vscale x 1 x i64> @vremu_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
703; RV32-V-LABEL: vremu_vi_nxv1i64_0:
704; RV32-V:       # %bb.0:
705; RV32-V-NEXT:    addi sp, sp, -16
706; RV32-V-NEXT:    .cfi_def_cfa_offset 16
707; RV32-V-NEXT:    lui a0, 131072
708; RV32-V-NEXT:    li a1, 1
709; RV32-V-NEXT:    sw a1, 8(sp)
710; RV32-V-NEXT:    sw a0, 12(sp)
711; RV32-V-NEXT:    addi a0, sp, 8
712; RV32-V-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
713; RV32-V-NEXT:    vlse64.v v9, (a0), zero
714; RV32-V-NEXT:    li a0, 61
715; RV32-V-NEXT:    vmulhu.vv v9, v8, v9
716; RV32-V-NEXT:    vsrl.vx v9, v9, a0
717; RV32-V-NEXT:    li a0, -7
718; RV32-V-NEXT:    vnmsac.vx v8, a0, v9
719; RV32-V-NEXT:    addi sp, sp, 16
720; RV32-V-NEXT:    .cfi_def_cfa_offset 0
721; RV32-V-NEXT:    ret
722;
723; ZVE64X-LABEL: vremu_vi_nxv1i64_0:
724; ZVE64X:       # %bb.0:
725; ZVE64X-NEXT:    li a0, -7
726; ZVE64X-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
727; ZVE64X-NEXT:    vremu.vx v8, v8, a0
728; ZVE64X-NEXT:    ret
729;
730; RV64-V-LABEL: vremu_vi_nxv1i64_0:
731; RV64-V:       # %bb.0:
732; RV64-V-NEXT:    li a0, 1
733; RV64-V-NEXT:    slli a0, a0, 61
734; RV64-V-NEXT:    addi a0, a0, 1
735; RV64-V-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
736; RV64-V-NEXT:    vmulhu.vx v9, v8, a0
737; RV64-V-NEXT:    li a0, 61
738; RV64-V-NEXT:    vsrl.vx v9, v9, a0
739; RV64-V-NEXT:    li a0, -7
740; RV64-V-NEXT:    vnmsac.vx v8, a0, v9
741; RV64-V-NEXT:    ret
742  %vc = urem <vscale x 1 x i64> %va, splat (i64 -7)
743  ret <vscale x 1 x i64> %vc
744}
745
746; fold (urem x, pow2) -> (and x, pow2-1)
747define <vscale x 1 x i64> @vremu_vi_nxv1i64_1(<vscale x 1 x i64> %va) {
748; CHECK-LABEL: vremu_vi_nxv1i64_1:
749; CHECK:       # %bb.0:
750; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
751; CHECK-NEXT:    vand.vi v8, v8, 15
752; CHECK-NEXT:    ret
753  %vc = urem <vscale x 1 x i64> %va, splat (i64 16)
754  ret <vscale x 1 x i64> %vc
755}
756
757; fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
758define <vscale x 1 x i64> @vremu_vi_nxv1i64_2(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb) {
759; CHECK-LABEL: vremu_vi_nxv1i64_2:
760; CHECK:       # %bb.0:
761; CHECK-NEXT:    li a0, 16
762; CHECK-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
763; CHECK-NEXT:    vmv.v.x v10, a0
764; CHECK-NEXT:    vsll.vv v9, v10, v9
765; CHECK-NEXT:    vadd.vi v9, v9, -1
766; CHECK-NEXT:    vand.vv v8, v8, v9
767; CHECK-NEXT:    ret
768  %vc = shl <vscale x 1 x i64> splat (i64 16), %vb
769  %vd = urem <vscale x 1 x i64> %va, %vc
770  ret <vscale x 1 x i64> %vd
771}
772
773define <vscale x 2 x i64> @vremu_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb) {
774; CHECK-LABEL: vremu_vv_nxv2i64:
775; CHECK:       # %bb.0:
776; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
777; CHECK-NEXT:    vremu.vv v8, v8, v10
778; CHECK-NEXT:    ret
779  %vc = urem <vscale x 2 x i64> %va, %vb
780  ret <vscale x 2 x i64> %vc
781}
782
783define <vscale x 2 x i64> @vremu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
784; RV32-LABEL: vremu_vx_nxv2i64:
785; RV32:       # %bb.0:
786; RV32-NEXT:    addi sp, sp, -16
787; RV32-NEXT:    .cfi_def_cfa_offset 16
788; RV32-NEXT:    sw a0, 8(sp)
789; RV32-NEXT:    sw a1, 12(sp)
790; RV32-NEXT:    addi a0, sp, 8
791; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
792; RV32-NEXT:    vlse64.v v10, (a0), zero
793; RV32-NEXT:    vremu.vv v8, v8, v10
794; RV32-NEXT:    addi sp, sp, 16
795; RV32-NEXT:    .cfi_def_cfa_offset 0
796; RV32-NEXT:    ret
797;
798; RV64-LABEL: vremu_vx_nxv2i64:
799; RV64:       # %bb.0:
800; RV64-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
801; RV64-NEXT:    vremu.vx v8, v8, a0
802; RV64-NEXT:    ret
803  %head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
804  %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
805  %vc = urem <vscale x 2 x i64> %va, %splat
806  ret <vscale x 2 x i64> %vc
807}
808
809define <vscale x 2 x i64> @vremu_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
810; RV32-V-LABEL: vremu_vi_nxv2i64_0:
811; RV32-V:       # %bb.0:
812; RV32-V-NEXT:    addi sp, sp, -16
813; RV32-V-NEXT:    .cfi_def_cfa_offset 16
814; RV32-V-NEXT:    lui a0, 131072
815; RV32-V-NEXT:    li a1, 1
816; RV32-V-NEXT:    sw a1, 8(sp)
817; RV32-V-NEXT:    sw a0, 12(sp)
818; RV32-V-NEXT:    addi a0, sp, 8
819; RV32-V-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
820; RV32-V-NEXT:    vlse64.v v10, (a0), zero
821; RV32-V-NEXT:    li a0, 61
822; RV32-V-NEXT:    vmulhu.vv v10, v8, v10
823; RV32-V-NEXT:    vsrl.vx v10, v10, a0
824; RV32-V-NEXT:    li a0, -7
825; RV32-V-NEXT:    vnmsac.vx v8, a0, v10
826; RV32-V-NEXT:    addi sp, sp, 16
827; RV32-V-NEXT:    .cfi_def_cfa_offset 0
828; RV32-V-NEXT:    ret
829;
830; ZVE64X-LABEL: vremu_vi_nxv2i64_0:
831; ZVE64X:       # %bb.0:
832; ZVE64X-NEXT:    li a0, -7
833; ZVE64X-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
834; ZVE64X-NEXT:    vremu.vx v8, v8, a0
835; ZVE64X-NEXT:    ret
836;
837; RV64-V-LABEL: vremu_vi_nxv2i64_0:
838; RV64-V:       # %bb.0:
839; RV64-V-NEXT:    li a0, 1
840; RV64-V-NEXT:    slli a0, a0, 61
841; RV64-V-NEXT:    addi a0, a0, 1
842; RV64-V-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
843; RV64-V-NEXT:    vmulhu.vx v10, v8, a0
844; RV64-V-NEXT:    li a0, 61
845; RV64-V-NEXT:    vsrl.vx v10, v10, a0
846; RV64-V-NEXT:    li a0, -7
847; RV64-V-NEXT:    vnmsac.vx v8, a0, v10
848; RV64-V-NEXT:    ret
849  %vc = urem <vscale x 2 x i64> %va, splat (i64 -7)
850  ret <vscale x 2 x i64> %vc
851}
852
853; fold (urem x, pow2) -> (and x, pow2-1)
854define <vscale x 2 x i64> @vremu_vi_nxv2i64_1(<vscale x 2 x i64> %va) {
855; CHECK-LABEL: vremu_vi_nxv2i64_1:
856; CHECK:       # %bb.0:
857; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
858; CHECK-NEXT:    vand.vi v8, v8, 15
859; CHECK-NEXT:    ret
860  %vc = urem <vscale x 2 x i64> %va, splat (i64 16)
861  ret <vscale x 2 x i64> %vc
862}
863
864; fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
865define <vscale x 2 x i64> @vremu_vi_nxv2i64_2(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb) {
866; CHECK-LABEL: vremu_vi_nxv2i64_2:
867; CHECK:       # %bb.0:
868; CHECK-NEXT:    li a0, 16
869; CHECK-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
870; CHECK-NEXT:    vmv.v.x v12, a0
871; CHECK-NEXT:    vsll.vv v10, v12, v10
872; CHECK-NEXT:    vadd.vi v10, v10, -1
873; CHECK-NEXT:    vand.vv v8, v8, v10
874; CHECK-NEXT:    ret
875  %vc = shl <vscale x 2 x i64> splat (i64 16), %vb
876  %vd = urem <vscale x 2 x i64> %va, %vc
877  ret <vscale x 2 x i64> %vd
878}
879
880define <vscale x 4 x i64> @vremu_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb) {
881; CHECK-LABEL: vremu_vv_nxv4i64:
882; CHECK:       # %bb.0:
883; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
884; CHECK-NEXT:    vremu.vv v8, v8, v12
885; CHECK-NEXT:    ret
886  %vc = urem <vscale x 4 x i64> %va, %vb
887  ret <vscale x 4 x i64> %vc
888}
889
890define <vscale x 4 x i64> @vremu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
891; RV32-LABEL: vremu_vx_nxv4i64:
892; RV32:       # %bb.0:
893; RV32-NEXT:    addi sp, sp, -16
894; RV32-NEXT:    .cfi_def_cfa_offset 16
895; RV32-NEXT:    sw a0, 8(sp)
896; RV32-NEXT:    sw a1, 12(sp)
897; RV32-NEXT:    addi a0, sp, 8
898; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
899; RV32-NEXT:    vlse64.v v12, (a0), zero
900; RV32-NEXT:    vremu.vv v8, v8, v12
901; RV32-NEXT:    addi sp, sp, 16
902; RV32-NEXT:    .cfi_def_cfa_offset 0
903; RV32-NEXT:    ret
904;
905; RV64-LABEL: vremu_vx_nxv4i64:
906; RV64:       # %bb.0:
907; RV64-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
908; RV64-NEXT:    vremu.vx v8, v8, a0
909; RV64-NEXT:    ret
910  %head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
911  %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
912  %vc = urem <vscale x 4 x i64> %va, %splat
913  ret <vscale x 4 x i64> %vc
914}
915
916define <vscale x 4 x i64> @vremu_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
917; RV32-V-LABEL: vremu_vi_nxv4i64_0:
918; RV32-V:       # %bb.0:
919; RV32-V-NEXT:    addi sp, sp, -16
920; RV32-V-NEXT:    .cfi_def_cfa_offset 16
921; RV32-V-NEXT:    lui a0, 131072
922; RV32-V-NEXT:    li a1, 1
923; RV32-V-NEXT:    sw a1, 8(sp)
924; RV32-V-NEXT:    sw a0, 12(sp)
925; RV32-V-NEXT:    addi a0, sp, 8
926; RV32-V-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
927; RV32-V-NEXT:    vlse64.v v12, (a0), zero
928; RV32-V-NEXT:    li a0, 61
929; RV32-V-NEXT:    vmulhu.vv v12, v8, v12
930; RV32-V-NEXT:    vsrl.vx v12, v12, a0
931; RV32-V-NEXT:    li a0, -7
932; RV32-V-NEXT:    vnmsac.vx v8, a0, v12
933; RV32-V-NEXT:    addi sp, sp, 16
934; RV32-V-NEXT:    .cfi_def_cfa_offset 0
935; RV32-V-NEXT:    ret
936;
937; ZVE64X-LABEL: vremu_vi_nxv4i64_0:
938; ZVE64X:       # %bb.0:
939; ZVE64X-NEXT:    li a0, -7
940; ZVE64X-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
941; ZVE64X-NEXT:    vremu.vx v8, v8, a0
942; ZVE64X-NEXT:    ret
943;
944; RV64-V-LABEL: vremu_vi_nxv4i64_0:
945; RV64-V:       # %bb.0:
946; RV64-V-NEXT:    li a0, 1
947; RV64-V-NEXT:    slli a0, a0, 61
948; RV64-V-NEXT:    addi a0, a0, 1
949; RV64-V-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
950; RV64-V-NEXT:    vmulhu.vx v12, v8, a0
951; RV64-V-NEXT:    li a0, 61
952; RV64-V-NEXT:    vsrl.vx v12, v12, a0
953; RV64-V-NEXT:    li a0, -7
954; RV64-V-NEXT:    vnmsac.vx v8, a0, v12
955; RV64-V-NEXT:    ret
956  %vc = urem <vscale x 4 x i64> %va, splat (i64 -7)
957  ret <vscale x 4 x i64> %vc
958}
959
960; fold (urem x, pow2) -> (and x, pow2-1)
961define <vscale x 4 x i64> @vremu_vi_nxv4i64_1(<vscale x 4 x i64> %va) {
962; CHECK-LABEL: vremu_vi_nxv4i64_1:
963; CHECK:       # %bb.0:
964; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
965; CHECK-NEXT:    vand.vi v8, v8, 15
966; CHECK-NEXT:    ret
967  %vc = urem <vscale x 4 x i64> %va, splat (i64 16)
968  ret <vscale x 4 x i64> %vc
969}
970
971;fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
972define <vscale x 4 x i64> @vremu_vi_nxv4i64_2(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb) {
973; CHECK-LABEL: vremu_vi_nxv4i64_2:
974; CHECK:       # %bb.0:
975; CHECK-NEXT:    li a0, 16
976; CHECK-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
977; CHECK-NEXT:    vmv.v.x v16, a0
978; CHECK-NEXT:    vsll.vv v12, v16, v12
979; CHECK-NEXT:    vadd.vi v12, v12, -1
980; CHECK-NEXT:    vand.vv v8, v8, v12
981; CHECK-NEXT:    ret
982  %vc = shl <vscale x 4 x i64> splat (i64 16), %vb
983  %vd = urem <vscale x 4 x i64> %va, %vc
984  ret <vscale x 4 x i64> %vd
985}
986
987define <vscale x 8 x i64> @vremu_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb) {
988; CHECK-LABEL: vremu_vv_nxv8i64:
989; CHECK:       # %bb.0:
990; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
991; CHECK-NEXT:    vremu.vv v8, v8, v16
992; CHECK-NEXT:    ret
993  %vc = urem <vscale x 8 x i64> %va, %vb
994  ret <vscale x 8 x i64> %vc
995}
996
997define <vscale x 8 x i64> @vremu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
998; RV32-LABEL: vremu_vx_nxv8i64:
999; RV32:       # %bb.0:
1000; RV32-NEXT:    addi sp, sp, -16
1001; RV32-NEXT:    .cfi_def_cfa_offset 16
1002; RV32-NEXT:    sw a0, 8(sp)
1003; RV32-NEXT:    sw a1, 12(sp)
1004; RV32-NEXT:    addi a0, sp, 8
1005; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1006; RV32-NEXT:    vlse64.v v16, (a0), zero
1007; RV32-NEXT:    vremu.vv v8, v8, v16
1008; RV32-NEXT:    addi sp, sp, 16
1009; RV32-NEXT:    .cfi_def_cfa_offset 0
1010; RV32-NEXT:    ret
1011;
1012; RV64-LABEL: vremu_vx_nxv8i64:
1013; RV64:       # %bb.0:
1014; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1015; RV64-NEXT:    vremu.vx v8, v8, a0
1016; RV64-NEXT:    ret
1017  %head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
1018  %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
1019  %vc = urem <vscale x 8 x i64> %va, %splat
1020  ret <vscale x 8 x i64> %vc
1021}
1022
1023define <vscale x 8 x i64> @vremu_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
1024; RV32-V-LABEL: vremu_vi_nxv8i64_0:
1025; RV32-V:       # %bb.0:
1026; RV32-V-NEXT:    addi sp, sp, -16
1027; RV32-V-NEXT:    .cfi_def_cfa_offset 16
1028; RV32-V-NEXT:    lui a0, 131072
1029; RV32-V-NEXT:    li a1, 1
1030; RV32-V-NEXT:    sw a1, 8(sp)
1031; RV32-V-NEXT:    sw a0, 12(sp)
1032; RV32-V-NEXT:    addi a0, sp, 8
1033; RV32-V-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1034; RV32-V-NEXT:    vlse64.v v16, (a0), zero
1035; RV32-V-NEXT:    li a0, 61
1036; RV32-V-NEXT:    vmulhu.vv v16, v8, v16
1037; RV32-V-NEXT:    vsrl.vx v16, v16, a0
1038; RV32-V-NEXT:    li a0, -7
1039; RV32-V-NEXT:    vnmsac.vx v8, a0, v16
1040; RV32-V-NEXT:    addi sp, sp, 16
1041; RV32-V-NEXT:    .cfi_def_cfa_offset 0
1042; RV32-V-NEXT:    ret
1043;
1044; ZVE64X-LABEL: vremu_vi_nxv8i64_0:
1045; ZVE64X:       # %bb.0:
1046; ZVE64X-NEXT:    li a0, -7
1047; ZVE64X-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1048; ZVE64X-NEXT:    vremu.vx v8, v8, a0
1049; ZVE64X-NEXT:    ret
1050;
1051; RV64-V-LABEL: vremu_vi_nxv8i64_0:
1052; RV64-V:       # %bb.0:
1053; RV64-V-NEXT:    li a0, 1
1054; RV64-V-NEXT:    slli a0, a0, 61
1055; RV64-V-NEXT:    addi a0, a0, 1
1056; RV64-V-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1057; RV64-V-NEXT:    vmulhu.vx v16, v8, a0
1058; RV64-V-NEXT:    li a0, 61
1059; RV64-V-NEXT:    vsrl.vx v16, v16, a0
1060; RV64-V-NEXT:    li a0, -7
1061; RV64-V-NEXT:    vnmsac.vx v8, a0, v16
1062; RV64-V-NEXT:    ret
1063  %vc = urem <vscale x 8 x i64> %va, splat (i64 -7)
1064  ret <vscale x 8 x i64> %vc
1065}
1066
1067; fold (urem x, pow2) -> (and x, pow2-1)
1068define <vscale x 8 x i64> @vremu_vi_nxv8i64_1(<vscale x 8 x i64> %va) {
1069; CHECK-LABEL: vremu_vi_nxv8i64_1:
1070; CHECK:       # %bb.0:
1071; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1072; CHECK-NEXT:    vand.vi v8, v8, 15
1073; CHECK-NEXT:    ret
1074  %vc = urem <vscale x 8 x i64> %va, splat (i64 16)
1075  ret <vscale x 8 x i64> %vc
1076}
1077
1078; fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
1079define <vscale x 8 x i64> @vremu_vi_nxv8i64_2(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb) {
1080; CHECK-LABEL: vremu_vi_nxv8i64_2:
1081; CHECK:       # %bb.0:
1082; CHECK-NEXT:    li a0, 16
1083; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1084; CHECK-NEXT:    vmv.v.x v24, a0
1085; CHECK-NEXT:    vsll.vv v16, v24, v16
1086; CHECK-NEXT:    vadd.vi v16, v16, -1
1087; CHECK-NEXT:    vand.vv v8, v8, v16
1088; CHECK-NEXT:    ret
1089  %vc = shl <vscale x 8 x i64> splat (i64 16), %vb
1090  %vd = urem <vscale x 8 x i64> %va, %vc
1091  ret <vscale x 8 x i64> %vd
1092}
1093