xref: /llvm-project/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3; RUN:   | FileCheck -check-prefixes=CHECK,RV32I %s
4; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
5; RUN:   | FileCheck -check-prefixes=CHECK,RV32IM %s
6; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
7; RUN:   | FileCheck -check-prefixes=CHECK,RV64I %s
8; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
9; RUN:   | FileCheck -check-prefixes=CHECK,RV64IM %s
10
11
12define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind {
13; RV32I-LABEL: fold_urem_vec_1:
14; RV32I:       # %bb.0:
15; RV32I-NEXT:    addi sp, sp, -32
16; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
17; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
18; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
19; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
20; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
21; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
22; RV32I-NEXT:    lhu a2, 0(a1)
23; RV32I-NEXT:    lhu s0, 4(a1)
24; RV32I-NEXT:    lhu s1, 8(a1)
25; RV32I-NEXT:    lhu s2, 12(a1)
26; RV32I-NEXT:    mv s3, a0
27; RV32I-NEXT:    li a1, 95
28; RV32I-NEXT:    mv a0, a2
29; RV32I-NEXT:    call __umodsi3
30; RV32I-NEXT:    mv s4, a0
31; RV32I-NEXT:    li a1, 124
32; RV32I-NEXT:    mv a0, s0
33; RV32I-NEXT:    call __umodsi3
34; RV32I-NEXT:    mv s0, a0
35; RV32I-NEXT:    li a1, 98
36; RV32I-NEXT:    mv a0, s1
37; RV32I-NEXT:    call __umodsi3
38; RV32I-NEXT:    mv s1, a0
39; RV32I-NEXT:    li a1, 1003
40; RV32I-NEXT:    mv a0, s2
41; RV32I-NEXT:    call __umodsi3
42; RV32I-NEXT:    sh s4, 0(s3)
43; RV32I-NEXT:    sh s0, 2(s3)
44; RV32I-NEXT:    sh s1, 4(s3)
45; RV32I-NEXT:    sh a0, 6(s3)
46; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
47; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
48; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
49; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
50; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
51; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
52; RV32I-NEXT:    addi sp, sp, 32
53; RV32I-NEXT:    ret
54;
55; RV32IM-LABEL: fold_urem_vec_1:
56; RV32IM:       # %bb.0:
57; RV32IM-NEXT:    lhu a2, 0(a1)
58; RV32IM-NEXT:    lhu a3, 4(a1)
59; RV32IM-NEXT:    lhu a4, 8(a1)
60; RV32IM-NEXT:    lhu a1, 12(a1)
61; RV32IM-NEXT:    lui a5, 8456
62; RV32IM-NEXT:    lui a6, 11038
63; RV32IM-NEXT:    li a7, 95
64; RV32IM-NEXT:    lui t0, 10700
65; RV32IM-NEXT:    li t1, 98
66; RV32IM-NEXT:    addi a6, a6, -1465
67; RV32IM-NEXT:    mulhu a6, a2, a6
68; RV32IM-NEXT:    mul a6, a6, a7
69; RV32IM-NEXT:    lui a7, 1045
70; RV32IM-NEXT:    addi t0, t0, -1003
71; RV32IM-NEXT:    mulhu t0, a4, t0
72; RV32IM-NEXT:    mul t0, t0, t1
73; RV32IM-NEXT:    li t1, 1003
74; RV32IM-NEXT:    addi a5, a5, 1058
75; RV32IM-NEXT:    addi a7, a7, 1801
76; RV32IM-NEXT:    mulhu a5, a3, a5
77; RV32IM-NEXT:    mulhu a7, a1, a7
78; RV32IM-NEXT:    mul a7, a7, t1
79; RV32IM-NEXT:    slli t1, a5, 7
80; RV32IM-NEXT:    slli a5, a5, 2
81; RV32IM-NEXT:    sub a5, a5, t1
82; RV32IM-NEXT:    sub a2, a2, a6
83; RV32IM-NEXT:    sub a4, a4, t0
84; RV32IM-NEXT:    sub a1, a1, a7
85; RV32IM-NEXT:    add a3, a3, a5
86; RV32IM-NEXT:    sh a2, 0(a0)
87; RV32IM-NEXT:    sh a3, 2(a0)
88; RV32IM-NEXT:    sh a4, 4(a0)
89; RV32IM-NEXT:    sh a1, 6(a0)
90; RV32IM-NEXT:    ret
91;
92; RV64I-LABEL: fold_urem_vec_1:
93; RV64I:       # %bb.0:
94; RV64I-NEXT:    addi sp, sp, -48
95; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
96; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
97; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
98; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
99; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
100; RV64I-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
101; RV64I-NEXT:    lhu a2, 0(a1)
102; RV64I-NEXT:    lhu s0, 8(a1)
103; RV64I-NEXT:    lhu s1, 16(a1)
104; RV64I-NEXT:    lhu s2, 24(a1)
105; RV64I-NEXT:    mv s3, a0
106; RV64I-NEXT:    li a1, 95
107; RV64I-NEXT:    mv a0, a2
108; RV64I-NEXT:    call __umoddi3
109; RV64I-NEXT:    mv s4, a0
110; RV64I-NEXT:    li a1, 124
111; RV64I-NEXT:    mv a0, s0
112; RV64I-NEXT:    call __umoddi3
113; RV64I-NEXT:    mv s0, a0
114; RV64I-NEXT:    li a1, 98
115; RV64I-NEXT:    mv a0, s1
116; RV64I-NEXT:    call __umoddi3
117; RV64I-NEXT:    mv s1, a0
118; RV64I-NEXT:    li a1, 1003
119; RV64I-NEXT:    mv a0, s2
120; RV64I-NEXT:    call __umoddi3
121; RV64I-NEXT:    sh s4, 0(s3)
122; RV64I-NEXT:    sh s0, 2(s3)
123; RV64I-NEXT:    sh s1, 4(s3)
124; RV64I-NEXT:    sh a0, 6(s3)
125; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
126; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
127; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
128; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
129; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
130; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
131; RV64I-NEXT:    addi sp, sp, 48
132; RV64I-NEXT:    ret
133;
134; RV64IM-LABEL: fold_urem_vec_1:
135; RV64IM:       # %bb.0:
136; RV64IM-NEXT:    lhu a2, 0(a1)
137; RV64IM-NEXT:    lhu a3, 8(a1)
138; RV64IM-NEXT:    lhu a4, 16(a1)
139; RV64IM-NEXT:    lhu a1, 24(a1)
140; RV64IM-NEXT:    lui a5, %hi(.LCPI0_0)
141; RV64IM-NEXT:    lui a6, %hi(.LCPI0_1)
142; RV64IM-NEXT:    li a7, 95
143; RV64IM-NEXT:    ld a6, %lo(.LCPI0_1)(a6)
144; RV64IM-NEXT:    lui t0, %hi(.LCPI0_2)
145; RV64IM-NEXT:    li t1, 98
146; RV64IM-NEXT:    ld t0, %lo(.LCPI0_2)(t0)
147; RV64IM-NEXT:    mulhu a6, a2, a6
148; RV64IM-NEXT:    mul a6, a6, a7
149; RV64IM-NEXT:    lui a7, %hi(.LCPI0_3)
150; RV64IM-NEXT:    ld a5, %lo(.LCPI0_0)(a5)
151; RV64IM-NEXT:    ld a7, %lo(.LCPI0_3)(a7)
152; RV64IM-NEXT:    mulhu t0, a4, t0
153; RV64IM-NEXT:    mul t0, t0, t1
154; RV64IM-NEXT:    li t1, 1003
155; RV64IM-NEXT:    mulhu a5, a3, a5
156; RV64IM-NEXT:    mulhu a7, a1, a7
157; RV64IM-NEXT:    mul a7, a7, t1
158; RV64IM-NEXT:    slli t1, a5, 7
159; RV64IM-NEXT:    slli a5, a5, 2
160; RV64IM-NEXT:    subw a5, a5, t1
161; RV64IM-NEXT:    subw a2, a2, a6
162; RV64IM-NEXT:    subw a4, a4, t0
163; RV64IM-NEXT:    subw a1, a1, a7
164; RV64IM-NEXT:    add a3, a3, a5
165; RV64IM-NEXT:    sh a2, 0(a0)
166; RV64IM-NEXT:    sh a3, 2(a0)
167; RV64IM-NEXT:    sh a4, 4(a0)
168; RV64IM-NEXT:    sh a1, 6(a0)
169; RV64IM-NEXT:    ret
170  %1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003>
171  ret <4 x i16> %1
172}
173
174define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
175; RV32I-LABEL: fold_urem_vec_2:
176; RV32I:       # %bb.0:
177; RV32I-NEXT:    addi sp, sp, -32
178; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
179; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
180; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
181; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
182; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
183; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
184; RV32I-NEXT:    lhu a2, 0(a1)
185; RV32I-NEXT:    lhu s0, 4(a1)
186; RV32I-NEXT:    lhu s1, 8(a1)
187; RV32I-NEXT:    lhu s2, 12(a1)
188; RV32I-NEXT:    mv s3, a0
189; RV32I-NEXT:    li a1, 95
190; RV32I-NEXT:    mv a0, a2
191; RV32I-NEXT:    call __umodsi3
192; RV32I-NEXT:    mv s4, a0
193; RV32I-NEXT:    li a1, 95
194; RV32I-NEXT:    mv a0, s0
195; RV32I-NEXT:    call __umodsi3
196; RV32I-NEXT:    mv s0, a0
197; RV32I-NEXT:    li a1, 95
198; RV32I-NEXT:    mv a0, s1
199; RV32I-NEXT:    call __umodsi3
200; RV32I-NEXT:    mv s1, a0
201; RV32I-NEXT:    li a1, 95
202; RV32I-NEXT:    mv a0, s2
203; RV32I-NEXT:    call __umodsi3
204; RV32I-NEXT:    sh s4, 0(s3)
205; RV32I-NEXT:    sh s0, 2(s3)
206; RV32I-NEXT:    sh s1, 4(s3)
207; RV32I-NEXT:    sh a0, 6(s3)
208; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
209; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
210; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
211; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
212; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
213; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
214; RV32I-NEXT:    addi sp, sp, 32
215; RV32I-NEXT:    ret
216;
217; RV32IM-LABEL: fold_urem_vec_2:
218; RV32IM:       # %bb.0:
219; RV32IM-NEXT:    lhu a2, 0(a1)
220; RV32IM-NEXT:    lhu a3, 4(a1)
221; RV32IM-NEXT:    lhu a4, 8(a1)
222; RV32IM-NEXT:    lhu a1, 12(a1)
223; RV32IM-NEXT:    lui a5, 11038
224; RV32IM-NEXT:    li a6, 95
225; RV32IM-NEXT:    addi a5, a5, -1465
226; RV32IM-NEXT:    mulhu a7, a2, a5
227; RV32IM-NEXT:    mulhu t0, a3, a5
228; RV32IM-NEXT:    mulhu t1, a4, a5
229; RV32IM-NEXT:    mulhu a5, a1, a5
230; RV32IM-NEXT:    mul a7, a7, a6
231; RV32IM-NEXT:    mul t0, t0, a6
232; RV32IM-NEXT:    mul t1, t1, a6
233; RV32IM-NEXT:    mul a5, a5, a6
234; RV32IM-NEXT:    sub a2, a2, a7
235; RV32IM-NEXT:    sub a3, a3, t0
236; RV32IM-NEXT:    sub a4, a4, t1
237; RV32IM-NEXT:    sub a1, a1, a5
238; RV32IM-NEXT:    sh a2, 0(a0)
239; RV32IM-NEXT:    sh a3, 2(a0)
240; RV32IM-NEXT:    sh a4, 4(a0)
241; RV32IM-NEXT:    sh a1, 6(a0)
242; RV32IM-NEXT:    ret
243;
244; RV64I-LABEL: fold_urem_vec_2:
245; RV64I:       # %bb.0:
246; RV64I-NEXT:    addi sp, sp, -48
247; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
248; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
249; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
250; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
251; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
252; RV64I-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
253; RV64I-NEXT:    lhu a2, 0(a1)
254; RV64I-NEXT:    lhu s0, 8(a1)
255; RV64I-NEXT:    lhu s1, 16(a1)
256; RV64I-NEXT:    lhu s2, 24(a1)
257; RV64I-NEXT:    mv s3, a0
258; RV64I-NEXT:    li a1, 95
259; RV64I-NEXT:    mv a0, a2
260; RV64I-NEXT:    call __umoddi3
261; RV64I-NEXT:    mv s4, a0
262; RV64I-NEXT:    li a1, 95
263; RV64I-NEXT:    mv a0, s0
264; RV64I-NEXT:    call __umoddi3
265; RV64I-NEXT:    mv s0, a0
266; RV64I-NEXT:    li a1, 95
267; RV64I-NEXT:    mv a0, s1
268; RV64I-NEXT:    call __umoddi3
269; RV64I-NEXT:    mv s1, a0
270; RV64I-NEXT:    li a1, 95
271; RV64I-NEXT:    mv a0, s2
272; RV64I-NEXT:    call __umoddi3
273; RV64I-NEXT:    sh s4, 0(s3)
274; RV64I-NEXT:    sh s0, 2(s3)
275; RV64I-NEXT:    sh s1, 4(s3)
276; RV64I-NEXT:    sh a0, 6(s3)
277; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
278; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
279; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
280; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
281; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
282; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
283; RV64I-NEXT:    addi sp, sp, 48
284; RV64I-NEXT:    ret
285;
286; RV64IM-LABEL: fold_urem_vec_2:
287; RV64IM:       # %bb.0:
288; RV64IM-NEXT:    lui a2, %hi(.LCPI1_0)
289; RV64IM-NEXT:    ld a2, %lo(.LCPI1_0)(a2)
290; RV64IM-NEXT:    lhu a3, 0(a1)
291; RV64IM-NEXT:    lhu a4, 8(a1)
292; RV64IM-NEXT:    lhu a5, 16(a1)
293; RV64IM-NEXT:    lhu a1, 24(a1)
294; RV64IM-NEXT:    li a6, 95
295; RV64IM-NEXT:    mulhu a7, a3, a2
296; RV64IM-NEXT:    mulhu t0, a4, a2
297; RV64IM-NEXT:    mulhu t1, a5, a2
298; RV64IM-NEXT:    mulhu a2, a1, a2
299; RV64IM-NEXT:    mul a7, a7, a6
300; RV64IM-NEXT:    mul t0, t0, a6
301; RV64IM-NEXT:    mul t1, t1, a6
302; RV64IM-NEXT:    mul a2, a2, a6
303; RV64IM-NEXT:    subw a3, a3, a7
304; RV64IM-NEXT:    subw a4, a4, t0
305; RV64IM-NEXT:    subw a5, a5, t1
306; RV64IM-NEXT:    subw a1, a1, a2
307; RV64IM-NEXT:    sh a3, 0(a0)
308; RV64IM-NEXT:    sh a4, 2(a0)
309; RV64IM-NEXT:    sh a5, 4(a0)
310; RV64IM-NEXT:    sh a1, 6(a0)
311; RV64IM-NEXT:    ret
312  %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
313  ret <4 x i16> %1
314}
315
316
317; Don't fold if we can combine urem with udiv.
318define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
319; RV32I-LABEL: combine_urem_udiv:
320; RV32I:       # %bb.0:
321; RV32I-NEXT:    addi sp, sp, -48
322; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
323; RV32I-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
324; RV32I-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
325; RV32I-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
326; RV32I-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
327; RV32I-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
328; RV32I-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
329; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
330; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
331; RV32I-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
332; RV32I-NEXT:    lhu s1, 0(a1)
333; RV32I-NEXT:    lhu s2, 4(a1)
334; RV32I-NEXT:    lhu s3, 8(a1)
335; RV32I-NEXT:    lhu s4, 12(a1)
336; RV32I-NEXT:    mv s0, a0
337; RV32I-NEXT:    li a1, 95
338; RV32I-NEXT:    mv a0, s4
339; RV32I-NEXT:    call __umodsi3
340; RV32I-NEXT:    mv s5, a0
341; RV32I-NEXT:    li a1, 95
342; RV32I-NEXT:    mv a0, s3
343; RV32I-NEXT:    call __umodsi3
344; RV32I-NEXT:    mv s6, a0
345; RV32I-NEXT:    li a1, 95
346; RV32I-NEXT:    mv a0, s2
347; RV32I-NEXT:    call __umodsi3
348; RV32I-NEXT:    mv s7, a0
349; RV32I-NEXT:    li a1, 95
350; RV32I-NEXT:    mv a0, s1
351; RV32I-NEXT:    call __umodsi3
352; RV32I-NEXT:    mv s8, a0
353; RV32I-NEXT:    li a1, 95
354; RV32I-NEXT:    mv a0, s4
355; RV32I-NEXT:    call __udivsi3
356; RV32I-NEXT:    mv s4, a0
357; RV32I-NEXT:    li a1, 95
358; RV32I-NEXT:    mv a0, s3
359; RV32I-NEXT:    call __udivsi3
360; RV32I-NEXT:    mv s3, a0
361; RV32I-NEXT:    li a1, 95
362; RV32I-NEXT:    mv a0, s2
363; RV32I-NEXT:    call __udivsi3
364; RV32I-NEXT:    mv s2, a0
365; RV32I-NEXT:    li a1, 95
366; RV32I-NEXT:    mv a0, s1
367; RV32I-NEXT:    call __udivsi3
368; RV32I-NEXT:    add a0, s8, a0
369; RV32I-NEXT:    add s2, s7, s2
370; RV32I-NEXT:    add s3, s6, s3
371; RV32I-NEXT:    add s4, s5, s4
372; RV32I-NEXT:    sh a0, 0(s0)
373; RV32I-NEXT:    sh s2, 2(s0)
374; RV32I-NEXT:    sh s3, 4(s0)
375; RV32I-NEXT:    sh s4, 6(s0)
376; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
377; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
378; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
379; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
380; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
381; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
382; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
383; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
384; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
385; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
386; RV32I-NEXT:    addi sp, sp, 48
387; RV32I-NEXT:    ret
388;
389; RV32IM-LABEL: combine_urem_udiv:
390; RV32IM:       # %bb.0:
391; RV32IM-NEXT:    lhu a2, 0(a1)
392; RV32IM-NEXT:    lhu a3, 4(a1)
393; RV32IM-NEXT:    lhu a4, 8(a1)
394; RV32IM-NEXT:    lhu a1, 12(a1)
395; RV32IM-NEXT:    lui a5, 11038
396; RV32IM-NEXT:    li a6, 95
397; RV32IM-NEXT:    addi a5, a5, -1465
398; RV32IM-NEXT:    mulhu a7, a1, a5
399; RV32IM-NEXT:    mulhu t0, a4, a5
400; RV32IM-NEXT:    mulhu t1, a3, a5
401; RV32IM-NEXT:    mulhu a5, a2, a5
402; RV32IM-NEXT:    mul t2, a7, a6
403; RV32IM-NEXT:    mul t3, t0, a6
404; RV32IM-NEXT:    mul t4, t1, a6
405; RV32IM-NEXT:    mul a6, a5, a6
406; RV32IM-NEXT:    add a2, a2, a5
407; RV32IM-NEXT:    add a3, a3, t1
408; RV32IM-NEXT:    add a4, a4, t0
409; RV32IM-NEXT:    add a1, a1, a7
410; RV32IM-NEXT:    sub a2, a2, a6
411; RV32IM-NEXT:    sub a3, a3, t4
412; RV32IM-NEXT:    sub a4, a4, t3
413; RV32IM-NEXT:    sub a1, a1, t2
414; RV32IM-NEXT:    sh a2, 0(a0)
415; RV32IM-NEXT:    sh a3, 2(a0)
416; RV32IM-NEXT:    sh a4, 4(a0)
417; RV32IM-NEXT:    sh a1, 6(a0)
418; RV32IM-NEXT:    ret
419;
420; RV64I-LABEL: combine_urem_udiv:
421; RV64I:       # %bb.0:
422; RV64I-NEXT:    addi sp, sp, -80
423; RV64I-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
424; RV64I-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
425; RV64I-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
426; RV64I-NEXT:    sd s2, 48(sp) # 8-byte Folded Spill
427; RV64I-NEXT:    sd s3, 40(sp) # 8-byte Folded Spill
428; RV64I-NEXT:    sd s4, 32(sp) # 8-byte Folded Spill
429; RV64I-NEXT:    sd s5, 24(sp) # 8-byte Folded Spill
430; RV64I-NEXT:    sd s6, 16(sp) # 8-byte Folded Spill
431; RV64I-NEXT:    sd s7, 8(sp) # 8-byte Folded Spill
432; RV64I-NEXT:    sd s8, 0(sp) # 8-byte Folded Spill
433; RV64I-NEXT:    lhu s1, 0(a1)
434; RV64I-NEXT:    lhu s2, 8(a1)
435; RV64I-NEXT:    lhu s3, 16(a1)
436; RV64I-NEXT:    lhu s4, 24(a1)
437; RV64I-NEXT:    mv s0, a0
438; RV64I-NEXT:    li a1, 95
439; RV64I-NEXT:    mv a0, s4
440; RV64I-NEXT:    call __umoddi3
441; RV64I-NEXT:    mv s5, a0
442; RV64I-NEXT:    li a1, 95
443; RV64I-NEXT:    mv a0, s3
444; RV64I-NEXT:    call __umoddi3
445; RV64I-NEXT:    mv s6, a0
446; RV64I-NEXT:    li a1, 95
447; RV64I-NEXT:    mv a0, s2
448; RV64I-NEXT:    call __umoddi3
449; RV64I-NEXT:    mv s7, a0
450; RV64I-NEXT:    li a1, 95
451; RV64I-NEXT:    mv a0, s1
452; RV64I-NEXT:    call __umoddi3
453; RV64I-NEXT:    mv s8, a0
454; RV64I-NEXT:    li a1, 95
455; RV64I-NEXT:    mv a0, s4
456; RV64I-NEXT:    call __udivdi3
457; RV64I-NEXT:    mv s4, a0
458; RV64I-NEXT:    li a1, 95
459; RV64I-NEXT:    mv a0, s3
460; RV64I-NEXT:    call __udivdi3
461; RV64I-NEXT:    mv s3, a0
462; RV64I-NEXT:    li a1, 95
463; RV64I-NEXT:    mv a0, s2
464; RV64I-NEXT:    call __udivdi3
465; RV64I-NEXT:    mv s2, a0
466; RV64I-NEXT:    li a1, 95
467; RV64I-NEXT:    mv a0, s1
468; RV64I-NEXT:    call __udivdi3
469; RV64I-NEXT:    add a0, s8, a0
470; RV64I-NEXT:    add s2, s7, s2
471; RV64I-NEXT:    add s3, s6, s3
472; RV64I-NEXT:    add s4, s5, s4
473; RV64I-NEXT:    sh a0, 0(s0)
474; RV64I-NEXT:    sh s2, 2(s0)
475; RV64I-NEXT:    sh s3, 4(s0)
476; RV64I-NEXT:    sh s4, 6(s0)
477; RV64I-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
478; RV64I-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
479; RV64I-NEXT:    ld s1, 56(sp) # 8-byte Folded Reload
480; RV64I-NEXT:    ld s2, 48(sp) # 8-byte Folded Reload
481; RV64I-NEXT:    ld s3, 40(sp) # 8-byte Folded Reload
482; RV64I-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
483; RV64I-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
484; RV64I-NEXT:    ld s6, 16(sp) # 8-byte Folded Reload
485; RV64I-NEXT:    ld s7, 8(sp) # 8-byte Folded Reload
486; RV64I-NEXT:    ld s8, 0(sp) # 8-byte Folded Reload
487; RV64I-NEXT:    addi sp, sp, 80
488; RV64I-NEXT:    ret
489;
490; RV64IM-LABEL: combine_urem_udiv:
491; RV64IM:       # %bb.0:
492; RV64IM-NEXT:    lhu a2, 16(a1)
493; RV64IM-NEXT:    lhu a3, 24(a1)
494; RV64IM-NEXT:    lui a4, %hi(.LCPI2_0)
495; RV64IM-NEXT:    ld a4, %lo(.LCPI2_0)(a4)
496; RV64IM-NEXT:    lhu a5, 0(a1)
497; RV64IM-NEXT:    lhu a1, 8(a1)
498; RV64IM-NEXT:    li a6, 95
499; RV64IM-NEXT:    mulhu a7, a3, a4
500; RV64IM-NEXT:    mulhu t0, a2, a4
501; RV64IM-NEXT:    mulhu t1, a1, a4
502; RV64IM-NEXT:    mulhu a4, a5, a4
503; RV64IM-NEXT:    mul t2, a7, a6
504; RV64IM-NEXT:    mul t3, t0, a6
505; RV64IM-NEXT:    mul t4, t1, a6
506; RV64IM-NEXT:    mul a6, a4, a6
507; RV64IM-NEXT:    add a4, a5, a4
508; RV64IM-NEXT:    add a1, a1, t1
509; RV64IM-NEXT:    add a2, a2, t0
510; RV64IM-NEXT:    add a3, a3, a7
511; RV64IM-NEXT:    subw a4, a4, a6
512; RV64IM-NEXT:    subw a1, a1, t4
513; RV64IM-NEXT:    subw a2, a2, t3
514; RV64IM-NEXT:    subw a3, a3, t2
515; RV64IM-NEXT:    sh a4, 0(a0)
516; RV64IM-NEXT:    sh a1, 2(a0)
517; RV64IM-NEXT:    sh a2, 4(a0)
518; RV64IM-NEXT:    sh a3, 6(a0)
519; RV64IM-NEXT:    ret
520  %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
521  %2 = udiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
522  %3 = add <4 x i16> %1, %2
523  ret <4 x i16> %3
524}
525
526; Don't fold for divisors that are a power of two.
527define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
528; RV32I-LABEL: dont_fold_urem_power_of_two:
529; RV32I:       # %bb.0:
530; RV32I-NEXT:    addi sp, sp, -32
531; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
532; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
533; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
534; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
535; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
536; RV32I-NEXT:    lhu s1, 0(a1)
537; RV32I-NEXT:    lhu s2, 4(a1)
538; RV32I-NEXT:    lhu s3, 8(a1)
539; RV32I-NEXT:    lhu a2, 12(a1)
540; RV32I-NEXT:    mv s0, a0
541; RV32I-NEXT:    li a1, 95
542; RV32I-NEXT:    mv a0, a2
543; RV32I-NEXT:    call __umodsi3
544; RV32I-NEXT:    andi a1, s1, 63
545; RV32I-NEXT:    andi a2, s2, 31
546; RV32I-NEXT:    andi a3, s3, 7
547; RV32I-NEXT:    sh a1, 0(s0)
548; RV32I-NEXT:    sh a2, 2(s0)
549; RV32I-NEXT:    sh a3, 4(s0)
550; RV32I-NEXT:    sh a0, 6(s0)
551; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
552; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
553; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
554; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
555; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
556; RV32I-NEXT:    addi sp, sp, 32
557; RV32I-NEXT:    ret
558;
559; RV32IM-LABEL: dont_fold_urem_power_of_two:
560; RV32IM:       # %bb.0:
561; RV32IM-NEXT:    lhu a2, 0(a1)
562; RV32IM-NEXT:    lhu a3, 4(a1)
563; RV32IM-NEXT:    lhu a4, 8(a1)
564; RV32IM-NEXT:    lhu a1, 12(a1)
565; RV32IM-NEXT:    lui a5, 11038
566; RV32IM-NEXT:    li a6, 95
567; RV32IM-NEXT:    addi a5, a5, -1465
568; RV32IM-NEXT:    mulhu a5, a1, a5
569; RV32IM-NEXT:    andi a2, a2, 63
570; RV32IM-NEXT:    andi a3, a3, 31
571; RV32IM-NEXT:    andi a4, a4, 7
572; RV32IM-NEXT:    mul a5, a5, a6
573; RV32IM-NEXT:    sub a1, a1, a5
574; RV32IM-NEXT:    sh a2, 0(a0)
575; RV32IM-NEXT:    sh a3, 2(a0)
576; RV32IM-NEXT:    sh a4, 4(a0)
577; RV32IM-NEXT:    sh a1, 6(a0)
578; RV32IM-NEXT:    ret
579;
580; RV64I-LABEL: dont_fold_urem_power_of_two:
581; RV64I:       # %bb.0:
582; RV64I-NEXT:    addi sp, sp, -48
583; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
584; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
585; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
586; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
587; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
588; RV64I-NEXT:    lhu s1, 0(a1)
589; RV64I-NEXT:    lhu s2, 8(a1)
590; RV64I-NEXT:    lhu s3, 16(a1)
591; RV64I-NEXT:    lhu a2, 24(a1)
592; RV64I-NEXT:    mv s0, a0
593; RV64I-NEXT:    li a1, 95
594; RV64I-NEXT:    mv a0, a2
595; RV64I-NEXT:    call __umoddi3
596; RV64I-NEXT:    andi a1, s1, 63
597; RV64I-NEXT:    andi a2, s2, 31
598; RV64I-NEXT:    andi a3, s3, 7
599; RV64I-NEXT:    sh a1, 0(s0)
600; RV64I-NEXT:    sh a2, 2(s0)
601; RV64I-NEXT:    sh a3, 4(s0)
602; RV64I-NEXT:    sh a0, 6(s0)
603; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
604; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
605; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
606; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
607; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
608; RV64I-NEXT:    addi sp, sp, 48
609; RV64I-NEXT:    ret
610;
611; RV64IM-LABEL: dont_fold_urem_power_of_two:
612; RV64IM:       # %bb.0:
613; RV64IM-NEXT:    lhu a2, 0(a1)
614; RV64IM-NEXT:    lhu a3, 8(a1)
615; RV64IM-NEXT:    lhu a4, 16(a1)
616; RV64IM-NEXT:    lhu a1, 24(a1)
617; RV64IM-NEXT:    lui a5, %hi(.LCPI3_0)
618; RV64IM-NEXT:    li a6, 95
619; RV64IM-NEXT:    ld a5, %lo(.LCPI3_0)(a5)
620; RV64IM-NEXT:    andi a2, a2, 63
621; RV64IM-NEXT:    andi a3, a3, 31
622; RV64IM-NEXT:    andi a4, a4, 7
623; RV64IM-NEXT:    mulhu a5, a1, a5
624; RV64IM-NEXT:    mul a5, a5, a6
625; RV64IM-NEXT:    subw a1, a1, a5
626; RV64IM-NEXT:    sh a2, 0(a0)
627; RV64IM-NEXT:    sh a3, 2(a0)
628; RV64IM-NEXT:    sh a4, 4(a0)
629; RV64IM-NEXT:    sh a1, 6(a0)
630; RV64IM-NEXT:    ret
631  %1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
632  ret <4 x i16> %1
633}
634
635; Don't fold if the divisor is one.
636define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
637; RV32I-LABEL: dont_fold_urem_one:
638; RV32I:       # %bb.0:
639; RV32I-NEXT:    addi sp, sp, -32
640; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
641; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
642; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
643; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
644; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
645; RV32I-NEXT:    lhu a2, 4(a1)
646; RV32I-NEXT:    lhu s0, 8(a1)
647; RV32I-NEXT:    lhu s1, 12(a1)
648; RV32I-NEXT:    mv s2, a0
649; RV32I-NEXT:    li a1, 654
650; RV32I-NEXT:    mv a0, a2
651; RV32I-NEXT:    call __umodsi3
652; RV32I-NEXT:    mv s3, a0
653; RV32I-NEXT:    li a1, 23
654; RV32I-NEXT:    mv a0, s0
655; RV32I-NEXT:    call __umodsi3
656; RV32I-NEXT:    mv s0, a0
657; RV32I-NEXT:    lui a0, 1
658; RV32I-NEXT:    addi a1, a0, 1327
659; RV32I-NEXT:    mv a0, s1
660; RV32I-NEXT:    call __umodsi3
661; RV32I-NEXT:    sh zero, 0(s2)
662; RV32I-NEXT:    sh s3, 2(s2)
663; RV32I-NEXT:    sh s0, 4(s2)
664; RV32I-NEXT:    sh a0, 6(s2)
665; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
666; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
667; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
668; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
669; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
670; RV32I-NEXT:    addi sp, sp, 32
671; RV32I-NEXT:    ret
672;
673; RV32IM-LABEL: dont_fold_urem_one:
674; RV32IM:       # %bb.0:
675; RV32IM-NEXT:    lhu a2, 4(a1)
676; RV32IM-NEXT:    lhu a3, 8(a1)
677; RV32IM-NEXT:    lhu a1, 12(a1)
678; RV32IM-NEXT:    lui a4, 1603
679; RV32IM-NEXT:    li a5, 654
680; RV32IM-NEXT:    lui a6, 45590
681; RV32IM-NEXT:    li a7, 23
682; RV32IM-NEXT:    addi a4, a4, 1341
683; RV32IM-NEXT:    mulhu a4, a2, a4
684; RV32IM-NEXT:    mul a4, a4, a5
685; RV32IM-NEXT:    lui a5, 193
686; RV32IM-NEXT:    addi a6, a6, 1069
687; RV32IM-NEXT:    mulhu a6, a3, a6
688; RV32IM-NEXT:    mul a6, a6, a7
689; RV32IM-NEXT:    lui a7, 1
690; RV32IM-NEXT:    addi a5, a5, 1464
691; RV32IM-NEXT:    addi a7, a7, 1327
692; RV32IM-NEXT:    mulhu a5, a1, a5
693; RV32IM-NEXT:    mul a5, a5, a7
694; RV32IM-NEXT:    sub a2, a2, a4
695; RV32IM-NEXT:    sub a3, a3, a6
696; RV32IM-NEXT:    sub a1, a1, a5
697; RV32IM-NEXT:    sh zero, 0(a0)
698; RV32IM-NEXT:    sh a2, 2(a0)
699; RV32IM-NEXT:    sh a3, 4(a0)
700; RV32IM-NEXT:    sh a1, 6(a0)
701; RV32IM-NEXT:    ret
702;
703; RV64I-LABEL: dont_fold_urem_one:
704; RV64I:       # %bb.0:
705; RV64I-NEXT:    addi sp, sp, -48
706; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
707; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
708; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
709; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
710; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
711; RV64I-NEXT:    lhu a2, 8(a1)
712; RV64I-NEXT:    lhu s0, 16(a1)
713; RV64I-NEXT:    lhu s1, 24(a1)
714; RV64I-NEXT:    mv s2, a0
715; RV64I-NEXT:    li a1, 654
716; RV64I-NEXT:    mv a0, a2
717; RV64I-NEXT:    call __umoddi3
718; RV64I-NEXT:    mv s3, a0
719; RV64I-NEXT:    li a1, 23
720; RV64I-NEXT:    mv a0, s0
721; RV64I-NEXT:    call __umoddi3
722; RV64I-NEXT:    mv s0, a0
723; RV64I-NEXT:    lui a0, 1
724; RV64I-NEXT:    addiw a1, a0, 1327
725; RV64I-NEXT:    mv a0, s1
726; RV64I-NEXT:    call __umoddi3
727; RV64I-NEXT:    sh zero, 0(s2)
728; RV64I-NEXT:    sh s3, 2(s2)
729; RV64I-NEXT:    sh s0, 4(s2)
730; RV64I-NEXT:    sh a0, 6(s2)
731; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
732; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
733; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
734; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
735; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
736; RV64I-NEXT:    addi sp, sp, 48
737; RV64I-NEXT:    ret
738;
739; RV64IM-LABEL: dont_fold_urem_one:
740; RV64IM:       # %bb.0:
741; RV64IM-NEXT:    lhu a2, 8(a1)
742; RV64IM-NEXT:    lhu a3, 16(a1)
743; RV64IM-NEXT:    lhu a1, 24(a1)
744; RV64IM-NEXT:    lui a4, %hi(.LCPI4_0)
745; RV64IM-NEXT:    li a5, 654
746; RV64IM-NEXT:    ld a4, %lo(.LCPI4_0)(a4)
747; RV64IM-NEXT:    lui a6, %hi(.LCPI4_1)
748; RV64IM-NEXT:    li a7, 23
749; RV64IM-NEXT:    ld a6, %lo(.LCPI4_1)(a6)
750; RV64IM-NEXT:    mulhu a4, a2, a4
751; RV64IM-NEXT:    mul a4, a4, a5
752; RV64IM-NEXT:    lui a5, %hi(.LCPI4_2)
753; RV64IM-NEXT:    ld a5, %lo(.LCPI4_2)(a5)
754; RV64IM-NEXT:    mulhu a6, a3, a6
755; RV64IM-NEXT:    mul a6, a6, a7
756; RV64IM-NEXT:    lui a7, 1
757; RV64IM-NEXT:    addi a7, a7, 1327
758; RV64IM-NEXT:    mulhu a5, a1, a5
759; RV64IM-NEXT:    mul a5, a5, a7
760; RV64IM-NEXT:    subw a2, a2, a4
761; RV64IM-NEXT:    subw a3, a3, a6
762; RV64IM-NEXT:    subw a1, a1, a5
763; RV64IM-NEXT:    sh zero, 0(a0)
764; RV64IM-NEXT:    sh a2, 2(a0)
765; RV64IM-NEXT:    sh a3, 4(a0)
766; RV64IM-NEXT:    sh a1, 6(a0)
767; RV64IM-NEXT:    ret
768  %1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
769  ret <4 x i16> %1
770}
771
772; Don't fold if the divisor is 2^16.
773define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
774; CHECK-LABEL: dont_fold_urem_i16_smax:
775; CHECK:       # %bb.0:
776; CHECK-NEXT:    ret
777  %1 = urem <4 x i16> %x, <i16 1, i16 65536, i16 23, i16 5423>
778  ret <4 x i16> %1
779}
780
781; Don't fold i64 urem.
782define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
783; RV32I-LABEL: dont_fold_urem_i64:
784; RV32I:       # %bb.0:
785; RV32I-NEXT:    addi sp, sp, -48
786; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
787; RV32I-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
788; RV32I-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
789; RV32I-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
790; RV32I-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
791; RV32I-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
792; RV32I-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
793; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
794; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
795; RV32I-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
796; RV32I-NEXT:    lw s1, 16(a1)
797; RV32I-NEXT:    lw s2, 20(a1)
798; RV32I-NEXT:    lw s3, 24(a1)
799; RV32I-NEXT:    lw s4, 28(a1)
800; RV32I-NEXT:    lw a3, 0(a1)
801; RV32I-NEXT:    lw a4, 4(a1)
802; RV32I-NEXT:    lw s5, 8(a1)
803; RV32I-NEXT:    lw s6, 12(a1)
804; RV32I-NEXT:    mv s0, a0
805; RV32I-NEXT:    li a2, 1
806; RV32I-NEXT:    mv a0, a3
807; RV32I-NEXT:    mv a1, a4
808; RV32I-NEXT:    li a3, 0
809; RV32I-NEXT:    call __umoddi3
810; RV32I-NEXT:    mv s7, a0
811; RV32I-NEXT:    mv s8, a1
812; RV32I-NEXT:    li a2, 654
813; RV32I-NEXT:    mv a0, s5
814; RV32I-NEXT:    mv a1, s6
815; RV32I-NEXT:    li a3, 0
816; RV32I-NEXT:    call __umoddi3
817; RV32I-NEXT:    mv s5, a0
818; RV32I-NEXT:    mv s6, a1
819; RV32I-NEXT:    li a2, 23
820; RV32I-NEXT:    mv a0, s1
821; RV32I-NEXT:    mv a1, s2
822; RV32I-NEXT:    li a3, 0
823; RV32I-NEXT:    call __umoddi3
824; RV32I-NEXT:    mv s1, a0
825; RV32I-NEXT:    mv s2, a1
826; RV32I-NEXT:    lui a0, 1
827; RV32I-NEXT:    addi a2, a0, 1327
828; RV32I-NEXT:    mv a0, s3
829; RV32I-NEXT:    mv a1, s4
830; RV32I-NEXT:    li a3, 0
831; RV32I-NEXT:    call __umoddi3
832; RV32I-NEXT:    sw s1, 16(s0)
833; RV32I-NEXT:    sw s2, 20(s0)
834; RV32I-NEXT:    sw a0, 24(s0)
835; RV32I-NEXT:    sw a1, 28(s0)
836; RV32I-NEXT:    sw s7, 0(s0)
837; RV32I-NEXT:    sw s8, 4(s0)
838; RV32I-NEXT:    sw s5, 8(s0)
839; RV32I-NEXT:    sw s6, 12(s0)
840; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
841; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
842; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
843; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
844; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
845; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
846; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
847; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
848; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
849; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
850; RV32I-NEXT:    addi sp, sp, 48
851; RV32I-NEXT:    ret
852;
853; RV32IM-LABEL: dont_fold_urem_i64:
854; RV32IM:       # %bb.0:
855; RV32IM-NEXT:    addi sp, sp, -48
856; RV32IM-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
857; RV32IM-NEXT:    sw s0, 40(sp) # 4-byte Folded Spill
858; RV32IM-NEXT:    sw s1, 36(sp) # 4-byte Folded Spill
859; RV32IM-NEXT:    sw s2, 32(sp) # 4-byte Folded Spill
860; RV32IM-NEXT:    sw s3, 28(sp) # 4-byte Folded Spill
861; RV32IM-NEXT:    sw s4, 24(sp) # 4-byte Folded Spill
862; RV32IM-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
863; RV32IM-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
864; RV32IM-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
865; RV32IM-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
866; RV32IM-NEXT:    lw s1, 16(a1)
867; RV32IM-NEXT:    lw s2, 20(a1)
868; RV32IM-NEXT:    lw s3, 24(a1)
869; RV32IM-NEXT:    lw s4, 28(a1)
870; RV32IM-NEXT:    lw a3, 0(a1)
871; RV32IM-NEXT:    lw a4, 4(a1)
872; RV32IM-NEXT:    lw s5, 8(a1)
873; RV32IM-NEXT:    lw s6, 12(a1)
874; RV32IM-NEXT:    mv s0, a0
875; RV32IM-NEXT:    li a2, 1
876; RV32IM-NEXT:    mv a0, a3
877; RV32IM-NEXT:    mv a1, a4
878; RV32IM-NEXT:    li a3, 0
879; RV32IM-NEXT:    call __umoddi3
880; RV32IM-NEXT:    mv s7, a0
881; RV32IM-NEXT:    mv s8, a1
882; RV32IM-NEXT:    li a2, 654
883; RV32IM-NEXT:    mv a0, s5
884; RV32IM-NEXT:    mv a1, s6
885; RV32IM-NEXT:    li a3, 0
886; RV32IM-NEXT:    call __umoddi3
887; RV32IM-NEXT:    mv s5, a0
888; RV32IM-NEXT:    mv s6, a1
889; RV32IM-NEXT:    li a2, 23
890; RV32IM-NEXT:    mv a0, s1
891; RV32IM-NEXT:    mv a1, s2
892; RV32IM-NEXT:    li a3, 0
893; RV32IM-NEXT:    call __umoddi3
894; RV32IM-NEXT:    mv s1, a0
895; RV32IM-NEXT:    mv s2, a1
896; RV32IM-NEXT:    lui a0, 1
897; RV32IM-NEXT:    addi a2, a0, 1327
898; RV32IM-NEXT:    mv a0, s3
899; RV32IM-NEXT:    mv a1, s4
900; RV32IM-NEXT:    li a3, 0
901; RV32IM-NEXT:    call __umoddi3
902; RV32IM-NEXT:    sw s1, 16(s0)
903; RV32IM-NEXT:    sw s2, 20(s0)
904; RV32IM-NEXT:    sw a0, 24(s0)
905; RV32IM-NEXT:    sw a1, 28(s0)
906; RV32IM-NEXT:    sw s7, 0(s0)
907; RV32IM-NEXT:    sw s8, 4(s0)
908; RV32IM-NEXT:    sw s5, 8(s0)
909; RV32IM-NEXT:    sw s6, 12(s0)
910; RV32IM-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
911; RV32IM-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
912; RV32IM-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
913; RV32IM-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
914; RV32IM-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
915; RV32IM-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
916; RV32IM-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
917; RV32IM-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
918; RV32IM-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
919; RV32IM-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
920; RV32IM-NEXT:    addi sp, sp, 48
921; RV32IM-NEXT:    ret
922;
923; RV64I-LABEL: dont_fold_urem_i64:
924; RV64I:       # %bb.0:
925; RV64I-NEXT:    addi sp, sp, -48
926; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
927; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
928; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
929; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
930; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
931; RV64I-NEXT:    ld a2, 8(a1)
932; RV64I-NEXT:    ld s0, 16(a1)
933; RV64I-NEXT:    ld s1, 24(a1)
934; RV64I-NEXT:    mv s2, a0
935; RV64I-NEXT:    li a1, 654
936; RV64I-NEXT:    mv a0, a2
937; RV64I-NEXT:    call __umoddi3
938; RV64I-NEXT:    mv s3, a0
939; RV64I-NEXT:    li a1, 23
940; RV64I-NEXT:    mv a0, s0
941; RV64I-NEXT:    call __umoddi3
942; RV64I-NEXT:    mv s0, a0
943; RV64I-NEXT:    lui a0, 1
944; RV64I-NEXT:    addiw a1, a0, 1327
945; RV64I-NEXT:    mv a0, s1
946; RV64I-NEXT:    call __umoddi3
947; RV64I-NEXT:    sd zero, 0(s2)
948; RV64I-NEXT:    sd s3, 8(s2)
949; RV64I-NEXT:    sd s0, 16(s2)
950; RV64I-NEXT:    sd a0, 24(s2)
951; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
952; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
953; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
954; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
955; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
956; RV64I-NEXT:    addi sp, sp, 48
957; RV64I-NEXT:    ret
958;
959; RV64IM-LABEL: dont_fold_urem_i64:
960; RV64IM:       # %bb.0:
961; RV64IM-NEXT:    ld a2, 8(a1)
962; RV64IM-NEXT:    ld a3, 16(a1)
963; RV64IM-NEXT:    ld a1, 24(a1)
964; RV64IM-NEXT:    lui a4, %hi(.LCPI6_1)
965; RV64IM-NEXT:    ld a4, %lo(.LCPI6_1)(a4)
966; RV64IM-NEXT:    lui a5, %hi(.LCPI6_0)
967; RV64IM-NEXT:    li a6, 654
968; RV64IM-NEXT:    srli a7, a2, 1
969; RV64IM-NEXT:    mulhu a4, a7, a4
970; RV64IM-NEXT:    lui a7, %hi(.LCPI6_2)
971; RV64IM-NEXT:    ld a5, %lo(.LCPI6_0)(a5)
972; RV64IM-NEXT:    ld a7, %lo(.LCPI6_2)(a7)
973; RV64IM-NEXT:    srli a4, a4, 7
974; RV64IM-NEXT:    mul a4, a4, a6
975; RV64IM-NEXT:    lui a6, 1
976; RV64IM-NEXT:    addiw a6, a6, 1327
977; RV64IM-NEXT:    mulhu a5, a3, a5
978; RV64IM-NEXT:    mulhu a7, a1, a7
979; RV64IM-NEXT:    srli a7, a7, 12
980; RV64IM-NEXT:    mul a6, a7, a6
981; RV64IM-NEXT:    sub a7, a3, a5
982; RV64IM-NEXT:    srli a7, a7, 1
983; RV64IM-NEXT:    add a5, a7, a5
984; RV64IM-NEXT:    sub a2, a2, a4
985; RV64IM-NEXT:    sub a1, a1, a6
986; RV64IM-NEXT:    li a4, 23
987; RV64IM-NEXT:    srli a5, a5, 4
988; RV64IM-NEXT:    mul a4, a5, a4
989; RV64IM-NEXT:    sub a3, a3, a4
990; RV64IM-NEXT:    sd zero, 0(a0)
991; RV64IM-NEXT:    sd a2, 8(a0)
992; RV64IM-NEXT:    sd a3, 16(a0)
993; RV64IM-NEXT:    sd a1, 24(a0)
994; RV64IM-NEXT:    ret
995  %1 = urem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>
996  ret <4 x i64> %1
997}
998