xref: /llvm-project/llvm/test/CodeGen/RISCV/srem-lkk.ll (revision eabaee0c59110d0e11b33a69db54ccda526b35fd)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3; RUN:   | FileCheck -check-prefixes=CHECK,RV32I %s
4; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
5; RUN:   | FileCheck -check-prefixes=CHECK,RV32IM %s
6; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
7; RUN:   | FileCheck -check-prefixes=CHECK,RV64I %s
8; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
9; RUN:   | FileCheck -check-prefixes=CHECK,RV64IM %s
10
11define i32 @fold_srem_positive_odd(i32 %x) nounwind {
12; RV32I-LABEL: fold_srem_positive_odd:
13; RV32I:       # %bb.0:
14; RV32I-NEXT:    li a1, 95
15; RV32I-NEXT:    tail __modsi3
16;
17; RV32IM-LABEL: fold_srem_positive_odd:
18; RV32IM:       # %bb.0:
19; RV32IM-NEXT:    lui a1, 706409
20; RV32IM-NEXT:    addi a1, a1, 389
21; RV32IM-NEXT:    mulh a1, a0, a1
22; RV32IM-NEXT:    add a1, a1, a0
23; RV32IM-NEXT:    srli a2, a1, 31
24; RV32IM-NEXT:    srai a1, a1, 6
25; RV32IM-NEXT:    add a1, a1, a2
26; RV32IM-NEXT:    li a2, 95
27; RV32IM-NEXT:    mul a1, a1, a2
28; RV32IM-NEXT:    sub a0, a0, a1
29; RV32IM-NEXT:    ret
30;
31; RV64I-LABEL: fold_srem_positive_odd:
32; RV64I:       # %bb.0:
33; RV64I-NEXT:    addi sp, sp, -16
34; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
35; RV64I-NEXT:    sext.w a0, a0
36; RV64I-NEXT:    li a1, 95
37; RV64I-NEXT:    call __moddi3
38; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
39; RV64I-NEXT:    addi sp, sp, 16
40; RV64I-NEXT:    ret
41;
42; RV64IM-LABEL: fold_srem_positive_odd:
43; RV64IM:       # %bb.0:
44; RV64IM-NEXT:    sext.w a1, a0
45; RV64IM-NEXT:    lui a2, 706409
46; RV64IM-NEXT:    addiw a2, a2, 389
47; RV64IM-NEXT:    mul a1, a1, a2
48; RV64IM-NEXT:    srli a1, a1, 32
49; RV64IM-NEXT:    add a1, a1, a0
50; RV64IM-NEXT:    srliw a2, a1, 31
51; RV64IM-NEXT:    sraiw a1, a1, 6
52; RV64IM-NEXT:    add a1, a1, a2
53; RV64IM-NEXT:    li a2, 95
54; RV64IM-NEXT:    mul a1, a1, a2
55; RV64IM-NEXT:    subw a0, a0, a1
56; RV64IM-NEXT:    ret
57  %1 = srem i32 %x, 95
58  ret i32 %1
59}
60
61
62define i32 @fold_srem_positive_even(i32 %x) nounwind {
63; RV32I-LABEL: fold_srem_positive_even:
64; RV32I:       # %bb.0:
65; RV32I-NEXT:    li a1, 1060
66; RV32I-NEXT:    tail __modsi3
67;
68; RV32IM-LABEL: fold_srem_positive_even:
69; RV32IM:       # %bb.0:
70; RV32IM-NEXT:    lui a1, 253241
71; RV32IM-NEXT:    addi a1, a1, -15
72; RV32IM-NEXT:    mulh a1, a0, a1
73; RV32IM-NEXT:    srli a2, a1, 31
74; RV32IM-NEXT:    srai a1, a1, 8
75; RV32IM-NEXT:    add a1, a1, a2
76; RV32IM-NEXT:    li a2, 1060
77; RV32IM-NEXT:    mul a1, a1, a2
78; RV32IM-NEXT:    sub a0, a0, a1
79; RV32IM-NEXT:    ret
80;
81; RV64I-LABEL: fold_srem_positive_even:
82; RV64I:       # %bb.0:
83; RV64I-NEXT:    addi sp, sp, -16
84; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
85; RV64I-NEXT:    sext.w a0, a0
86; RV64I-NEXT:    li a1, 1060
87; RV64I-NEXT:    call __moddi3
88; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
89; RV64I-NEXT:    addi sp, sp, 16
90; RV64I-NEXT:    ret
91;
92; RV64IM-LABEL: fold_srem_positive_even:
93; RV64IM:       # %bb.0:
94; RV64IM-NEXT:    sext.w a1, a0
95; RV64IM-NEXT:    lui a2, 253241
96; RV64IM-NEXT:    addiw a2, a2, -15
97; RV64IM-NEXT:    mul a1, a1, a2
98; RV64IM-NEXT:    srli a2, a1, 63
99; RV64IM-NEXT:    srai a1, a1, 40
100; RV64IM-NEXT:    add a1, a1, a2
101; RV64IM-NEXT:    li a2, 1060
102; RV64IM-NEXT:    mul a1, a1, a2
103; RV64IM-NEXT:    subw a0, a0, a1
104; RV64IM-NEXT:    ret
105  %1 = srem i32 %x, 1060
106  ret i32 %1
107}
108
109
110define i32 @fold_srem_negative_odd(i32 %x) nounwind {
111; RV32I-LABEL: fold_srem_negative_odd:
112; RV32I:       # %bb.0:
113; RV32I-NEXT:    li a1, -723
114; RV32I-NEXT:    tail __modsi3
115;
116; RV32IM-LABEL: fold_srem_negative_odd:
117; RV32IM:       # %bb.0:
118; RV32IM-NEXT:    lui a1, 677296
119; RV32IM-NEXT:    addi a1, a1, -91
120; RV32IM-NEXT:    mulh a1, a0, a1
121; RV32IM-NEXT:    srli a2, a1, 31
122; RV32IM-NEXT:    srai a1, a1, 8
123; RV32IM-NEXT:    add a1, a1, a2
124; RV32IM-NEXT:    li a2, -723
125; RV32IM-NEXT:    mul a1, a1, a2
126; RV32IM-NEXT:    sub a0, a0, a1
127; RV32IM-NEXT:    ret
128;
129; RV64I-LABEL: fold_srem_negative_odd:
130; RV64I:       # %bb.0:
131; RV64I-NEXT:    addi sp, sp, -16
132; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
133; RV64I-NEXT:    sext.w a0, a0
134; RV64I-NEXT:    li a1, -723
135; RV64I-NEXT:    call __moddi3
136; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
137; RV64I-NEXT:    addi sp, sp, 16
138; RV64I-NEXT:    ret
139;
140; RV64IM-LABEL: fold_srem_negative_odd:
141; RV64IM:       # %bb.0:
142; RV64IM-NEXT:    sext.w a1, a0
143; RV64IM-NEXT:    lui a2, 677296
144; RV64IM-NEXT:    addiw a2, a2, -91
145; RV64IM-NEXT:    mul a1, a1, a2
146; RV64IM-NEXT:    srli a2, a1, 63
147; RV64IM-NEXT:    srai a1, a1, 40
148; RV64IM-NEXT:    add a1, a1, a2
149; RV64IM-NEXT:    li a2, -723
150; RV64IM-NEXT:    mul a1, a1, a2
151; RV64IM-NEXT:    subw a0, a0, a1
152; RV64IM-NEXT:    ret
153  %1 = srem i32 %x, -723
154  ret i32 %1
155}
156
157
158define i32 @fold_srem_negative_even(i32 %x) nounwind {
159; RV32I-LABEL: fold_srem_negative_even:
160; RV32I:       # %bb.0:
161; RV32I-NEXT:    lui a1, 1048570
162; RV32I-NEXT:    addi a1, a1, 1595
163; RV32I-NEXT:    tail __modsi3
164;
165; RV32IM-LABEL: fold_srem_negative_even:
166; RV32IM:       # %bb.0:
167; RV32IM-NEXT:    lui a1, 1036895
168; RV32IM-NEXT:    addi a1, a1, 999
169; RV32IM-NEXT:    mulh a1, a0, a1
170; RV32IM-NEXT:    srli a2, a1, 31
171; RV32IM-NEXT:    srai a1, a1, 8
172; RV32IM-NEXT:    add a1, a1, a2
173; RV32IM-NEXT:    lui a2, 1048570
174; RV32IM-NEXT:    addi a2, a2, 1595
175; RV32IM-NEXT:    mul a1, a1, a2
176; RV32IM-NEXT:    sub a0, a0, a1
177; RV32IM-NEXT:    ret
178;
179; RV64I-LABEL: fold_srem_negative_even:
180; RV64I:       # %bb.0:
181; RV64I-NEXT:    addi sp, sp, -16
182; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
183; RV64I-NEXT:    sext.w a0, a0
184; RV64I-NEXT:    lui a1, 1048570
185; RV64I-NEXT:    addiw a1, a1, 1595
186; RV64I-NEXT:    call __moddi3
187; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
188; RV64I-NEXT:    addi sp, sp, 16
189; RV64I-NEXT:    ret
190;
191; RV64IM-LABEL: fold_srem_negative_even:
192; RV64IM:       # %bb.0:
193; RV64IM-NEXT:    sext.w a1, a0
194; RV64IM-NEXT:    lui a2, 1036895
195; RV64IM-NEXT:    addiw a2, a2, 999
196; RV64IM-NEXT:    mul a1, a1, a2
197; RV64IM-NEXT:    srli a2, a1, 63
198; RV64IM-NEXT:    srai a1, a1, 40
199; RV64IM-NEXT:    add a1, a1, a2
200; RV64IM-NEXT:    lui a2, 1048570
201; RV64IM-NEXT:    addi a2, a2, 1595
202; RV64IM-NEXT:    mul a1, a1, a2
203; RV64IM-NEXT:    subw a0, a0, a1
204; RV64IM-NEXT:    ret
205  %1 = srem i32 %x, -22981
206  ret i32 %1
207}
208
209
210; Don't fold if we can combine srem with sdiv.
211define i32 @combine_srem_sdiv(i32 %x) nounwind {
212; RV32I-LABEL: combine_srem_sdiv:
213; RV32I:       # %bb.0:
214; RV32I-NEXT:    addi sp, sp, -16
215; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
216; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
217; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
218; RV32I-NEXT:    mv s0, a0
219; RV32I-NEXT:    li a1, 95
220; RV32I-NEXT:    call __modsi3
221; RV32I-NEXT:    mv s1, a0
222; RV32I-NEXT:    li a1, 95
223; RV32I-NEXT:    mv a0, s0
224; RV32I-NEXT:    call __divsi3
225; RV32I-NEXT:    add a0, s1, a0
226; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
227; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
228; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
229; RV32I-NEXT:    addi sp, sp, 16
230; RV32I-NEXT:    ret
231;
232; RV32IM-LABEL: combine_srem_sdiv:
233; RV32IM:       # %bb.0:
234; RV32IM-NEXT:    lui a1, 706409
235; RV32IM-NEXT:    addi a1, a1, 389
236; RV32IM-NEXT:    mulh a1, a0, a1
237; RV32IM-NEXT:    add a1, a1, a0
238; RV32IM-NEXT:    srli a2, a1, 31
239; RV32IM-NEXT:    srai a1, a1, 6
240; RV32IM-NEXT:    add a1, a1, a2
241; RV32IM-NEXT:    li a2, 95
242; RV32IM-NEXT:    mul a2, a1, a2
243; RV32IM-NEXT:    add a0, a0, a1
244; RV32IM-NEXT:    sub a0, a0, a2
245; RV32IM-NEXT:    ret
246;
247; RV64I-LABEL: combine_srem_sdiv:
248; RV64I:       # %bb.0:
249; RV64I-NEXT:    addi sp, sp, -32
250; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
251; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
252; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
253; RV64I-NEXT:    sext.w s0, a0
254; RV64I-NEXT:    li a1, 95
255; RV64I-NEXT:    mv a0, s0
256; RV64I-NEXT:    call __moddi3
257; RV64I-NEXT:    mv s1, a0
258; RV64I-NEXT:    li a1, 95
259; RV64I-NEXT:    mv a0, s0
260; RV64I-NEXT:    call __divdi3
261; RV64I-NEXT:    addw a0, s1, a0
262; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
263; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
264; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
265; RV64I-NEXT:    addi sp, sp, 32
266; RV64I-NEXT:    ret
267;
268; RV64IM-LABEL: combine_srem_sdiv:
269; RV64IM:       # %bb.0:
270; RV64IM-NEXT:    sext.w a1, a0
271; RV64IM-NEXT:    lui a2, 706409
272; RV64IM-NEXT:    addiw a2, a2, 389
273; RV64IM-NEXT:    mul a1, a1, a2
274; RV64IM-NEXT:    srli a1, a1, 32
275; RV64IM-NEXT:    add a1, a1, a0
276; RV64IM-NEXT:    srliw a2, a1, 31
277; RV64IM-NEXT:    sraiw a1, a1, 6
278; RV64IM-NEXT:    add a1, a1, a2
279; RV64IM-NEXT:    li a2, 95
280; RV64IM-NEXT:    mul a2, a1, a2
281; RV64IM-NEXT:    add a0, a0, a1
282; RV64IM-NEXT:    subw a0, a0, a2
283; RV64IM-NEXT:    ret
284  %1 = srem i32 %x, 95
285  %2 = sdiv i32 %x, 95
286  %3 = add i32 %1, %2
287  ret i32 %3
288}
289
290; Don't fold for divisors that are a power of two.
291define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind {
292; RV32I-LABEL: dont_fold_srem_power_of_two:
293; RV32I:       # %bb.0:
294; RV32I-NEXT:    srai a1, a0, 31
295; RV32I-NEXT:    srli a1, a1, 26
296; RV32I-NEXT:    add a1, a0, a1
297; RV32I-NEXT:    andi a1, a1, -64
298; RV32I-NEXT:    sub a0, a0, a1
299; RV32I-NEXT:    ret
300;
301; RV32IM-LABEL: dont_fold_srem_power_of_two:
302; RV32IM:       # %bb.0:
303; RV32IM-NEXT:    srai a1, a0, 31
304; RV32IM-NEXT:    srli a1, a1, 26
305; RV32IM-NEXT:    add a1, a0, a1
306; RV32IM-NEXT:    andi a1, a1, -64
307; RV32IM-NEXT:    sub a0, a0, a1
308; RV32IM-NEXT:    ret
309;
310; RV64I-LABEL: dont_fold_srem_power_of_two:
311; RV64I:       # %bb.0:
312; RV64I-NEXT:    sraiw a1, a0, 31
313; RV64I-NEXT:    srliw a1, a1, 26
314; RV64I-NEXT:    add a1, a0, a1
315; RV64I-NEXT:    andi a1, a1, -64
316; RV64I-NEXT:    subw a0, a0, a1
317; RV64I-NEXT:    ret
318;
319; RV64IM-LABEL: dont_fold_srem_power_of_two:
320; RV64IM:       # %bb.0:
321; RV64IM-NEXT:    sraiw a1, a0, 31
322; RV64IM-NEXT:    srliw a1, a1, 26
323; RV64IM-NEXT:    add a1, a0, a1
324; RV64IM-NEXT:    andi a1, a1, -64
325; RV64IM-NEXT:    subw a0, a0, a1
326; RV64IM-NEXT:    ret
327  %1 = srem i32 %x, 64
328  ret i32 %1
329}
330
331; Don't fold if the divisor is one.
332define i32 @dont_fold_srem_one(i32 %x) nounwind {
333; CHECK-LABEL: dont_fold_srem_one:
334; CHECK:       # %bb.0:
335; CHECK-NEXT:    li a0, 0
336; CHECK-NEXT:    ret
337  %1 = srem i32 %x, 1
338  ret i32 %1
339}
340
341; Don't fold if the divisor is 2^31.
342define i32 @dont_fold_srem_i32_smax(i32 %x) nounwind {
343; RV32I-LABEL: dont_fold_srem_i32_smax:
344; RV32I:       # %bb.0:
345; RV32I-NEXT:    srai a1, a0, 31
346; RV32I-NEXT:    srli a1, a1, 1
347; RV32I-NEXT:    add a1, a0, a1
348; RV32I-NEXT:    lui a2, 524288
349; RV32I-NEXT:    and a1, a1, a2
350; RV32I-NEXT:    add a0, a0, a1
351; RV32I-NEXT:    ret
352;
353; RV32IM-LABEL: dont_fold_srem_i32_smax:
354; RV32IM:       # %bb.0:
355; RV32IM-NEXT:    srai a1, a0, 31
356; RV32IM-NEXT:    srli a1, a1, 1
357; RV32IM-NEXT:    add a1, a0, a1
358; RV32IM-NEXT:    lui a2, 524288
359; RV32IM-NEXT:    and a1, a1, a2
360; RV32IM-NEXT:    add a0, a0, a1
361; RV32IM-NEXT:    ret
362;
363; RV64I-LABEL: dont_fold_srem_i32_smax:
364; RV64I:       # %bb.0:
365; RV64I-NEXT:    sraiw a1, a0, 31
366; RV64I-NEXT:    srliw a1, a1, 1
367; RV64I-NEXT:    add a1, a0, a1
368; RV64I-NEXT:    lui a2, 524288
369; RV64I-NEXT:    and a1, a1, a2
370; RV64I-NEXT:    addw a0, a0, a1
371; RV64I-NEXT:    ret
372;
373; RV64IM-LABEL: dont_fold_srem_i32_smax:
374; RV64IM:       # %bb.0:
375; RV64IM-NEXT:    sraiw a1, a0, 31
376; RV64IM-NEXT:    srliw a1, a1, 1
377; RV64IM-NEXT:    add a1, a0, a1
378; RV64IM-NEXT:    lui a2, 524288
379; RV64IM-NEXT:    and a1, a1, a2
380; RV64IM-NEXT:    addw a0, a0, a1
381; RV64IM-NEXT:    ret
382  %1 = srem i32 %x, 2147483648
383  ret i32 %1
384}
385
386; Don't fold i64 srem
387define i64 @dont_fold_srem_i64(i64 %x) nounwind {
388; RV32I-LABEL: dont_fold_srem_i64:
389; RV32I:       # %bb.0:
390; RV32I-NEXT:    addi sp, sp, -16
391; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
392; RV32I-NEXT:    li a2, 98
393; RV32I-NEXT:    li a3, 0
394; RV32I-NEXT:    call __moddi3
395; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
396; RV32I-NEXT:    addi sp, sp, 16
397; RV32I-NEXT:    ret
398;
399; RV32IM-LABEL: dont_fold_srem_i64:
400; RV32IM:       # %bb.0:
401; RV32IM-NEXT:    addi sp, sp, -16
402; RV32IM-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
403; RV32IM-NEXT:    li a2, 98
404; RV32IM-NEXT:    li a3, 0
405; RV32IM-NEXT:    call __moddi3
406; RV32IM-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
407; RV32IM-NEXT:    addi sp, sp, 16
408; RV32IM-NEXT:    ret
409;
410; RV64I-LABEL: dont_fold_srem_i64:
411; RV64I:       # %bb.0:
412; RV64I-NEXT:    li a1, 98
413; RV64I-NEXT:    tail __moddi3
414;
415; RV64IM-LABEL: dont_fold_srem_i64:
416; RV64IM:       # %bb.0:
417; RV64IM-NEXT:    lui a1, %hi(.LCPI8_0)
418; RV64IM-NEXT:    ld a1, %lo(.LCPI8_0)(a1)
419; RV64IM-NEXT:    mulh a1, a0, a1
420; RV64IM-NEXT:    srli a2, a1, 63
421; RV64IM-NEXT:    srai a1, a1, 5
422; RV64IM-NEXT:    add a1, a1, a2
423; RV64IM-NEXT:    li a2, 98
424; RV64IM-NEXT:    mul a1, a1, a2
425; RV64IM-NEXT:    sub a0, a0, a1
426; RV64IM-NEXT:    ret
427  %1 = srem i64 %x, 98
428  ret i64 %1
429}
430