xref: /llvm-project/llvm/test/CodeGen/RISCV/div-by-constant.ll (revision eabaee0c59110d0e11b33a69db54ccda526b35fd)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \
3; RUN:   | FileCheck -check-prefixes=RV32,RV32IM %s
4; RUN: llc -mtriple=riscv32 -mattr=+m,+zba,+zbb \
5; RUN:    -verify-machineinstrs < %s \
6; RUN:   | FileCheck -check-prefixes=RV32,RV32IMZB %s
7; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
8; RUN:   | FileCheck -check-prefixes=RV64,RV64IM %s
9; RUN: llc -mtriple=riscv64 -mattr=+m,+zba,+zbb \
10; RUN:   -verify-machineinstrs < %s \
11; RUN:   | FileCheck -check-prefixes=RV64,RV64IMZB %s
12
13; Test that there is a single shift after the mul and no addition.
14define i32 @udiv_constant_no_add(i32 %a) nounwind {
15; RV32-LABEL: udiv_constant_no_add:
16; RV32:       # %bb.0:
17; RV32-NEXT:    lui a1, 838861
18; RV32-NEXT:    addi a1, a1, -819
19; RV32-NEXT:    mulhu a0, a0, a1
20; RV32-NEXT:    srli a0, a0, 2
21; RV32-NEXT:    ret
22;
23; RV64-LABEL: udiv_constant_no_add:
24; RV64:       # %bb.0:
25; RV64-NEXT:    slli a0, a0, 32
26; RV64-NEXT:    lui a1, 838861
27; RV64-NEXT:    addi a1, a1, -819
28; RV64-NEXT:    slli a1, a1, 32
29; RV64-NEXT:    mulhu a0, a0, a1
30; RV64-NEXT:    srli a0, a0, 34
31; RV64-NEXT:    ret
32  %1 = udiv i32 %a, 5
33  ret i32 %1
34}
35
36; This constant requires a sub, shrli, add sequence after the mul.
37define i32 @udiv_constant_add(i32 %a) nounwind {
38; RV32-LABEL: udiv_constant_add:
39; RV32:       # %bb.0:
40; RV32-NEXT:    lui a1, 149797
41; RV32-NEXT:    addi a1, a1, -1755
42; RV32-NEXT:    mulhu a1, a0, a1
43; RV32-NEXT:    sub a0, a0, a1
44; RV32-NEXT:    srli a0, a0, 1
45; RV32-NEXT:    add a0, a0, a1
46; RV32-NEXT:    srli a0, a0, 2
47; RV32-NEXT:    ret
48;
49; RV64IM-LABEL: udiv_constant_add:
50; RV64IM:       # %bb.0:
51; RV64IM-NEXT:    slli a1, a0, 32
52; RV64IM-NEXT:    lui a2, 149797
53; RV64IM-NEXT:    addi a2, a2, -1755
54; RV64IM-NEXT:    slli a2, a2, 32
55; RV64IM-NEXT:    mulhu a1, a1, a2
56; RV64IM-NEXT:    srli a1, a1, 32
57; RV64IM-NEXT:    subw a0, a0, a1
58; RV64IM-NEXT:    srliw a0, a0, 1
59; RV64IM-NEXT:    add a0, a0, a1
60; RV64IM-NEXT:    srli a0, a0, 2
61; RV64IM-NEXT:    ret
62;
63; RV64IMZB-LABEL: udiv_constant_add:
64; RV64IMZB:       # %bb.0:
65; RV64IMZB-NEXT:    zext.w a1, a0
66; RV64IMZB-NEXT:    lui a2, 149797
67; RV64IMZB-NEXT:    addiw a2, a2, -1755
68; RV64IMZB-NEXT:    mul a1, a1, a2
69; RV64IMZB-NEXT:    srli a1, a1, 32
70; RV64IMZB-NEXT:    subw a0, a0, a1
71; RV64IMZB-NEXT:    srliw a0, a0, 1
72; RV64IMZB-NEXT:    add a0, a0, a1
73; RV64IMZB-NEXT:    srli a0, a0, 2
74; RV64IMZB-NEXT:    ret
75  %1 = udiv i32 %a, 7
76  ret i32 %1
77}
78
79define i64 @udiv64_constant_no_add(i64 %a) nounwind {
80; RV32-LABEL: udiv64_constant_no_add:
81; RV32:       # %bb.0:
82; RV32-NEXT:    add a2, a0, a1
83; RV32-NEXT:    sltu a3, a2, a0
84; RV32-NEXT:    add a2, a2, a3
85; RV32-NEXT:    lui a3, 838861
86; RV32-NEXT:    addi a4, a3, -819
87; RV32-NEXT:    mulhu a5, a2, a4
88; RV32-NEXT:    srli a6, a5, 2
89; RV32-NEXT:    andi a5, a5, -4
90; RV32-NEXT:    add a5, a5, a6
91; RV32-NEXT:    sub a2, a2, a5
92; RV32-NEXT:    sub a5, a0, a2
93; RV32-NEXT:    addi a3, a3, -820
94; RV32-NEXT:    mul a3, a5, a3
95; RV32-NEXT:    mulhu a6, a5, a4
96; RV32-NEXT:    add a3, a6, a3
97; RV32-NEXT:    sltu a0, a0, a2
98; RV32-NEXT:    sub a1, a1, a0
99; RV32-NEXT:    mul a1, a1, a4
100; RV32-NEXT:    add a1, a3, a1
101; RV32-NEXT:    mul a0, a5, a4
102; RV32-NEXT:    ret
103;
104; RV64-LABEL: udiv64_constant_no_add:
105; RV64:       # %bb.0:
106; RV64-NEXT:    lui a1, 838861
107; RV64-NEXT:    addiw a1, a1, -819
108; RV64-NEXT:    slli a2, a1, 32
109; RV64-NEXT:    add a1, a1, a2
110; RV64-NEXT:    mulhu a0, a0, a1
111; RV64-NEXT:    srli a0, a0, 2
112; RV64-NEXT:    ret
113  %1 = udiv i64 %a, 5
114  ret i64 %1
115}
116
117define i64 @udiv64_constant_add(i64 %a) nounwind {
118; RV32-LABEL: udiv64_constant_add:
119; RV32:       # %bb.0:
120; RV32-NEXT:    addi sp, sp, -16
121; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
122; RV32-NEXT:    li a2, 7
123; RV32-NEXT:    li a3, 0
124; RV32-NEXT:    call __udivdi3
125; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
126; RV32-NEXT:    addi sp, sp, 16
127; RV32-NEXT:    ret
128;
129; RV64-LABEL: udiv64_constant_add:
130; RV64:       # %bb.0:
131; RV64-NEXT:    lui a1, %hi(.LCPI3_0)
132; RV64-NEXT:    ld a1, %lo(.LCPI3_0)(a1)
133; RV64-NEXT:    mulhu a1, a0, a1
134; RV64-NEXT:    sub a0, a0, a1
135; RV64-NEXT:    srli a0, a0, 1
136; RV64-NEXT:    add a0, a0, a1
137; RV64-NEXT:    srli a0, a0, 2
138; RV64-NEXT:    ret
139  %1 = udiv i64 %a, 7
140  ret i64 %1
141}
142
143define i8 @udiv8_constant_no_add(i8 %a) nounwind {
144; RV32-LABEL: udiv8_constant_no_add:
145; RV32:       # %bb.0:
146; RV32-NEXT:    andi a0, a0, 255
147; RV32-NEXT:    li a1, 205
148; RV32-NEXT:    mul a0, a0, a1
149; RV32-NEXT:    srli a0, a0, 10
150; RV32-NEXT:    ret
151;
152; RV64-LABEL: udiv8_constant_no_add:
153; RV64:       # %bb.0:
154; RV64-NEXT:    andi a0, a0, 255
155; RV64-NEXT:    li a1, 205
156; RV64-NEXT:    mul a0, a0, a1
157; RV64-NEXT:    srli a0, a0, 10
158; RV64-NEXT:    ret
159  %1 = udiv i8 %a, 5
160  ret i8 %1
161}
162
163define i8 @udiv8_constant_add(i8 %a) nounwind {
164; RV32IM-LABEL: udiv8_constant_add:
165; RV32IM:       # %bb.0:
166; RV32IM-NEXT:    andi a1, a0, 255
167; RV32IM-NEXT:    li a2, 37
168; RV32IM-NEXT:    mul a1, a1, a2
169; RV32IM-NEXT:    srli a1, a1, 8
170; RV32IM-NEXT:    sub a0, a0, a1
171; RV32IM-NEXT:    slli a0, a0, 24
172; RV32IM-NEXT:    srli a0, a0, 25
173; RV32IM-NEXT:    add a0, a0, a1
174; RV32IM-NEXT:    srli a0, a0, 2
175; RV32IM-NEXT:    ret
176;
177; RV32IMZB-LABEL: udiv8_constant_add:
178; RV32IMZB:       # %bb.0:
179; RV32IMZB-NEXT:    andi a1, a0, 255
180; RV32IMZB-NEXT:    sh3add a2, a1, a1
181; RV32IMZB-NEXT:    sh2add a1, a2, a1
182; RV32IMZB-NEXT:    srli a1, a1, 8
183; RV32IMZB-NEXT:    sub a0, a0, a1
184; RV32IMZB-NEXT:    slli a0, a0, 24
185; RV32IMZB-NEXT:    srli a0, a0, 25
186; RV32IMZB-NEXT:    add a0, a0, a1
187; RV32IMZB-NEXT:    srli a0, a0, 2
188; RV32IMZB-NEXT:    ret
189;
190; RV64IM-LABEL: udiv8_constant_add:
191; RV64IM:       # %bb.0:
192; RV64IM-NEXT:    andi a1, a0, 255
193; RV64IM-NEXT:    li a2, 37
194; RV64IM-NEXT:    mul a1, a1, a2
195; RV64IM-NEXT:    srli a1, a1, 8
196; RV64IM-NEXT:    subw a0, a0, a1
197; RV64IM-NEXT:    slli a0, a0, 56
198; RV64IM-NEXT:    srli a0, a0, 57
199; RV64IM-NEXT:    add a0, a0, a1
200; RV64IM-NEXT:    srli a0, a0, 2
201; RV64IM-NEXT:    ret
202;
203; RV64IMZB-LABEL: udiv8_constant_add:
204; RV64IMZB:       # %bb.0:
205; RV64IMZB-NEXT:    andi a1, a0, 255
206; RV64IMZB-NEXT:    sh3add a2, a1, a1
207; RV64IMZB-NEXT:    sh2add a1, a2, a1
208; RV64IMZB-NEXT:    srli a1, a1, 8
209; RV64IMZB-NEXT:    subw a0, a0, a1
210; RV64IMZB-NEXT:    slli a0, a0, 56
211; RV64IMZB-NEXT:    srli a0, a0, 57
212; RV64IMZB-NEXT:    add a0, a0, a1
213; RV64IMZB-NEXT:    srli a0, a0, 2
214; RV64IMZB-NEXT:    ret
215  %1 = udiv i8 %a, 7
216  ret i8 %1
217}
218
219define i16 @udiv16_constant_no_add(i16 %a) nounwind {
220; RV32-LABEL: udiv16_constant_no_add:
221; RV32:       # %bb.0:
222; RV32-NEXT:    slli a0, a0, 16
223; RV32-NEXT:    lui a1, 838864
224; RV32-NEXT:    mulhu a0, a0, a1
225; RV32-NEXT:    srli a0, a0, 18
226; RV32-NEXT:    ret
227;
228; RV64-LABEL: udiv16_constant_no_add:
229; RV64:       # %bb.0:
230; RV64-NEXT:    lui a1, 52429
231; RV64-NEXT:    slli a1, a1, 4
232; RV64-NEXT:    slli a0, a0, 48
233; RV64-NEXT:    mulhu a0, a0, a1
234; RV64-NEXT:    srli a0, a0, 18
235; RV64-NEXT:    ret
236  %1 = udiv i16 %a, 5
237  ret i16 %1
238}
239
240define i16 @udiv16_constant_add(i16 %a) nounwind {
241; RV32-LABEL: udiv16_constant_add:
242; RV32:       # %bb.0:
243; RV32-NEXT:    slli a1, a0, 16
244; RV32-NEXT:    lui a2, 149808
245; RV32-NEXT:    mulhu a1, a1, a2
246; RV32-NEXT:    srli a1, a1, 16
247; RV32-NEXT:    sub a0, a0, a1
248; RV32-NEXT:    slli a0, a0, 16
249; RV32-NEXT:    srli a0, a0, 17
250; RV32-NEXT:    add a0, a0, a1
251; RV32-NEXT:    srli a0, a0, 2
252; RV32-NEXT:    ret
253;
254; RV64-LABEL: udiv16_constant_add:
255; RV64:       # %bb.0:
256; RV64-NEXT:    slli a1, a0, 48
257; RV64-NEXT:    lui a2, 149808
258; RV64-NEXT:    mulhu a1, a1, a2
259; RV64-NEXT:    srli a1, a1, 16
260; RV64-NEXT:    subw a0, a0, a1
261; RV64-NEXT:    slli a0, a0, 48
262; RV64-NEXT:    srli a0, a0, 49
263; RV64-NEXT:    add a0, a0, a1
264; RV64-NEXT:    srli a0, a0, 2
265; RV64-NEXT:    ret
266  %1 = udiv i16 %a, 7
267  ret i16 %1
268}
269
270; Test the simplest case a srli and an add after the mul. No srai.
271define i32 @sdiv_constant_no_srai(i32 %a) nounwind {
272; RV32-LABEL: sdiv_constant_no_srai:
273; RV32:       # %bb.0:
274; RV32-NEXT:    lui a1, 349525
275; RV32-NEXT:    addi a1, a1, 1366
276; RV32-NEXT:    mulh a0, a0, a1
277; RV32-NEXT:    srli a1, a0, 31
278; RV32-NEXT:    add a0, a0, a1
279; RV32-NEXT:    ret
280;
281; RV64-LABEL: sdiv_constant_no_srai:
282; RV64:       # %bb.0:
283; RV64-NEXT:    sext.w a0, a0
284; RV64-NEXT:    lui a1, 349525
285; RV64-NEXT:    addiw a1, a1, 1366
286; RV64-NEXT:    mul a0, a0, a1
287; RV64-NEXT:    srli a1, a0, 63
288; RV64-NEXT:    srli a0, a0, 32
289; RV64-NEXT:    addw a0, a0, a1
290; RV64-NEXT:    ret
291  %1 = sdiv i32 %a, 3
292  ret i32 %1
293}
294
295; This constant requires an srai between the mul and the add.
296define i32 @sdiv_constant_srai(i32 %a) nounwind {
297; RV32-LABEL: sdiv_constant_srai:
298; RV32:       # %bb.0:
299; RV32-NEXT:    lui a1, 419430
300; RV32-NEXT:    addi a1, a1, 1639
301; RV32-NEXT:    mulh a0, a0, a1
302; RV32-NEXT:    srli a1, a0, 31
303; RV32-NEXT:    srai a0, a0, 1
304; RV32-NEXT:    add a0, a0, a1
305; RV32-NEXT:    ret
306;
307; RV64-LABEL: sdiv_constant_srai:
308; RV64:       # %bb.0:
309; RV64-NEXT:    sext.w a0, a0
310; RV64-NEXT:    lui a1, 419430
311; RV64-NEXT:    addiw a1, a1, 1639
312; RV64-NEXT:    mul a0, a0, a1
313; RV64-NEXT:    srli a1, a0, 63
314; RV64-NEXT:    srai a0, a0, 33
315; RV64-NEXT:    add a0, a0, a1
316; RV64-NEXT:    ret
317  %1 = sdiv i32 %a, 5
318  ret i32 %1
319}
320
321; This constant requires an add and an srai after the mul.
322define i32 @sdiv_constant_add_srai(i32 %a) nounwind {
323; RV32-LABEL: sdiv_constant_add_srai:
324; RV32:       # %bb.0:
325; RV32-NEXT:    lui a1, 599186
326; RV32-NEXT:    addi a1, a1, 1171
327; RV32-NEXT:    mulh a1, a0, a1
328; RV32-NEXT:    add a0, a1, a0
329; RV32-NEXT:    srli a1, a0, 31
330; RV32-NEXT:    srai a0, a0, 2
331; RV32-NEXT:    add a0, a0, a1
332; RV32-NEXT:    ret
333;
334; RV64-LABEL: sdiv_constant_add_srai:
335; RV64:       # %bb.0:
336; RV64-NEXT:    sext.w a1, a0
337; RV64-NEXT:    lui a2, 599186
338; RV64-NEXT:    addiw a2, a2, 1171
339; RV64-NEXT:    mul a1, a1, a2
340; RV64-NEXT:    srli a1, a1, 32
341; RV64-NEXT:    add a0, a1, a0
342; RV64-NEXT:    srliw a1, a0, 31
343; RV64-NEXT:    sraiw a0, a0, 2
344; RV64-NEXT:    add a0, a0, a1
345; RV64-NEXT:    ret
346  %1 = sdiv i32 %a, 7
347  ret i32 %1
348}
349
350; This constant requires a sub and an srai after the mul.
351define i32 @sdiv_constant_sub_srai(i32 %a) nounwind {
352; RV32-LABEL: sdiv_constant_sub_srai:
353; RV32:       # %bb.0:
354; RV32-NEXT:    lui a1, 449390
355; RV32-NEXT:    addi a1, a1, -1171
356; RV32-NEXT:    mulh a1, a0, a1
357; RV32-NEXT:    sub a1, a1, a0
358; RV32-NEXT:    srli a0, a1, 31
359; RV32-NEXT:    srai a1, a1, 2
360; RV32-NEXT:    add a0, a1, a0
361; RV32-NEXT:    ret
362;
363; RV64-LABEL: sdiv_constant_sub_srai:
364; RV64:       # %bb.0:
365; RV64-NEXT:    sext.w a1, a0
366; RV64-NEXT:    lui a2, 449390
367; RV64-NEXT:    addiw a2, a2, -1171
368; RV64-NEXT:    mul a1, a1, a2
369; RV64-NEXT:    srli a1, a1, 32
370; RV64-NEXT:    subw a1, a1, a0
371; RV64-NEXT:    srliw a0, a1, 31
372; RV64-NEXT:    sraiw a1, a1, 2
373; RV64-NEXT:    add a0, a1, a0
374; RV64-NEXT:    ret
375  %1 = sdiv i32 %a, -7
376  ret i32 %1
377}
378
379define i64 @sdiv64_constant_no_srai(i64 %a) nounwind {
380; RV32-LABEL: sdiv64_constant_no_srai:
381; RV32:       # %bb.0:
382; RV32-NEXT:    addi sp, sp, -16
383; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
384; RV32-NEXT:    li a2, 3
385; RV32-NEXT:    li a3, 0
386; RV32-NEXT:    call __divdi3
387; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
388; RV32-NEXT:    addi sp, sp, 16
389; RV32-NEXT:    ret
390;
391; RV64-LABEL: sdiv64_constant_no_srai:
392; RV64:       # %bb.0:
393; RV64-NEXT:    lui a1, %hi(.LCPI12_0)
394; RV64-NEXT:    ld a1, %lo(.LCPI12_0)(a1)
395; RV64-NEXT:    mulh a0, a0, a1
396; RV64-NEXT:    srli a1, a0, 63
397; RV64-NEXT:    add a0, a0, a1
398; RV64-NEXT:    ret
399  %1 = sdiv i64 %a, 3
400  ret i64 %1
401}
402
403define i64 @sdiv64_constant_srai(i64 %a) nounwind {
404; RV32-LABEL: sdiv64_constant_srai:
405; RV32:       # %bb.0:
406; RV32-NEXT:    addi sp, sp, -16
407; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
408; RV32-NEXT:    li a2, 5
409; RV32-NEXT:    li a3, 0
410; RV32-NEXT:    call __divdi3
411; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
412; RV32-NEXT:    addi sp, sp, 16
413; RV32-NEXT:    ret
414;
415; RV64-LABEL: sdiv64_constant_srai:
416; RV64:       # %bb.0:
417; RV64-NEXT:    lui a1, %hi(.LCPI13_0)
418; RV64-NEXT:    ld a1, %lo(.LCPI13_0)(a1)
419; RV64-NEXT:    mulh a0, a0, a1
420; RV64-NEXT:    srli a1, a0, 63
421; RV64-NEXT:    srai a0, a0, 1
422; RV64-NEXT:    add a0, a0, a1
423; RV64-NEXT:    ret
424  %1 = sdiv i64 %a, 5
425  ret i64 %1
426}
427
428define i64 @sdiv64_constant_add_srai(i64 %a) nounwind {
429; RV32-LABEL: sdiv64_constant_add_srai:
430; RV32:       # %bb.0:
431; RV32-NEXT:    addi sp, sp, -16
432; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
433; RV32-NEXT:    li a2, 15
434; RV32-NEXT:    li a3, 0
435; RV32-NEXT:    call __divdi3
436; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
437; RV32-NEXT:    addi sp, sp, 16
438; RV32-NEXT:    ret
439;
440; RV64-LABEL: sdiv64_constant_add_srai:
441; RV64:       # %bb.0:
442; RV64-NEXT:    lui a1, 559241
443; RV64-NEXT:    addiw a1, a1, -1911
444; RV64-NEXT:    slli a2, a1, 32
445; RV64-NEXT:    add a1, a1, a2
446; RV64-NEXT:    mulh a1, a0, a1
447; RV64-NEXT:    add a0, a1, a0
448; RV64-NEXT:    srli a1, a0, 63
449; RV64-NEXT:    srai a0, a0, 3
450; RV64-NEXT:    add a0, a0, a1
451; RV64-NEXT:    ret
452  %1 = sdiv i64 %a, 15
453  ret i64 %1
454}
455
456define i64 @sdiv64_constant_sub_srai(i64 %a) nounwind {
457; RV32-LABEL: sdiv64_constant_sub_srai:
458; RV32:       # %bb.0:
459; RV32-NEXT:    addi sp, sp, -16
460; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
461; RV32-NEXT:    li a2, -3
462; RV32-NEXT:    li a3, -1
463; RV32-NEXT:    call __divdi3
464; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
465; RV32-NEXT:    addi sp, sp, 16
466; RV32-NEXT:    ret
467;
468; RV64-LABEL: sdiv64_constant_sub_srai:
469; RV64:       # %bb.0:
470; RV64-NEXT:    lui a1, 349525
471; RV64-NEXT:    addiw a1, a1, 1365
472; RV64-NEXT:    slli a2, a1, 32
473; RV64-NEXT:    add a1, a1, a2
474; RV64-NEXT:    mulh a1, a0, a1
475; RV64-NEXT:    sub a1, a1, a0
476; RV64-NEXT:    srli a0, a1, 63
477; RV64-NEXT:    srai a1, a1, 1
478; RV64-NEXT:    add a0, a1, a0
479; RV64-NEXT:    ret
480  %1 = sdiv i64 %a, -3
481  ret i64 %1
482}
483
484define i8 @sdiv8_constant_no_srai(i8 %a) nounwind {
485; RV32IM-LABEL: sdiv8_constant_no_srai:
486; RV32IM:       # %bb.0:
487; RV32IM-NEXT:    slli a0, a0, 24
488; RV32IM-NEXT:    srai a0, a0, 24
489; RV32IM-NEXT:    li a1, 86
490; RV32IM-NEXT:    mul a0, a0, a1
491; RV32IM-NEXT:    srli a1, a0, 8
492; RV32IM-NEXT:    slli a0, a0, 16
493; RV32IM-NEXT:    srli a0, a0, 31
494; RV32IM-NEXT:    add a0, a1, a0
495; RV32IM-NEXT:    ret
496;
497; RV32IMZB-LABEL: sdiv8_constant_no_srai:
498; RV32IMZB:       # %bb.0:
499; RV32IMZB-NEXT:    sext.b a0, a0
500; RV32IMZB-NEXT:    li a1, 86
501; RV32IMZB-NEXT:    mul a0, a0, a1
502; RV32IMZB-NEXT:    srli a1, a0, 8
503; RV32IMZB-NEXT:    slli a0, a0, 16
504; RV32IMZB-NEXT:    srli a0, a0, 31
505; RV32IMZB-NEXT:    add a0, a1, a0
506; RV32IMZB-NEXT:    ret
507;
508; RV64IM-LABEL: sdiv8_constant_no_srai:
509; RV64IM:       # %bb.0:
510; RV64IM-NEXT:    slli a0, a0, 56
511; RV64IM-NEXT:    srai a0, a0, 56
512; RV64IM-NEXT:    li a1, 86
513; RV64IM-NEXT:    mul a0, a0, a1
514; RV64IM-NEXT:    srli a1, a0, 8
515; RV64IM-NEXT:    slli a0, a0, 48
516; RV64IM-NEXT:    srli a0, a0, 63
517; RV64IM-NEXT:    add a0, a1, a0
518; RV64IM-NEXT:    ret
519;
520; RV64IMZB-LABEL: sdiv8_constant_no_srai:
521; RV64IMZB:       # %bb.0:
522; RV64IMZB-NEXT:    sext.b a0, a0
523; RV64IMZB-NEXT:    li a1, 86
524; RV64IMZB-NEXT:    mul a0, a0, a1
525; RV64IMZB-NEXT:    srli a1, a0, 8
526; RV64IMZB-NEXT:    slli a0, a0, 48
527; RV64IMZB-NEXT:    srli a0, a0, 63
528; RV64IMZB-NEXT:    add a0, a1, a0
529; RV64IMZB-NEXT:    ret
530  %1 = sdiv i8 %a, 3
531  ret i8 %1
532}
533
534define i8 @sdiv8_constant_srai(i8 %a) nounwind {
535; RV32IM-LABEL: sdiv8_constant_srai:
536; RV32IM:       # %bb.0:
537; RV32IM-NEXT:    slli a0, a0, 24
538; RV32IM-NEXT:    srai a0, a0, 24
539; RV32IM-NEXT:    li a1, 103
540; RV32IM-NEXT:    mul a0, a0, a1
541; RV32IM-NEXT:    srai a1, a0, 9
542; RV32IM-NEXT:    slli a0, a0, 16
543; RV32IM-NEXT:    srli a0, a0, 31
544; RV32IM-NEXT:    add a0, a1, a0
545; RV32IM-NEXT:    ret
546;
547; RV32IMZB-LABEL: sdiv8_constant_srai:
548; RV32IMZB:       # %bb.0:
549; RV32IMZB-NEXT:    sext.b a0, a0
550; RV32IMZB-NEXT:    li a1, 103
551; RV32IMZB-NEXT:    mul a0, a0, a1
552; RV32IMZB-NEXT:    srai a1, a0, 9
553; RV32IMZB-NEXT:    slli a0, a0, 16
554; RV32IMZB-NEXT:    srli a0, a0, 31
555; RV32IMZB-NEXT:    add a0, a1, a0
556; RV32IMZB-NEXT:    ret
557;
558; RV64IM-LABEL: sdiv8_constant_srai:
559; RV64IM:       # %bb.0:
560; RV64IM-NEXT:    slli a0, a0, 56
561; RV64IM-NEXT:    srai a0, a0, 56
562; RV64IM-NEXT:    li a1, 103
563; RV64IM-NEXT:    mul a0, a0, a1
564; RV64IM-NEXT:    srai a1, a0, 9
565; RV64IM-NEXT:    slli a0, a0, 48
566; RV64IM-NEXT:    srli a0, a0, 63
567; RV64IM-NEXT:    add a0, a1, a0
568; RV64IM-NEXT:    ret
569;
570; RV64IMZB-LABEL: sdiv8_constant_srai:
571; RV64IMZB:       # %bb.0:
572; RV64IMZB-NEXT:    sext.b a0, a0
573; RV64IMZB-NEXT:    li a1, 103
574; RV64IMZB-NEXT:    mul a0, a0, a1
575; RV64IMZB-NEXT:    srai a1, a0, 9
576; RV64IMZB-NEXT:    slli a0, a0, 48
577; RV64IMZB-NEXT:    srli a0, a0, 63
578; RV64IMZB-NEXT:    add a0, a1, a0
579; RV64IMZB-NEXT:    ret
580  %1 = sdiv i8 %a, 5
581  ret i8 %1
582}
583
584define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
585; RV32IM-LABEL: sdiv8_constant_add_srai:
586; RV32IM:       # %bb.0:
587; RV32IM-NEXT:    slli a1, a0, 24
588; RV32IM-NEXT:    srai a1, a1, 24
589; RV32IM-NEXT:    li a2, -109
590; RV32IM-NEXT:    mul a1, a1, a2
591; RV32IM-NEXT:    srli a1, a1, 8
592; RV32IM-NEXT:    add a0, a1, a0
593; RV32IM-NEXT:    slli a0, a0, 24
594; RV32IM-NEXT:    srli a1, a0, 31
595; RV32IM-NEXT:    srai a0, a0, 26
596; RV32IM-NEXT:    add a0, a0, a1
597; RV32IM-NEXT:    ret
598;
599; RV32IMZB-LABEL: sdiv8_constant_add_srai:
600; RV32IMZB:       # %bb.0:
601; RV32IMZB-NEXT:    sext.b a1, a0
602; RV32IMZB-NEXT:    li a2, -109
603; RV32IMZB-NEXT:    mul a1, a1, a2
604; RV32IMZB-NEXT:    srli a1, a1, 8
605; RV32IMZB-NEXT:    add a0, a1, a0
606; RV32IMZB-NEXT:    slli a0, a0, 24
607; RV32IMZB-NEXT:    srli a1, a0, 31
608; RV32IMZB-NEXT:    srai a0, a0, 26
609; RV32IMZB-NEXT:    add a0, a0, a1
610; RV32IMZB-NEXT:    ret
611;
612; RV64IM-LABEL: sdiv8_constant_add_srai:
613; RV64IM:       # %bb.0:
614; RV64IM-NEXT:    slli a1, a0, 56
615; RV64IM-NEXT:    srai a1, a1, 56
616; RV64IM-NEXT:    li a2, -109
617; RV64IM-NEXT:    mul a1, a1, a2
618; RV64IM-NEXT:    srli a1, a1, 8
619; RV64IM-NEXT:    add a0, a1, a0
620; RV64IM-NEXT:    slli a0, a0, 56
621; RV64IM-NEXT:    srli a1, a0, 63
622; RV64IM-NEXT:    srai a0, a0, 58
623; RV64IM-NEXT:    add a0, a0, a1
624; RV64IM-NEXT:    ret
625;
626; RV64IMZB-LABEL: sdiv8_constant_add_srai:
627; RV64IMZB:       # %bb.0:
628; RV64IMZB-NEXT:    sext.b a1, a0
629; RV64IMZB-NEXT:    li a2, -109
630; RV64IMZB-NEXT:    mul a1, a1, a2
631; RV64IMZB-NEXT:    srli a1, a1, 8
632; RV64IMZB-NEXT:    add a0, a1, a0
633; RV64IMZB-NEXT:    slli a0, a0, 56
634; RV64IMZB-NEXT:    srli a1, a0, 63
635; RV64IMZB-NEXT:    srai a0, a0, 58
636; RV64IMZB-NEXT:    add a0, a0, a1
637; RV64IMZB-NEXT:    ret
638  %1 = sdiv i8 %a, 7
639  ret i8 %1
640}
641
642define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
643; RV32IM-LABEL: sdiv8_constant_sub_srai:
644; RV32IM:       # %bb.0:
645; RV32IM-NEXT:    slli a1, a0, 24
646; RV32IM-NEXT:    srai a1, a1, 24
647; RV32IM-NEXT:    li a2, 109
648; RV32IM-NEXT:    mul a1, a1, a2
649; RV32IM-NEXT:    srli a1, a1, 8
650; RV32IM-NEXT:    sub a1, a1, a0
651; RV32IM-NEXT:    slli a1, a1, 24
652; RV32IM-NEXT:    srli a0, a1, 31
653; RV32IM-NEXT:    srai a1, a1, 26
654; RV32IM-NEXT:    add a0, a1, a0
655; RV32IM-NEXT:    ret
656;
657; RV32IMZB-LABEL: sdiv8_constant_sub_srai:
658; RV32IMZB:       # %bb.0:
659; RV32IMZB-NEXT:    sext.b a1, a0
660; RV32IMZB-NEXT:    li a2, 109
661; RV32IMZB-NEXT:    mul a1, a1, a2
662; RV32IMZB-NEXT:    srli a1, a1, 8
663; RV32IMZB-NEXT:    sub a1, a1, a0
664; RV32IMZB-NEXT:    slli a1, a1, 24
665; RV32IMZB-NEXT:    srli a0, a1, 31
666; RV32IMZB-NEXT:    srai a1, a1, 26
667; RV32IMZB-NEXT:    add a0, a1, a0
668; RV32IMZB-NEXT:    ret
669;
670; RV64IM-LABEL: sdiv8_constant_sub_srai:
671; RV64IM:       # %bb.0:
672; RV64IM-NEXT:    slli a1, a0, 56
673; RV64IM-NEXT:    srai a1, a1, 56
674; RV64IM-NEXT:    li a2, 109
675; RV64IM-NEXT:    mul a1, a1, a2
676; RV64IM-NEXT:    srli a1, a1, 8
677; RV64IM-NEXT:    subw a1, a1, a0
678; RV64IM-NEXT:    slli a1, a1, 56
679; RV64IM-NEXT:    srli a0, a1, 63
680; RV64IM-NEXT:    srai a1, a1, 58
681; RV64IM-NEXT:    add a0, a1, a0
682; RV64IM-NEXT:    ret
683;
684; RV64IMZB-LABEL: sdiv8_constant_sub_srai:
685; RV64IMZB:       # %bb.0:
686; RV64IMZB-NEXT:    sext.b a1, a0
687; RV64IMZB-NEXT:    li a2, 109
688; RV64IMZB-NEXT:    mul a1, a1, a2
689; RV64IMZB-NEXT:    srli a1, a1, 8
690; RV64IMZB-NEXT:    subw a1, a1, a0
691; RV64IMZB-NEXT:    slli a1, a1, 56
692; RV64IMZB-NEXT:    srli a0, a1, 63
693; RV64IMZB-NEXT:    srai a1, a1, 58
694; RV64IMZB-NEXT:    add a0, a1, a0
695; RV64IMZB-NEXT:    ret
696  %1 = sdiv i8 %a, -7
697  ret i8 %1
698}
699
700define i16 @sdiv16_constant_no_srai(i16 %a) nounwind {
701; RV32IM-LABEL: sdiv16_constant_no_srai:
702; RV32IM:       # %bb.0:
703; RV32IM-NEXT:    slli a0, a0, 16
704; RV32IM-NEXT:    srai a0, a0, 16
705; RV32IM-NEXT:    lui a1, 5
706; RV32IM-NEXT:    addi a1, a1, 1366
707; RV32IM-NEXT:    mul a0, a0, a1
708; RV32IM-NEXT:    srli a1, a0, 31
709; RV32IM-NEXT:    srli a0, a0, 16
710; RV32IM-NEXT:    add a0, a0, a1
711; RV32IM-NEXT:    ret
712;
713; RV32IMZB-LABEL: sdiv16_constant_no_srai:
714; RV32IMZB:       # %bb.0:
715; RV32IMZB-NEXT:    sext.h a0, a0
716; RV32IMZB-NEXT:    lui a1, 5
717; RV32IMZB-NEXT:    addi a1, a1, 1366
718; RV32IMZB-NEXT:    mul a0, a0, a1
719; RV32IMZB-NEXT:    srli a1, a0, 31
720; RV32IMZB-NEXT:    srli a0, a0, 16
721; RV32IMZB-NEXT:    add a0, a0, a1
722; RV32IMZB-NEXT:    ret
723;
724; RV64IM-LABEL: sdiv16_constant_no_srai:
725; RV64IM:       # %bb.0:
726; RV64IM-NEXT:    slli a0, a0, 48
727; RV64IM-NEXT:    srai a0, a0, 48
728; RV64IM-NEXT:    lui a1, 5
729; RV64IM-NEXT:    addiw a1, a1, 1366
730; RV64IM-NEXT:    mul a0, a0, a1
731; RV64IM-NEXT:    srliw a1, a0, 31
732; RV64IM-NEXT:    srli a0, a0, 16
733; RV64IM-NEXT:    add a0, a0, a1
734; RV64IM-NEXT:    ret
735;
736; RV64IMZB-LABEL: sdiv16_constant_no_srai:
737; RV64IMZB:       # %bb.0:
738; RV64IMZB-NEXT:    sext.h a0, a0
739; RV64IMZB-NEXT:    lui a1, 5
740; RV64IMZB-NEXT:    addiw a1, a1, 1366
741; RV64IMZB-NEXT:    mul a0, a0, a1
742; RV64IMZB-NEXT:    srliw a1, a0, 31
743; RV64IMZB-NEXT:    srli a0, a0, 16
744; RV64IMZB-NEXT:    add a0, a0, a1
745; RV64IMZB-NEXT:    ret
746  %1 = sdiv i16 %a, 3
747  ret i16 %1
748}
749
750define i16 @sdiv16_constant_srai(i16 %a) nounwind {
751; RV32IM-LABEL: sdiv16_constant_srai:
752; RV32IM:       # %bb.0:
753; RV32IM-NEXT:    slli a0, a0, 16
754; RV32IM-NEXT:    srai a0, a0, 16
755; RV32IM-NEXT:    lui a1, 6
756; RV32IM-NEXT:    addi a1, a1, 1639
757; RV32IM-NEXT:    mul a0, a0, a1
758; RV32IM-NEXT:    srli a1, a0, 31
759; RV32IM-NEXT:    srai a0, a0, 17
760; RV32IM-NEXT:    add a0, a0, a1
761; RV32IM-NEXT:    ret
762;
763; RV32IMZB-LABEL: sdiv16_constant_srai:
764; RV32IMZB:       # %bb.0:
765; RV32IMZB-NEXT:    sext.h a0, a0
766; RV32IMZB-NEXT:    lui a1, 6
767; RV32IMZB-NEXT:    addi a1, a1, 1639
768; RV32IMZB-NEXT:    mul a0, a0, a1
769; RV32IMZB-NEXT:    srli a1, a0, 31
770; RV32IMZB-NEXT:    srai a0, a0, 17
771; RV32IMZB-NEXT:    add a0, a0, a1
772; RV32IMZB-NEXT:    ret
773;
774; RV64IM-LABEL: sdiv16_constant_srai:
775; RV64IM:       # %bb.0:
776; RV64IM-NEXT:    slli a0, a0, 48
777; RV64IM-NEXT:    srai a0, a0, 48
778; RV64IM-NEXT:    lui a1, 6
779; RV64IM-NEXT:    addiw a1, a1, 1639
780; RV64IM-NEXT:    mul a0, a0, a1
781; RV64IM-NEXT:    srliw a1, a0, 31
782; RV64IM-NEXT:    srai a0, a0, 17
783; RV64IM-NEXT:    add a0, a0, a1
784; RV64IM-NEXT:    ret
785;
786; RV64IMZB-LABEL: sdiv16_constant_srai:
787; RV64IMZB:       # %bb.0:
788; RV64IMZB-NEXT:    sext.h a0, a0
789; RV64IMZB-NEXT:    lui a1, 6
790; RV64IMZB-NEXT:    addiw a1, a1, 1639
791; RV64IMZB-NEXT:    mul a0, a0, a1
792; RV64IMZB-NEXT:    srliw a1, a0, 31
793; RV64IMZB-NEXT:    srai a0, a0, 17
794; RV64IMZB-NEXT:    add a0, a0, a1
795; RV64IMZB-NEXT:    ret
796  %1 = sdiv i16 %a, 5
797  ret i16 %1
798}
799
800define i16 @sdiv16_constant_add_srai(i16 %a) nounwind {
801; RV32IM-LABEL: sdiv16_constant_add_srai:
802; RV32IM:       # %bb.0:
803; RV32IM-NEXT:    slli a1, a0, 16
804; RV32IM-NEXT:    srai a1, a1, 16
805; RV32IM-NEXT:    lui a2, 1048569
806; RV32IM-NEXT:    addi a2, a2, -1911
807; RV32IM-NEXT:    mul a1, a1, a2
808; RV32IM-NEXT:    srli a1, a1, 16
809; RV32IM-NEXT:    add a0, a1, a0
810; RV32IM-NEXT:    slli a0, a0, 16
811; RV32IM-NEXT:    srli a1, a0, 31
812; RV32IM-NEXT:    srai a0, a0, 19
813; RV32IM-NEXT:    add a0, a0, a1
814; RV32IM-NEXT:    ret
815;
816; RV32IMZB-LABEL: sdiv16_constant_add_srai:
817; RV32IMZB:       # %bb.0:
818; RV32IMZB-NEXT:    sext.h a1, a0
819; RV32IMZB-NEXT:    lui a2, 1048569
820; RV32IMZB-NEXT:    addi a2, a2, -1911
821; RV32IMZB-NEXT:    mul a1, a1, a2
822; RV32IMZB-NEXT:    srli a1, a1, 16
823; RV32IMZB-NEXT:    add a0, a1, a0
824; RV32IMZB-NEXT:    slli a0, a0, 16
825; RV32IMZB-NEXT:    srli a1, a0, 31
826; RV32IMZB-NEXT:    srai a0, a0, 19
827; RV32IMZB-NEXT:    add a0, a0, a1
828; RV32IMZB-NEXT:    ret
829;
830; RV64IM-LABEL: sdiv16_constant_add_srai:
831; RV64IM:       # %bb.0:
832; RV64IM-NEXT:    slli a1, a0, 48
833; RV64IM-NEXT:    srai a1, a1, 48
834; RV64IM-NEXT:    lui a2, 1048569
835; RV64IM-NEXT:    addiw a2, a2, -1911
836; RV64IM-NEXT:    mul a1, a1, a2
837; RV64IM-NEXT:    srli a1, a1, 16
838; RV64IM-NEXT:    add a0, a1, a0
839; RV64IM-NEXT:    slli a0, a0, 48
840; RV64IM-NEXT:    srli a1, a0, 63
841; RV64IM-NEXT:    srai a0, a0, 51
842; RV64IM-NEXT:    add a0, a0, a1
843; RV64IM-NEXT:    ret
844;
845; RV64IMZB-LABEL: sdiv16_constant_add_srai:
846; RV64IMZB:       # %bb.0:
847; RV64IMZB-NEXT:    sext.h a1, a0
848; RV64IMZB-NEXT:    lui a2, 1048569
849; RV64IMZB-NEXT:    addiw a2, a2, -1911
850; RV64IMZB-NEXT:    mul a1, a1, a2
851; RV64IMZB-NEXT:    srli a1, a1, 16
852; RV64IMZB-NEXT:    add a0, a1, a0
853; RV64IMZB-NEXT:    slli a0, a0, 48
854; RV64IMZB-NEXT:    srli a1, a0, 63
855; RV64IMZB-NEXT:    srai a0, a0, 51
856; RV64IMZB-NEXT:    add a0, a0, a1
857; RV64IMZB-NEXT:    ret
858  %1 = sdiv i16 %a, 15
859  ret i16 %1
860}
861
862define i16 @sdiv16_constant_sub_srai(i16 %a) nounwind {
863; RV32IM-LABEL: sdiv16_constant_sub_srai:
864; RV32IM:       # %bb.0:
865; RV32IM-NEXT:    slli a1, a0, 16
866; RV32IM-NEXT:    srai a1, a1, 16
867; RV32IM-NEXT:    lui a2, 7
868; RV32IM-NEXT:    addi a2, a2, 1911
869; RV32IM-NEXT:    mul a1, a1, a2
870; RV32IM-NEXT:    srli a1, a1, 16
871; RV32IM-NEXT:    sub a1, a1, a0
872; RV32IM-NEXT:    slli a1, a1, 16
873; RV32IM-NEXT:    srli a0, a1, 31
874; RV32IM-NEXT:    srai a1, a1, 19
875; RV32IM-NEXT:    add a0, a1, a0
876; RV32IM-NEXT:    ret
877;
878; RV32IMZB-LABEL: sdiv16_constant_sub_srai:
879; RV32IMZB:       # %bb.0:
880; RV32IMZB-NEXT:    sext.h a1, a0
881; RV32IMZB-NEXT:    lui a2, 7
882; RV32IMZB-NEXT:    addi a2, a2, 1911
883; RV32IMZB-NEXT:    mul a1, a1, a2
884; RV32IMZB-NEXT:    srli a1, a1, 16
885; RV32IMZB-NEXT:    sub a1, a1, a0
886; RV32IMZB-NEXT:    slli a1, a1, 16
887; RV32IMZB-NEXT:    srli a0, a1, 31
888; RV32IMZB-NEXT:    srai a1, a1, 19
889; RV32IMZB-NEXT:    add a0, a1, a0
890; RV32IMZB-NEXT:    ret
891;
892; RV64IM-LABEL: sdiv16_constant_sub_srai:
893; RV64IM:       # %bb.0:
894; RV64IM-NEXT:    slli a1, a0, 48
895; RV64IM-NEXT:    srai a1, a1, 48
896; RV64IM-NEXT:    lui a2, 7
897; RV64IM-NEXT:    addiw a2, a2, 1911
898; RV64IM-NEXT:    mul a1, a1, a2
899; RV64IM-NEXT:    srli a1, a1, 16
900; RV64IM-NEXT:    subw a1, a1, a0
901; RV64IM-NEXT:    slli a1, a1, 48
902; RV64IM-NEXT:    srli a0, a1, 63
903; RV64IM-NEXT:    srai a1, a1, 51
904; RV64IM-NEXT:    add a0, a1, a0
905; RV64IM-NEXT:    ret
906;
907; RV64IMZB-LABEL: sdiv16_constant_sub_srai:
908; RV64IMZB:       # %bb.0:
909; RV64IMZB-NEXT:    sext.h a1, a0
910; RV64IMZB-NEXT:    lui a2, 7
911; RV64IMZB-NEXT:    addiw a2, a2, 1911
912; RV64IMZB-NEXT:    mul a1, a1, a2
913; RV64IMZB-NEXT:    srli a1, a1, 16
914; RV64IMZB-NEXT:    subw a1, a1, a0
915; RV64IMZB-NEXT:    slli a1, a1, 48
916; RV64IMZB-NEXT:    srli a0, a1, 63
917; RV64IMZB-NEXT:    srai a1, a1, 51
918; RV64IMZB-NEXT:    add a0, a1, a0
919; RV64IMZB-NEXT:    ret
920  %1 = sdiv i16 %a, -15
921  ret i16 %1
922}
923