xref: /llvm-project/llvm/test/CodeGen/X86/bypass-slow-division-tune.ll (revision e89b4bcf32b8f6ddce9d7e95659e9f092a55c021)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; Check that a division is bypassed when appropriate only.
3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=atom       < %s | FileCheck -check-prefixes=CHECK,ATOM %s
4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64     < %s | FileCheck -check-prefixes=CHECK,REST,X64 %s
5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=silvermont < %s | FileCheck -check-prefixes=CHECK,REST,SLM %s
6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake    < %s | FileCheck -check-prefixes=CHECK,REST,SKL %s
7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=goldmont   < %s | FileCheck -check-prefixes=CHECK,REST,GMT %s
8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=gracemont  < %s | FileCheck -check-prefixes=CHECK,REST,GMT %s
9; RUN: llc -profile-summary-huge-working-set-size-threshold=1 -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake    < %s | FileCheck -check-prefixes=HUGEWS %s
10
11; Verify that div32 is bypassed only for Atoms.
12define i32 @div32(i32 %a, i32 %b) {
13; ATOM-LABEL: div32:
14; ATOM:       # %bb.0: # %entry
15; ATOM-NEXT:    movl %edi, %eax
16; ATOM-NEXT:    orl %esi, %eax
17; ATOM-NEXT:    testl $-256, %eax
18; ATOM-NEXT:    je .LBB0_1
19; ATOM-NEXT:  # %bb.2:
20; ATOM-NEXT:    movl %edi, %eax
21; ATOM-NEXT:    cltd
22; ATOM-NEXT:    idivl %esi
23; ATOM-NEXT:    retq
24; ATOM-NEXT:  .LBB0_1:
25; ATOM-NEXT:    movzbl %dil, %eax
26; ATOM-NEXT:    divb %sil
27; ATOM-NEXT:    movzbl %al, %eax
28; ATOM-NEXT:    retq
29;
30; REST-LABEL: div32:
31; REST:       # %bb.0: # %entry
32; REST-NEXT:    movl %edi, %eax
33; REST-NEXT:    cltd
34; REST-NEXT:    idivl %esi
35; REST-NEXT:    retq
36;
37; HUGEWS-LABEL: div32:
38; HUGEWS:       # %bb.0: # %entry
39; HUGEWS-NEXT:    movl %edi, %eax
40; HUGEWS-NEXT:    cltd
41; HUGEWS-NEXT:    idivl %esi
42; HUGEWS-NEXT:    retq
43entry:
44  %div = sdiv i32 %a, %b
45  ret i32 %div
46}
47
48; Verify that div64 is always bypassed.
49define i64 @div64(i64 %a, i64 %b) {
50; ATOM-LABEL: div64:
51; ATOM:       # %bb.0: # %entry
52; ATOM-NEXT:    movq %rdi, %rcx
53; ATOM-NEXT:    movq %rdi, %rax
54; ATOM-NEXT:    orq %rsi, %rcx
55; ATOM-NEXT:    shrq $32, %rcx
56; ATOM-NEXT:    je .LBB1_1
57; ATOM-NEXT:  # %bb.2:
58; ATOM-NEXT:    cqto
59; ATOM-NEXT:    idivq %rsi
60; ATOM-NEXT:    retq
61; ATOM-NEXT:  .LBB1_1:
62; ATOM-NEXT:    # kill: def $eax killed $eax killed $rax
63; ATOM-NEXT:    xorl %edx, %edx
64; ATOM-NEXT:    divl %esi
65; ATOM-NEXT:    # kill: def $eax killed $eax def $rax
66; ATOM-NEXT:    retq
67;
68; X64-LABEL: div64:
69; X64:       # %bb.0: # %entry
70; X64-NEXT:    movq %rdi, %rax
71; X64-NEXT:    movq %rdi, %rcx
72; X64-NEXT:    orq %rsi, %rcx
73; X64-NEXT:    shrq $32, %rcx
74; X64-NEXT:    je .LBB1_1
75; X64-NEXT:  # %bb.2:
76; X64-NEXT:    cqto
77; X64-NEXT:    idivq %rsi
78; X64-NEXT:    retq
79; X64-NEXT:  .LBB1_1:
80; X64-NEXT:    # kill: def $eax killed $eax killed $rax
81; X64-NEXT:    xorl %edx, %edx
82; X64-NEXT:    divl %esi
83; X64-NEXT:    # kill: def $eax killed $eax def $rax
84; X64-NEXT:    retq
85;
86; SLM-LABEL: div64:
87; SLM:       # %bb.0: # %entry
88; SLM-NEXT:    movq %rdi, %rcx
89; SLM-NEXT:    movq %rdi, %rax
90; SLM-NEXT:    orq %rsi, %rcx
91; SLM-NEXT:    shrq $32, %rcx
92; SLM-NEXT:    je .LBB1_1
93; SLM-NEXT:  # %bb.2:
94; SLM-NEXT:    cqto
95; SLM-NEXT:    idivq %rsi
96; SLM-NEXT:    retq
97; SLM-NEXT:  .LBB1_1:
98; SLM-NEXT:    xorl %edx, %edx
99; SLM-NEXT:    # kill: def $eax killed $eax killed $rax
100; SLM-NEXT:    divl %esi
101; SLM-NEXT:    # kill: def $eax killed $eax def $rax
102; SLM-NEXT:    retq
103;
104; SKL-LABEL: div64:
105; SKL:       # %bb.0: # %entry
106; SKL-NEXT:    movq %rdi, %rax
107; SKL-NEXT:    movq %rdi, %rcx
108; SKL-NEXT:    orq %rsi, %rcx
109; SKL-NEXT:    shrq $32, %rcx
110; SKL-NEXT:    je .LBB1_1
111; SKL-NEXT:  # %bb.2:
112; SKL-NEXT:    cqto
113; SKL-NEXT:    idivq %rsi
114; SKL-NEXT:    retq
115; SKL-NEXT:  .LBB1_1:
116; SKL-NEXT:    # kill: def $eax killed $eax killed $rax
117; SKL-NEXT:    xorl %edx, %edx
118; SKL-NEXT:    divl %esi
119; SKL-NEXT:    # kill: def $eax killed $eax def $rax
120; SKL-NEXT:    retq
121;
122; GMT-LABEL: div64:
123; GMT:       # %bb.0: # %entry
124; GMT-NEXT:    movq %rdi, %rax
125; GMT-NEXT:    cqto
126; GMT-NEXT:    idivq %rsi
127; GMT-NEXT:    retq
128;
129; HUGEWS-LABEL: div64:
130; HUGEWS:       # %bb.0: # %entry
131; HUGEWS-NEXT:    movq %rdi, %rax
132; HUGEWS-NEXT:    cqto
133; HUGEWS-NEXT:    idivq %rsi
134; HUGEWS-NEXT:    retq
135entry:
136  %div = sdiv i64 %a, %b
137  ret i64 %div
138}
139
140
141; Verify that no extra code is generated when optimizing for size.
142
143define i64 @div64_optsize(i64 %a, i64 %b) optsize {
144; CHECK-LABEL: div64_optsize:
145; CHECK:       # %bb.0:
146; CHECK-NEXT:    movq %rdi, %rax
147; CHECK-NEXT:    cqto
148; CHECK-NEXT:    idivq %rsi
149; CHECK-NEXT:    retq
150;
151; HUGEWS-LABEL: div64_optsize:
152; HUGEWS:       # %bb.0:
153; HUGEWS-NEXT:    movq %rdi, %rax
154; HUGEWS-NEXT:    cqto
155; HUGEWS-NEXT:    idivq %rsi
156; HUGEWS-NEXT:    retq
157  %div = sdiv i64 %a, %b
158  ret i64 %div
159}
160
161define i64 @div64_pgso(i64 %a, i64 %b) !prof !15 {
162; CHECK-LABEL: div64_pgso:
163; CHECK:       # %bb.0:
164; CHECK-NEXT:    movq %rdi, %rax
165; CHECK-NEXT:    cqto
166; CHECK-NEXT:    idivq %rsi
167; CHECK-NEXT:    retq
168;
169; HUGEWS-LABEL: div64_pgso:
170; HUGEWS:       # %bb.0:
171; HUGEWS-NEXT:    movq %rdi, %rax
172; HUGEWS-NEXT:    cqto
173; HUGEWS-NEXT:    idivq %rsi
174; HUGEWS-NEXT:    retq
175  %div = sdiv i64 %a, %b
176  ret i64 %div
177}
178
179define i64 @div64_hugews(i64 %a, i64 %b) {
180; ATOM-LABEL: div64_hugews:
181; ATOM:       # %bb.0:
182; ATOM-NEXT:    movq %rdi, %rcx
183; ATOM-NEXT:    movq %rdi, %rax
184; ATOM-NEXT:    orq %rsi, %rcx
185; ATOM-NEXT:    shrq $32, %rcx
186; ATOM-NEXT:    je .LBB4_1
187; ATOM-NEXT:  # %bb.2:
188; ATOM-NEXT:    cqto
189; ATOM-NEXT:    idivq %rsi
190; ATOM-NEXT:    retq
191; ATOM-NEXT:  .LBB4_1:
192; ATOM-NEXT:    # kill: def $eax killed $eax killed $rax
193; ATOM-NEXT:    xorl %edx, %edx
194; ATOM-NEXT:    divl %esi
195; ATOM-NEXT:    # kill: def $eax killed $eax def $rax
196; ATOM-NEXT:    retq
197;
198; X64-LABEL: div64_hugews:
199; X64:       # %bb.0:
200; X64-NEXT:    movq %rdi, %rax
201; X64-NEXT:    movq %rdi, %rcx
202; X64-NEXT:    orq %rsi, %rcx
203; X64-NEXT:    shrq $32, %rcx
204; X64-NEXT:    je .LBB4_1
205; X64-NEXT:  # %bb.2:
206; X64-NEXT:    cqto
207; X64-NEXT:    idivq %rsi
208; X64-NEXT:    retq
209; X64-NEXT:  .LBB4_1:
210; X64-NEXT:    # kill: def $eax killed $eax killed $rax
211; X64-NEXT:    xorl %edx, %edx
212; X64-NEXT:    divl %esi
213; X64-NEXT:    # kill: def $eax killed $eax def $rax
214; X64-NEXT:    retq
215;
216; SLM-LABEL: div64_hugews:
217; SLM:       # %bb.0:
218; SLM-NEXT:    movq %rdi, %rcx
219; SLM-NEXT:    movq %rdi, %rax
220; SLM-NEXT:    orq %rsi, %rcx
221; SLM-NEXT:    shrq $32, %rcx
222; SLM-NEXT:    je .LBB4_1
223; SLM-NEXT:  # %bb.2:
224; SLM-NEXT:    cqto
225; SLM-NEXT:    idivq %rsi
226; SLM-NEXT:    retq
227; SLM-NEXT:  .LBB4_1:
228; SLM-NEXT:    xorl %edx, %edx
229; SLM-NEXT:    # kill: def $eax killed $eax killed $rax
230; SLM-NEXT:    divl %esi
231; SLM-NEXT:    # kill: def $eax killed $eax def $rax
232; SLM-NEXT:    retq
233;
234; SKL-LABEL: div64_hugews:
235; SKL:       # %bb.0:
236; SKL-NEXT:    movq %rdi, %rax
237; SKL-NEXT:    movq %rdi, %rcx
238; SKL-NEXT:    orq %rsi, %rcx
239; SKL-NEXT:    shrq $32, %rcx
240; SKL-NEXT:    je .LBB4_1
241; SKL-NEXT:  # %bb.2:
242; SKL-NEXT:    cqto
243; SKL-NEXT:    idivq %rsi
244; SKL-NEXT:    retq
245; SKL-NEXT:  .LBB4_1:
246; SKL-NEXT:    # kill: def $eax killed $eax killed $rax
247; SKL-NEXT:    xorl %edx, %edx
248; SKL-NEXT:    divl %esi
249; SKL-NEXT:    # kill: def $eax killed $eax def $rax
250; SKL-NEXT:    retq
251;
252; GMT-LABEL: div64_hugews:
253; GMT:       # %bb.0:
254; GMT-NEXT:    movq %rdi, %rax
255; GMT-NEXT:    cqto
256; GMT-NEXT:    idivq %rsi
257; GMT-NEXT:    retq
258;
259; HUGEWS-LABEL: div64_hugews:
260; HUGEWS:       # %bb.0:
261; HUGEWS-NEXT:    movq %rdi, %rax
262; HUGEWS-NEXT:    cqto
263; HUGEWS-NEXT:    idivq %rsi
264; HUGEWS-NEXT:    retq
265  %div = sdiv i64 %a, %b
266  ret i64 %div
267}
268
269define i32 @div32_optsize(i32 %a, i32 %b) optsize {
270; CHECK-LABEL: div32_optsize:
271; CHECK:       # %bb.0:
272; CHECK-NEXT:    movl %edi, %eax
273; CHECK-NEXT:    cltd
274; CHECK-NEXT:    idivl %esi
275; CHECK-NEXT:    retq
276;
277; HUGEWS-LABEL: div32_optsize:
278; HUGEWS:       # %bb.0:
279; HUGEWS-NEXT:    movl %edi, %eax
280; HUGEWS-NEXT:    cltd
281; HUGEWS-NEXT:    idivl %esi
282; HUGEWS-NEXT:    retq
283  %div = sdiv i32 %a, %b
284  ret i32 %div
285}
286
287define i32 @div32_pgso(i32 %a, i32 %b) !prof !15 {
288; CHECK-LABEL: div32_pgso:
289; CHECK:       # %bb.0:
290; CHECK-NEXT:    movl %edi, %eax
291; CHECK-NEXT:    cltd
292; CHECK-NEXT:    idivl %esi
293; CHECK-NEXT:    retq
294;
295; HUGEWS-LABEL: div32_pgso:
296; HUGEWS:       # %bb.0:
297; HUGEWS-NEXT:    movl %edi, %eax
298; HUGEWS-NEXT:    cltd
299; HUGEWS-NEXT:    idivl %esi
300; HUGEWS-NEXT:    retq
301  %div = sdiv i32 %a, %b
302  ret i32 %div
303}
304
305define i32 @div32_minsize(i32 %a, i32 %b) minsize {
306; CHECK-LABEL: div32_minsize:
307; CHECK:       # %bb.0:
308; CHECK-NEXT:    movl %edi, %eax
309; CHECK-NEXT:    cltd
310; CHECK-NEXT:    idivl %esi
311; CHECK-NEXT:    retq
312;
313; HUGEWS-LABEL: div32_minsize:
314; HUGEWS:       # %bb.0:
315; HUGEWS-NEXT:    movl %edi, %eax
316; HUGEWS-NEXT:    cltd
317; HUGEWS-NEXT:    idivl %esi
318; HUGEWS-NEXT:    retq
319  %div = sdiv i32 %a, %b
320  ret i32 %div
321}
322
323!llvm.module.flags = !{!1}
324!1 = !{i32 1, !"ProfileSummary", !2}
325!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
326!3 = !{!"ProfileFormat", !"InstrProf"}
327!4 = !{!"TotalCount", i64 10000}
328!5 = !{!"MaxCount", i64 1000}
329!6 = !{!"MaxInternalCount", i64 1}
330!7 = !{!"MaxFunctionCount", i64 1000}
331!8 = !{!"NumCounts", i64 3}
332!9 = !{!"NumFunctions", i64 3}
333!10 = !{!"DetailedSummary", !11}
334!11 = !{!12, !13, !14}
335!12 = !{i32 10000, i64 1000, i32 1}
336!13 = !{i32 999000, i64 1000, i32 3}
337!14 = !{i32 999999, i64 5, i32 3}
338!15 = !{!"function_entry_count", i64 0}
339