xref: /llvm-project/llvm/test/CodeGen/X86/dup-cost.ll (revision e3cf80c5c1fe55efd8216575ccadea0ab087e79c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
3
4; Cold function, %dup should not be duplicated into predecessors.
5define i32 @cold(i32 %a, ptr %p, ptr %q) !prof !21 {
6; CHECK-LABEL: cold:
7; CHECK:       # %bb.0: # %entry
8; CHECK-NEXT:    cmpl $2, %edi
9; CHECK-NEXT:    jl .LBB0_2
10; CHECK-NEXT:  # %bb.1: # %true1
11; CHECK-NEXT:    movl (%rsi), %eax
12; CHECK-NEXT:    addl $2, %eax
13; CHECK-NEXT:  .LBB0_3: # %dup
14; CHECK-NEXT:    cmpl $5, %eax
15; CHECK-NEXT:    jl .LBB0_5
16; CHECK-NEXT:  # %bb.4: # %true2
17; CHECK-NEXT:    xorl %edi, %eax
18; CHECK-NEXT:    retq
19; CHECK-NEXT:  .LBB0_2: # %false1
20; CHECK-NEXT:    movl (%rdx), %eax
21; CHECK-NEXT:    addl $-3, %eax
22; CHECK-NEXT:    jmp .LBB0_3
23; CHECK-NEXT:  .LBB0_5: # %false2
24; CHECK-NEXT:    andl %edi, %eax
25; CHECK-NEXT:    retq
26entry:
27  %cond1 = icmp sgt i32 %a, 1
28  br i1 %cond1, label %true1, label %false1, !prof !30
29
30true1:
31  %v1 = load i32, ptr %p, align 4
32  %v2 = add i32 %v1, 2
33  br label %dup
34
35false1:
36  %v3 = load i32, ptr %q, align 4
37  %v4 = sub i32 %v3, 3
38  br label %dup
39
40dup:
41  %v5 = phi i32 [%v2, %true1], [%v4, %false1]
42  %cond2 = icmp sgt i32 %v5, 4
43  br i1 %cond2, label %true2, label %false2, !prof !30
44
45true2:
46  %v6 = xor i32 %v5, %a
47  br label %exit
48
49false2:
50  %v7 = and i32 %v5, %a
51  br label %exit
52
53exit:
54  %v8 = phi i32 [%v6, %true2], [%v7, %false2]
55  ret i32 %v8
56}
57
58; Same code as previous function, but with hot profile count.
59; So %dup should be duplicated into predecessors.
60define i32 @hot(i32 %a, ptr %p, ptr %q) !prof !22 {
61; CHECK-LABEL: hot:
62; CHECK:       # %bb.0: # %entry
63; CHECK-NEXT:    cmpl $2, %edi
64; CHECK-NEXT:    jl .LBB1_2
65; CHECK-NEXT:  # %bb.1: # %true1
66; CHECK-NEXT:    movl (%rsi), %eax
67; CHECK-NEXT:    addl $2, %eax
68; CHECK-NEXT:    cmpl $5, %eax
69; CHECK-NEXT:    jge .LBB1_4
70; CHECK-NEXT:  .LBB1_5: # %false2
71; CHECK-NEXT:    andl %edi, %eax
72; CHECK-NEXT:    retq
73; CHECK-NEXT:  .LBB1_2: # %false1
74; CHECK-NEXT:    movl (%rdx), %eax
75; CHECK-NEXT:    addl $-3, %eax
76; CHECK-NEXT:    cmpl $5, %eax
77; CHECK-NEXT:    jl .LBB1_5
78; CHECK-NEXT:  .LBB1_4: # %true2
79; CHECK-NEXT:    xorl %edi, %eax
80; CHECK-NEXT:    retq
81entry:
82  %cond1 = icmp sgt i32 %a, 1
83  br i1 %cond1, label %true1, label %false1, !prof !30
84
85true1:
86  %v1 = load i32, ptr %p, align 4
87  %v2 = add i32 %v1, 2
88  br label %dup
89
90false1:
91  %v3 = load i32, ptr %q, align 4
92  %v4 = sub i32 %v3, 3
93  br label %dup
94
95dup:
96  %v5 = phi i32 [%v2, %true1], [%v4, %false1]
97  %cond2 = icmp sgt i32 %v5, 4
98  br i1 %cond2, label %true2, label %false2, !prof !30
99
100true2:
101  %v6 = xor i32 %v5, %a
102  br label %exit
103
104false2:
105  %v7 = and i32 %v5, %a
106  br label %exit
107
108exit:
109  %v8 = phi i32 [%v6, %true2], [%v7, %false2]
110  ret i32 %v8
111}
112
113
114!llvm.module.flags = !{!1}
115!21 = !{!"function_entry_count", i64 10}
116!22 = !{!"function_entry_count", i64 400}
117
118!30 = !{!"branch_weights", i32 1, i32 1}
119
120!1 = !{i32 1, !"ProfileSummary", !2}
121!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
122!3 = !{!"ProfileFormat", !"InstrProf"}
123!4 = !{!"TotalCount", i64 10000}
124!5 = !{!"MaxCount", i64 10}
125!6 = !{!"MaxInternalCount", i64 1}
126!7 = !{!"MaxFunctionCount", i64 1000}
127!8 = !{!"NumCounts", i64 3}
128!9 = !{!"NumFunctions", i64 3}
129!10 = !{!"DetailedSummary", !11}
130!11 = !{!12, !13, !14}
131!12 = !{i32 10000, i64 100, i32 1}
132!13 = !{i32 999000, i64 100, i32 1}
133!14 = !{i32 999999, i64 1, i32 2}
134