xref: /llvm-project/llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll (revision cbdccb30c23f71f20d05b19256232419e7c5e517)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -O2 -o - %s | FileCheck %s
3target datalayout = "e-m:e-i64:64-n32:64"
4target triple = "powerpc64le-grtev4-linux-gnu"
5
6; Intended layout:
7; The code for tail-duplication during layout will produce the layout:
8; test1
9; test2
10; body1 (with copy of test2)
11; body2
12; exit
13
14define void @tail_dup_break_cfg(i32 %tag) {
15; CHECK-LABEL: tail_dup_break_cfg:
16; CHECK:       # %bb.0: # %entry
17; CHECK-NEXT:    mflr 0
18; CHECK-NEXT:    .cfi_def_cfa_offset 48
19; CHECK-NEXT:    .cfi_offset lr, 16
20; CHECK-NEXT:    .cfi_offset r30, -16
21; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
22; CHECK-NEXT:    stdu 1, -48(1)
23; CHECK-NEXT:    mr 30, 3
24; CHECK-NEXT:    std 0, 64(1)
25; CHECK-NEXT:    andi. 3, 30, 1
26; CHECK-NEXT:    bc 12, 1, .LBB0_3
27; CHECK-NEXT:  # %bb.1: # %test2
28; CHECK-NEXT:    andi. 3, 30, 2
29; CHECK-NEXT:    bne 0, .LBB0_4
30; CHECK-NEXT:  .LBB0_2: # %exit
31; CHECK-NEXT:    addi 1, 1, 48
32; CHECK-NEXT:    ld 0, 16(1)
33; CHECK-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
34; CHECK-NEXT:    mtlr 0
35; CHECK-NEXT:    blr
36; CHECK-NEXT:  .LBB0_3: # %body1
37; CHECK-NEXT:    bl a
38; CHECK-NEXT:    nop
39; CHECK-NEXT:    bl a
40; CHECK-NEXT:    nop
41; CHECK-NEXT:    bl a
42; CHECK-NEXT:    nop
43; CHECK-NEXT:    bl a
44; CHECK-NEXT:    nop
45; CHECK-NEXT:    andi. 3, 30, 2
46; CHECK-NEXT:    beq 0, .LBB0_2
47; CHECK-NEXT:  .LBB0_4: # %body2
48; CHECK-NEXT:    bl b
49; CHECK-NEXT:    nop
50; CHECK-NEXT:    bl b
51; CHECK-NEXT:    nop
52; CHECK-NEXT:    bl b
53; CHECK-NEXT:    nop
54; CHECK-NEXT:    bl b
55; CHECK-NEXT:    nop
56; CHECK-NEXT:    b .LBB0_2
57entry:
58  br label %test1
59test1:
60  %tagbit1 = and i32 %tag, 1
61  %tagbit1eq0 = icmp eq i32 %tagbit1, 0
62  br i1 %tagbit1eq0, label %test2, label %body1, !prof !1 ; %test2 more likely
63body1:
64  call void @a()
65  call void @a()
66  call void @a()
67  call void @a()
68  br label %test2
69test2:
70  %tagbit2 = and i32 %tag, 2
71  %tagbit2eq0 = icmp eq i32 %tagbit2, 0
72  br i1 %tagbit2eq0, label %exit, label %body2, !prof !1 ; %exit more likely
73body2:
74  call void @b()
75  call void @b()
76  call void @b()
77  call void @b()
78  br label %exit
79exit:
80  ret void
81}
82
83; The branch weights here hint that we shouldn't tail duplicate in this case.
84define void @tail_dup_dont_break_cfg(i32 %tag) {
85; CHECK-LABEL: tail_dup_dont_break_cfg:
86; CHECK:       # %bb.0: # %entry
87; CHECK-NEXT:    mflr 0
88; CHECK-NEXT:    .cfi_def_cfa_offset 48
89; CHECK-NEXT:    .cfi_offset lr, 16
90; CHECK-NEXT:    .cfi_offset r30, -16
91; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
92; CHECK-NEXT:    stdu 1, -48(1)
93; CHECK-NEXT:    mr 30, 3
94; CHECK-NEXT:    std 0, 64(1)
95; CHECK-NEXT:    andi. 3, 30, 1
96; CHECK-NEXT:    bc 4, 1, .LBB1_2
97; CHECK-NEXT:  # %bb.1: # %body1
98; CHECK-NEXT:    bl a
99; CHECK-NEXT:    nop
100; CHECK-NEXT:    bl a
101; CHECK-NEXT:    nop
102; CHECK-NEXT:    bl a
103; CHECK-NEXT:    nop
104; CHECK-NEXT:    bl a
105; CHECK-NEXT:    nop
106; CHECK-NEXT:  .LBB1_2: # %test2
107; CHECK-NEXT:    andi. 3, 30, 2
108; CHECK-NEXT:    beq 0, .LBB1_4
109; CHECK-NEXT:  # %bb.3: # %body2
110; CHECK-NEXT:    bl b
111; CHECK-NEXT:    nop
112; CHECK-NEXT:    bl b
113; CHECK-NEXT:    nop
114; CHECK-NEXT:    bl b
115; CHECK-NEXT:    nop
116; CHECK-NEXT:    bl b
117; CHECK-NEXT:    nop
118; CHECK-NEXT:  .LBB1_4: # %exit
119; CHECK-NEXT:    addi 1, 1, 48
120; CHECK-NEXT:    ld 0, 16(1)
121; CHECK-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
122; CHECK-NEXT:    mtlr 0
123; CHECK-NEXT:    blr
124entry:
125  br label %test1
126test1:
127  %tagbit1 = and i32 %tag, 1
128  %tagbit1eq0 = icmp eq i32 %tagbit1, 0
129  br i1 %tagbit1eq0, label %test2, label %body1, !prof !1 ; %test2 more likely
130body1:
131  call void @a()
132  call void @a()
133  call void @a()
134  call void @a()
135  br label %test2
136test2:
137  %tagbit2 = and i32 %tag, 2
138  %tagbit2eq0 = icmp ne i32 %tagbit2, 0
139  br i1 %tagbit2eq0, label %body2, label %exit, !prof !3 ; %body2 more likely
140body2:
141  call void @b()
142  call void @b()
143  call void @b()
144  call void @b()
145  br label %exit
146exit:
147  ret void
148}
149
150declare void @a()
151declare void @b()
152declare void @c()
153declare void @d()
154
155; This function arranges for the successors of %succ to have already been laid
156; out. When we consider whether to lay out succ after bb and to tail-duplicate
157; it, v and ret have already been placed, so we tail-duplicate as it removes a
158; branch and strictly increases fallthrough
159define void @tail_dup_no_succ(i32 %tag) {
160; CHECK-LABEL: tail_dup_no_succ:
161; CHECK:       # %bb.0: # %entry
162; CHECK-NEXT:    mflr 0
163; CHECK-NEXT:    .cfi_def_cfa_offset 48
164; CHECK-NEXT:    .cfi_offset lr, 16
165; CHECK-NEXT:    .cfi_offset r30, -16
166; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
167; CHECK-NEXT:    stdu 1, -48(1)
168; CHECK-NEXT:    andi. 4, 3, 1
169; CHECK-NEXT:    std 0, 64(1)
170; CHECK-NEXT:    bc 12, 1, .LBB2_3
171; CHECK-NEXT:  .LBB2_1: # %v
172; CHECK-NEXT:    bl d
173; CHECK-NEXT:    nop
174; CHECK-NEXT:    bl d
175; CHECK-NEXT:    nop
176; CHECK-NEXT:  .LBB2_2: # %ret
177; CHECK-NEXT:    addi 1, 1, 48
178; CHECK-NEXT:    ld 0, 16(1)
179; CHECK-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
180; CHECK-NEXT:    mtlr 0
181; CHECK-NEXT:    blr
182; CHECK-NEXT:  .LBB2_3: # %bb
183; CHECK-NEXT:    andi. 4, 3, 2
184; CHECK-NEXT:    bne 0, .LBB2_5
185; CHECK-NEXT:  # %bb.4: # %succ
186; CHECK-NEXT:    andi. 3, 3, 4
187; CHECK-NEXT:    beq 0, .LBB2_2
188; CHECK-NEXT:    b .LBB2_1
189; CHECK-NEXT:  .LBB2_5: # %c
190; CHECK-NEXT:    mr 30, 3
191; CHECK-NEXT:    bl c
192; CHECK-NEXT:    nop
193; CHECK-NEXT:    bl c
194; CHECK-NEXT:    nop
195; CHECK-NEXT:    mr 3, 30
196; CHECK-NEXT:    andi. 3, 3, 4
197; CHECK-NEXT:    beq 0, .LBB2_2
198; CHECK-NEXT:    b .LBB2_1
199entry:
200  %tagbit1 = and i32 %tag, 1
201  %tagbit1eq0 = icmp eq i32 %tagbit1, 0
202  br i1 %tagbit1eq0, label %v, label %bb, !prof !2 ; %v very much more likely
203bb:
204  %tagbit2 = and i32 %tag, 2
205  %tagbit2eq0 = icmp eq i32 %tagbit2, 0
206  br i1 %tagbit2eq0, label %succ, label %c, !prof !3 ; %succ more likely
207c:
208  call void @c()
209  call void @c()
210  br label %succ
211succ:
212  %tagbit3 = and i32 %tag, 4
213  %tagbit3eq0 = icmp eq i32 %tagbit3, 0
214  br i1 %tagbit3eq0, label %ret, label %v, !prof !1 ; %u more likely
215v:
216  call void @d()
217  call void @d()
218  br label %ret
219ret:
220  ret void
221}
222
223!1 = !{!"branch_weights", i32 5, i32 3}
224!2 = !{!"branch_weights", i32 95, i32 5}
225!3 = !{!"branch_weights", i32 8, i32 3}
226