xref: /llvm-project/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll (revision e3cf80c5c1fe55efd8216575ccadea0ab087e79c)
1; RUN: llc -O2 -ppc-reduce-cr-logicals -o - %s | FileCheck \
2; RUN:   --check-prefix=CHECK --check-prefix=CHECK-O2 %s
3; RUN: llc -O3 -ppc-reduce-cr-logicals -o - %s | FileCheck \
4; RUN:   --check-prefix=CHECK --check-prefix=CHECK-O3 %s
5target datalayout = "e-m:e-i64:64-n32:64"
6target triple = "powerpc64le-grtev4-linux-gnu"
7
8; Intended layout:
9; The chain-based outlining produces the layout
10; test1
11; test2
12; test3
13; test4
14; optional1
15; optional2
16; optional3
17; optional4
18; exit
19; Tail duplication puts test n+1 at the end of optional n
20; so optional1 includes a copy of test2 at the end, and branches
21; to test3 (at the top) or falls through to optional 2.
22; The CHECK statements check for the whole string of tests
23; and then check that the correct test has been duplicated into the end of
24; the optional blocks and that the optional blocks are in the correct order.
25;CHECK-LABEL: straight_test:
26; test1 may have been merged with entry
27;CHECK: mr [[TAGREG:[0-9]+]], 3
28;CHECK: andi. {{[0-9]+}}, [[TAGREG:[0-9]+]], 1
29;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
30;CHECK-NEXT: # %test2
31;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2
32;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
33;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
34;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 4
35;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
36;CHECK-NEXT: .[[TEST4LABEL:[_0-9A-Za-z]+]]: # %test4
37;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 8
38;CHECK-NEXT: bne 0, .[[OPT4LABEL:[_0-9A-Za-z]+]]
39;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
40;CHECK: blr
41;CHECK-NEXT: .[[OPT1LABEL]]:
42;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2
43;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
44;CHECK-NEXT: .[[OPT2LABEL]]:
45;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4
46;CHECK-NEXT: beq 0, .[[TEST4LABEL]]
47;CHECK-NEXT: .[[OPT3LABEL]]:
48;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 8
49;CHECK-NEXT: beq 0, .[[EXITLABEL]]
50;CHECK-NEXT: .[[OPT4LABEL]]:
51;CHECK: b .[[EXITLABEL]]
52
53define void @straight_test(i32 %tag) {
54entry:
55  br label %test1
56test1:
57  %tagbit1 = and i32 %tag, 1
58  %tagbit1eq0 = icmp eq i32 %tagbit1, 0
59  br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
60optional1:
61  call void @a()
62  call void @a()
63  call void @a()
64  call void @a()
65  br label %test2
66test2:
67  %tagbit2 = and i32 %tag, 2
68  %tagbit2eq0 = icmp eq i32 %tagbit2, 0
69  br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
70optional2:
71  call void @b()
72  call void @b()
73  call void @b()
74  call void @b()
75  br label %test3
76test3:
77  %tagbit3 = and i32 %tag, 4
78  %tagbit3eq0 = icmp eq i32 %tagbit3, 0
79  br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
80optional3:
81  call void @c()
82  call void @c()
83  call void @c()
84  call void @c()
85  br label %test4
86test4:
87  %tagbit4 = and i32 %tag, 8
88  %tagbit4eq0 = icmp eq i32 %tagbit4, 0
89  br i1 %tagbit4eq0, label %exit, label %optional4, !prof !1
90optional4:
91  call void @d()
92  call void @d()
93  call void @d()
94  call void @d()
95  br label %exit
96exit:
97  ret void
98}
99
100; Intended layout:
101; The chain-of-triangles based duplicating produces the layout
102; test1
103; test2
104; test3
105; optional1
106; optional2
107; optional3
108; exit
109; even for 50/50 branches.
110; Tail duplication puts test n+1 at the end of optional n
111; so optional1 includes a copy of test2 at the end, and branches
112; to test3 (at the top) or falls through to optional 2.
113; The CHECK statements check for the whole string of tests
114; and then check that the correct test has been duplicated into the end of
115; the optional blocks and that the optional blocks are in the correct order.
116;CHECK-LABEL: straight_test_50:
117; test1 may have been merged with entry
118;CHECK: mr [[TAGREG:[0-9]+]], 3
119;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
120;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]]
121;CHECK-NEXT: # %test2
122;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2
123;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
124;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
125;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 4
126;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
127;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
128;CHECK: blr
129;CHECK-NEXT: .[[OPT1LABEL]]:
130;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2
131;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
132;CHECK-NEXT: .[[OPT2LABEL]]:
133;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4
134;CHECK-NEXT: beq 0, .[[EXITLABEL]]
135;CHECK-NEXT: .[[OPT3LABEL]]:
136;CHECK: b .[[EXITLABEL]]
137
138define void @straight_test_50(i32 %tag) {
139entry:
140  br label %test1
141test1:
142  %tagbit1 = and i32 %tag, 1
143  %tagbit1eq0 = icmp eq i32 %tagbit1, 0
144  br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2
145optional1:
146  call void @a()
147  br label %test2
148test2:
149  %tagbit2 = and i32 %tag, 2
150  %tagbit2eq0 = icmp eq i32 %tagbit2, 0
151  br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2
152optional2:
153  call void @b()
154  br label %test3
155test3:
156  %tagbit3 = and i32 %tag, 4
157  %tagbit3eq0 = icmp eq i32 %tagbit3, 0
158  br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
159optional3:
160  call void @c()
161  br label %exit
162exit:
163  ret void
164}
165
166; Intended layout:
167; The chain-of-triangles based duplicating produces the layout when 3
168; instructions are allowed for tail-duplication.
169; test1
170; test2
171; test3
172; optional1
173; optional2
174; optional3
175; exit
176;
177; Otherwise it produces the layout:
178; test1
179; optional1
180; test2
181; optional2
182; test3
183; optional3
184; exit
185
186;CHECK-LABEL: straight_test_3_instr_test:
187; test1 may have been merged with entry
188;CHECK: mr [[TAGREG:[0-9]+]], 3
189;CHECK: clrlwi {{[0-9]+}}, [[TAGREG]], 30
190;CHECK-NEXT: cmplwi {{[0-9]+}}, 2
191
192;CHECK-O3-NEXT: bne 0, .[[OPT1LABEL:[_0-9A-Za-z]+]]
193;CHECK-O3-NEXT: # %test2
194;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
195;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
196;CHECK-O3-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]]
197;CHECK-O3-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3
198;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
199;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
200;CHECK-O3-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]]
201;CHECK-O3-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
202;CHECK-O3: blr
203;CHECK-O3-NEXT: .[[OPT1LABEL]]:
204;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
205;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8
206;CHECK-O3-NEXT: beq 0, .[[TEST3LABEL]]
207;CHECK-O3-NEXT: .[[OPT2LABEL]]:
208;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
209;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32
210;CHECK-O3-NEXT: beq 0, .[[EXITLABEL]]
211;CHECK-O3-NEXT: .[[OPT3LABEL]]:
212;CHECK-O3: b .[[EXITLABEL]]
213
214;CHECK-O2-NEXT: beq 0, .[[TEST2LABEL:[_0-9A-Za-z]+]]
215;CHECK-O2-NEXT: # %optional1
216;CHECK-O2: .[[TEST2LABEL]]: # %test2
217;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29
218;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 8
219;CHECK-O2-NEXT: beq 0, .[[TEST3LABEL:[_0-9A-Za-z]+]]
220;CHECK-O2-NEXT: # %optional2
221;CHECK-O2: .[[TEST3LABEL]]: # %test3
222;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27
223;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 32
224;CHECK-O2-NEXT: beq 0, .[[EXITLABEL:[_0-9A-Za-z]+]]
225;CHECK-O2-NEXT: # %optional3
226;CHECK-O2: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit
227;CHECK-O2: blr
228
229
230define void @straight_test_3_instr_test(i32 %tag) {
231entry:
232  br label %test1
233test1:
234  %tagbit1 = and i32 %tag, 3
235  %tagbit1eq0 = icmp eq i32 %tagbit1, 2
236  br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2
237optional1:
238  call void @a()
239  br label %test2
240test2:
241  %tagbit2 = and i32 %tag, 12
242  %tagbit2eq0 = icmp eq i32 %tagbit2, 8
243  br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2
244optional2:
245  call void @b()
246  br label %test3
247test3:
248  %tagbit3 = and i32 %tag, 48
249  %tagbit3eq0 = icmp eq i32 %tagbit3, 32
250  br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1
251optional3:
252  call void @c()
253  br label %exit
254exit:
255  ret void
256}
257
258; Intended layout:
259; The chain-based outlining produces the layout
260; entry
261; --- Begin loop ---
262; for.latch
263; for.check
264; test1
265; test2
266; test3
267; test4
268; optional1
269; optional2
270; optional3
271; optional4
272; --- End loop ---
273; exit
274; The CHECK statements check for the whole string of tests and exit block,
275; and then check that the correct test has been duplicated into the end of
276; the optional blocks and that the optional blocks are in the correct order.
277;CHECK-LABEL: loop_test:
278;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4
279;CHECK: .[[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch
280;CHECK: addi
281;CHECK-O2: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check
282;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]])
283;CHECK-O3: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check
284;CHECK: # %bb.{{[0-9]+}}: # %test1
285;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1
286;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]]
287;CHECK-NEXT: # %test2
288;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2
289;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]]
290;CHECK-NEXT: .[[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3
291;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4
292;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]]
293;CHECK-NEXT: .[[TEST4LABEL:[._0-9A-Za-z]+]]: # %{{(test4|optional3)}}
294;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 8
295;CHECK-NEXT: beq 0, .[[LATCHLABEL]]
296;CHECK-NEXT: b .[[OPT4LABEL:[._0-9A-Za-z]+]]
297;CHECK: [[OPT1LABEL]]
298;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2
299;CHECK-NEXT: beq 0, .[[TEST3LABEL]]
300;CHECK-NEXT: .[[OPT2LABEL]]
301;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4
302;CHECK-NEXT: beq 0, .[[TEST4LABEL]]
303;CHECK-NEXT: .[[OPT3LABEL]]
304;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 8
305;CHECK-NEXT: beq 0, .[[LATCHLABEL]]
306;CHECK: [[OPT4LABEL]]:
307;CHECK: b .[[LATCHLABEL]]
308define void @loop_test(ptr %tags, i32 %count) {
309entry:
310  br label %for.check
311for.check:
312  %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch]
313  %done.count = icmp ugt i32 %count.loop, 0
314  %tag_ptr = getelementptr inbounds i32, ptr %tags, i32 %count
315  %tag = load i32, ptr %tag_ptr
316  %done.tag = icmp eq i32 %tag, 0
317  %done = and i1 %done.count, %done.tag
318  br i1 %done, label %test1, label %exit, !prof !1
319test1:
320  %tagbit1 = and i32 %tag, 1
321  %tagbit1eq0 = icmp eq i32 %tagbit1, 0
322  br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1
323optional1:
324  call void @a()
325  call void @a()
326  call void @a()
327  call void @a()
328  br label %test2
329test2:
330  %tagbit2 = and i32 %tag, 2
331  %tagbit2eq0 = icmp eq i32 %tagbit2, 0
332  br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1
333optional2:
334  call void @b()
335  call void @b()
336  call void @b()
337  call void @b()
338  br label %test3
339test3:
340  %tagbit3 = and i32 %tag, 4
341  %tagbit3eq0 = icmp eq i32 %tagbit3, 0
342  br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1
343optional3:
344  call void @c()
345  call void @c()
346  call void @c()
347  call void @c()
348  br label %test4
349test4:
350  %tagbit4 = and i32 %tag, 8
351  %tagbit4eq0 = icmp eq i32 %tagbit4, 0
352  br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1
353optional4:
354  call void @d()
355  call void @d()
356  call void @d()
357  call void @d()
358  br label %for.latch
359for.latch:
360  %count.sub = sub i32 %count.loop, 1
361  br label %for.check
362exit:
363  ret void
364}
365
366; The block then2 is not unavoidable, meaning it does not dominate the exit.
367; But since it can be tail-duplicated, it should be placed as a fallthrough from
368; test2 and copied. The purpose here is to make sure that the tail-duplication
369; code is independent of the outlining code, which works by choosing the
370; "unavoidable" blocks.
371; CHECK-LABEL: avoidable_test:
372; CHECK: # %bb.{{[0-9]+}}: # %entry
373; CHECK: andi.
374; CHECK: # %bb.{{[0-9]+}}: # %test2
375; Make sure else2 falls through from test2
376; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}}
377; CHECK: # %bb.{{[0-9]+}}: # %else2
378; CHECK: bl c
379; CHECK: # %else1
380; CHECK: bl a
381; CHECK: bl a
382; CHECK: # %then2
383; CHECK: andi. {{[0-9]+}}, {{[0-9]+}}, 4
384; CHECK: # %end1
385; CHECK: bl d
386; CHECK: # %end2
387define void @avoidable_test(i32 %tag) {
388entry:
389  br label %test1
390test1:
391  %tagbit1 = and i32 %tag, 1
392  %tagbit1eq0 = icmp eq i32 %tagbit1, 0
393  br i1 %tagbit1eq0, label %test2, label %else1, !prof !1 ; %test2 more likely
394else1:
395  call void @a()
396  call void @a()
397  br label %then2
398test2:
399  %tagbit2 = and i32 %tag, 2
400  %tagbit2eq0 = icmp eq i32 %tagbit2, 0
401  br i1 %tagbit2eq0, label %then2, label %else2, !prof !1 ; %then2 more likely
402then2:
403  %tagbit3 = and i32 %tag, 4
404  %tagbit3eq0 = icmp eq i32 %tagbit3, 0
405  br i1 %tagbit3eq0, label %end2, label %end1, !prof !1 ; %end2 more likely
406else2:
407  call void @c()
408  br label %end2
409end2:
410  ret void
411end1:
412  call void @d()
413  ret void
414}
415
416; CHECK-LABEL: trellis_test
417; The number in the block labels is the expected block frequency given the
418; probabilities annotated. There is a conflict in the b;c->d;e trellis that
419; should be resolved as c->e;b->d.
420; The d;e->f;g trellis should be resolved as e->g;d->f.
421; The f;g->h;i trellis should be resolved as f->i;g->h.
422; The h;i->j;ret trellis contains a triangle edge, and should be resolved as
423; h->j->ret
424; CHECK: # %bb.{{[0-9]+}}: # %entry
425; CHECK: # %bb.{{[0-9]+}}: # %c10
426; CHECK: # %e9
427; CHECK: # %g10
428; CHECK: # %h10
429; CHECK: # %j8
430; CHECK: # %ret
431; CHECK: # %b6
432; CHECK: # %d7
433; CHECK: # %f6
434; CHECK: # %i6
435define void @trellis_test(i32 %tag) {
436entry:
437  br label %a16
438a16:
439  call void @a()
440  call void @a()
441  %tagbits.a = and i32 %tag, 3
442  %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
443  br i1 %tagbits.a.eq0, label %c10, label %b6, !prof !1 ; 10 to 6
444c10:
445  call void @c()
446  call void @c()
447  %tagbits.c = and i32 %tag, 12
448  %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
449  ; Both of these edges should be hotter than the other incoming edge
450  ; for e9 or d7
451  br i1 %tagbits.c.eq0, label %e9, label %d7, !prof !3 ; 6 to 4
452e9:
453  call void @e()
454  call void @e()
455  %tagbits.e = and i32 %tag, 48
456  %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0
457  br i1 %tagbits.e.eq0, label %g10, label %f6, !prof !4 ; 7 to 2
458g10:
459  call void @g()
460  call void @g()
461  %tagbits.g = and i32 %tag, 192
462  %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0
463  br i1 %tagbits.g.eq0, label %i6, label %h10, !prof !5 ; 2 to 8
464i6:
465  call void @i()
466  call void @i()
467  %tagbits.i = and i32 %tag, 768
468  %tagbits.i.eq0 = icmp eq i32 %tagbits.i, 0
469  br i1 %tagbits.i.eq0, label %ret, label %j8, !prof !2 ; balanced (3 to 3)
470b6:
471  call void @b()
472  call void @b()
473  %tagbits.b = and i32 %tag, 12
474  %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
475  br i1 %tagbits.b.eq1, label %e9, label %d7, !prof !2 ; balanced (3 to 3)
476d7:
477  call void @d()
478  call void @d()
479  %tagbits.d = and i32 %tag, 48
480  %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
481  br i1 %tagbits.d.eq1, label %g10, label %f6, !prof !6 ; 3 to 4
482f6:
483  call void @f()
484  call void @f()
485  %tagbits.f = and i32 %tag, 192
486  %tagbits.f.eq1 = icmp eq i32 %tagbits.f, 128
487  br i1 %tagbits.f.eq1, label %i6, label %h10, !prof !7 ; 4 to 2
488h10:
489  call void @h()
490  call void @h()
491  %tagbits.h = and i32 %tag, 768
492  %tagbits.h.eq1 = icmp eq i32 %tagbits.h, 512
493  br i1 %tagbits.h.eq1, label %ret, label %j8, !prof !2 ; balanced (5 to 5)
494j8:
495  call void @j()
496  call void @j()
497  br label %ret
498ret:
499  ret void
500}
501
502; Verify that we still consider tail-duplication opportunities if we find a
503; triangle trellis. Here D->F->G is the triangle, and D;E are both predecessors
504; of both F and G. The basic trellis algorithm picks the F->G edge, but after
505; checking, it's profitable to duplicate G into F. The weights here are not
506; really important. They are there to help make the test stable.
507; CHECK-LABEL: trellis_then_dup_test
508; CHECK: # %bb.{{[0-9]+}}: # %entry
509; CHECK: # %bb.{{[0-9]+}}: # %b
510; CHECK: # %d
511; CHECK: # %g
512; CHECK: # %ret1
513; CHECK: # %c
514; CHECK: # %e
515; CHECK: # %f
516; CHECK: # %ret2
517; CHECK: # %ret
518define void @trellis_then_dup_test(i32 %tag) {
519entry:
520  br label %a
521a:
522  call void @a()
523  call void @a()
524  %tagbits.a = and i32 %tag, 3
525  %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
526  br i1 %tagbits.a.eq0, label %b, label %c, !prof !1 ; 5 to 3
527b:
528  call void @b()
529  call void @b()
530  %tagbits.b = and i32 %tag, 12
531  %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
532  br i1 %tagbits.b.eq1, label %d, label %e, !prof !1 ; 5 to 3
533d:
534  call void @d()
535  call void @d()
536  %tagbits.d = and i32 %tag, 48
537  %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
538  br i1 %tagbits.d.eq1, label %g, label %f, !prof !1 ; 5 to 3
539f:
540  call void @f()
541  call void @f()
542  br label %g
543g:
544  %tagbits.g = and i32 %tag, 192
545  %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0
546  br i1 %tagbits.g.eq0, label %ret1, label %ret2, !prof !2 ; balanced
547c:
548  call void @c()
549  call void @c()
550  %tagbits.c = and i32 %tag, 12
551  %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
552  br i1 %tagbits.c.eq0, label %d, label %e, !prof !1 ; 5 to 3
553e:
554  call void @e()
555  call void @e()
556  %tagbits.e = and i32 %tag, 48
557  %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0
558  br i1 %tagbits.e.eq0, label %g, label %f, !prof !1 ; 5 to 3
559ret1:
560  call void @a()
561  br label %ret
562ret2:
563  call void @b()
564  br label %ret
565ret:
566  ret void
567}
568
569; Verify that we did not mis-identify triangle trellises if it is not
570; really a triangle.
571; CHECK-LABEL: trellis_no_triangle
572; CHECK: # %bb.{{[0-9]+}}: # %entry
573; CHECK: # %bb.{{[0-9]+}}: # %b
574; CHECK: # %d
575; CHECK: # %ret
576; CHECK: # %c
577; CHECK: # %e
578define void @trellis_no_triangle(i32 %tag) {
579entry:
580  br label %a
581a:
582  call void @a()
583  call void @a()
584  %tagbits.a = and i32 %tag, 3
585  %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0
586  br i1 %tagbits.a.eq0, label %b, label %c, !prof !8 ; 98 to 2
587b:
588  call void @b()
589  call void @b()
590  %tagbits.b = and i32 %tag, 12
591  %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8
592  br i1 %tagbits.b.eq1, label %d, label %e, !prof !9 ; 97 to 1
593d:
594  call void @d()
595  call void @d()
596  %tagbits.d = and i32 %tag, 48
597  %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32
598  br i1 %tagbits.d.eq1, label %ret, label %e, !prof !10 ; 96 to 2
599c:
600  call void @c()
601  call void @c()
602  %tagbits.c = and i32 %tag, 12
603  %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0
604  br i1 %tagbits.c.eq0, label %d, label %e, !prof !2 ; 1 to 1
605e:
606  call void @e()
607  call void @e()
608  br label %ret
609ret:
610  call void @f()
611  ret void
612}
613
614declare void @a()
615declare void @b()
616declare void @c()
617declare void @d()
618declare void @e()
619declare void @f()
620declare void @g()
621declare void @h()
622declare void @i()
623declare void @j()
624
625!1 = !{!"branch_weights", i32 5, i32 3}
626!2 = !{!"branch_weights", i32 50, i32 50}
627!3 = !{!"branch_weights", i32 6, i32 4}
628!4 = !{!"branch_weights", i32 7, i32 2}
629!5 = !{!"branch_weights", i32 2, i32 8}
630!6 = !{!"branch_weights", i32 3, i32 4}
631!7 = !{!"branch_weights", i32 4, i32 2}
632!8 = !{!"branch_weights", i32 98, i32 2}
633!9 = !{!"branch_weights", i32 97, i32 1}
634!10 = !{!"branch_weights", i32 96, i32 2}
635