1; RUN: llc -O2 -ppc-reduce-cr-logicals -o - %s | FileCheck \ 2; RUN: --check-prefix=CHECK --check-prefix=CHECK-O2 %s 3; RUN: llc -O3 -ppc-reduce-cr-logicals -o - %s | FileCheck \ 4; RUN: --check-prefix=CHECK --check-prefix=CHECK-O3 %s 5target datalayout = "e-m:e-i64:64-n32:64" 6target triple = "powerpc64le-grtev4-linux-gnu" 7 8; Intended layout: 9; The chain-based outlining produces the layout 10; test1 11; test2 12; test3 13; test4 14; optional1 15; optional2 16; optional3 17; optional4 18; exit 19; Tail duplication puts test n+1 at the end of optional n 20; so optional1 includes a copy of test2 at the end, and branches 21; to test3 (at the top) or falls through to optional 2. 22; The CHECK statements check for the whole string of tests 23; and then check that the correct test has been duplicated into the end of 24; the optional blocks and that the optional blocks are in the correct order. 25;CHECK-LABEL: straight_test: 26; test1 may have been merged with entry 27;CHECK: mr [[TAGREG:[0-9]+]], 3 28;CHECK: andi. {{[0-9]+}}, [[TAGREG:[0-9]+]], 1 29;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] 30;CHECK-NEXT: # %test2 31;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2 32;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] 33;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3 34;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 4 35;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]] 36;CHECK-NEXT: .[[TEST4LABEL:[_0-9A-Za-z]+]]: # %test4 37;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 8 38;CHECK-NEXT: bne 0, .[[OPT4LABEL:[_0-9A-Za-z]+]] 39;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit 40;CHECK: blr 41;CHECK-NEXT: .[[OPT1LABEL]]: 42;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2 43;CHECK-NEXT: beq 0, .[[TEST3LABEL]] 44;CHECK-NEXT: .[[OPT2LABEL]]: 45;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4 46;CHECK-NEXT: beq 0, .[[TEST4LABEL]] 47;CHECK-NEXT: .[[OPT3LABEL]]: 48;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 8 49;CHECK-NEXT: beq 0, .[[EXITLABEL]] 50;CHECK-NEXT: .[[OPT4LABEL]]: 51;CHECK: b .[[EXITLABEL]] 52 53define void @straight_test(i32 %tag) { 54entry: 55 br label %test1 56test1: 57 %tagbit1 = and i32 %tag, 1 58 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 59 br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1 60optional1: 61 call void @a() 62 call void @a() 63 call void @a() 64 call void @a() 65 br label %test2 66test2: 67 %tagbit2 = and i32 %tag, 2 68 %tagbit2eq0 = icmp eq i32 %tagbit2, 0 69 br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1 70optional2: 71 call void @b() 72 call void @b() 73 call void @b() 74 call void @b() 75 br label %test3 76test3: 77 %tagbit3 = and i32 %tag, 4 78 %tagbit3eq0 = icmp eq i32 %tagbit3, 0 79 br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1 80optional3: 81 call void @c() 82 call void @c() 83 call void @c() 84 call void @c() 85 br label %test4 86test4: 87 %tagbit4 = and i32 %tag, 8 88 %tagbit4eq0 = icmp eq i32 %tagbit4, 0 89 br i1 %tagbit4eq0, label %exit, label %optional4, !prof !1 90optional4: 91 call void @d() 92 call void @d() 93 call void @d() 94 call void @d() 95 br label %exit 96exit: 97 ret void 98} 99 100; Intended layout: 101; The chain-of-triangles based duplicating produces the layout 102; test1 103; test2 104; test3 105; optional1 106; optional2 107; optional3 108; exit 109; even for 50/50 branches. 110; Tail duplication puts test n+1 at the end of optional n 111; so optional1 includes a copy of test2 at the end, and branches 112; to test3 (at the top) or falls through to optional 2. 113; The CHECK statements check for the whole string of tests 114; and then check that the correct test has been duplicated into the end of 115; the optional blocks and that the optional blocks are in the correct order. 116;CHECK-LABEL: straight_test_50: 117; test1 may have been merged with entry 118;CHECK: mr [[TAGREG:[0-9]+]], 3 119;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 120;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] 121;CHECK-NEXT: # %test2 122;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2 123;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] 124;CHECK-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3 125;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 4 126;CHECK-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]] 127;CHECK-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit 128;CHECK: blr 129;CHECK-NEXT: .[[OPT1LABEL]]: 130;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2 131;CHECK-NEXT: beq 0, .[[TEST3LABEL]] 132;CHECK-NEXT: .[[OPT2LABEL]]: 133;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4 134;CHECK-NEXT: beq 0, .[[EXITLABEL]] 135;CHECK-NEXT: .[[OPT3LABEL]]: 136;CHECK: b .[[EXITLABEL]] 137 138define void @straight_test_50(i32 %tag) { 139entry: 140 br label %test1 141test1: 142 %tagbit1 = and i32 %tag, 1 143 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 144 br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2 145optional1: 146 call void @a() 147 br label %test2 148test2: 149 %tagbit2 = and i32 %tag, 2 150 %tagbit2eq0 = icmp eq i32 %tagbit2, 0 151 br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2 152optional2: 153 call void @b() 154 br label %test3 155test3: 156 %tagbit3 = and i32 %tag, 4 157 %tagbit3eq0 = icmp eq i32 %tagbit3, 0 158 br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1 159optional3: 160 call void @c() 161 br label %exit 162exit: 163 ret void 164} 165 166; Intended layout: 167; The chain-of-triangles based duplicating produces the layout when 3 168; instructions are allowed for tail-duplication. 169; test1 170; test2 171; test3 172; optional1 173; optional2 174; optional3 175; exit 176; 177; Otherwise it produces the layout: 178; test1 179; optional1 180; test2 181; optional2 182; test3 183; optional3 184; exit 185 186;CHECK-LABEL: straight_test_3_instr_test: 187; test1 may have been merged with entry 188;CHECK: mr [[TAGREG:[0-9]+]], 3 189;CHECK: clrlwi {{[0-9]+}}, [[TAGREG]], 30 190;CHECK-NEXT: cmplwi {{[0-9]+}}, 2 191 192;CHECK-O3-NEXT: bne 0, .[[OPT1LABEL:[_0-9A-Za-z]+]] 193;CHECK-O3-NEXT: # %test2 194;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29 195;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8 196;CHECK-O3-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] 197;CHECK-O3-NEXT: .[[TEST3LABEL:[_0-9A-Za-z]+]]: # %test3 198;CHECK-O3-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27 199;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32 200;CHECK-O3-NEXT: bne 0, .[[OPT3LABEL:[_0-9A-Za-z]+]] 201;CHECK-O3-NEXT: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit 202;CHECK-O3: blr 203;CHECK-O3-NEXT: .[[OPT1LABEL]]: 204;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29 205;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 8 206;CHECK-O3-NEXT: beq 0, .[[TEST3LABEL]] 207;CHECK-O3-NEXT: .[[OPT2LABEL]]: 208;CHECK-O3: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27 209;CHECK-O3-NEXT: cmplwi {{[0-9]+}}, 32 210;CHECK-O3-NEXT: beq 0, .[[EXITLABEL]] 211;CHECK-O3-NEXT: .[[OPT3LABEL]]: 212;CHECK-O3: b .[[EXITLABEL]] 213 214;CHECK-O2-NEXT: beq 0, .[[TEST2LABEL:[_0-9A-Za-z]+]] 215;CHECK-O2-NEXT: # %optional1 216;CHECK-O2: .[[TEST2LABEL]]: # %test2 217;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 28, 29 218;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 8 219;CHECK-O2-NEXT: beq 0, .[[TEST3LABEL:[_0-9A-Za-z]+]] 220;CHECK-O2-NEXT: # %optional2 221;CHECK-O2: .[[TEST3LABEL]]: # %test3 222;CHECK-O2-NEXT: rlwinm {{[0-9]+}}, [[TAGREG]], 0, 26, 27 223;CHECK-O2-NEXT: cmplwi {{[0-9]+}}, 32 224;CHECK-O2-NEXT: beq 0, .[[EXITLABEL:[_0-9A-Za-z]+]] 225;CHECK-O2-NEXT: # %optional3 226;CHECK-O2: .[[EXITLABEL:[_0-9A-Za-z]+]]: # %exit 227;CHECK-O2: blr 228 229 230define void @straight_test_3_instr_test(i32 %tag) { 231entry: 232 br label %test1 233test1: 234 %tagbit1 = and i32 %tag, 3 235 %tagbit1eq0 = icmp eq i32 %tagbit1, 2 236 br i1 %tagbit1eq0, label %test2, label %optional1, !prof !2 237optional1: 238 call void @a() 239 br label %test2 240test2: 241 %tagbit2 = and i32 %tag, 12 242 %tagbit2eq0 = icmp eq i32 %tagbit2, 8 243 br i1 %tagbit2eq0, label %test3, label %optional2, !prof !2 244optional2: 245 call void @b() 246 br label %test3 247test3: 248 %tagbit3 = and i32 %tag, 48 249 %tagbit3eq0 = icmp eq i32 %tagbit3, 32 250 br i1 %tagbit3eq0, label %exit, label %optional3, !prof !1 251optional3: 252 call void @c() 253 br label %exit 254exit: 255 ret void 256} 257 258; Intended layout: 259; The chain-based outlining produces the layout 260; entry 261; --- Begin loop --- 262; for.latch 263; for.check 264; test1 265; test2 266; test3 267; test4 268; optional1 269; optional2 270; optional3 271; optional4 272; --- End loop --- 273; exit 274; The CHECK statements check for the whole string of tests and exit block, 275; and then check that the correct test has been duplicated into the end of 276; the optional blocks and that the optional blocks are in the correct order. 277;CHECK-LABEL: loop_test: 278;CHECK: add [[TAGPTRREG:[0-9]+]], 3, 4 279;CHECK: .[[LATCHLABEL:[._0-9A-Za-z]+]]: # %for.latch 280;CHECK: addi 281;CHECK-O2: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check 282;CHECK: lwz [[TAGREG:[0-9]+]], 0([[TAGPTRREG]]) 283;CHECK-O3: .[[CHECKLABEL:[._0-9A-Za-z]+]]: # %for.check 284;CHECK: # %bb.{{[0-9]+}}: # %test1 285;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 286;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[._0-9A-Za-z]+]] 287;CHECK-NEXT: # %test2 288;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2 289;CHECK-NEXT: bne 0, .[[OPT2LABEL:[._0-9A-Za-z]+]] 290;CHECK-NEXT: .[[TEST3LABEL:[._0-9A-Za-z]+]]: # %test3 291;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4 292;CHECK-NEXT: bne 0, .[[OPT3LABEL:[._0-9A-Za-z]+]] 293;CHECK-NEXT: .[[TEST4LABEL:[._0-9A-Za-z]+]]: # %{{(test4|optional3)}} 294;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 8 295;CHECK-NEXT: beq 0, .[[LATCHLABEL]] 296;CHECK-NEXT: b .[[OPT4LABEL:[._0-9A-Za-z]+]] 297;CHECK: [[OPT1LABEL]] 298;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 2 299;CHECK-NEXT: beq 0, .[[TEST3LABEL]] 300;CHECK-NEXT: .[[OPT2LABEL]] 301;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 4 302;CHECK-NEXT: beq 0, .[[TEST4LABEL]] 303;CHECK-NEXT: .[[OPT3LABEL]] 304;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 8 305;CHECK-NEXT: beq 0, .[[LATCHLABEL]] 306;CHECK: [[OPT4LABEL]]: 307;CHECK: b .[[LATCHLABEL]] 308define void @loop_test(ptr %tags, i32 %count) { 309entry: 310 br label %for.check 311for.check: 312 %count.loop = phi i32 [%count, %entry], [%count.sub, %for.latch] 313 %done.count = icmp ugt i32 %count.loop, 0 314 %tag_ptr = getelementptr inbounds i32, ptr %tags, i32 %count 315 %tag = load i32, ptr %tag_ptr 316 %done.tag = icmp eq i32 %tag, 0 317 %done = and i1 %done.count, %done.tag 318 br i1 %done, label %test1, label %exit, !prof !1 319test1: 320 %tagbit1 = and i32 %tag, 1 321 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 322 br i1 %tagbit1eq0, label %test2, label %optional1, !prof !1 323optional1: 324 call void @a() 325 call void @a() 326 call void @a() 327 call void @a() 328 br label %test2 329test2: 330 %tagbit2 = and i32 %tag, 2 331 %tagbit2eq0 = icmp eq i32 %tagbit2, 0 332 br i1 %tagbit2eq0, label %test3, label %optional2, !prof !1 333optional2: 334 call void @b() 335 call void @b() 336 call void @b() 337 call void @b() 338 br label %test3 339test3: 340 %tagbit3 = and i32 %tag, 4 341 %tagbit3eq0 = icmp eq i32 %tagbit3, 0 342 br i1 %tagbit3eq0, label %test4, label %optional3, !prof !1 343optional3: 344 call void @c() 345 call void @c() 346 call void @c() 347 call void @c() 348 br label %test4 349test4: 350 %tagbit4 = and i32 %tag, 8 351 %tagbit4eq0 = icmp eq i32 %tagbit4, 0 352 br i1 %tagbit4eq0, label %for.latch, label %optional4, !prof !1 353optional4: 354 call void @d() 355 call void @d() 356 call void @d() 357 call void @d() 358 br label %for.latch 359for.latch: 360 %count.sub = sub i32 %count.loop, 1 361 br label %for.check 362exit: 363 ret void 364} 365 366; The block then2 is not unavoidable, meaning it does not dominate the exit. 367; But since it can be tail-duplicated, it should be placed as a fallthrough from 368; test2 and copied. The purpose here is to make sure that the tail-duplication 369; code is independent of the outlining code, which works by choosing the 370; "unavoidable" blocks. 371; CHECK-LABEL: avoidable_test: 372; CHECK: # %bb.{{[0-9]+}}: # %entry 373; CHECK: andi. 374; CHECK: # %bb.{{[0-9]+}}: # %test2 375; Make sure else2 falls through from test2 376; CHECK-NOT: # %{{[-_a-zA-Z0-9]+}} 377; CHECK: # %bb.{{[0-9]+}}: # %else2 378; CHECK: bl c 379; CHECK: # %else1 380; CHECK: bl a 381; CHECK: bl a 382; CHECK: # %then2 383; CHECK: andi. {{[0-9]+}}, {{[0-9]+}}, 4 384; CHECK: # %end1 385; CHECK: bl d 386; CHECK: # %end2 387define void @avoidable_test(i32 %tag) { 388entry: 389 br label %test1 390test1: 391 %tagbit1 = and i32 %tag, 1 392 %tagbit1eq0 = icmp eq i32 %tagbit1, 0 393 br i1 %tagbit1eq0, label %test2, label %else1, !prof !1 ; %test2 more likely 394else1: 395 call void @a() 396 call void @a() 397 br label %then2 398test2: 399 %tagbit2 = and i32 %tag, 2 400 %tagbit2eq0 = icmp eq i32 %tagbit2, 0 401 br i1 %tagbit2eq0, label %then2, label %else2, !prof !1 ; %then2 more likely 402then2: 403 %tagbit3 = and i32 %tag, 4 404 %tagbit3eq0 = icmp eq i32 %tagbit3, 0 405 br i1 %tagbit3eq0, label %end2, label %end1, !prof !1 ; %end2 more likely 406else2: 407 call void @c() 408 br label %end2 409end2: 410 ret void 411end1: 412 call void @d() 413 ret void 414} 415 416; CHECK-LABEL: trellis_test 417; The number in the block labels is the expected block frequency given the 418; probabilities annotated. There is a conflict in the b;c->d;e trellis that 419; should be resolved as c->e;b->d. 420; The d;e->f;g trellis should be resolved as e->g;d->f. 421; The f;g->h;i trellis should be resolved as f->i;g->h. 422; The h;i->j;ret trellis contains a triangle edge, and should be resolved as 423; h->j->ret 424; CHECK: # %bb.{{[0-9]+}}: # %entry 425; CHECK: # %bb.{{[0-9]+}}: # %c10 426; CHECK: # %e9 427; CHECK: # %g10 428; CHECK: # %h10 429; CHECK: # %j8 430; CHECK: # %ret 431; CHECK: # %b6 432; CHECK: # %d7 433; CHECK: # %f6 434; CHECK: # %i6 435define void @trellis_test(i32 %tag) { 436entry: 437 br label %a16 438a16: 439 call void @a() 440 call void @a() 441 %tagbits.a = and i32 %tag, 3 442 %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0 443 br i1 %tagbits.a.eq0, label %c10, label %b6, !prof !1 ; 10 to 6 444c10: 445 call void @c() 446 call void @c() 447 %tagbits.c = and i32 %tag, 12 448 %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0 449 ; Both of these edges should be hotter than the other incoming edge 450 ; for e9 or d7 451 br i1 %tagbits.c.eq0, label %e9, label %d7, !prof !3 ; 6 to 4 452e9: 453 call void @e() 454 call void @e() 455 %tagbits.e = and i32 %tag, 48 456 %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0 457 br i1 %tagbits.e.eq0, label %g10, label %f6, !prof !4 ; 7 to 2 458g10: 459 call void @g() 460 call void @g() 461 %tagbits.g = and i32 %tag, 192 462 %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0 463 br i1 %tagbits.g.eq0, label %i6, label %h10, !prof !5 ; 2 to 8 464i6: 465 call void @i() 466 call void @i() 467 %tagbits.i = and i32 %tag, 768 468 %tagbits.i.eq0 = icmp eq i32 %tagbits.i, 0 469 br i1 %tagbits.i.eq0, label %ret, label %j8, !prof !2 ; balanced (3 to 3) 470b6: 471 call void @b() 472 call void @b() 473 %tagbits.b = and i32 %tag, 12 474 %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8 475 br i1 %tagbits.b.eq1, label %e9, label %d7, !prof !2 ; balanced (3 to 3) 476d7: 477 call void @d() 478 call void @d() 479 %tagbits.d = and i32 %tag, 48 480 %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32 481 br i1 %tagbits.d.eq1, label %g10, label %f6, !prof !6 ; 3 to 4 482f6: 483 call void @f() 484 call void @f() 485 %tagbits.f = and i32 %tag, 192 486 %tagbits.f.eq1 = icmp eq i32 %tagbits.f, 128 487 br i1 %tagbits.f.eq1, label %i6, label %h10, !prof !7 ; 4 to 2 488h10: 489 call void @h() 490 call void @h() 491 %tagbits.h = and i32 %tag, 768 492 %tagbits.h.eq1 = icmp eq i32 %tagbits.h, 512 493 br i1 %tagbits.h.eq1, label %ret, label %j8, !prof !2 ; balanced (5 to 5) 494j8: 495 call void @j() 496 call void @j() 497 br label %ret 498ret: 499 ret void 500} 501 502; Verify that we still consider tail-duplication opportunities if we find a 503; triangle trellis. Here D->F->G is the triangle, and D;E are both predecessors 504; of both F and G. The basic trellis algorithm picks the F->G edge, but after 505; checking, it's profitable to duplicate G into F. The weights here are not 506; really important. They are there to help make the test stable. 507; CHECK-LABEL: trellis_then_dup_test 508; CHECK: # %bb.{{[0-9]+}}: # %entry 509; CHECK: # %bb.{{[0-9]+}}: # %b 510; CHECK: # %d 511; CHECK: # %g 512; CHECK: # %ret1 513; CHECK: # %c 514; CHECK: # %e 515; CHECK: # %f 516; CHECK: # %ret2 517; CHECK: # %ret 518define void @trellis_then_dup_test(i32 %tag) { 519entry: 520 br label %a 521a: 522 call void @a() 523 call void @a() 524 %tagbits.a = and i32 %tag, 3 525 %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0 526 br i1 %tagbits.a.eq0, label %b, label %c, !prof !1 ; 5 to 3 527b: 528 call void @b() 529 call void @b() 530 %tagbits.b = and i32 %tag, 12 531 %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8 532 br i1 %tagbits.b.eq1, label %d, label %e, !prof !1 ; 5 to 3 533d: 534 call void @d() 535 call void @d() 536 %tagbits.d = and i32 %tag, 48 537 %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32 538 br i1 %tagbits.d.eq1, label %g, label %f, !prof !1 ; 5 to 3 539f: 540 call void @f() 541 call void @f() 542 br label %g 543g: 544 %tagbits.g = and i32 %tag, 192 545 %tagbits.g.eq0 = icmp eq i32 %tagbits.g, 0 546 br i1 %tagbits.g.eq0, label %ret1, label %ret2, !prof !2 ; balanced 547c: 548 call void @c() 549 call void @c() 550 %tagbits.c = and i32 %tag, 12 551 %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0 552 br i1 %tagbits.c.eq0, label %d, label %e, !prof !1 ; 5 to 3 553e: 554 call void @e() 555 call void @e() 556 %tagbits.e = and i32 %tag, 48 557 %tagbits.e.eq0 = icmp eq i32 %tagbits.e, 0 558 br i1 %tagbits.e.eq0, label %g, label %f, !prof !1 ; 5 to 3 559ret1: 560 call void @a() 561 br label %ret 562ret2: 563 call void @b() 564 br label %ret 565ret: 566 ret void 567} 568 569; Verify that we did not mis-identify triangle trellises if it is not 570; really a triangle. 571; CHECK-LABEL: trellis_no_triangle 572; CHECK: # %bb.{{[0-9]+}}: # %entry 573; CHECK: # %bb.{{[0-9]+}}: # %b 574; CHECK: # %d 575; CHECK: # %ret 576; CHECK: # %c 577; CHECK: # %e 578define void @trellis_no_triangle(i32 %tag) { 579entry: 580 br label %a 581a: 582 call void @a() 583 call void @a() 584 %tagbits.a = and i32 %tag, 3 585 %tagbits.a.eq0 = icmp eq i32 %tagbits.a, 0 586 br i1 %tagbits.a.eq0, label %b, label %c, !prof !8 ; 98 to 2 587b: 588 call void @b() 589 call void @b() 590 %tagbits.b = and i32 %tag, 12 591 %tagbits.b.eq1 = icmp eq i32 %tagbits.b, 8 592 br i1 %tagbits.b.eq1, label %d, label %e, !prof !9 ; 97 to 1 593d: 594 call void @d() 595 call void @d() 596 %tagbits.d = and i32 %tag, 48 597 %tagbits.d.eq1 = icmp eq i32 %tagbits.d, 32 598 br i1 %tagbits.d.eq1, label %ret, label %e, !prof !10 ; 96 to 2 599c: 600 call void @c() 601 call void @c() 602 %tagbits.c = and i32 %tag, 12 603 %tagbits.c.eq0 = icmp eq i32 %tagbits.c, 0 604 br i1 %tagbits.c.eq0, label %d, label %e, !prof !2 ; 1 to 1 605e: 606 call void @e() 607 call void @e() 608 br label %ret 609ret: 610 call void @f() 611 ret void 612} 613 614declare void @a() 615declare void @b() 616declare void @c() 617declare void @d() 618declare void @e() 619declare void @f() 620declare void @g() 621declare void @h() 622declare void @i() 623declare void @j() 624 625!1 = !{!"branch_weights", i32 5, i32 3} 626!2 = !{!"branch_weights", i32 50, i32 50} 627!3 = !{!"branch_weights", i32 6, i32 4} 628!4 = !{!"branch_weights", i32 7, i32 2} 629!5 = !{!"branch_weights", i32 2, i32 8} 630!6 = !{!"branch_weights", i32 3, i32 4} 631!7 = !{!"branch_weights", i32 4, i32 2} 632!8 = !{!"branch_weights", i32 98, i32 2} 633!9 = !{!"branch_weights", i32 97, i32 1} 634!10 = !{!"branch_weights", i32 96, i32 2} 635