1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -mtriple=x86_64-unknown-unknown -select-optimize -S < %s | FileCheck %s 3; RUN: opt -mtriple=x86_64-unknown-unknown -passes='require<profile-summary>,function(select-optimize)' -S < %s | FileCheck %s 4 5; RUN: opt -mtriple=x86_64-unknown-unknown -select-optimize -S < %s --try-experimental-debuginfo-iterators | FileCheck %s 6; RUN: opt -mtriple=x86_64-unknown-unknown -passes='require<profile-summary>,function(select-optimize)' -S < %s --try-experimental-debuginfo-iterators | FileCheck %s 7 8;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9;; Test base heuristic 1: 10;; highly-biased selects assumed to be highly predictable, converted to branches 11;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 12 13; If a select is obviously predictable, turn it into a branch. 14define i32 @weighted_select1(i32 %a, i32 %b, i1 %cmp) { 15; CHECK-LABEL: @weighted_select1( 16; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] 17; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16:![0-9]+]] 18; CHECK: select.false: 19; CHECK-NEXT: br label [[SELECT_END]] 20; CHECK: select.end: 21; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] 22; CHECK-NEXT: ret i32 [[SEL]] 23; 24 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 25 ret i32 %sel 26} 27 28; If a select is obviously predictable (reversed profile weights), 29; turn it into a branch. 30define i32 @weighted_select2(i32 %a, i32 %b, i1 %cmp) { 31; CHECK-LABEL: @weighted_select2( 32; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] 33; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF17:![0-9]+]] 34; CHECK: select.false: 35; CHECK-NEXT: br label [[SELECT_END]] 36; CHECK: select.end: 37; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] 38; CHECK-NEXT: ret i32 [[SEL]] 39; 40 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !16 41 ret i32 %sel 42} 43 44; Not obvioulsy predictable select. 45define i32 @weighted_select3(i32 %a, i32 %b, i1 %cmp) { 46; CHECK-LABEL: @weighted_select3( 47; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF18:![0-9]+]] 48; CHECK-NEXT: ret i32 [[SEL]] 49; 50 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !17 51 ret i32 %sel 52} 53 54; Unpredictable select should not form a branch. 55define i32 @unpred_select(i32 %a, i32 %b, i1 %cmp) { 56; CHECK-LABEL: @unpred_select( 57; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !unpredictable [[META19:![0-9]+]] 58; CHECK-NEXT: ret i32 [[SEL]] 59; 60 %sel = select i1 %cmp, i32 %a, i32 %b, !unpredictable !20 61 ret i32 %sel 62} 63 64; Predictable select in function with optsize attribute should not form branch. 65define i32 @weighted_select_optsize(i32 %a, i32 %b, i1 %cmp) optsize { 66; CHECK-LABEL: @weighted_select_optsize( 67; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF16]] 68; CHECK-NEXT: ret i32 [[SEL]] 69; 70 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 71 ret i32 %sel 72} 73 74define i32 @weighted_select_pgso(i32 %a, i32 %b, i1 %cmp) !prof !14 { 75; CHECK-LABEL: @weighted_select_pgso( 76; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF16]] 77; CHECK-NEXT: ret i32 [[SEL]] 78; 79 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 80 ret i32 %sel 81} 82 83; If two selects in a row are predictable, turn them into branches. 84define i32 @weighted_selects(i32 %a, i32 %b) !prof !19 { 85; CHECK-LABEL: @weighted_selects( 86; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 87; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP]] 88; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] 89; CHECK: select.false: 90; CHECK-NEXT: br label [[SELECT_END]] 91; CHECK: select.end: 92; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] 93; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[SEL]], 0 94; CHECK-NEXT: [[CMP1_FROZEN:%.*]] = freeze i1 [[CMP1]] 95; CHECK-NEXT: br i1 [[CMP1_FROZEN]], label [[SELECT_END1:%.*]], label [[SELECT_FALSE2:%.*]], !prof [[PROF16]] 96; CHECK: select.false2: 97; CHECK-NEXT: br label [[SELECT_END1]] 98; CHECK: select.end1: 99; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[B]], [[SELECT_END]] ], [ [[A]], [[SELECT_FALSE2]] ] 100; CHECK-NEXT: ret i32 [[SEL1]] 101; 102 %cmp = icmp ne i32 %a, 0 103 %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15 104 %cmp1 = icmp ne i32 %sel, 0 105 %sel1 = select i1 %cmp1, i32 %b, i32 %a, !prof !15 106 ret i32 %sel1 107} 108 109; If select group predictable, turn it into a branch. 110define i32 @weighted_select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) !prof !19 { 111; CHECK-LABEL: @weighted_select_group( 112; CHECK-NEXT: [[A1:%.*]] = add i32 [[A:%.*]], 1 113; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] 114; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]], !prof [[PROF16]] 115; CHECK: select.true.sink: 116; CHECK-NEXT: [[C1:%.*]] = add i32 [[C:%.*]], 1 117; CHECK-NEXT: br label [[SELECT_END:%.*]] 118; CHECK: select.false.sink: 119; CHECK-NEXT: [[B1:%.*]] = add i32 [[B:%.*]], 1 120; CHECK-NEXT: br label [[SELECT_END]] 121; CHECK: select.end: 122; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A1]], [[SELECT_TRUE_SINK]] ], [ [[B1]], [[SELECT_FALSE_SINK]] ] 123; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C1]], [[SELECT_TRUE_SINK]] ], [ [[A1]], [[SELECT_FALSE_SINK]] ] 124; CHECK-NEXT: #dbg_value(i32 [[SEL1]], [[META22:![0-9]+]], !DIExpression(), [[META26:![0-9]+]]) 125; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEL1]], [[SEL2]] 126; CHECK-NEXT: ret i32 [[ADD]] 127; 128 %a1 = add i32 %a, 1 129 %b1 = add i32 %b, 1 130 %c1 = add i32 %c, 1 131 %sel1 = select i1 %cmp, i32 %a1, i32 %b1, !prof !15 132 call void @llvm.dbg.value(metadata i32 %sel1, metadata !24, metadata !DIExpression()), !dbg !DILocation(scope: !23) 133 %sel2 = select i1 %cmp, i32 %c1, i32 %a1, !prof !15 134 %add = add i32 %sel1, %sel2 135 ret i32 %add 136} 137 138; Predictable select group with intra-group dependence converted to branch 139define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { 140; CHECK-LABEL: @select_group_intra_group( 141; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] 142; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] 143; CHECK: select.false: 144; CHECK-NEXT: br label [[SELECT_END]] 145; CHECK: select.end: 146; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] 147; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[B]], [[SELECT_FALSE]] ] 148; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[SEL1]], [[SEL2]] 149; CHECK-NEXT: ret i32 [[SUB]] 150; 151 %sel1 = select i1 %cmp, i32 %a, i32 %b,!prof !15 152 %sel2 = select i1 %cmp, i32 %c, i32 %sel1, !prof !15 153 %sub = sub i32 %sel1, %sel2 154 ret i32 %sub 155} 156 157;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 158;; Test base heuristic 2: 159;; look for expensive instructions in the one-use slice of the cold path 160;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 161 162; Select with cold one-use load value operand should form branch and 163; sink load 164define i32 @expensive_val_operand1(ptr nocapture %a, i32 %y, i1 %cmp) { 165; CHECK-LABEL: @expensive_val_operand1( 166; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] 167; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] 168; CHECK: select.true.sink: 169; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8 170; CHECK-NEXT: br label [[SELECT_END]] 171; CHECK: select.end: 172; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[LOAD]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ] 173; CHECK-NEXT: ret i32 [[SEL]] 174; 175 %load = load i32, ptr %a, align 8 176 %sel = select i1 %cmp, i32 %load, i32 %y, !prof !17 177 ret i32 %sel 178} 179 180; Expensive hot value operand and cheap cold value operand. 181define i32 @expensive_val_operand2(ptr nocapture %a, i32 %x, i1 %cmp) { 182; CHECK-LABEL: @expensive_val_operand2( 183; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8 184; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[X:%.*]], i32 [[LOAD]], !prof [[PROF18]] 185; CHECK-NEXT: ret i32 [[SEL]] 186; 187 %load = load i32, ptr %a, align 8 188 %sel = select i1 %cmp, i32 %x, i32 %load, !prof !17 189 ret i32 %sel 190} 191 192; Cold value operand with load in its one-use dependence slice should result 193; into a branch with sinked dependence slice. 194define i32 @expensive_val_operand3(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) { 195; CHECK-LABEL: @expensive_val_operand3( 196; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] 197; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] 198; CHECK: select.true.sink: 199; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8 200; CHECK-NEXT: [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]] 201; CHECK-NEXT: br label [[SELECT_END]] 202; CHECK: select.end: 203; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ] 204; CHECK-NEXT: ret i32 [[SEL]] 205; 206 %load = load i32, ptr %a, align 8 207 %x = add i32 %load, %b 208 %sel = select i1 %cmp, i32 %x, i32 %y, !prof !17 209 ret i32 %sel 210} 211 212; Expensive cold value operand with unsafe-to-sink (due to func call) load (partial slice sinking). 213define i32 @expensive_val_operand4(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) { 214; CHECK-LABEL: @expensive_val_operand4( 215; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8 216; CHECK-NEXT: call void @free(ptr [[A]]) 217; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] 218; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] 219; CHECK: select.true.sink: 220; CHECK-NEXT: [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]] 221; CHECK-NEXT: br label [[SELECT_END]] 222; CHECK: select.end: 223; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ] 224; CHECK-NEXT: ret i32 [[SEL]] 225; 226 %load = load i32, ptr %a, align 8 227 call void @free(ptr %a) 228 %x = add i32 %load, %b 229 %sel = select i1 %cmp, i32 %x, i32 %y, !prof !17 230 ret i32 %sel 231} 232 233; Expensive cold value operand with unsafe-to-sink (due to lifetime-end marker) load (partial slice sinking). 234define i32 @expensive_val_operand5(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) { 235; CHECK-LABEL: @expensive_val_operand5( 236; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8 237; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr nonnull [[A]]) 238; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] 239; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] 240; CHECK: select.true.sink: 241; CHECK-NEXT: [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]] 242; CHECK-NEXT: br label [[SELECT_END]] 243; CHECK: select.end: 244; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ] 245; CHECK-NEXT: ret i32 [[SEL]] 246; 247 %load = load i32, ptr %a, align 8 248 call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %a) 249 %x = add i32 %load, %b 250 %sel = select i1 %cmp, i32 %x, i32 %y, !prof !17 251 ret i32 %sel 252} 253 254; Expensive cold value operand with potentially-unsafe-to-sink load (located 255; in a different basic block and thus unchecked for sinkability). 256define i32 @expensive_val_operand6(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) { 257; CHECK-LABEL: @expensive_val_operand6( 258; CHECK-NEXT: entry: 259; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8 260; CHECK-NEXT: br label [[BB1:%.*]] 261; CHECK: bb1: 262; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] 263; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] 264; CHECK: select.true.sink: 265; CHECK-NEXT: [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]] 266; CHECK-NEXT: br label [[SELECT_END]] 267; CHECK: select.end: 268; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[BB1]] ] 269; CHECK-NEXT: ret i32 [[SEL]] 270; 271entry: 272 %load = load i32, ptr %a, align 8 273 br label %bb1 274bb1: ; preds = %entry 275 %x = add i32 %load, %b 276 %sel = select i1 %cmp, i32 %x, i32 %y, !prof !17 277 ret i32 %sel 278} 279 280; Multiple uses of the load value operand. 281define i32 @expensive_val_operand7(i32 %a, ptr nocapture %b, i32 %x, i1 %cmp) { 282; CHECK-LABEL: @expensive_val_operand7( 283; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4 284; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[X:%.*]], i32 [[LOAD]] 285; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEL]], [[LOAD]] 286; CHECK-NEXT: ret i32 [[ADD]] 287; 288 %load = load i32, ptr %b, align 4 289 %sel = select i1 %cmp, i32 %x, i32 %load 290 %add = add i32 %sel, %load 291 ret i32 %add 292} 293 294;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 295;; Test loop heuristic: loop-level critical-path analysis 296;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 297 298;; Use of cmov in this test would put a load and a fsub on the critical path. 299;; Loop-level analysis should decide to form a branch. 300;; 301;;double cmov_on_critical_path(int n, double x, ptr a) { 302;; for (int i = 0; i < n; i++) { 303;; double r = a[i]; 304;; if (x > r) 305;; // 50% of iterations 306;; x -= r; 307;; } 308;; return x; 309;;} 310define double @cmov_on_critical_path(i32 %n, double %x, ptr nocapture %a) { 311; CHECK-LABEL: @cmov_on_critical_path( 312; CHECK-NEXT: entry: 313; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 314; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] 315; CHECK: for.cond.cleanup: 316; CHECK-NEXT: ret double [[X:%.*]] 317; CHECK: for.body.preheader: 318; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 319; CHECK-NEXT: br label [[FOR_BODY:%.*]] 320; CHECK: for.body: 321; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[SELECT_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 322; CHECK-NEXT: [[X1:%.*]] = phi double [ [[X2:%.*]], [[SELECT_END]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ] 323; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]] 324; CHECK-NEXT: [[R:%.*]] = load double, ptr [[ARRAYIDX]], align 8 325; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt double [[X1]], [[R]] 326; CHECK-NEXT: [[CMP2_FROZEN:%.*]] = freeze i1 [[CMP2]] 327; CHECK-NEXT: br i1 [[CMP2_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END]], !prof [[PROF27:![0-9]+]] 328; CHECK: select.true.sink: 329; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X1]], [[R]] 330; CHECK-NEXT: br label [[SELECT_END]] 331; CHECK: select.end: 332; CHECK-NEXT: [[X2]] = phi double [ [[SUB]], [[SELECT_TRUE_SINK]] ], [ [[X1]], [[FOR_BODY]] ] 333; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 334; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 335; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]] 336; CHECK: for.exit: 337; CHECK-NEXT: ret double [[X2]] 338; 339entry: 340 %cmp1 = icmp sgt i32 %n, 0 341 br i1 %cmp1, label %for.body.preheader, label %for.cond.cleanup 342 343for.cond.cleanup: ; preds = %entry 344 ret double %x 345 346for.body.preheader: ; preds = %entry 347 %wide.trip.count = zext i32 %n to i64 348 br label %for.body 349 350for.body: ; preds = %for.body.preheader, %for.body 351 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 352 %x1 = phi double [ %x2, %for.body ], [ %x, %for.body.preheader ] 353 %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv 354 %r = load double, ptr %arrayidx, align 8 355 %sub = fsub double %x1, %r 356 %cmp2 = fcmp ogt double %x1, %r 357 %x2 = select i1 %cmp2, double %sub, double %x1, !prof !18 358 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 359 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 360 br i1 %exitcond, label %for.exit, label %for.body 361 362for.exit: ; preds = %for.body 363 ret double %x2 364} 365 366;; The common path includes expensive operations (load and fsub) making 367;; branch similarly expensive to cmov, and thus the gain is small. 368;; Loop-level analysis should decide on not forming a branch. 369;; 370;;double small_gain(int n, double x, ptr a) { 371;; for (int i = 0; i < n; i++) { 372;; double r = a[i]; 373;; if (x > r) 374;; // 99% of iterations 375;; x -= r; 376;; } 377;; return x; 378;;} 379define double @small_gain(i32 %n, double %x, ptr nocapture %a) { 380; CHECK-LABEL: @small_gain( 381; CHECK-NEXT: entry: 382; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 383; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] 384; CHECK: for.cond.cleanup: 385; CHECK-NEXT: ret double [[X:%.*]] 386; CHECK: for.body.preheader: 387; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 388; CHECK-NEXT: br label [[FOR_BODY:%.*]] 389; CHECK: for.body: 390; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 391; CHECK-NEXT: [[X1:%.*]] = phi double [ [[X2:%.*]], [[FOR_BODY]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ] 392; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]] 393; CHECK-NEXT: [[R:%.*]] = load double, ptr [[ARRAYIDX]], align 8 394; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X1]], [[R]] 395; CHECK-NEXT: [[CMP2:%.*]] = fcmp ole double [[X1]], [[R]] 396; CHECK-NEXT: [[X2]] = select i1 [[CMP2]], double [[X1]], double [[SUB]], !prof [[PROF18]] 397; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 398; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 399; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]] 400; CHECK: for.exit: 401; CHECK-NEXT: ret double [[X2]] 402; 403entry: 404 %cmp1 = icmp sgt i32 %n, 0 405 br i1 %cmp1, label %for.body.preheader, label %for.cond.cleanup 406 407for.cond.cleanup: ; preds = %entry 408 ret double %x 409 410for.body.preheader: ; preds = %entry 411 %wide.trip.count = zext i32 %n to i64 412 br label %for.body 413 414for.body: ; preds = %for.body.preheader, %for.body 415 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 416 %x1 = phi double [ %x2, %for.body ], [ %x, %for.body.preheader ] 417 %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv 418 %r = load double, ptr %arrayidx, align 8 419 %sub = fsub double %x1, %r 420 %cmp2 = fcmp ole double %x1, %r 421 %x2 = select i1 %cmp2, double %x1, double %sub, !prof !17 422 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 423 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 424 br i1 %exitcond, label %for.exit, label %for.body 425 426for.exit: ; preds = %for.body 427 ret double %x2 428} 429 430;; One select on the critical path and one off the critical path. 431;; Loop-level analysis should decide to form a branch only for 432;; the select on the critical path. 433;; 434;;double loop_select_groups(int n, double x, ptr a, int k) { 435;; int c = 0; 436;; for (int i = 0; i < n; i++) { 437;; double r = a[i]; 438;; if (x > r) 439;; x -= r; 440;; if (i == k) 441;; c += n; 442;; } 443;; return x + c; 444;;} 445define double @loop_select_groups(i32 %n, double %x, ptr nocapture %a, i32 %k) { 446; CHECK-LABEL: @loop_select_groups( 447; CHECK-NEXT: entry: 448; CHECK-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[N:%.*]], 0 449; CHECK-NEXT: br i1 [[CMP19]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] 450; CHECK: for.body.preheader: 451; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 452; CHECK-NEXT: br label [[FOR_BODY:%.*]] 453; CHECK: for.cond.cleanup.loopexit: 454; CHECK-NEXT: [[PHI_CAST:%.*]] = sitofp i32 [[C_1:%.*]] to double 455; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] 456; CHECK: for.cond.cleanup: 457; CHECK-NEXT: [[C_0_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[PHI_CAST]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ] 458; CHECK-NEXT: [[X_ADDR_0_LCSSA:%.*]] = phi double [ [[X:%.*]], [[ENTRY]] ], [ [[X_ADDR_1:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] 459; CHECK-NEXT: [[ADD5:%.*]] = fadd double [[X_ADDR_0_LCSSA]], [[C_0_LCSSA]] 460; CHECK-NEXT: ret double [[ADD5]] 461; CHECK: for.body: 462; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SELECT_END:%.*]] ] 463; CHECK-NEXT: [[X_ADDR_022:%.*]] = phi double [ [[X]], [[FOR_BODY_PREHEADER]] ], [ [[X_ADDR_1]], [[SELECT_END]] ] 464; CHECK-NEXT: [[C_020:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[C_1]], [[SELECT_END]] ] 465; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]] 466; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 467; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[X_ADDR_022]], [[TMP0]] 468; CHECK-NEXT: [[CMP1_FROZEN:%.*]] = freeze i1 [[CMP1]] 469; CHECK-NEXT: br i1 [[CMP1_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]] 470; CHECK: select.false: 471; CHECK-NEXT: br label [[SELECT_END]] 472; CHECK: select.end: 473; CHECK-NEXT: [[SUB:%.*]] = phi double [ [[TMP0]], [[FOR_BODY]] ], [ 0.000000e+00, [[SELECT_FALSE]] ] 474; CHECK-NEXT: [[X_ADDR_1]] = fsub double [[X_ADDR_022]], [[SUB]] 475; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 476; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[K:%.*]], [[N]] 477; CHECK-NEXT: [[ADD:%.*]] = select i1 [[CMP2]], i32 [[N]], i32 0 478; CHECK-NEXT: [[C_1]] = add nsw i32 [[ADD]], [[C_020]] 479; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 480; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 481; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] 482; 483entry: 484 %cmp19 = icmp sgt i32 %n, 0 485 br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup 486 487for.body.preheader: ; preds = %entry 488 %wide.trip.count = zext i32 %n to i64 489 br label %for.body 490 491for.cond.cleanup.loopexit: ; preds = %for.body 492 %phi.cast = sitofp i32 %c.1 to double 493 br label %for.cond.cleanup 494 495for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry 496 %c.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %phi.cast, %for.cond.cleanup.loopexit ] 497 %x.addr.0.lcssa = phi double [ %x, %entry ], [ %x.addr.1, %for.cond.cleanup.loopexit ] 498 %add5 = fadd double %x.addr.0.lcssa, %c.0.lcssa 499 ret double %add5 500 501for.body: ; preds = %for.body.preheader, %for.body 502 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 503 %x.addr.022 = phi double [ %x, %for.body.preheader ], [ %x.addr.1, %for.body ] 504 %c.020 = phi i32 [ 0, %for.body.preheader ], [ %c.1, %for.body ] 505 %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv 506 %0 = load double, ptr %arrayidx, align 8 507 %cmp1 = fcmp ogt double %x.addr.022, %0 508 %sub = select i1 %cmp1, double %0, double 0.000000e+00 509 %x.addr.1 = fsub double %x.addr.022, %sub 510 %1 = trunc i64 %indvars.iv to i32 511 %cmp2 = icmp eq i32 %k, %n 512 %add = select i1 %cmp2, i32 %n, i32 0 513 %c.1 = add nsw i32 %add, %c.020 514 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 515 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count 516 br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body 517} 518 519; Function Attrs: nounwind readnone speculatable willreturn 520declare void @llvm.dbg.value(metadata, metadata, metadata) 521 522; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn 523declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) 524 525declare void @free(ptr nocapture) 526 527!llvm.module.flags = !{!0, !26, !27} 528!0 = !{i32 1, !"ProfileSummary", !1} 529!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 530!2 = !{!"ProfileFormat", !"InstrProf"} 531!3 = !{!"TotalCount", i64 10000} 532!4 = !{!"MaxCount", i64 10} 533!5 = !{!"MaxInternalCount", i64 1} 534!6 = !{!"MaxFunctionCount", i64 1000} 535!7 = !{!"NumCounts", i64 3} 536!8 = !{!"NumFunctions", i64 3} 537!9 = !{!"DetailedSummary", !10} 538!10 = !{!11, !12, !13} 539!11 = !{i32 10000, i64 100, i32 1} 540!12 = !{i32 999000, i64 100, i32 1} 541!13 = !{i32 999999, i64 1, i32 2} 542!14 = !{!"function_entry_count", i64 0} 543!15 = !{!"branch_weights", i32 1, i32 100} 544!16 = !{!"branch_weights", i32 100, i32 1} 545!17 = !{!"branch_weights", i32 1, i32 99} 546!18 = !{!"branch_weights", i32 50, i32 50} 547!19 = !{!"function_entry_count", i64 100} 548!20 = !{} 549!21 = !DIFile(filename: "test.c", directory: "/test") 550!22 = distinct !DICompileUnit(language: DW_LANG_C99, file: !21, producer: "clang version 15.0.0", isOptimized: true, emissionKind: FullDebug, globals: !25, splitDebugInlining: false, nameTableKind: None) 551!23 = distinct !DISubprogram(name: "test", scope: !21, file: !21, line: 1, unit: !22) 552!24 = !DILocalVariable(name: "x", scope: !23) 553!25 = !{} 554!26 = !{i32 2, !"Dwarf Version", i32 4} 555!27 = !{i32 1, !"Debug Info Version", i32 3} 556!28 = !{!"branch_weights", i32 30, i32 70} 557