xref: /llvm-project/llvm/test/CodeGen/X86/select-optimize.ll (revision 094572701dce4aaf36f4521d6cf750420d39f206)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -mtriple=x86_64-unknown-unknown -select-optimize -S < %s | FileCheck %s
3; RUN: opt -mtriple=x86_64-unknown-unknown -passes='require<profile-summary>,function(select-optimize)' -S < %s | FileCheck %s
4
5; RUN: opt -mtriple=x86_64-unknown-unknown -select-optimize -S < %s --try-experimental-debuginfo-iterators | FileCheck %s
6; RUN: opt -mtriple=x86_64-unknown-unknown -passes='require<profile-summary>,function(select-optimize)' -S < %s --try-experimental-debuginfo-iterators | FileCheck %s
7
8;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9;; Test base heuristic 1:
10;; highly-biased selects assumed to be highly predictable, converted to branches
11;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12
13; If a select is obviously predictable, turn it into a branch.
14define i32 @weighted_select1(i32 %a, i32 %b, i1 %cmp) {
15; CHECK-LABEL: @weighted_select1(
16; CHECK-NEXT:    [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
17; CHECK-NEXT:    br i1 [[CMP_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16:![0-9]+]]
18; CHECK:       select.false:
19; CHECK-NEXT:    br label [[SELECT_END]]
20; CHECK:       select.end:
21; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ]
22; CHECK-NEXT:    ret i32 [[SEL]]
23;
24  %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15
25  ret i32 %sel
26}
27
28; If a select is obviously predictable (reversed profile weights),
29; turn it into a branch.
30define i32 @weighted_select2(i32 %a, i32 %b, i1 %cmp) {
31; CHECK-LABEL: @weighted_select2(
32; CHECK-NEXT:    [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
33; CHECK-NEXT:    br i1 [[CMP_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF17:![0-9]+]]
34; CHECK:       select.false:
35; CHECK-NEXT:    br label [[SELECT_END]]
36; CHECK:       select.end:
37; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ]
38; CHECK-NEXT:    ret i32 [[SEL]]
39;
40  %sel = select i1 %cmp, i32 %a, i32 %b, !prof !16
41  ret i32 %sel
42}
43
44; Not obvioulsy predictable select.
45define i32 @weighted_select3(i32 %a, i32 %b, i1 %cmp) {
46; CHECK-LABEL: @weighted_select3(
47; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF18:![0-9]+]]
48; CHECK-NEXT:    ret i32 [[SEL]]
49;
50  %sel = select i1 %cmp, i32 %a, i32 %b, !prof !17
51  ret i32 %sel
52}
53
54; Unpredictable select should not form a branch.
55define i32 @unpred_select(i32 %a, i32 %b, i1 %cmp) {
56; CHECK-LABEL: @unpred_select(
57; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !unpredictable [[META19:![0-9]+]]
58; CHECK-NEXT:    ret i32 [[SEL]]
59;
60  %sel = select i1 %cmp, i32 %a, i32 %b, !unpredictable !20
61  ret i32 %sel
62}
63
64; Predictable select in function with optsize attribute should not form branch.
65define i32 @weighted_select_optsize(i32 %a, i32 %b, i1 %cmp) optsize {
66; CHECK-LABEL: @weighted_select_optsize(
67; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF16]]
68; CHECK-NEXT:    ret i32 [[SEL]]
69;
70  %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15
71  ret i32 %sel
72}
73
74define i32 @weighted_select_pgso(i32 %a, i32 %b, i1 %cmp) !prof !14 {
75; CHECK-LABEL: @weighted_select_pgso(
76; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF16]]
77; CHECK-NEXT:    ret i32 [[SEL]]
78;
79  %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15
80  ret i32 %sel
81}
82
83; If two selects in a row are predictable, turn them into branches.
84define i32 @weighted_selects(i32 %a, i32 %b) !prof !19 {
85; CHECK-LABEL: @weighted_selects(
86; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0
87; CHECK-NEXT:    [[CMP_FROZEN:%.*]] = freeze i1 [[CMP]]
88; CHECK-NEXT:    br i1 [[CMP_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]]
89; CHECK:       select.false:
90; CHECK-NEXT:    br label [[SELECT_END]]
91; CHECK:       select.end:
92; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[A]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ]
93; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[SEL]], 0
94; CHECK-NEXT:    [[CMP1_FROZEN:%.*]] = freeze i1 [[CMP1]]
95; CHECK-NEXT:    br i1 [[CMP1_FROZEN]], label [[SELECT_END1:%.*]], label [[SELECT_FALSE2:%.*]], !prof [[PROF16]]
96; CHECK:       select.false2:
97; CHECK-NEXT:    br label [[SELECT_END1]]
98; CHECK:       select.end1:
99; CHECK-NEXT:    [[SEL1:%.*]] = phi i32 [ [[B]], [[SELECT_END]] ], [ [[A]], [[SELECT_FALSE2]] ]
100; CHECK-NEXT:    ret i32 [[SEL1]]
101;
102  %cmp = icmp ne i32 %a, 0
103  %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15
104  %cmp1 = icmp ne i32 %sel, 0
105  %sel1 = select i1 %cmp1, i32 %b, i32 %a, !prof !15
106  ret i32 %sel1
107}
108
109; If select group predictable, turn it into a branch.
110define i32 @weighted_select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) !prof !19 {
111; CHECK-LABEL: @weighted_select_group(
112; CHECK-NEXT:    [[A1:%.*]] = add i32 [[A:%.*]], 1
113; CHECK-NEXT:    [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
114; CHECK-NEXT:    br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]], !prof [[PROF16]]
115; CHECK:       select.true.sink:
116; CHECK-NEXT:    [[C1:%.*]] = add i32 [[C:%.*]], 1
117; CHECK-NEXT:    br label [[SELECT_END:%.*]]
118; CHECK:       select.false.sink:
119; CHECK-NEXT:    [[B1:%.*]] = add i32 [[B:%.*]], 1
120; CHECK-NEXT:    br label [[SELECT_END]]
121; CHECK:       select.end:
122; CHECK-NEXT:    [[SEL1:%.*]] = phi i32 [ [[A1]], [[SELECT_TRUE_SINK]] ], [ [[B1]], [[SELECT_FALSE_SINK]] ]
123; CHECK-NEXT:    [[SEL2:%.*]] = phi i32 [ [[C1]], [[SELECT_TRUE_SINK]] ], [ [[A1]], [[SELECT_FALSE_SINK]] ]
124; CHECK-NEXT:      #dbg_value(i32 [[SEL1]], [[META22:![0-9]+]], !DIExpression(), [[META26:![0-9]+]])
125; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SEL1]], [[SEL2]]
126; CHECK-NEXT:    ret i32 [[ADD]]
127;
128  %a1 = add i32 %a, 1
129  %b1 = add i32 %b, 1
130  %c1 = add i32 %c, 1
131  %sel1 = select i1 %cmp, i32 %a1, i32 %b1, !prof !15
132  call void @llvm.dbg.value(metadata i32 %sel1, metadata !24, metadata !DIExpression()), !dbg !DILocation(scope: !23)
133  %sel2 = select i1 %cmp, i32 %c1, i32 %a1, !prof !15
134  %add = add i32 %sel1, %sel2
135  ret i32 %add
136}
137
138; Predictable select group with intra-group dependence converted to branch
139define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) {
140; CHECK-LABEL: @select_group_intra_group(
141; CHECK-NEXT:    [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
142; CHECK-NEXT:    br i1 [[CMP_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]]
143; CHECK:       select.false:
144; CHECK-NEXT:    br label [[SELECT_END]]
145; CHECK:       select.end:
146; CHECK-NEXT:    [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ]
147; CHECK-NEXT:    [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[B]], [[SELECT_FALSE]] ]
148; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[SEL1]], [[SEL2]]
149; CHECK-NEXT:    ret i32 [[SUB]]
150;
151  %sel1 = select i1 %cmp, i32 %a, i32 %b,!prof !15
152  %sel2 = select i1 %cmp, i32 %c, i32 %sel1, !prof !15
153  %sub = sub i32 %sel1, %sel2
154  ret i32 %sub
155}
156
157;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
158;; Test base heuristic 2:
159;; look for expensive instructions in the one-use slice of the cold path
160;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
161
162; Select with cold one-use load value operand should form branch and
163; sink load
164define i32 @expensive_val_operand1(ptr nocapture %a, i32 %y, i1 %cmp) {
165; CHECK-LABEL: @expensive_val_operand1(
166; CHECK-NEXT:    [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
167; CHECK-NEXT:    br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]]
168; CHECK:       select.true.sink:
169; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8
170; CHECK-NEXT:    br label [[SELECT_END]]
171; CHECK:       select.end:
172; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[LOAD]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ]
173; CHECK-NEXT:    ret i32 [[SEL]]
174;
175  %load = load i32, ptr %a, align 8
176  %sel = select i1 %cmp, i32 %load, i32 %y, !prof !17
177  ret i32 %sel
178}
179
180; Expensive hot value operand and cheap cold value operand.
181define i32 @expensive_val_operand2(ptr nocapture %a, i32 %x, i1 %cmp) {
182; CHECK-LABEL: @expensive_val_operand2(
183; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8
184; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[X:%.*]], i32 [[LOAD]], !prof [[PROF18]]
185; CHECK-NEXT:    ret i32 [[SEL]]
186;
187  %load = load i32, ptr %a, align 8
188  %sel = select i1 %cmp, i32 %x, i32 %load, !prof !17
189  ret i32 %sel
190}
191
192; Cold value operand with load in its one-use dependence slice should result
193; into a branch with sinked dependence slice.
194define i32 @expensive_val_operand3(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) {
195; CHECK-LABEL: @expensive_val_operand3(
196; CHECK-NEXT:    [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
197; CHECK-NEXT:    br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]]
198; CHECK:       select.true.sink:
199; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8
200; CHECK-NEXT:    [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]]
201; CHECK-NEXT:    br label [[SELECT_END]]
202; CHECK:       select.end:
203; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ]
204; CHECK-NEXT:    ret i32 [[SEL]]
205;
206  %load = load i32, ptr %a, align 8
207  %x = add i32 %load, %b
208  %sel = select i1 %cmp, i32 %x, i32 %y, !prof !17
209  ret i32 %sel
210}
211
212; Expensive cold value operand with unsafe-to-sink (due to func call) load (partial slice sinking).
213define i32 @expensive_val_operand4(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) {
214; CHECK-LABEL: @expensive_val_operand4(
215; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8
216; CHECK-NEXT:    call void @free(ptr [[A]])
217; CHECK-NEXT:    [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
218; CHECK-NEXT:    br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]]
219; CHECK:       select.true.sink:
220; CHECK-NEXT:    [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]]
221; CHECK-NEXT:    br label [[SELECT_END]]
222; CHECK:       select.end:
223; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ]
224; CHECK-NEXT:    ret i32 [[SEL]]
225;
226  %load = load i32, ptr %a, align 8
227  call void @free(ptr %a)
228  %x = add i32 %load, %b
229  %sel = select i1 %cmp, i32 %x, i32 %y, !prof !17
230  ret i32 %sel
231}
232
233; Expensive cold value operand with unsafe-to-sink (due to lifetime-end marker) load (partial slice sinking).
234define i32 @expensive_val_operand5(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) {
235; CHECK-LABEL: @expensive_val_operand5(
236; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8
237; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 2, ptr nonnull [[A]])
238; CHECK-NEXT:    [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
239; CHECK-NEXT:    br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]]
240; CHECK:       select.true.sink:
241; CHECK-NEXT:    [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]]
242; CHECK-NEXT:    br label [[SELECT_END]]
243; CHECK:       select.end:
244; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ]
245; CHECK-NEXT:    ret i32 [[SEL]]
246;
247  %load = load i32, ptr %a, align 8
248  call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %a)
249  %x = add i32 %load, %b
250  %sel = select i1 %cmp, i32 %x, i32 %y, !prof !17
251  ret i32 %sel
252}
253
254; Expensive cold value operand with potentially-unsafe-to-sink load (located
255; in a different basic block and thus unchecked for sinkability).
256define i32 @expensive_val_operand6(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) {
257; CHECK-LABEL: @expensive_val_operand6(
258; CHECK-NEXT:  entry:
259; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8
260; CHECK-NEXT:    br label [[BB1:%.*]]
261; CHECK:       bb1:
262; CHECK-NEXT:    [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
263; CHECK-NEXT:    br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]]
264; CHECK:       select.true.sink:
265; CHECK-NEXT:    [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]]
266; CHECK-NEXT:    br label [[SELECT_END]]
267; CHECK:       select.end:
268; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[BB1]] ]
269; CHECK-NEXT:    ret i32 [[SEL]]
270;
271entry:
272  %load = load i32, ptr %a, align 8
273  br label %bb1
274bb1:                                 ; preds = %entry
275  %x = add i32 %load, %b
276  %sel = select i1 %cmp, i32 %x, i32 %y, !prof !17
277  ret i32 %sel
278}
279
280; Multiple uses of the load value operand.
281define i32 @expensive_val_operand7(i32 %a, ptr nocapture %b, i32 %x, i1 %cmp) {
282; CHECK-LABEL: @expensive_val_operand7(
283; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4
284; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[X:%.*]], i32 [[LOAD]]
285; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SEL]], [[LOAD]]
286; CHECK-NEXT:    ret i32 [[ADD]]
287;
288  %load = load i32, ptr %b, align 4
289  %sel = select i1 %cmp, i32 %x, i32 %load
290  %add = add i32 %sel, %load
291  ret i32 %add
292}
293
294;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
295;; Test loop heuristic: loop-level critical-path analysis
296;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
297
298;; Use of cmov in this test would put a load and a fsub on the critical path.
299;; Loop-level analysis should decide to form a branch.
300;;
301;;double cmov_on_critical_path(int n, double x, ptr a) {
302;;  for (int i = 0; i < n; i++) {
303;;    double r = a[i];
304;;    if (x > r)
305;; 			// 50% of iterations
306;;   		x -= r;
307;;  }
308;;  return x;
309;;}
310define double @cmov_on_critical_path(i32 %n, double %x, ptr nocapture %a) {
311; CHECK-LABEL: @cmov_on_critical_path(
312; CHECK-NEXT:  entry:
313; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
314; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
315; CHECK:       for.cond.cleanup:
316; CHECK-NEXT:    ret double [[X:%.*]]
317; CHECK:       for.body.preheader:
318; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
319; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
320; CHECK:       for.body:
321; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[SELECT_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
322; CHECK-NEXT:    [[X1:%.*]] = phi double [ [[X2:%.*]], [[SELECT_END]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ]
323; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]]
324; CHECK-NEXT:    [[R:%.*]] = load double, ptr [[ARRAYIDX]], align 8
325; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt double [[X1]], [[R]]
326; CHECK-NEXT:    [[CMP2_FROZEN:%.*]] = freeze i1 [[CMP2]]
327; CHECK-NEXT:    br i1 [[CMP2_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END]], !prof [[PROF27:![0-9]+]]
328; CHECK:       select.true.sink:
329; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[X1]], [[R]]
330; CHECK-NEXT:    br label [[SELECT_END]]
331; CHECK:       select.end:
332; CHECK-NEXT:    [[X2]] = phi double [ [[SUB]], [[SELECT_TRUE_SINK]] ], [ [[X1]], [[FOR_BODY]] ]
333; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
334; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
335; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]]
336; CHECK:       for.exit:
337; CHECK-NEXT:    ret double [[X2]]
338;
339entry:
340  %cmp1 = icmp sgt i32 %n, 0
341  br i1 %cmp1, label %for.body.preheader, label %for.cond.cleanup
342
343for.cond.cleanup:                                 ; preds = %entry
344  ret double %x
345
346for.body.preheader:                               ; preds = %entry
347  %wide.trip.count = zext i32 %n to i64
348  br label %for.body
349
350for.body:                                         ; preds = %for.body.preheader, %for.body
351  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
352  %x1 = phi double [ %x2, %for.body ], [ %x, %for.body.preheader ]
353  %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv
354  %r = load double, ptr %arrayidx, align 8
355  %sub = fsub double %x1, %r
356  %cmp2 = fcmp ogt double %x1, %r
357  %x2 = select i1 %cmp2, double %sub, double %x1, !prof !18
358  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
359  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
360  br i1 %exitcond, label %for.exit, label %for.body
361
362for.exit:                                         ; preds = %for.body
363  ret double %x2
364}
365
366;; The common path includes expensive operations (load and fsub) making
367;; branch similarly expensive to cmov, and thus the gain is small.
368;; Loop-level analysis should decide on not forming a branch.
369;;
370;;double small_gain(int n, double x, ptr a) {
371;;  for (int i = 0; i < n; i++) {
372;;    double r = a[i];
373;;    if (x > r)
374;;      // 99% of iterations
375;;      x -= r;
376;;  }
377;;  return x;
378;;}
379define double @small_gain(i32 %n, double %x, ptr nocapture %a) {
380; CHECK-LABEL: @small_gain(
381; CHECK-NEXT:  entry:
382; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
383; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
384; CHECK:       for.cond.cleanup:
385; CHECK-NEXT:    ret double [[X:%.*]]
386; CHECK:       for.body.preheader:
387; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
388; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
389; CHECK:       for.body:
390; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
391; CHECK-NEXT:    [[X1:%.*]] = phi double [ [[X2:%.*]], [[FOR_BODY]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ]
392; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]]
393; CHECK-NEXT:    [[R:%.*]] = load double, ptr [[ARRAYIDX]], align 8
394; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[X1]], [[R]]
395; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ole double [[X1]], [[R]]
396; CHECK-NEXT:    [[X2]] = select i1 [[CMP2]], double [[X1]], double [[SUB]], !prof [[PROF18]]
397; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
398; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
399; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]]
400; CHECK:       for.exit:
401; CHECK-NEXT:    ret double [[X2]]
402;
403entry:
404  %cmp1 = icmp sgt i32 %n, 0
405  br i1 %cmp1, label %for.body.preheader, label %for.cond.cleanup
406
407for.cond.cleanup:                                 ; preds = %entry
408  ret double %x
409
410for.body.preheader:                               ; preds = %entry
411  %wide.trip.count = zext i32 %n to i64
412  br label %for.body
413
414for.body:                                         ; preds = %for.body.preheader, %for.body
415  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
416  %x1 = phi double [ %x2, %for.body ], [ %x, %for.body.preheader ]
417  %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv
418  %r = load double, ptr %arrayidx, align 8
419  %sub = fsub double %x1, %r
420  %cmp2 = fcmp ole double %x1, %r
421  %x2 = select i1 %cmp2, double %x1, double %sub, !prof !17
422  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
423  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
424  br i1 %exitcond, label %for.exit, label %for.body
425
426for.exit:                                         ; preds = %for.body
427  ret double %x2
428}
429
430;; One select on the critical path and one off the critical path.
431;; Loop-level analysis should decide to form a branch only for
432;; the select on the critical path.
433;;
434;;double loop_select_groups(int n, double x, ptr a, int k) {
435;;  int c = 0;
436;;  for (int i = 0; i < n; i++) {
437;;    double r = a[i];
438;;    if (x > r)
439;;      x -= r;
440;;    if (i == k)
441;;      c += n;
442;;  }
443;;  return x + c;
444;;}
445define double @loop_select_groups(i32 %n, double %x, ptr nocapture %a, i32 %k) {
446; CHECK-LABEL: @loop_select_groups(
447; CHECK-NEXT:  entry:
448; CHECK-NEXT:    [[CMP19:%.*]] = icmp sgt i32 [[N:%.*]], 0
449; CHECK-NEXT:    br i1 [[CMP19]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
450; CHECK:       for.body.preheader:
451; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
452; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
453; CHECK:       for.cond.cleanup.loopexit:
454; CHECK-NEXT:    [[PHI_CAST:%.*]] = sitofp i32 [[C_1:%.*]] to double
455; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
456; CHECK:       for.cond.cleanup:
457; CHECK-NEXT:    [[C_0_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[PHI_CAST]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
458; CHECK-NEXT:    [[X_ADDR_0_LCSSA:%.*]] = phi double [ [[X:%.*]], [[ENTRY]] ], [ [[X_ADDR_1:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
459; CHECK-NEXT:    [[ADD5:%.*]] = fadd double [[X_ADDR_0_LCSSA]], [[C_0_LCSSA]]
460; CHECK-NEXT:    ret double [[ADD5]]
461; CHECK:       for.body:
462; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SELECT_END:%.*]] ]
463; CHECK-NEXT:    [[X_ADDR_022:%.*]] = phi double [ [[X]], [[FOR_BODY_PREHEADER]] ], [ [[X_ADDR_1]], [[SELECT_END]] ]
464; CHECK-NEXT:    [[C_020:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[C_1]], [[SELECT_END]] ]
465; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]]
466; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
467; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt double [[X_ADDR_022]], [[TMP0]]
468; CHECK-NEXT:    [[CMP1_FROZEN:%.*]] = freeze i1 [[CMP1]]
469; CHECK-NEXT:    br i1 [[CMP1_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]]
470; CHECK:       select.false:
471; CHECK-NEXT:    br label [[SELECT_END]]
472; CHECK:       select.end:
473; CHECK-NEXT:    [[SUB:%.*]] = phi double [ [[TMP0]], [[FOR_BODY]] ], [ 0.000000e+00, [[SELECT_FALSE]] ]
474; CHECK-NEXT:    [[X_ADDR_1]] = fsub double [[X_ADDR_022]], [[SUB]]
475; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
476; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[K:%.*]], [[N]]
477; CHECK-NEXT:    [[ADD:%.*]] = select i1 [[CMP2]], i32 [[N]], i32 0
478; CHECK-NEXT:    [[C_1]] = add nsw i32 [[ADD]], [[C_020]]
479; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
480; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
481; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
482;
483entry:
484  %cmp19 = icmp sgt i32 %n, 0
485  br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup
486
487for.body.preheader:                               ; preds = %entry
488  %wide.trip.count = zext i32 %n to i64
489  br label %for.body
490
491for.cond.cleanup.loopexit:                        ; preds = %for.body
492  %phi.cast = sitofp i32 %c.1 to double
493  br label %for.cond.cleanup
494
495for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
496  %c.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %phi.cast, %for.cond.cleanup.loopexit ]
497  %x.addr.0.lcssa = phi double [ %x, %entry ], [ %x.addr.1, %for.cond.cleanup.loopexit ]
498  %add5 = fadd double %x.addr.0.lcssa, %c.0.lcssa
499  ret double %add5
500
501for.body:                                         ; preds = %for.body.preheader, %for.body
502  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
503  %x.addr.022 = phi double [ %x, %for.body.preheader ], [ %x.addr.1, %for.body ]
504  %c.020 = phi i32 [ 0, %for.body.preheader ], [ %c.1, %for.body ]
505  %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv
506  %0 = load double, ptr %arrayidx, align 8
507  %cmp1 = fcmp ogt double %x.addr.022, %0
508  %sub = select i1 %cmp1, double %0, double 0.000000e+00
509  %x.addr.1 = fsub double %x.addr.022, %sub
510  %1 = trunc i64 %indvars.iv to i32
511  %cmp2 = icmp eq i32 %k, %n
512  %add = select i1 %cmp2, i32 %n, i32 0
513  %c.1 = add nsw i32 %add, %c.020
514  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
515  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
516  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
517}
518
519; Function Attrs: nounwind readnone speculatable willreturn
520declare void @llvm.dbg.value(metadata, metadata, metadata)
521
522; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
523declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
524
525declare void @free(ptr nocapture)
526
527!llvm.module.flags = !{!0, !26, !27}
528!0 = !{i32 1, !"ProfileSummary", !1}
529!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
530!2 = !{!"ProfileFormat", !"InstrProf"}
531!3 = !{!"TotalCount", i64 10000}
532!4 = !{!"MaxCount", i64 10}
533!5 = !{!"MaxInternalCount", i64 1}
534!6 = !{!"MaxFunctionCount", i64 1000}
535!7 = !{!"NumCounts", i64 3}
536!8 = !{!"NumFunctions", i64 3}
537!9 = !{!"DetailedSummary", !10}
538!10 = !{!11, !12, !13}
539!11 = !{i32 10000, i64 100, i32 1}
540!12 = !{i32 999000, i64 100, i32 1}
541!13 = !{i32 999999, i64 1, i32 2}
542!14 = !{!"function_entry_count", i64 0}
543!15 = !{!"branch_weights", i32 1, i32 100}
544!16 = !{!"branch_weights", i32 100, i32 1}
545!17 = !{!"branch_weights", i32 1, i32 99}
546!18 = !{!"branch_weights", i32 50, i32 50}
547!19 = !{!"function_entry_count", i64 100}
548!20 = !{}
549!21 = !DIFile(filename: "test.c", directory: "/test")
550!22 = distinct !DICompileUnit(language: DW_LANG_C99, file: !21, producer: "clang version 15.0.0", isOptimized: true, emissionKind: FullDebug, globals: !25, splitDebugInlining: false, nameTableKind: None)
551!23 = distinct !DISubprogram(name: "test", scope: !21, file: !21, line: 1, unit: !22)
552!24 = !DILocalVariable(name: "x", scope: !23)
553!25 = !{}
554!26 = !{i32 2, !"Dwarf Version", i32 4}
555!27 = !{i32 1, !"Debug Info Version", i32 3}
556!28 = !{!"branch_weights", i32 30, i32 70}
557