xref: /llvm-project/llvm/test/CodeGen/X86/fsafdo_test2.ll (revision 958a3d8e2dec95a878dfc9d823861b3a6c674534)
1; REQUIRES: asserts
2; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false < %s | FileCheck %s --check-prefixes=V0,V01
3; RUN: llvm-profdata merge --sample -profile-isfs -o %t0.afdo %S/Inputs/fsloader.afdo
4; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=false -fs-profile-file=%t0.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefixes=LOADERV0,LOADER
5; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true < %s | FileCheck %s --check-prefixes=V1,V01
6; RUN: llvm-profdata merge --sample -profile-isfs -o %t1.afdo %S/Inputs/fsloader_v1.afdo
7; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true -fs-profile-file=%t1.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefixes=LOADERV1,LOADER
8; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true -fs-profile-file=%S/Inputs/fsloader_v1.afdo -profile-isfs -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefixes=LOADERV1,LOADER
9; RUN: llc -enable-fs-discriminator -improved-fs-discriminator=true -fs-profile-file=%S/Inputs/fsloader_v1.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefixes=NOLOAD
10;;
11;; C source code for the test (compiler at -O3):
12;; // A test case for loop unroll.
13;;
14;; __attribute__((noinline)) int bar(int i){
15;;   volatile int j;
16;;   j = i;
17;;   return j;
18;; }
19;;
20;; unsigned sum;
21;; __attribute__((noinline)) void work(int i){
22;;   if (sum % 7)
23;;     sum += i;
24;;   else
25;;     sum -= i;
26;; }
27;;
28;; __attribute__((noinline)) void foo(){
29;;   int i, j;
30;;   for (j = 0; j < 48; j++)
31;;     for (i = 0; i < 4; i++) {
32;;       int ii = bar(i+j*48);
33;;       if (ii % 2)
34;;         work(ii*2);
35;;       if (ii % 4)
36;;         work(ii*3);
37;;   }
38;; }
39;;
40;; int main() {
41;;   int i;
42;;   for (i = 0; i < 10000000; i++) {
43;;     foo();
44;;   }
45;; }
46;;
47;; Check that fs-afdo discriminators are generated.
48; V01: .loc    1 23 9 is_stmt 0 discriminator 1 # unroll.c:23:9
49; V0: .loc    1 23 9 is_stmt 0 discriminator 3585 # unroll.c:23:9
50; V0: .loc    1 23 9 is_stmt 0 discriminator 8705 # unroll.c:23:9
51; V0: .loc    1 23 9 is_stmt 0 discriminator 4097 # unroll.c:23:9
52; V1: .loc    1 23 9 is_stmt 0 discriminator 257 # unroll.c:23:9
53; V1: .loc    1 23 9 is_stmt 0 discriminator 513 # unroll.c:23:9
54; V1: .loc    1 23 9 is_stmt 0 discriminator 769 # unroll.c:23:9
55;;
56;; Check that variable __llvm_fs_discriminator__ is generated.
57; V01: .type   __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__
58; V01: .section        .rodata,"a",@progbits
59; V01: .weak   __llvm_fs_discriminator__
60; V01: __llvm_fs_discriminator__:
61; V01: .byte   1
62; V01: .size   __llvm_fs_discriminator__, 1
63
64;; Check that new branch probs are generated.
65; LOADER: Set branch fs prob: MBB (1 -> 3): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93%
66; LOADER: Set branch fs prob: MBB (1 -> 2): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07%
67; LOADER: Set branch fs prob: MBB (3 -> 5): unroll.c:24:11-->unroll.c:22:11 W=283590  0x30000000 / 0x80000000 = 37.50% --> 0x7aca7894 / 0x80000000 = 95.93%
68; LOADER: Set branch fs prob: MBB (3 -> 4): unroll.c:24:11 W=283590  0x50000000 / 0x80000000 = 62.50% --> 0x0535876c / 0x80000000 = 4.07%
69; LOADER: Set branch fs prob: MBB (5 -> 8): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x021c112e / 0x80000000 = 1.65%
70; LOADER: Set branch fs prob: MBB (5 -> 7): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x7de3eed2 / 0x80000000 = 98.35%
71; LOADER: Set branch fs prob: MBB (8 -> 10): unroll.c:24:11-->unroll.c:22:11 W=283590  0x30000000 / 0x80000000 = 37.50% --> 0x00000000 / 0x80000000 = 0.00%
72; LOADER: Set branch fs prob: MBB (8 -> 9): unroll.c:24:11 W=283590  0x50000000 / 0x80000000 = 62.50% --> 0x80000000 / 0x80000000 = 100.00%
73; LOADERV0: Set branch fs prob: MBB (10 -> 12): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93%
74; LOADERV1: Set branch fs prob: MBB (10 -> 12): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0a5856e1 / 0x80000000 = 8.08%
75; LOADERV0: Set branch fs prob: MBB (10 -> 11): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07%
76; LOADERV1: Set branch fs prob: MBB (10 -> 11): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x75a7a91f / 0x80000000 = 91.92%
77; LOADER: Set branch fs prob: MBB (12 -> 14): unroll.c:24:11-->unroll.c:22:11 W=283590  0x30000000 / 0x80000000 = 37.50% --> 0x02012507 / 0x80000000 = 1.57%
78; LOADER: Set branch fs prob: MBB (12 -> 13): unroll.c:24:11 W=283590  0x50000000 / 0x80000000 = 62.50% --> 0x7dfedaf9 / 0x80000000 = 98.43%
79; LOADERV0: Set branch fs prob: MBB (14 -> 16): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0a5856e1 / 0x80000000 = 8.08%
80; LOADERV1: Set branch fs prob: MBB (14 -> 16): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93%
81; LOADERV0: Set branch fs prob: MBB (14 -> 15): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x75a7a91f / 0x80000000 = 91.92%
82; LOADERV1: Set branch fs prob: MBB (14 -> 15): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07%
83; LOADER: Set branch fs prob: MBB (16 -> 18): unroll.c:24:11-->unroll.c:19:3 W=283590  0x30000000 / 0x80000000 = 37.50% --> 0x16588166 / 0x80000000 = 17.46%
84; LOADER: Set branch fs prob: MBB (16 -> 17): unroll.c:24:11 W=283590  0x50000000 / 0x80000000 = 62.50% --> 0x69a77e9a / 0x80000000 = 82.54%
85
86;; Check that the profile is not loaded since the reader doesn't know it is a FS profile.
87; NOLOAD-NOT: Set branch fs prob
88
89target triple = "x86_64-unknown-linux-gnu"
90
91@sum = dso_local local_unnamed_addr global i32 0, align 4
92
93declare i32 @bar(i32 %i) #0
94declare void @work(i32 %i) #2
95
96define dso_local void @foo() #0 !dbg !29 {
97entry:
98  br label %for.cond1.preheader, !dbg !30
99
100for.cond1.preheader:
101  %j.012 = phi i32 [ 0, %entry ], [ %inc11, %if.end9.3 ]
102  %mul = mul nuw nsw i32 %j.012, 48
103  %call = tail call i32 @bar(i32 %mul), !dbg !32
104  %0 = and i32 %call, 1, !dbg !33
105  %tobool.not = icmp eq i32 %0, 0, !dbg !33
106  br i1 %tobool.not, label %if.end, label %if.then, !dbg !35
107
108if.then:
109  %mul4 = shl nsw i32 %call, 1, !dbg !36
110  tail call void @work(i32 %mul4), !dbg !37
111  br label %if.end, !dbg !38
112
113if.end:
114  %1 = and i32 %call, 3, !dbg !39
115  %tobool6.not = icmp eq i32 %1, 0, !dbg !39
116  br i1 %tobool6.not, label %if.end9, label %if.then7, !dbg !40
117
118if.then7:
119  %mul8 = mul nsw i32 %call, 3, !dbg !41
120  tail call void @work(i32 %mul8), !dbg !42
121  br label %if.end9, !dbg !43
122
123if.end9:
124  %add.1 = or i32 %mul, 1, !dbg !44
125  %call.1 = tail call i32 @bar(i32 %add.1), !dbg !32
126  %2 = and i32 %call.1, 1, !dbg !33
127  %tobool.not.1 = icmp eq i32 %2, 0, !dbg !33
128  br i1 %tobool.not.1, label %if.end.1, label %if.then.1, !dbg !35
129
130for.end12:
131  ret void, !dbg !45
132
133if.then.1:
134  %mul4.1 = shl nsw i32 %call.1, 1, !dbg !36
135  tail call void @work(i32 %mul4.1), !dbg !37
136  br label %if.end.1, !dbg !38
137
138if.end.1:
139  %3 = and i32 %call.1, 3, !dbg !39
140  %tobool6.not.1 = icmp eq i32 %3, 0, !dbg !39
141  br i1 %tobool6.not.1, label %if.end9.1, label %if.then7.1, !dbg !40
142
143if.then7.1:
144  %mul8.1 = mul nsw i32 %call.1, 3, !dbg !41
145  tail call void @work(i32 %mul8.1), !dbg !42
146  br label %if.end9.1, !dbg !43
147
148if.end9.1:
149  %add.2 = or i32 %mul, 2, !dbg !44
150  %call.2 = tail call i32 @bar(i32 %add.2), !dbg !32
151  %4 = and i32 %call.2, 1, !dbg !33
152  %tobool.not.2 = icmp eq i32 %4, 0, !dbg !33
153  br i1 %tobool.not.2, label %if.end.2, label %if.then.2, !dbg !35
154
155if.then.2:
156  %mul4.2 = shl nsw i32 %call.2, 1, !dbg !36
157  tail call void @work(i32 %mul4.2), !dbg !37
158  br label %if.end.2, !dbg !38
159
160if.end.2:
161  %5 = and i32 %call.2, 3, !dbg !39
162  %tobool6.not.2 = icmp eq i32 %5, 0, !dbg !39
163  br i1 %tobool6.not.2, label %if.end9.2, label %if.then7.2, !dbg !40
164
165if.then7.2:
166  %mul8.2 = mul nsw i32 %call.2, 3, !dbg !41
167  tail call void @work(i32 %mul8.2), !dbg !42
168  br label %if.end9.2, !dbg !43
169
170if.end9.2:
171  %add.3 = or i32 %mul, 3, !dbg !44
172  %call.3 = tail call i32 @bar(i32 %add.3), !dbg !32
173  %6 = and i32 %call.3, 1, !dbg !33
174  %tobool.not.3 = icmp eq i32 %6, 0, !dbg !33
175  br i1 %tobool.not.3, label %if.end.3, label %if.then.3, !dbg !35
176
177if.then.3:
178  %mul4.3 = shl nsw i32 %call.3, 1, !dbg !36
179  tail call void @work(i32 %mul4.3), !dbg !37
180  br label %if.end.3, !dbg !38
181
182if.end.3:
183  %7 = and i32 %call.3, 3, !dbg !39
184  %tobool6.not.3 = icmp eq i32 %7, 0, !dbg !39
185  br i1 %tobool6.not.3, label %if.end9.3, label %if.then7.3, !dbg !40
186
187if.then7.3:
188  %mul8.3 = mul nsw i32 %call.3, 3, !dbg !41
189  tail call void @work(i32 %mul8.3), !dbg !42
190  br label %if.end9.3, !dbg !43
191
192if.end9.3:
193  %inc11 = add nuw nsw i32 %j.012, 1, !dbg !46
194  %exitcond.not = icmp eq i32 %inc11, 48, !dbg !48
195  br i1 %exitcond.not, label %for.end12, label %for.cond1.preheader, !dbg !30, !llvm.loop !49
196}
197
198
199attributes #0 = { noinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
200attributes #1 = { argmemonly nounwind willreturn }
201attributes #2 = { nofree noinline norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
202attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
203
204!llvm.dbg.cu = !{!0}
205!llvm.module.flags = !{!3, !4, !5}
206
207!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
208!1 = !DIFile(filename: "unroll.c", directory: "a/")
209!2 = !{}
210!3 = !{i32 7, !"Dwarf Version", i32 4}
211!4 = !{i32 2, !"Debug Info Version", i32 3}
212!5 = !{i32 1, !"wchar_size", i32 4}
213!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
214!8 = !DISubroutineType(types: !2)
215!9 = !DILocation(line: 4, column: 3, scope: !7)
216!10 = !DILocation(line: 5, column: 5, scope: !7)
217!11 = !{!12, !12, i64 0}
218!12 = !{!"int", !13, i64 0}
219!13 = !{!"omnipotent char", !14, i64 0}
220!14 = !{!"Simple C/C++ TBAA"}
221!15 = !DILocation(line: 6, column: 10, scope: !7)
222!16 = !DILocation(line: 7, column: 1, scope: !7)
223!17 = !DILocation(line: 6, column: 3, scope: !18)
224!18 = !DILexicalBlockFile(scope: !7, file: !1, discriminator: 1)
225!19 = distinct !DISubprogram(name: "work", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
226!20 = !DILocation(line: 11, column: 7, scope: !19)
227!21 = !DILocation(line: 11, column: 11, scope: !22)
228!22 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 1)
229!23 = !DILocation(line: 11, column: 11, scope: !24)
230!24 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 2)
231!25 = !DILocation(line: 11, column: 7, scope: !26)
232!26 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 3)
233!27 = !DILocation(line: 0, scope: !22)
234!28 = !DILocation(line: 15, column: 1, scope: !19)
235!29 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 17, type: !8, scopeLine: 17, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
236!30 = !DILocation(line: 19, column: 3, scope: !31)
237!31 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 2)
238!32 = !DILocation(line: 21, column: 16, scope: !31)
239!33 = !DILocation(line: 22, column: 14, scope: !34)
240!34 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 1)
241!35 = !DILocation(line: 22, column: 11, scope: !31)
242!36 = !DILocation(line: 23, column: 16, scope: !29)
243!37 = !DILocation(line: 23, column: 9, scope: !34)
244!38 = !DILocation(line: 23, column: 9, scope: !31)
245!39 = !DILocation(line: 24, column: 14, scope: !34)
246!40 = !DILocation(line: 24, column: 11, scope: !31)
247!41 = !DILocation(line: 25, column: 16, scope: !29)
248!42 = !DILocation(line: 25, column: 9, scope: !34)
249!43 = !DILocation(line: 25, column: 9, scope: !31)
250!44 = !DILocation(line: 21, column: 21, scope: !34)
251!45 = !DILocation(line: 27, column: 1, scope: !29)
252!46 = !DILocation(line: 19, column: 24, scope: !47)
253!47 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 3)
254!48 = !DILocation(line: 19, column: 17, scope: !34)
255!49 = distinct !{!49, !50, !51}
256!50 = !DILocation(line: 19, column: 3, scope: !29)
257!51 = !DILocation(line: 26, column: 3, scope: !29)
258!52 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 29, type: !8, scopeLine: 29, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
259!53 = !DILocation(line: 31, column: 3, scope: !54)
260!54 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 2)
261!55 = !DILocation(line: 32, column: 5, scope: !52)
262!56 = !DILocation(line: 31, column: 30, scope: !57)
263!57 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 3)
264!58 = !DILocation(line: 31, column: 17, scope: !59)
265!59 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 1)
266!60 = distinct !{!60, !61, !62}
267!61 = !DILocation(line: 31, column: 3, scope: !52)
268!62 = !DILocation(line: 33, column: 3, scope: !52)
269!63 = !DILocation(line: 34, column: 1, scope: !52)
270