xref: /llvm-project/polly/lib/CodeGen/PerfMonitor.cpp (revision fa789dffb1e12c2aece0187aeacc48dfb1768340)
1 //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "polly/CodeGen/PerfMonitor.h"
12 #include "polly/CodeGen/RuntimeDebugBuilder.h"
13 #include "polly/ScopInfo.h"
14 #include "llvm/ADT/Twine.h"
15 #include "llvm/IR/IntrinsicsX86.h"
16 #include "llvm/IR/Module.h"
17 #include "llvm/TargetParser/Triple.h"
18 
19 using namespace llvm;
20 using namespace polly;
21 
22 Function *PerfMonitor::getAtExit() {
23   const char *Name = "atexit";
24   Function *F = M->getFunction(Name);
25 
26   if (!F) {
27     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
28     FunctionType *Ty =
29         FunctionType::get(Builder.getInt32Ty(), {Builder.getPtrTy()}, false);
30     F = Function::Create(Ty, Linkage, Name, M);
31   }
32 
33   return F;
34 }
35 
36 void PerfMonitor::addToGlobalConstructors(Function *Fn) {
37   const char *Name = "llvm.global_ctors";
38   GlobalVariable *GV = M->getGlobalVariable(Name);
39   std::vector<Constant *> V;
40 
41   if (GV) {
42     Constant *Array = GV->getInitializer();
43     for (Value *X : Array->operand_values())
44       V.push_back(cast<Constant>(X));
45     GV->eraseFromParent();
46   }
47 
48   StructType *ST =
49       StructType::get(Builder.getInt32Ty(), Fn->getType(), Builder.getPtrTy());
50 
51   V.push_back(
52       ConstantStruct::get(ST, Builder.getInt32(10), Fn,
53                           ConstantPointerNull::get(Builder.getPtrTy())));
54   ArrayType *Ty = ArrayType::get(ST, V.size());
55 
56   GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage,
57                           ConstantArray::get(Ty, V), Name, nullptr,
58                           GlobalVariable::NotThreadLocal);
59 }
60 
61 Function *PerfMonitor::getRDTSCP() {
62   return Intrinsic::getOrInsertDeclaration(M, Intrinsic::x86_rdtscp);
63 }
64 
65 PerfMonitor::PerfMonitor(const Scop &S, Module *M)
66     : M(M), Builder(M->getContext()), S(S) {
67   if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64)
68     Supported = true;
69   else
70     Supported = false;
71 }
72 
73 static void TryRegisterGlobal(Module *M, const char *Name,
74                               Constant *InitialValue, Value **Location) {
75   *Location = M->getGlobalVariable(Name);
76 
77   if (!*Location)
78     *Location = new GlobalVariable(
79         *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,
80         InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel);
81 }
82 
83 // Generate a unique name that is usable as a LLVM name for a scop to name its
84 // performance counter.
85 static std::string GetScopUniqueVarname(const Scop &S) {
86   std::string EntryString, ExitString;
87   std::tie(EntryString, ExitString) = S.getEntryExitStr();
88 
89   return (Twine("__polly_perf_in_") + S.getFunction().getName() + "_from__" +
90           EntryString + "__to__" + ExitString)
91       .str();
92 }
93 
94 void PerfMonitor::addScopCounter() {
95   const std::string varname = GetScopUniqueVarname(S);
96   TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0),
97                     &CyclesInCurrentScopPtr);
98 
99   TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0),
100                     &TripCountForCurrentScopPtr);
101 }
102 
103 void PerfMonitor::addGlobalVariables() {
104   TryRegisterGlobal(M, "__polly_perf_cycles_total_start", Builder.getInt64(0),
105                     &CyclesTotalStartPtr);
106 
107   TryRegisterGlobal(M, "__polly_perf_initialized", Builder.getInt1(false),
108                     &AlreadyInitializedPtr);
109 
110   TryRegisterGlobal(M, "__polly_perf_cycles_in_scops", Builder.getInt64(0),
111                     &CyclesInScopsPtr);
112 
113   TryRegisterGlobal(M, "__polly_perf_cycles_in_scop_start", Builder.getInt64(0),
114                     &CyclesInScopStartPtr);
115 }
116 
117 static const char *InitFunctionName = "__polly_perf_init";
118 static const char *FinalReportingFunctionName = "__polly_perf_final";
119 
120 static BasicBlock *FinalStartBB = nullptr;
121 static ReturnInst *ReturnFromFinal = nullptr;
122 
123 Function *PerfMonitor::insertFinalReporting() {
124   // Create new function.
125   GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
126   FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
127   Function *ExitFn =
128       Function::Create(Ty, Linkage, FinalReportingFunctionName, M);
129   FinalStartBB = BasicBlock::Create(M->getContext(), "start", ExitFn);
130   Builder.SetInsertPoint(FinalStartBB);
131 
132   if (!Supported) {
133     RuntimeDebugBuilder::createCPUPrinter(
134         Builder, "Polly runtime information generation not supported\n");
135     Builder.CreateRetVoid();
136     return ExitFn;
137   }
138 
139   // Measure current cycles and compute final timings.
140   Function *RDTSCPFn = getRDTSCP();
141 
142   Type *Int64Ty = Builder.getInt64Ty();
143   Value *CurrentCycles =
144       Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
145   Value *CyclesStart = Builder.CreateLoad(Int64Ty, CyclesTotalStartPtr, true);
146   Value *CyclesTotal = Builder.CreateSub(CurrentCycles, CyclesStart);
147   Value *CyclesInScops = Builder.CreateLoad(Int64Ty, CyclesInScopsPtr, true);
148 
149   // Print the runtime information.
150   RuntimeDebugBuilder::createCPUPrinter(Builder, "Polly runtime information\n");
151   RuntimeDebugBuilder::createCPUPrinter(Builder, "-------------------------\n");
152   RuntimeDebugBuilder::createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n");
153   RuntimeDebugBuilder::createCPUPrinter(Builder, "Scops: ", CyclesInScops,
154                                         "\n");
155 
156   // Print the preamble for per-scop information.
157   RuntimeDebugBuilder::createCPUPrinter(Builder, "\n");
158   RuntimeDebugBuilder::createCPUPrinter(Builder, "Per SCoP information\n");
159   RuntimeDebugBuilder::createCPUPrinter(Builder, "--------------------\n");
160 
161   RuntimeDebugBuilder::createCPUPrinter(
162       Builder, "scop function, "
163                "entry block name, exit block name, total time, trip count\n");
164   ReturnFromFinal = Builder.CreateRetVoid();
165   return ExitFn;
166 }
167 
168 void PerfMonitor::AppendScopReporting() {
169   if (!Supported)
170     return;
171 
172   assert(FinalStartBB && "Expected FinalStartBB to be initialized by "
173                          "PerfMonitor::insertFinalReporting.");
174   assert(ReturnFromFinal && "Expected ReturnFromFinal to be initialized by "
175                             "PerfMonitor::insertFinalReporting.");
176 
177   Builder.SetInsertPoint(FinalStartBB);
178   ReturnFromFinal->eraseFromParent();
179 
180   Type *Int64Ty = Builder.getInt64Ty();
181   Value *CyclesInCurrentScop =
182       Builder.CreateLoad(Int64Ty, this->CyclesInCurrentScopPtr, true);
183 
184   Value *TripCountForCurrentScop =
185       Builder.CreateLoad(Int64Ty, this->TripCountForCurrentScopPtr, true);
186 
187   std::string EntryName, ExitName;
188   std::tie(EntryName, ExitName) = S.getEntryExitStr();
189 
190   // print in CSV for easy parsing with other tools.
191   RuntimeDebugBuilder::createCPUPrinter(
192       Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ",
193       CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n");
194 
195   ReturnFromFinal = Builder.CreateRetVoid();
196 }
197 
198 static Function *FinalReporting = nullptr;
199 
200 void PerfMonitor::initialize() {
201   addGlobalVariables();
202   addScopCounter();
203 
204   // Ensure that we only add the final reporting function once.
205   // On later invocations, append to the reporting function.
206   if (!FinalReporting) {
207     FinalReporting = insertFinalReporting();
208 
209     Function *InitFn = insertInitFunction(FinalReporting);
210     addToGlobalConstructors(InitFn);
211   }
212 
213   AppendScopReporting();
214 }
215 
216 Function *PerfMonitor::insertInitFunction(Function *FinalReporting) {
217   // Insert function definition and BBs.
218   GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
219   FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
220   Function *InitFn = Function::Create(Ty, Linkage, InitFunctionName, M);
221   BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", InitFn);
222   BasicBlock *EarlyReturn =
223       BasicBlock::Create(M->getContext(), "earlyreturn", InitFn);
224   BasicBlock *InitBB = BasicBlock::Create(M->getContext(), "initbb", InitFn);
225 
226   Builder.SetInsertPoint(Start);
227 
228   // Check if this function was already run. If yes, return.
229   //
230   // In case profiling has been enabled in multiple translation units, the
231   // initializer function will be added to the global constructors list of
232   // each translation unit. When merging translation units, the global
233   // constructor lists are just appended, such that the initializer will appear
234   // multiple times. To avoid initializations being run multiple times (and
235   // especially to avoid that atExitFn is called more than once), we bail
236   // out if the initializer is run more than once.
237   Value *HasRunBefore =
238       Builder.CreateLoad(Builder.getInt1Ty(), AlreadyInitializedPtr);
239   Builder.CreateCondBr(HasRunBefore, EarlyReturn, InitBB);
240   Builder.SetInsertPoint(EarlyReturn);
241   Builder.CreateRetVoid();
242 
243   // Keep track that this function has been run once.
244   Builder.SetInsertPoint(InitBB);
245   Value *True = Builder.getInt1(true);
246   Builder.CreateStore(True, AlreadyInitializedPtr);
247 
248   // Register the final reporting function with atexit().
249   Value *FinalReportingPtr =
250       Builder.CreatePointerCast(FinalReporting, Builder.getPtrTy());
251   Function *AtExitFn = getAtExit();
252   Builder.CreateCall(AtExitFn, {FinalReportingPtr});
253 
254   if (Supported) {
255     // Read the currently cycle counter and store the result for later.
256     Function *RDTSCPFn = getRDTSCP();
257     Value *CurrentCycles =
258         Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
259     Builder.CreateStore(CurrentCycles, CyclesTotalStartPtr, true);
260   }
261   Builder.CreateRetVoid();
262 
263   return InitFn;
264 }
265 
266 void PerfMonitor::insertRegionStart(Instruction *InsertBefore) {
267   if (!Supported)
268     return;
269 
270   Builder.SetInsertPoint(InsertBefore);
271   Function *RDTSCPFn = getRDTSCP();
272   Value *CurrentCycles =
273       Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
274   Builder.CreateStore(CurrentCycles, CyclesInScopStartPtr, true);
275 }
276 
277 void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
278   if (!Supported)
279     return;
280 
281   Builder.SetInsertPoint(InsertBefore);
282   Function *RDTSCPFn = getRDTSCP();
283   Type *Int64Ty = Builder.getInt64Ty();
284   LoadInst *CyclesStart =
285       Builder.CreateLoad(Int64Ty, CyclesInScopStartPtr, true);
286   Value *CurrentCycles =
287       Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
288   Value *CyclesInScop = Builder.CreateSub(CurrentCycles, CyclesStart);
289   Value *CyclesInScops = Builder.CreateLoad(Int64Ty, CyclesInScopsPtr, true);
290   CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop);
291   Builder.CreateStore(CyclesInScops, CyclesInScopsPtr, true);
292 
293   Value *CyclesInCurrentScop =
294       Builder.CreateLoad(Int64Ty, CyclesInCurrentScopPtr, true);
295   CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop);
296   Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true);
297 
298   Value *TripCountForCurrentScop =
299       Builder.CreateLoad(Int64Ty, TripCountForCurrentScopPtr, true);
300   TripCountForCurrentScop =
301       Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1));
302   Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr,
303                       true);
304 }
305