1 //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 11 #include "polly/CodeGen/PerfMonitor.h" 12 #include "polly/CodeGen/RuntimeDebugBuilder.h" 13 #include "polly/ScopInfo.h" 14 #include "llvm/ADT/Twine.h" 15 #include "llvm/IR/IntrinsicsX86.h" 16 #include "llvm/IR/Module.h" 17 #include "llvm/TargetParser/Triple.h" 18 19 using namespace llvm; 20 using namespace polly; 21 22 Function *PerfMonitor::getAtExit() { 23 const char *Name = "atexit"; 24 Function *F = M->getFunction(Name); 25 26 if (!F) { 27 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 28 FunctionType *Ty = 29 FunctionType::get(Builder.getInt32Ty(), {Builder.getPtrTy()}, false); 30 F = Function::Create(Ty, Linkage, Name, M); 31 } 32 33 return F; 34 } 35 36 void PerfMonitor::addToGlobalConstructors(Function *Fn) { 37 const char *Name = "llvm.global_ctors"; 38 GlobalVariable *GV = M->getGlobalVariable(Name); 39 std::vector<Constant *> V; 40 41 if (GV) { 42 Constant *Array = GV->getInitializer(); 43 for (Value *X : Array->operand_values()) 44 V.push_back(cast<Constant>(X)); 45 GV->eraseFromParent(); 46 } 47 48 StructType *ST = 49 StructType::get(Builder.getInt32Ty(), Fn->getType(), Builder.getPtrTy()); 50 51 V.push_back( 52 ConstantStruct::get(ST, Builder.getInt32(10), Fn, 53 ConstantPointerNull::get(Builder.getPtrTy()))); 54 ArrayType *Ty = ArrayType::get(ST, V.size()); 55 56 GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage, 57 ConstantArray::get(Ty, V), Name, nullptr, 58 GlobalVariable::NotThreadLocal); 59 } 60 61 Function *PerfMonitor::getRDTSCP() { 62 return Intrinsic::getOrInsertDeclaration(M, Intrinsic::x86_rdtscp); 63 } 64 65 PerfMonitor::PerfMonitor(const Scop &S, Module *M) 66 : M(M), Builder(M->getContext()), S(S) { 67 if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64) 68 Supported = true; 69 else 70 Supported = false; 71 } 72 73 static void TryRegisterGlobal(Module *M, const char *Name, 74 Constant *InitialValue, Value **Location) { 75 *Location = M->getGlobalVariable(Name); 76 77 if (!*Location) 78 *Location = new GlobalVariable( 79 *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage, 80 InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel); 81 } 82 83 // Generate a unique name that is usable as a LLVM name for a scop to name its 84 // performance counter. 85 static std::string GetScopUniqueVarname(const Scop &S) { 86 std::string EntryString, ExitString; 87 std::tie(EntryString, ExitString) = S.getEntryExitStr(); 88 89 return (Twine("__polly_perf_in_") + S.getFunction().getName() + "_from__" + 90 EntryString + "__to__" + ExitString) 91 .str(); 92 } 93 94 void PerfMonitor::addScopCounter() { 95 const std::string varname = GetScopUniqueVarname(S); 96 TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0), 97 &CyclesInCurrentScopPtr); 98 99 TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0), 100 &TripCountForCurrentScopPtr); 101 } 102 103 void PerfMonitor::addGlobalVariables() { 104 TryRegisterGlobal(M, "__polly_perf_cycles_total_start", Builder.getInt64(0), 105 &CyclesTotalStartPtr); 106 107 TryRegisterGlobal(M, "__polly_perf_initialized", Builder.getInt1(false), 108 &AlreadyInitializedPtr); 109 110 TryRegisterGlobal(M, "__polly_perf_cycles_in_scops", Builder.getInt64(0), 111 &CyclesInScopsPtr); 112 113 TryRegisterGlobal(M, "__polly_perf_cycles_in_scop_start", Builder.getInt64(0), 114 &CyclesInScopStartPtr); 115 } 116 117 static const char *InitFunctionName = "__polly_perf_init"; 118 static const char *FinalReportingFunctionName = "__polly_perf_final"; 119 120 static BasicBlock *FinalStartBB = nullptr; 121 static ReturnInst *ReturnFromFinal = nullptr; 122 123 Function *PerfMonitor::insertFinalReporting() { 124 // Create new function. 125 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage; 126 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false); 127 Function *ExitFn = 128 Function::Create(Ty, Linkage, FinalReportingFunctionName, M); 129 FinalStartBB = BasicBlock::Create(M->getContext(), "start", ExitFn); 130 Builder.SetInsertPoint(FinalStartBB); 131 132 if (!Supported) { 133 RuntimeDebugBuilder::createCPUPrinter( 134 Builder, "Polly runtime information generation not supported\n"); 135 Builder.CreateRetVoid(); 136 return ExitFn; 137 } 138 139 // Measure current cycles and compute final timings. 140 Function *RDTSCPFn = getRDTSCP(); 141 142 Type *Int64Ty = Builder.getInt64Ty(); 143 Value *CurrentCycles = 144 Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0}); 145 Value *CyclesStart = Builder.CreateLoad(Int64Ty, CyclesTotalStartPtr, true); 146 Value *CyclesTotal = Builder.CreateSub(CurrentCycles, CyclesStart); 147 Value *CyclesInScops = Builder.CreateLoad(Int64Ty, CyclesInScopsPtr, true); 148 149 // Print the runtime information. 150 RuntimeDebugBuilder::createCPUPrinter(Builder, "Polly runtime information\n"); 151 RuntimeDebugBuilder::createCPUPrinter(Builder, "-------------------------\n"); 152 RuntimeDebugBuilder::createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n"); 153 RuntimeDebugBuilder::createCPUPrinter(Builder, "Scops: ", CyclesInScops, 154 "\n"); 155 156 // Print the preamble for per-scop information. 157 RuntimeDebugBuilder::createCPUPrinter(Builder, "\n"); 158 RuntimeDebugBuilder::createCPUPrinter(Builder, "Per SCoP information\n"); 159 RuntimeDebugBuilder::createCPUPrinter(Builder, "--------------------\n"); 160 161 RuntimeDebugBuilder::createCPUPrinter( 162 Builder, "scop function, " 163 "entry block name, exit block name, total time, trip count\n"); 164 ReturnFromFinal = Builder.CreateRetVoid(); 165 return ExitFn; 166 } 167 168 void PerfMonitor::AppendScopReporting() { 169 if (!Supported) 170 return; 171 172 assert(FinalStartBB && "Expected FinalStartBB to be initialized by " 173 "PerfMonitor::insertFinalReporting."); 174 assert(ReturnFromFinal && "Expected ReturnFromFinal to be initialized by " 175 "PerfMonitor::insertFinalReporting."); 176 177 Builder.SetInsertPoint(FinalStartBB); 178 ReturnFromFinal->eraseFromParent(); 179 180 Type *Int64Ty = Builder.getInt64Ty(); 181 Value *CyclesInCurrentScop = 182 Builder.CreateLoad(Int64Ty, this->CyclesInCurrentScopPtr, true); 183 184 Value *TripCountForCurrentScop = 185 Builder.CreateLoad(Int64Ty, this->TripCountForCurrentScopPtr, true); 186 187 std::string EntryName, ExitName; 188 std::tie(EntryName, ExitName) = S.getEntryExitStr(); 189 190 // print in CSV for easy parsing with other tools. 191 RuntimeDebugBuilder::createCPUPrinter( 192 Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ", 193 CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n"); 194 195 ReturnFromFinal = Builder.CreateRetVoid(); 196 } 197 198 static Function *FinalReporting = nullptr; 199 200 void PerfMonitor::initialize() { 201 addGlobalVariables(); 202 addScopCounter(); 203 204 // Ensure that we only add the final reporting function once. 205 // On later invocations, append to the reporting function. 206 if (!FinalReporting) { 207 FinalReporting = insertFinalReporting(); 208 209 Function *InitFn = insertInitFunction(FinalReporting); 210 addToGlobalConstructors(InitFn); 211 } 212 213 AppendScopReporting(); 214 } 215 216 Function *PerfMonitor::insertInitFunction(Function *FinalReporting) { 217 // Insert function definition and BBs. 218 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage; 219 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false); 220 Function *InitFn = Function::Create(Ty, Linkage, InitFunctionName, M); 221 BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", InitFn); 222 BasicBlock *EarlyReturn = 223 BasicBlock::Create(M->getContext(), "earlyreturn", InitFn); 224 BasicBlock *InitBB = BasicBlock::Create(M->getContext(), "initbb", InitFn); 225 226 Builder.SetInsertPoint(Start); 227 228 // Check if this function was already run. If yes, return. 229 // 230 // In case profiling has been enabled in multiple translation units, the 231 // initializer function will be added to the global constructors list of 232 // each translation unit. When merging translation units, the global 233 // constructor lists are just appended, such that the initializer will appear 234 // multiple times. To avoid initializations being run multiple times (and 235 // especially to avoid that atExitFn is called more than once), we bail 236 // out if the initializer is run more than once. 237 Value *HasRunBefore = 238 Builder.CreateLoad(Builder.getInt1Ty(), AlreadyInitializedPtr); 239 Builder.CreateCondBr(HasRunBefore, EarlyReturn, InitBB); 240 Builder.SetInsertPoint(EarlyReturn); 241 Builder.CreateRetVoid(); 242 243 // Keep track that this function has been run once. 244 Builder.SetInsertPoint(InitBB); 245 Value *True = Builder.getInt1(true); 246 Builder.CreateStore(True, AlreadyInitializedPtr); 247 248 // Register the final reporting function with atexit(). 249 Value *FinalReportingPtr = 250 Builder.CreatePointerCast(FinalReporting, Builder.getPtrTy()); 251 Function *AtExitFn = getAtExit(); 252 Builder.CreateCall(AtExitFn, {FinalReportingPtr}); 253 254 if (Supported) { 255 // Read the currently cycle counter and store the result for later. 256 Function *RDTSCPFn = getRDTSCP(); 257 Value *CurrentCycles = 258 Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0}); 259 Builder.CreateStore(CurrentCycles, CyclesTotalStartPtr, true); 260 } 261 Builder.CreateRetVoid(); 262 263 return InitFn; 264 } 265 266 void PerfMonitor::insertRegionStart(Instruction *InsertBefore) { 267 if (!Supported) 268 return; 269 270 Builder.SetInsertPoint(InsertBefore); 271 Function *RDTSCPFn = getRDTSCP(); 272 Value *CurrentCycles = 273 Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0}); 274 Builder.CreateStore(CurrentCycles, CyclesInScopStartPtr, true); 275 } 276 277 void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) { 278 if (!Supported) 279 return; 280 281 Builder.SetInsertPoint(InsertBefore); 282 Function *RDTSCPFn = getRDTSCP(); 283 Type *Int64Ty = Builder.getInt64Ty(); 284 LoadInst *CyclesStart = 285 Builder.CreateLoad(Int64Ty, CyclesInScopStartPtr, true); 286 Value *CurrentCycles = 287 Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0}); 288 Value *CyclesInScop = Builder.CreateSub(CurrentCycles, CyclesStart); 289 Value *CyclesInScops = Builder.CreateLoad(Int64Ty, CyclesInScopsPtr, true); 290 CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop); 291 Builder.CreateStore(CyclesInScops, CyclesInScopsPtr, true); 292 293 Value *CyclesInCurrentScop = 294 Builder.CreateLoad(Int64Ty, CyclesInCurrentScopPtr, true); 295 CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop); 296 Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true); 297 298 Value *TripCountForCurrentScop = 299 Builder.CreateLoad(Int64Ty, TripCountForCurrentScopPtr, true); 300 TripCountForCurrentScop = 301 Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1)); 302 Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr, 303 true); 304 } 305