1 #include "llvm/ADT/DenseMap.h" 2 #include "llvm/ADT/StringExtras.h" 3 #include "llvm/ADT/StringSet.h" 4 #include "llvm/DebugInfo/DIContext.h" 5 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 6 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" 7 #include "llvm/Object/ObjectFile.h" 8 9 #define DEBUG_TYPE "dwarfdump" 10 using namespace llvm; 11 using namespace object; 12 13 /// Holds statistics for one function (or other entity that has a PC range and 14 /// contains variables, such as a compile unit). 15 struct PerFunctionStats { 16 /// Number of inlined instances of this function. 17 unsigned NumFnInlined = 0; 18 /// Number of inlined instances that have abstract origins. 19 unsigned NumAbstractOrigins = 0; 20 /// Number of variables and parameters with location across all inlined 21 /// instances. 22 unsigned TotalVarWithLoc = 0; 23 /// Number of constants with location across all inlined instances. 24 unsigned ConstantMembers = 0; 25 /// List of all Variables and parameters in this function. 26 StringSet<> VarsInFunction; 27 /// Compile units also cover a PC range, but have this flag set to false. 28 bool IsFunction = false; 29 /// Verify function definition has PC addresses (for detecting when 30 /// a function has been inlined everywhere). 31 bool HasPCAddresses = false; 32 /// Function has source location information. 33 bool HasSourceLocation = false; 34 /// Number of function parameters. 35 unsigned NumParams = 0; 36 /// Number of function parameters with source location. 37 unsigned NumParamSourceLocations = 0; 38 /// Number of function parameters with type. 39 unsigned NumParamTypes = 0; 40 /// Number of function parameters with a DW_AT_location. 41 unsigned NumParamLocations = 0; 42 /// Number of variables. 43 unsigned NumVars = 0; 44 /// Number of variables with source location. 45 unsigned NumVarSourceLocations = 0; 46 /// Number of variables wtih type. 47 unsigned NumVarTypes = 0; 48 /// Number of variables wtih DW_AT_location. 49 unsigned NumVarLocations = 0; 50 }; 51 52 /// Holds accumulated global statistics about DIEs. 53 struct GlobalStats { 54 /// Total number of PC range bytes covered by DW_AT_locations. 55 unsigned ScopeBytesCovered = 0; 56 /// Total number of PC range bytes in each variable's enclosing scope, 57 /// starting from the first definition of the variable. 58 unsigned ScopeBytesFromFirstDefinition = 0; 59 /// Total number of call site entries (DW_TAG_call_site) or 60 /// (DW_AT_call_file & DW_AT_call_line). 61 unsigned CallSiteEntries = 0; 62 /// Total byte size of concrete functions. This byte size includes 63 /// inline functions contained in the concrete functions. 64 uint64_t FunctionSize = 0; 65 /// Total byte size of inlined functions. This is the total number of bytes 66 /// for the top inline functions within concrete functions. This can help 67 /// tune the inline settings when compiling to match user expectations. 68 uint64_t InlineFunctionSize = 0; 69 }; 70 71 /// Extract the low pc from a Die. 72 static uint64_t getLowPC(DWARFDie Die) { 73 auto RangesOrError = Die.getAddressRanges(); 74 DWARFAddressRangesVector Ranges; 75 if (RangesOrError) 76 Ranges = RangesOrError.get(); 77 else 78 llvm::consumeError(RangesOrError.takeError()); 79 if (Ranges.size()) 80 return Ranges[0].LowPC; 81 return dwarf::toAddress(Die.find(dwarf::DW_AT_low_pc), 0); 82 } 83 84 /// Collect debug info quality metrics for one DIE. 85 static void collectStatsForDie(DWARFDie Die, std::string FnPrefix, 86 std::string VarPrefix, uint64_t ScopeLowPC, 87 uint64_t BytesInScope, uint32_t InlineDepth, 88 StringMap<PerFunctionStats> &FnStatMap, 89 GlobalStats &GlobalStats) { 90 bool HasLoc = false; 91 bool HasSrcLoc = false; 92 bool HasType = false; 93 bool IsArtificial = false; 94 uint64_t BytesCovered = 0; 95 uint64_t OffsetToFirstDefinition = 0; 96 97 if (Die.getTag() == dwarf::DW_TAG_call_site) { 98 GlobalStats.CallSiteEntries++; 99 return; 100 } 101 102 if (Die.getTag() != dwarf::DW_TAG_formal_parameter && 103 Die.getTag() != dwarf::DW_TAG_variable && 104 Die.getTag() != dwarf::DW_TAG_member) { 105 // Not a variable or constant member. 106 return; 107 } 108 109 if (Die.findRecursively(dwarf::DW_AT_decl_file) && 110 Die.findRecursively(dwarf::DW_AT_decl_line)) 111 HasSrcLoc = true; 112 113 if (Die.findRecursively(dwarf::DW_AT_type)) 114 HasType = true; 115 116 if (Die.find(dwarf::DW_AT_artificial)) 117 IsArtificial = true; 118 119 if (Die.find(dwarf::DW_AT_const_value)) { 120 // This catches constant members *and* variables. 121 HasLoc = true; 122 BytesCovered = BytesInScope; 123 } else { 124 if (Die.getTag() == dwarf::DW_TAG_member) { 125 // Non-const member. 126 return; 127 } 128 // Handle variables and function arguments. 129 auto FormValue = Die.find(dwarf::DW_AT_location); 130 HasLoc = FormValue.hasValue(); 131 if (HasLoc) { 132 // Get PC coverage. 133 if (auto DebugLocOffset = FormValue->getAsSectionOffset()) { 134 auto *DebugLoc = Die.getDwarfUnit()->getContext().getDebugLoc(); 135 if (auto List = DebugLoc->getLocationListAtOffset(*DebugLocOffset)) { 136 for (auto Entry : List->Entries) 137 BytesCovered += Entry.End - Entry.Begin; 138 if (List->Entries.size()) { 139 uint64_t FirstDef = List->Entries[0].Begin; 140 uint64_t UnitOfs = getLowPC(Die.getDwarfUnit()->getUnitDIE()); 141 // Ranges sometimes start before the lexical scope. 142 if (UnitOfs + FirstDef >= ScopeLowPC) 143 OffsetToFirstDefinition = UnitOfs + FirstDef - ScopeLowPC; 144 // Or even after it. Count that as a failure. 145 if (OffsetToFirstDefinition > BytesInScope) 146 OffsetToFirstDefinition = 0; 147 } 148 } 149 assert(BytesInScope); 150 } else { 151 // Assume the entire range is covered by a single location. 152 BytesCovered = BytesInScope; 153 } 154 } 155 } 156 157 // Collect PC range coverage data. 158 auto &FnStats = FnStatMap[FnPrefix]; 159 if (DWARFDie D = 160 Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) 161 Die = D; 162 // By using the variable name + the path through the lexical block tree, the 163 // keys are consistent across duplicate abstract origins in different CUs. 164 std::string VarName = StringRef(Die.getName(DINameKind::ShortName)); 165 FnStats.VarsInFunction.insert(VarPrefix + VarName); 166 if (BytesInScope) { 167 FnStats.TotalVarWithLoc += (unsigned)HasLoc; 168 // Adjust for the fact the variables often start their lifetime in the 169 // middle of the scope. 170 BytesInScope -= OffsetToFirstDefinition; 171 // Turns out we have a lot of ranges that extend past the lexical scope. 172 GlobalStats.ScopeBytesCovered += std::min(BytesInScope, BytesCovered); 173 GlobalStats.ScopeBytesFromFirstDefinition += BytesInScope; 174 assert(GlobalStats.ScopeBytesCovered <= 175 GlobalStats.ScopeBytesFromFirstDefinition); 176 } else if (Die.getTag() == dwarf::DW_TAG_member) { 177 FnStats.ConstantMembers++; 178 } else { 179 FnStats.TotalVarWithLoc += (unsigned)HasLoc; 180 } 181 if (!IsArtificial) { 182 if (Die.getTag() == dwarf::DW_TAG_formal_parameter) { 183 FnStats.NumParams++; 184 if (HasType) 185 FnStats.NumParamTypes++; 186 if (HasSrcLoc) 187 FnStats.NumParamSourceLocations++; 188 if (HasLoc) 189 FnStats.NumParamLocations++; 190 } else if (Die.getTag() == dwarf::DW_TAG_variable) { 191 FnStats.NumVars++; 192 if (HasType) 193 FnStats.NumVarTypes++; 194 if (HasSrcLoc) 195 FnStats.NumVarSourceLocations++; 196 if (HasLoc) 197 FnStats.NumVarLocations++; 198 } 199 } 200 } 201 202 /// Recursively collect debug info quality metrics. 203 static void collectStatsRecursive(DWARFDie Die, std::string FnPrefix, 204 std::string VarPrefix, uint64_t ScopeLowPC, 205 uint64_t BytesInScope, uint32_t InlineDepth, 206 StringMap<PerFunctionStats> &FnStatMap, 207 GlobalStats &GlobalStats) { 208 // Handle any kind of lexical scope. 209 const dwarf::Tag Tag = Die.getTag(); 210 const bool IsFunction = Tag == dwarf::DW_TAG_subprogram; 211 const bool IsBlock = Tag == dwarf::DW_TAG_lexical_block; 212 const bool IsInlinedFunction = Tag == dwarf::DW_TAG_inlined_subroutine; 213 if (IsFunction || IsInlinedFunction || IsBlock) { 214 215 // Reset VarPrefix when entering a new function. 216 if (Die.getTag() == dwarf::DW_TAG_subprogram || 217 Die.getTag() == dwarf::DW_TAG_inlined_subroutine) 218 VarPrefix = "v"; 219 220 // Ignore forward declarations. 221 if (Die.find(dwarf::DW_AT_declaration)) 222 return; 223 224 // Check for call sites. 225 if (Die.find(dwarf::DW_AT_call_file) && Die.find(dwarf::DW_AT_call_line)) 226 GlobalStats.CallSiteEntries++; 227 228 // PC Ranges. 229 auto RangesOrError = Die.getAddressRanges(); 230 if (!RangesOrError) { 231 llvm::consumeError(RangesOrError.takeError()); 232 return; 233 } 234 235 auto Ranges = RangesOrError.get(); 236 uint64_t BytesInThisScope = 0; 237 for (auto Range : Ranges) 238 BytesInThisScope += Range.HighPC - Range.LowPC; 239 ScopeLowPC = getLowPC(Die); 240 241 // Count the function. 242 if (!IsBlock) { 243 StringRef Name = Die.getName(DINameKind::LinkageName); 244 if (Name.empty()) 245 Name = Die.getName(DINameKind::ShortName); 246 FnPrefix = Name; 247 // Skip over abstract origins. 248 if (Die.find(dwarf::DW_AT_inline)) 249 return; 250 // We've seen an (inlined) instance of this function. 251 auto &FnStats = FnStatMap[Name]; 252 if (IsInlinedFunction) { 253 FnStats.NumFnInlined++; 254 if (Die.findRecursively(dwarf::DW_AT_abstract_origin)) 255 FnStats.NumAbstractOrigins++; 256 } 257 FnStats.IsFunction = true; 258 if (BytesInThisScope && !IsInlinedFunction) 259 FnStats.HasPCAddresses = true; 260 std::string FnName = StringRef(Die.getName(DINameKind::ShortName)); 261 if (Die.findRecursively(dwarf::DW_AT_decl_file) && 262 Die.findRecursively(dwarf::DW_AT_decl_line)) 263 FnStats.HasSourceLocation = true; 264 } 265 266 if (BytesInThisScope) { 267 BytesInScope = BytesInThisScope; 268 if (IsFunction) 269 GlobalStats.FunctionSize += BytesInThisScope; 270 else if (IsInlinedFunction && InlineDepth == 0) 271 GlobalStats.InlineFunctionSize += BytesInThisScope; 272 } 273 } else { 274 // Not a scope, visit the Die itself. It could be a variable. 275 collectStatsForDie(Die, FnPrefix, VarPrefix, ScopeLowPC, BytesInScope, 276 InlineDepth, FnStatMap, GlobalStats); 277 } 278 279 // Set InlineDepth correctly for child recursion 280 if (IsFunction) 281 InlineDepth = 0; 282 else if (IsInlinedFunction) 283 ++InlineDepth; 284 285 // Traverse children. 286 unsigned LexicalBlockIndex = 0; 287 DWARFDie Child = Die.getFirstChild(); 288 while (Child) { 289 std::string ChildVarPrefix = VarPrefix; 290 if (Child.getTag() == dwarf::DW_TAG_lexical_block) 291 ChildVarPrefix += toHex(LexicalBlockIndex++) + '.'; 292 293 collectStatsRecursive(Child, FnPrefix, ChildVarPrefix, ScopeLowPC, 294 BytesInScope, InlineDepth, FnStatMap, GlobalStats); 295 Child = Child.getSibling(); 296 } 297 } 298 299 /// Print machine-readable output. 300 /// The machine-readable format is single-line JSON output. 301 /// \{ 302 static void printDatum(raw_ostream &OS, const char *Key, StringRef Value) { 303 OS << ",\"" << Key << "\":\"" << Value << '"'; 304 LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n'); 305 } 306 static void printDatum(raw_ostream &OS, const char *Key, uint64_t Value) { 307 OS << ",\"" << Key << "\":" << Value; 308 LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n'); 309 } 310 /// \} 311 312 /// Collect debug info quality metrics for an entire DIContext. 313 /// 314 /// Do the impossible and reduce the quality of the debug info down to a few 315 /// numbers. The idea is to condense the data into numbers that can be tracked 316 /// over time to identify trends in newer compiler versions and gauge the effect 317 /// of particular optimizations. The raw numbers themselves are not particularly 318 /// useful, only the delta between compiling the same program with different 319 /// compilers is. 320 bool collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx, 321 Twine Filename, raw_ostream &OS) { 322 StringRef FormatName = Obj.getFileFormatName(); 323 GlobalStats GlobalStats; 324 StringMap<PerFunctionStats> Statistics; 325 for (const auto &CU : static_cast<DWARFContext *>(&DICtx)->compile_units()) 326 if (DWARFDie CUDie = CU->getNonSkeletonUnitDIE(false)) 327 collectStatsRecursive(CUDie, "/", "g", 0, 0, 0, Statistics, GlobalStats); 328 329 /// The version number should be increased every time the algorithm is changed 330 /// (including bug fixes). New metrics may be added without increasing the 331 /// version. 332 unsigned Version = 3; 333 unsigned VarParamTotal = 0; 334 unsigned VarParamUnique = 0; 335 unsigned VarParamWithLoc = 0; 336 unsigned NumFunctions = 0; 337 unsigned NumInlinedFunctions = 0; 338 unsigned NumFuncsWithSrcLoc = 0; 339 unsigned NumAbstractOrigins = 0; 340 unsigned ParamTotal = 0; 341 unsigned ParamWithType = 0; 342 unsigned ParamWithLoc = 0; 343 unsigned ParamWithSrcLoc = 0; 344 unsigned VarTotal = 0; 345 unsigned VarWithType = 0; 346 unsigned VarWithSrcLoc = 0; 347 unsigned VarWithLoc = 0; 348 for (auto &Entry : Statistics) { 349 PerFunctionStats &Stats = Entry.getValue(); 350 unsigned TotalVars = Stats.VarsInFunction.size() * Stats.NumFnInlined; 351 // Count variables in concrete out-of-line functions and in global scope. 352 if (Stats.HasPCAddresses || !Stats.IsFunction) 353 TotalVars += Stats.VarsInFunction.size(); 354 unsigned Constants = Stats.ConstantMembers; 355 VarParamWithLoc += Stats.TotalVarWithLoc + Constants; 356 VarParamTotal += TotalVars; 357 VarParamUnique += Stats.VarsInFunction.size(); 358 LLVM_DEBUG(for (auto &V 359 : Stats.VarsInFunction) llvm::dbgs() 360 << Entry.getKey() << ": " << V.getKey() << "\n"); 361 NumFunctions += Stats.IsFunction; 362 NumFuncsWithSrcLoc += Stats.HasSourceLocation; 363 NumInlinedFunctions += Stats.IsFunction * Stats.NumFnInlined; 364 NumAbstractOrigins += Stats.IsFunction * Stats.NumAbstractOrigins; 365 ParamTotal += Stats.NumParams; 366 ParamWithType += Stats.NumParamTypes; 367 ParamWithLoc += Stats.NumParamLocations; 368 ParamWithSrcLoc += Stats.NumParamSourceLocations; 369 VarTotal += Stats.NumVars; 370 VarWithType += Stats.NumVarTypes; 371 VarWithLoc += Stats.NumVarLocations; 372 VarWithSrcLoc += Stats.NumVarSourceLocations; 373 } 374 375 // Print summary. 376 OS.SetBufferSize(1024); 377 OS << "{\"version\":" << Version; 378 LLVM_DEBUG(llvm::dbgs() << "Variable location quality metrics\n"; 379 llvm::dbgs() << "---------------------------------\n"); 380 printDatum(OS, "file", Filename.str()); 381 printDatum(OS, "format", FormatName); 382 printDatum(OS, "source functions", NumFunctions); 383 printDatum(OS, "source functions with location", NumFuncsWithSrcLoc); 384 printDatum(OS, "inlined functions", NumInlinedFunctions); 385 printDatum(OS, "inlined funcs with abstract origins", NumAbstractOrigins); 386 printDatum(OS, "unique source variables", VarParamUnique); 387 printDatum(OS, "source variables", VarParamTotal); 388 printDatum(OS, "variables with location", VarParamWithLoc); 389 printDatum(OS, "call site entries", GlobalStats.CallSiteEntries); 390 printDatum(OS, "scope bytes total", 391 GlobalStats.ScopeBytesFromFirstDefinition); 392 printDatum(OS, "scope bytes covered", GlobalStats.ScopeBytesCovered); 393 printDatum(OS, "total function size", GlobalStats.FunctionSize); 394 printDatum(OS, "total inlined function size", GlobalStats.InlineFunctionSize); 395 printDatum(OS, "total formal params", ParamTotal); 396 printDatum(OS, "formal params with source location", ParamWithSrcLoc); 397 printDatum(OS, "formal params with type", ParamWithType); 398 printDatum(OS, "formal params with binary location", ParamWithLoc); 399 printDatum(OS, "total vars", VarTotal); 400 printDatum(OS, "vars with source location", VarWithSrcLoc); 401 printDatum(OS, "vars with type", VarWithType); 402 printDatum(OS, "vars with binary location", VarWithLoc); 403 OS << "}\n"; 404 LLVM_DEBUG( 405 llvm::dbgs() << "Total Availability: " 406 << (int)std::round((VarParamWithLoc * 100.0) / VarParamTotal) 407 << "%\n"; 408 llvm::dbgs() << "PC Ranges covered: " 409 << (int)std::round((GlobalStats.ScopeBytesCovered * 100.0) / 410 GlobalStats.ScopeBytesFromFirstDefinition) 411 << "%\n"); 412 return true; 413 } 414