1 //=======- PaddingChecker.cpp ------------------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a checker that checks for padding that could be 10 // removed by re-ordering members. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 15 #include "clang/AST/CharUnits.h" 16 #include "clang/AST/DeclTemplate.h" 17 #include "clang/AST/RecordLayout.h" 18 #include "clang/AST/RecursiveASTVisitor.h" 19 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" 20 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 21 #include "clang/StaticAnalyzer/Core/Checker.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <numeric> 27 28 using namespace clang; 29 using namespace ento; 30 31 namespace { 32 class PaddingChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> { 33 private: 34 mutable std::unique_ptr<BugType> PaddingBug; 35 mutable BugReporter *BR; 36 37 public: 38 int64_t AllowedPad; 39 40 void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR, 41 BugReporter &BRArg) const { 42 BR = &BRArg; 43 44 // The calls to checkAST* from AnalysisConsumer don't 45 // visit template instantiations or lambda classes. We 46 // want to visit those, so we make our own RecursiveASTVisitor. 47 struct LocalVisitor : public RecursiveASTVisitor<LocalVisitor> { 48 const PaddingChecker *Checker; 49 bool shouldVisitTemplateInstantiations() const { return true; } 50 bool shouldVisitImplicitCode() const { return true; } 51 explicit LocalVisitor(const PaddingChecker *Checker) : Checker(Checker) {} 52 bool VisitRecordDecl(const RecordDecl *RD) { 53 Checker->visitRecord(RD); 54 return true; 55 } 56 bool VisitVarDecl(const VarDecl *VD) { 57 Checker->visitVariable(VD); 58 return true; 59 } 60 // TODO: Visit array new and mallocs for arrays. 61 }; 62 63 LocalVisitor visitor(this); 64 visitor.TraverseDecl(const_cast<TranslationUnitDecl *>(TUD)); 65 } 66 67 /// Look for records of overly padded types. If padding * 68 /// PadMultiplier exceeds AllowedPad, then generate a report. 69 /// PadMultiplier is used to share code with the array padding 70 /// checker. 71 void visitRecord(const RecordDecl *RD, uint64_t PadMultiplier = 1) const { 72 if (shouldSkipDecl(RD)) 73 return; 74 75 // TODO: Figure out why we are going through declarations and not only 76 // definitions. 77 if (!(RD = RD->getDefinition())) 78 return; 79 80 // This is the simplest correct case: a class with no fields and one base 81 // class. Other cases are more complicated because of how the base classes 82 // & fields might interact, so we don't bother dealing with them. 83 // TODO: Support other combinations of base classes and fields. 84 if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) 85 if (CXXRD->field_empty() && CXXRD->getNumBases() == 1) 86 return visitRecord(CXXRD->bases().begin()->getType()->getAsRecordDecl(), 87 PadMultiplier); 88 89 auto &ASTContext = RD->getASTContext(); 90 const ASTRecordLayout &RL = ASTContext.getASTRecordLayout(RD); 91 assert(llvm::isPowerOf2_64(RL.getAlignment().getQuantity())); 92 93 CharUnits BaselinePad = calculateBaselinePad(RD, ASTContext, RL); 94 if (BaselinePad.isZero()) 95 return; 96 97 CharUnits OptimalPad; 98 SmallVector<const FieldDecl *, 20> OptimalFieldsOrder; 99 std::tie(OptimalPad, OptimalFieldsOrder) = 100 calculateOptimalPad(RD, ASTContext, RL); 101 102 CharUnits DiffPad = PadMultiplier * (BaselinePad - OptimalPad); 103 if (DiffPad.getQuantity() <= AllowedPad) { 104 assert(!DiffPad.isNegative() && "DiffPad should not be negative"); 105 // There is not enough excess padding to trigger a warning. 106 return; 107 } 108 reportRecord(RD, BaselinePad, OptimalPad, OptimalFieldsOrder); 109 } 110 111 /// Look for arrays of overly padded types. If the padding of the 112 /// array type exceeds AllowedPad, then generate a report. 113 void visitVariable(const VarDecl *VD) const { 114 const ArrayType *ArrTy = VD->getType()->getAsArrayTypeUnsafe(); 115 if (ArrTy == nullptr) 116 return; 117 uint64_t Elts = 0; 118 if (const ConstantArrayType *CArrTy = dyn_cast<ConstantArrayType>(ArrTy)) 119 Elts = CArrTy->getSize().getZExtValue(); 120 if (Elts == 0) 121 return; 122 const RecordType *RT = ArrTy->getElementType()->getAs<RecordType>(); 123 if (RT == nullptr) 124 return; 125 126 // TODO: Recurse into the fields to see if they have excess padding. 127 visitRecord(RT->getDecl(), Elts); 128 } 129 130 bool shouldSkipDecl(const RecordDecl *RD) const { 131 // TODO: Figure out why we are going through declarations and not only 132 // definitions. 133 if (!(RD = RD->getDefinition())) 134 return true; 135 auto Location = RD->getLocation(); 136 // If the construct doesn't have a source file, then it's not something 137 // we want to diagnose. 138 if (!Location.isValid()) 139 return true; 140 SrcMgr::CharacteristicKind Kind = 141 BR->getSourceManager().getFileCharacteristic(Location); 142 // Throw out all records that come from system headers. 143 if (Kind != SrcMgr::C_User) 144 return true; 145 146 // Not going to attempt to optimize unions. 147 if (RD->isUnion()) 148 return true; 149 if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 150 // Tail padding with base classes ends up being very complicated. 151 // We will skip objects with base classes for now, unless they do not 152 // have fields. 153 // TODO: Handle more base class scenarios. 154 if (!CXXRD->field_empty() && CXXRD->getNumBases() != 0) 155 return true; 156 if (CXXRD->field_empty() && CXXRD->getNumBases() != 1) 157 return true; 158 // Virtual bases are complicated, skipping those for now. 159 if (CXXRD->getNumVBases() != 0) 160 return true; 161 // Can't layout a template, so skip it. We do still layout the 162 // instantiations though. 163 if (CXXRD->getTypeForDecl()->isDependentType()) 164 return true; 165 if (CXXRD->getTypeForDecl()->isInstantiationDependentType()) 166 return true; 167 } 168 // How do you reorder fields if you haven't got any? 169 else if (RD->field_empty()) 170 return true; 171 172 auto IsTrickyField = [](const FieldDecl *FD) -> bool { 173 // Bitfield layout is hard. 174 if (FD->isBitField()) 175 return true; 176 177 // Variable length arrays are tricky too. 178 QualType Ty = FD->getType(); 179 if (Ty->isIncompleteArrayType()) 180 return true; 181 return false; 182 }; 183 184 if (std::any_of(RD->field_begin(), RD->field_end(), IsTrickyField)) 185 return true; 186 return false; 187 } 188 189 static CharUnits calculateBaselinePad(const RecordDecl *RD, 190 const ASTContext &ASTContext, 191 const ASTRecordLayout &RL) { 192 CharUnits PaddingSum; 193 CharUnits Offset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0)); 194 for (const FieldDecl *FD : RD->fields()) { 195 // This checker only cares about the padded size of the 196 // field, and not the data size. If the field is a record 197 // with tail padding, then we won't put that number in our 198 // total because reordering fields won't fix that problem. 199 CharUnits FieldSize = ASTContext.getTypeSizeInChars(FD->getType()); 200 auto FieldOffsetBits = RL.getFieldOffset(FD->getFieldIndex()); 201 CharUnits FieldOffset = ASTContext.toCharUnitsFromBits(FieldOffsetBits); 202 PaddingSum += (FieldOffset - Offset); 203 Offset = FieldOffset + FieldSize; 204 } 205 PaddingSum += RL.getSize() - Offset; 206 return PaddingSum; 207 } 208 209 /// Optimal padding overview: 210 /// 1. Find a close approximation to where we can place our first field. 211 /// This will usually be at offset 0. 212 /// 2. Try to find the best field that can legally be placed at the current 213 /// offset. 214 /// a. "Best" is the largest alignment that is legal, but smallest size. 215 /// This is to account for overly aligned types. 216 /// 3. If no fields can fit, pad by rounding the current offset up to the 217 /// smallest alignment requirement of our fields. Measure and track the 218 // amount of padding added. Go back to 2. 219 /// 4. Increment the current offset by the size of the chosen field. 220 /// 5. Remove the chosen field from the set of future possibilities. 221 /// 6. Go back to 2 if there are still unplaced fields. 222 /// 7. Add tail padding by rounding the current offset up to the structure 223 /// alignment. Track the amount of padding added. 224 225 static std::pair<CharUnits, SmallVector<const FieldDecl *, 20>> 226 calculateOptimalPad(const RecordDecl *RD, const ASTContext &ASTContext, 227 const ASTRecordLayout &RL) { 228 struct FieldInfo { 229 CharUnits Align; 230 CharUnits Size; 231 const FieldDecl *Field; 232 bool operator<(const FieldInfo &RHS) const { 233 // Order from small alignments to large alignments, 234 // then large sizes to small sizes. 235 // then large field indices to small field indices 236 return std::make_tuple(Align, -Size, 237 Field ? -static_cast<int>(Field->getFieldIndex()) 238 : 0) < 239 std::make_tuple( 240 RHS.Align, -RHS.Size, 241 RHS.Field ? -static_cast<int>(RHS.Field->getFieldIndex()) 242 : 0); 243 } 244 }; 245 SmallVector<FieldInfo, 20> Fields; 246 auto GatherSizesAndAlignments = [](const FieldDecl *FD) { 247 FieldInfo RetVal; 248 RetVal.Field = FD; 249 auto &Ctx = FD->getASTContext(); 250 std::tie(RetVal.Size, RetVal.Align) = 251 Ctx.getTypeInfoInChars(FD->getType()); 252 assert(llvm::isPowerOf2_64(RetVal.Align.getQuantity())); 253 if (auto Max = FD->getMaxAlignment()) 254 RetVal.Align = std::max(Ctx.toCharUnitsFromBits(Max), RetVal.Align); 255 return RetVal; 256 }; 257 std::transform(RD->field_begin(), RD->field_end(), 258 std::back_inserter(Fields), GatherSizesAndAlignments); 259 llvm::sort(Fields); 260 // This lets us skip over vptrs and non-virtual bases, 261 // so that we can just worry about the fields in our object. 262 // Note that this does cause us to miss some cases where we 263 // could pack more bytes in to a base class's tail padding. 264 CharUnits NewOffset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0)); 265 CharUnits NewPad; 266 SmallVector<const FieldDecl *, 20> OptimalFieldsOrder; 267 while (!Fields.empty()) { 268 unsigned TrailingZeros = 269 llvm::countTrailingZeros((unsigned long long)NewOffset.getQuantity()); 270 // If NewOffset is zero, then countTrailingZeros will be 64. Shifting 271 // 64 will overflow our unsigned long long. Shifting 63 will turn 272 // our long long (and CharUnits internal type) negative. So shift 62. 273 long long CurAlignmentBits = 1ull << (std::min)(TrailingZeros, 62u); 274 CharUnits CurAlignment = CharUnits::fromQuantity(CurAlignmentBits); 275 FieldInfo InsertPoint = {CurAlignment, CharUnits::Zero(), nullptr}; 276 auto CurBegin = Fields.begin(); 277 auto CurEnd = Fields.end(); 278 279 // In the typical case, this will find the last element 280 // of the vector. We won't find a middle element unless 281 // we started on a poorly aligned address or have an overly 282 // aligned field. 283 auto Iter = std::upper_bound(CurBegin, CurEnd, InsertPoint); 284 if (Iter != CurBegin) { 285 // We found a field that we can layout with the current alignment. 286 --Iter; 287 NewOffset += Iter->Size; 288 OptimalFieldsOrder.push_back(Iter->Field); 289 Fields.erase(Iter); 290 } else { 291 // We are poorly aligned, and we need to pad in order to layout another 292 // field. Round up to at least the smallest field alignment that we 293 // currently have. 294 CharUnits NextOffset = NewOffset.alignTo(Fields[0].Align); 295 NewPad += NextOffset - NewOffset; 296 NewOffset = NextOffset; 297 } 298 } 299 // Calculate tail padding. 300 CharUnits NewSize = NewOffset.alignTo(RL.getAlignment()); 301 NewPad += NewSize - NewOffset; 302 return {NewPad, std::move(OptimalFieldsOrder)}; 303 } 304 305 void reportRecord( 306 const RecordDecl *RD, CharUnits BaselinePad, CharUnits OptimalPad, 307 const SmallVector<const FieldDecl *, 20> &OptimalFieldsOrder) const { 308 if (!PaddingBug) 309 PaddingBug = 310 llvm::make_unique<BugType>(this, "Excessive Padding", "Performance"); 311 312 SmallString<100> Buf; 313 llvm::raw_svector_ostream Os(Buf); 314 Os << "Excessive padding in '"; 315 Os << QualType::getAsString(RD->getTypeForDecl(), Qualifiers(), 316 LangOptions()) 317 << "'"; 318 319 if (auto *TSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) { 320 // TODO: make this show up better in the console output and in 321 // the HTML. Maybe just make it show up in HTML like the path 322 // diagnostics show. 323 SourceLocation ILoc = TSD->getPointOfInstantiation(); 324 if (ILoc.isValid()) 325 Os << " instantiated here: " 326 << ILoc.printToString(BR->getSourceManager()); 327 } 328 329 Os << " (" << BaselinePad.getQuantity() << " padding bytes, where " 330 << OptimalPad.getQuantity() << " is optimal). \n" 331 << "Optimal fields order: \n"; 332 for (const auto *FD : OptimalFieldsOrder) 333 Os << FD->getName() << ", \n"; 334 Os << "consider reordering the fields or adding explicit padding " 335 "members."; 336 337 PathDiagnosticLocation CELoc = 338 PathDiagnosticLocation::create(RD, BR->getSourceManager()); 339 auto Report = llvm::make_unique<BugReport>(*PaddingBug, Os.str(), CELoc); 340 Report->setDeclWithIssue(RD); 341 Report->addRange(RD->getSourceRange()); 342 BR->emitReport(std::move(Report)); 343 } 344 }; 345 } // namespace 346 347 void ento::registerPaddingChecker(CheckerManager &Mgr) { 348 auto *Checker = Mgr.registerChecker<PaddingChecker>(); 349 Checker->AllowedPad = Mgr.getAnalyzerOptions() 350 .getCheckerIntegerOption(Checker, "AllowedPad", 24); 351 assert(Checker->AllowedPad >= 0 && 352 "AllowedPad option should be non-negative"); 353 } 354 355 bool ento::shouldRegisterPaddingChecker(const LangOptions &LO) { 356 return true; 357 } 358