1 //===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" 10 #include "clang/AST/ASTContext.h" 11 #include "clang/AST/Decl.h" 12 #include "clang/AST/DynamicRecursiveASTVisitor.h" 13 #include "clang/AST/Expr.h" 14 #include "clang/AST/FormatString.h" 15 #include "clang/AST/Stmt.h" 16 #include "clang/AST/StmtVisitor.h" 17 #include "clang/AST/Type.h" 18 #include "clang/ASTMatchers/ASTMatchFinder.h" 19 #include "clang/ASTMatchers/ASTMatchers.h" 20 #include "clang/Basic/SourceLocation.h" 21 #include "clang/Lex/Lexer.h" 22 #include "clang/Lex/Preprocessor.h" 23 #include "llvm/ADT/APSInt.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/Support/Casting.h" 27 #include <memory> 28 #include <optional> 29 #include <queue> 30 #include <sstream> 31 32 using namespace llvm; 33 using namespace clang; 34 using namespace ast_matchers; 35 36 #ifndef NDEBUG 37 namespace { 38 class StmtDebugPrinter 39 : public ConstStmtVisitor<StmtDebugPrinter, std::string> { 40 public: 41 std::string VisitStmt(const Stmt *S) { return S->getStmtClassName(); } 42 43 std::string VisitBinaryOperator(const BinaryOperator *BO) { 44 return "BinaryOperator(" + BO->getOpcodeStr().str() + ")"; 45 } 46 47 std::string VisitUnaryOperator(const UnaryOperator *UO) { 48 return "UnaryOperator(" + UO->getOpcodeStr(UO->getOpcode()).str() + ")"; 49 } 50 51 std::string VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { 52 return "ImplicitCastExpr(" + std::string(ICE->getCastKindName()) + ")"; 53 } 54 }; 55 56 // Returns a string of ancestor `Stmt`s of the given `DRE` in such a form: 57 // "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...". 58 static std::string getDREAncestorString(const DeclRefExpr *DRE, 59 ASTContext &Ctx) { 60 std::stringstream SS; 61 const Stmt *St = DRE; 62 StmtDebugPrinter StmtPriner; 63 64 do { 65 SS << StmtPriner.Visit(St); 66 67 DynTypedNodeList StParents = Ctx.getParents(*St); 68 69 if (StParents.size() > 1) 70 return "unavailable due to multiple parents"; 71 if (StParents.size() == 0) 72 break; 73 St = StParents.begin()->get<Stmt>(); 74 if (St) 75 SS << " ==> "; 76 } while (St); 77 return SS.str(); 78 } 79 } // namespace 80 #endif /* NDEBUG */ 81 82 namespace clang::ast_matchers { 83 // A `RecursiveASTVisitor` that traverses all descendants of a given node "n" 84 // except for those belonging to a different callable of "n". 85 class MatchDescendantVisitor : public DynamicRecursiveASTVisitor { 86 public: 87 // Creates an AST visitor that matches `Matcher` on all 88 // descendants of a given node "n" except for the ones 89 // belonging to a different callable of "n". 90 MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher, 91 internal::ASTMatchFinder *Finder, 92 internal::BoundNodesTreeBuilder *Builder, 93 internal::ASTMatchFinder::BindKind Bind, 94 const bool ignoreUnevaluatedContext) 95 : Matcher(Matcher), Finder(Finder), Builder(Builder), Bind(Bind), 96 Matches(false), ignoreUnevaluatedContext(ignoreUnevaluatedContext) { 97 ShouldVisitTemplateInstantiations = true; 98 ShouldVisitImplicitCode = false; // TODO: let's ignore implicit code for now 99 } 100 101 // Returns true if a match is found in a subtree of `DynNode`, which belongs 102 // to the same callable of `DynNode`. 103 bool findMatch(const DynTypedNode &DynNode) { 104 Matches = false; 105 if (const Stmt *StmtNode = DynNode.get<Stmt>()) { 106 TraverseStmt(const_cast<Stmt *>(StmtNode)); 107 *Builder = ResultBindings; 108 return Matches; 109 } 110 return false; 111 } 112 113 // The following are overriding methods from the base visitor class. 114 // They are public only to allow CRTP to work. They are *not *part 115 // of the public API of this class. 116 117 // For the matchers so far used in safe buffers, we only need to match 118 // `Stmt`s. To override more as needed. 119 120 bool TraverseDecl(Decl *Node) override { 121 if (!Node) 122 return true; 123 if (!match(*Node)) 124 return false; 125 // To skip callables: 126 if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Node)) 127 return true; 128 // Traverse descendants 129 return DynamicRecursiveASTVisitor::TraverseDecl(Node); 130 } 131 132 bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node) override { 133 // These are unevaluated, except the result expression. 134 if (ignoreUnevaluatedContext) 135 return TraverseStmt(Node->getResultExpr()); 136 return DynamicRecursiveASTVisitor::TraverseGenericSelectionExpr(Node); 137 } 138 139 bool 140 TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) override { 141 // Unevaluated context. 142 if (ignoreUnevaluatedContext) 143 return true; 144 return DynamicRecursiveASTVisitor::TraverseUnaryExprOrTypeTraitExpr(Node); 145 } 146 147 bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node) override { 148 // Unevaluated context. 149 if (ignoreUnevaluatedContext) 150 return true; 151 return DynamicRecursiveASTVisitor::TraverseTypeOfExprTypeLoc(Node); 152 } 153 154 bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node) override { 155 // Unevaluated context. 156 if (ignoreUnevaluatedContext) 157 return true; 158 return DynamicRecursiveASTVisitor::TraverseDecltypeTypeLoc(Node); 159 } 160 161 bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node) override { 162 // Unevaluated context. 163 if (ignoreUnevaluatedContext) 164 return true; 165 return DynamicRecursiveASTVisitor::TraverseCXXNoexceptExpr(Node); 166 } 167 168 bool TraverseCXXTypeidExpr(CXXTypeidExpr *Node) override { 169 // Unevaluated context. 170 if (ignoreUnevaluatedContext) 171 return true; 172 return DynamicRecursiveASTVisitor::TraverseCXXTypeidExpr(Node); 173 } 174 175 bool TraverseCXXDefaultInitExpr(CXXDefaultInitExpr *Node) override { 176 if (!TraverseStmt(Node->getExpr())) 177 return false; 178 return DynamicRecursiveASTVisitor::TraverseCXXDefaultInitExpr(Node); 179 } 180 181 bool TraverseStmt(Stmt *Node) override { 182 if (!Node) 183 return true; 184 if (!match(*Node)) 185 return false; 186 return DynamicRecursiveASTVisitor::TraverseStmt(Node); 187 } 188 189 private: 190 // Sets 'Matched' to true if 'Matcher' matches 'Node' 191 // 192 // Returns 'true' if traversal should continue after this function 193 // returns, i.e. if no match is found or 'Bind' is 'BK_All'. 194 template <typename T> bool match(const T &Node) { 195 internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder); 196 197 if (Matcher->matches(DynTypedNode::create(Node), Finder, 198 &RecursiveBuilder)) { 199 ResultBindings.addMatch(RecursiveBuilder); 200 Matches = true; 201 if (Bind != internal::ASTMatchFinder::BK_All) 202 return false; // Abort as soon as a match is found. 203 } 204 return true; 205 } 206 207 const internal::DynTypedMatcher *const Matcher; 208 internal::ASTMatchFinder *const Finder; 209 internal::BoundNodesTreeBuilder *const Builder; 210 internal::BoundNodesTreeBuilder ResultBindings; 211 const internal::ASTMatchFinder::BindKind Bind; 212 bool Matches; 213 bool ignoreUnevaluatedContext; 214 }; 215 216 // Because we're dealing with raw pointers, let's define what we mean by that. 217 static auto hasPointerType() { 218 return hasType(hasCanonicalType(pointerType())); 219 } 220 221 static auto hasArrayType() { return hasType(hasCanonicalType(arrayType())); } 222 223 AST_MATCHER_P(Stmt, forEachDescendantEvaluatedStmt, internal::Matcher<Stmt>, 224 innerMatcher) { 225 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher); 226 227 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, 228 true); 229 return Visitor.findMatch(DynTypedNode::create(Node)); 230 } 231 232 AST_MATCHER_P(Stmt, forEachDescendantStmt, internal::Matcher<Stmt>, 233 innerMatcher) { 234 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher); 235 236 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, 237 false); 238 return Visitor.findMatch(DynTypedNode::create(Node)); 239 } 240 241 // Matches a `Stmt` node iff the node is in a safe-buffer opt-out region 242 AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *, 243 Handler) { 244 return !Handler->isSafeBufferOptOut(Node.getBeginLoc()); 245 } 246 247 AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer, 248 const UnsafeBufferUsageHandler *, Handler) { 249 return Handler->ignoreUnsafeBufferInContainer(Node.getBeginLoc()); 250 } 251 252 AST_MATCHER_P(Stmt, ignoreUnsafeLibcCall, const UnsafeBufferUsageHandler *, 253 Handler) { 254 if (Finder->getASTContext().getLangOpts().CPlusPlus) 255 return Handler->ignoreUnsafeBufferInLibcCall(Node.getBeginLoc()); 256 return true; /* Only warn about libc calls for C++ */ 257 } 258 259 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) { 260 return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder); 261 } 262 263 // Matches a `UnaryOperator` whose operator is pre-increment: 264 AST_MATCHER(UnaryOperator, isPreInc) { 265 return Node.getOpcode() == UnaryOperator::Opcode::UO_PreInc; 266 } 267 268 // Returns a matcher that matches any expression 'e' such that `innerMatcher` 269 // matches 'e' and 'e' is in an Unspecified Lvalue Context. 270 static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) { 271 // clang-format off 272 return 273 expr(anyOf( 274 implicitCastExpr( 275 hasCastKind(CastKind::CK_LValueToRValue), 276 castSubExpr(innerMatcher)), 277 binaryOperator( 278 hasAnyOperatorName("="), 279 hasLHS(innerMatcher) 280 ) 281 )); 282 // clang-format on 283 } 284 285 // Returns a matcher that matches any expression `e` such that `InnerMatcher` 286 // matches `e` and `e` is in an Unspecified Pointer Context (UPC). 287 static internal::Matcher<Stmt> 288 isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) { 289 // A UPC can be 290 // 1. an argument of a function call (except the callee has [[unsafe_...]] 291 // attribute), or 292 // 2. the operand of a pointer-to-(integer or bool) cast operation; or 293 // 3. the operand of a comparator operation; or 294 // 4. the operand of a pointer subtraction operation 295 // (i.e., computing the distance between two pointers); or ... 296 297 // clang-format off 298 auto CallArgMatcher = callExpr( 299 forEachArgumentWithParamType( 300 InnerMatcher, 301 isAnyPointer() /* array also decays to pointer type*/), 302 unless(callee( 303 functionDecl(hasAttr(attr::UnsafeBufferUsage))))); 304 305 auto CastOperandMatcher = 306 castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral), 307 hasCastKind(CastKind::CK_PointerToBoolean)), 308 castSubExpr(allOf(hasPointerType(), InnerMatcher))); 309 310 auto CompOperandMatcher = 311 binaryOperator(hasAnyOperatorName("!=", "==", "<", "<=", ">", ">="), 312 eachOf(hasLHS(allOf(hasPointerType(), InnerMatcher)), 313 hasRHS(allOf(hasPointerType(), InnerMatcher)))); 314 315 // A matcher that matches pointer subtractions: 316 auto PtrSubtractionMatcher = 317 binaryOperator(hasOperatorName("-"), 318 // Note that here we need both LHS and RHS to be 319 // pointer. Then the inner matcher can match any of 320 // them: 321 allOf(hasLHS(hasPointerType()), 322 hasRHS(hasPointerType())), 323 eachOf(hasLHS(InnerMatcher), 324 hasRHS(InnerMatcher))); 325 // clang-format on 326 327 return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher, 328 PtrSubtractionMatcher)); 329 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we 330 // don't have to check that.) 331 } 332 333 // Returns a matcher that matches any expression 'e' such that `innerMatcher` 334 // matches 'e' and 'e' is in an unspecified untyped context (i.e the expression 335 // 'e' isn't evaluated to an RValue). For example, consider the following code: 336 // int *p = new int[4]; 337 // int *q = new int[4]; 338 // if ((p = q)) {} 339 // p = q; 340 // The expression `p = q` in the conditional of the `if` statement 341 // `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;` 342 // in the assignment statement is in an untyped context. 343 static internal::Matcher<Stmt> 344 isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher) { 345 // An unspecified context can be 346 // 1. A compound statement, 347 // 2. The body of an if statement 348 // 3. Body of a loop 349 auto CompStmt = compoundStmt(forEach(InnerMatcher)); 350 auto IfStmtThen = ifStmt(hasThen(InnerMatcher)); 351 auto IfStmtElse = ifStmt(hasElse(InnerMatcher)); 352 // FIXME: Handle loop bodies. 353 return stmt(anyOf(CompStmt, IfStmtThen, IfStmtElse)); 354 } 355 356 // Given a two-param std::span construct call, matches iff the call has the 357 // following forms: 358 // 1. `std::span<T>{new T[n], n}`, where `n` is a literal or a DRE 359 // 2. `std::span<T>{new T, 1}` 360 // 3. `std::span<T>{&var, 1}` 361 // 4. `std::span<T>{a, n}`, where `a` is of an array-of-T with constant size 362 // `n` 363 // 5. `std::span<T>{any, 0}` 364 // 6. `std::span<T>{std::addressof(...), 1}` 365 AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) { 366 assert(Node.getNumArgs() == 2 && 367 "expecting a two-parameter std::span constructor"); 368 const Expr *Arg0 = Node.getArg(0)->IgnoreImplicit(); 369 const Expr *Arg1 = Node.getArg(1)->IgnoreImplicit(); 370 auto HaveEqualConstantValues = [&Finder](const Expr *E0, const Expr *E1) { 371 if (auto E0CV = E0->getIntegerConstantExpr(Finder->getASTContext())) 372 if (auto E1CV = E1->getIntegerConstantExpr(Finder->getASTContext())) { 373 return APSInt::compareValues(*E0CV, *E1CV) == 0; 374 } 375 return false; 376 }; 377 auto AreSameDRE = [](const Expr *E0, const Expr *E1) { 378 if (auto *DRE0 = dyn_cast<DeclRefExpr>(E0)) 379 if (auto *DRE1 = dyn_cast<DeclRefExpr>(E1)) { 380 return DRE0->getDecl() == DRE1->getDecl(); 381 } 382 return false; 383 }; 384 std::optional<APSInt> Arg1CV = 385 Arg1->getIntegerConstantExpr(Finder->getASTContext()); 386 387 if (Arg1CV && Arg1CV->isZero()) 388 // Check form 5: 389 return true; 390 switch (Arg0->IgnoreImplicit()->getStmtClass()) { 391 case Stmt::CXXNewExprClass: 392 if (auto Size = cast<CXXNewExpr>(Arg0)->getArraySize()) { 393 // Check form 1: 394 return AreSameDRE((*Size)->IgnoreImplicit(), Arg1) || 395 HaveEqualConstantValues(*Size, Arg1); 396 } 397 // TODO: what's placeholder type? avoid it for now. 398 if (!cast<CXXNewExpr>(Arg0)->hasPlaceholderType()) { 399 // Check form 2: 400 return Arg1CV && Arg1CV->isOne(); 401 } 402 break; 403 case Stmt::UnaryOperatorClass: 404 if (cast<UnaryOperator>(Arg0)->getOpcode() == 405 UnaryOperator::Opcode::UO_AddrOf) 406 // Check form 3: 407 return Arg1CV && Arg1CV->isOne(); 408 break; 409 case Stmt::CallExprClass: 410 if (const auto *CE = dyn_cast<CallExpr>(Arg0)) { 411 const auto FnDecl = CE->getDirectCallee(); 412 if (FnDecl && FnDecl->getNameAsString() == "addressof" && 413 FnDecl->isInStdNamespace()) { 414 return Arg1CV && Arg1CV->isOne(); 415 } 416 } 417 break; 418 default: 419 break; 420 } 421 422 QualType Arg0Ty = Arg0->IgnoreImplicit()->getType(); 423 424 if (auto *ConstArrTy = 425 Finder->getASTContext().getAsConstantArrayType(Arg0Ty)) { 426 const APSInt ConstArrSize = APSInt(ConstArrTy->getSize()); 427 428 // Check form 4: 429 return Arg1CV && APSInt::compareValues(ConstArrSize, *Arg1CV) == 0; 430 } 431 return false; 432 } 433 434 AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) { 435 // FIXME: Proper solution: 436 // - refactor Sema::CheckArrayAccess 437 // - split safe/OOB/unknown decision logic from diagnostics emitting code 438 // - e. g. "Try harder to find a NamedDecl to point at in the note." 439 // already duplicated 440 // - call both from Sema and from here 441 442 uint64_t limit; 443 if (const auto *CATy = 444 dyn_cast<ConstantArrayType>(Node.getBase() 445 ->IgnoreParenImpCasts() 446 ->getType() 447 ->getUnqualifiedDesugaredType())) { 448 limit = CATy->getLimitedSize(); 449 } else if (const auto *SLiteral = dyn_cast<StringLiteral>( 450 Node.getBase()->IgnoreParenImpCasts())) { 451 limit = SLiteral->getLength() + 1; 452 } else { 453 return false; 454 } 455 456 Expr::EvalResult EVResult; 457 const Expr *IndexExpr = Node.getIdx(); 458 if (!IndexExpr->isValueDependent() && 459 IndexExpr->EvaluateAsInt(EVResult, Finder->getASTContext())) { 460 llvm::APSInt ArrIdx = EVResult.Val.getInt(); 461 // FIXME: ArrIdx.isNegative() we could immediately emit an error as that's a 462 // bug 463 if (ArrIdx.isNonNegative() && ArrIdx.getLimitedValue() < limit) 464 return true; 465 } 466 return false; 467 } 468 469 AST_MATCHER_P(CallExpr, hasNumArgs, unsigned, Num) { 470 return Node.getNumArgs() == Num; 471 } 472 473 namespace libc_func_matchers { 474 // Under `libc_func_matchers`, define a set of matchers that match unsafe 475 // functions in libc and unsafe calls to them. 476 477 // A tiny parser to strip off common prefix and suffix of libc function names 478 // in real code. 479 // 480 // Given a function name, `matchName` returns `CoreName` according to the 481 // following grammar: 482 // 483 // LibcName := CoreName | CoreName + "_s" 484 // MatchingName := "__builtin_" + LibcName | 485 // "__builtin___" + LibcName + "_chk" | 486 // "__asan_" + LibcName 487 // 488 struct LibcFunNamePrefixSuffixParser { 489 StringRef matchName(StringRef FunName, bool isBuiltin) { 490 // Try to match __builtin_: 491 if (isBuiltin && FunName.starts_with("__builtin_")) 492 // Then either it is __builtin_LibcName or __builtin___LibcName_chk or 493 // no match: 494 return matchLibcNameOrBuiltinChk( 495 FunName.drop_front(10 /* truncate "__builtin_" */)); 496 // Try to match __asan_: 497 if (FunName.starts_with("__asan_")) 498 return matchLibcName(FunName.drop_front(7 /* truncate of "__asan_" */)); 499 return matchLibcName(FunName); 500 } 501 502 // Parameter `Name` is the substring after stripping off the prefix 503 // "__builtin_". 504 StringRef matchLibcNameOrBuiltinChk(StringRef Name) { 505 if (Name.starts_with("__") && Name.ends_with("_chk")) 506 return matchLibcName( 507 Name.drop_front(2).drop_back(4) /* truncate "__" and "_chk" */); 508 return matchLibcName(Name); 509 } 510 511 StringRef matchLibcName(StringRef Name) { 512 if (Name.ends_with("_s")) 513 return Name.drop_back(2 /* truncate "_s" */); 514 return Name; 515 } 516 }; 517 518 // A pointer type expression is known to be null-terminated, if it has the 519 // form: E.c_str(), for any expression E of `std::string` type. 520 static bool isNullTermPointer(const Expr *Ptr) { 521 if (isa<StringLiteral>(Ptr->IgnoreParenImpCasts())) 522 return true; 523 if (isa<PredefinedExpr>(Ptr->IgnoreParenImpCasts())) 524 return true; 525 if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Ptr->IgnoreParenImpCasts())) { 526 const CXXMethodDecl *MD = MCE->getMethodDecl(); 527 const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl(); 528 529 if (MD && RD && RD->isInStdNamespace()) 530 if (MD->getName() == "c_str" && RD->getName() == "basic_string") 531 return true; 532 } 533 return false; 534 } 535 536 // Return true iff at least one of following cases holds: 537 // 1. Format string is a literal and there is an unsafe pointer argument 538 // corresponding to an `s` specifier; 539 // 2. Format string is not a literal and there is least an unsafe pointer 540 // argument (including the formatter argument). 541 // 542 // `UnsafeArg` is the output argument that will be set only if this function 543 // returns true. 544 static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg, 545 const unsigned FmtArgIdx, ASTContext &Ctx, 546 bool isKprintf = false) { 547 class StringFormatStringHandler 548 : public analyze_format_string::FormatStringHandler { 549 const CallExpr *Call; 550 unsigned FmtArgIdx; 551 const Expr *&UnsafeArg; 552 553 public: 554 StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx, 555 const Expr *&UnsafeArg) 556 : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {} 557 558 bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 559 const char *startSpecifier, 560 unsigned specifierLen, 561 const TargetInfo &Target) override { 562 if (FS.getConversionSpecifier().getKind() == 563 analyze_printf::PrintfConversionSpecifier::sArg) { 564 unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx; 565 566 if (0 < ArgIdx && ArgIdx < Call->getNumArgs()) 567 if (!isNullTermPointer(Call->getArg(ArgIdx))) { 568 UnsafeArg = Call->getArg(ArgIdx); // output 569 // returning false stops parsing immediately 570 return false; 571 } 572 } 573 return true; // continue parsing 574 } 575 }; 576 577 const Expr *Fmt = Call->getArg(FmtArgIdx); 578 579 if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) { 580 StringRef FmtStr; 581 582 if (SL->getCharByteWidth() == 1) 583 FmtStr = SL->getString(); 584 else if (auto EvaledFmtStr = SL->tryEvaluateString(Ctx)) 585 FmtStr = *EvaledFmtStr; 586 else 587 goto CHECK_UNSAFE_PTR; 588 589 StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg); 590 591 return analyze_format_string::ParsePrintfString( 592 Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(), 593 Ctx.getTargetInfo(), isKprintf); 594 } 595 CHECK_UNSAFE_PTR: 596 // If format is not a string literal, we cannot analyze the format string. 597 // In this case, this call is considered unsafe if at least one argument 598 // (including the format argument) is unsafe pointer. 599 return llvm::any_of( 600 llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()), 601 [&UnsafeArg](const Expr *Arg) -> bool { 602 if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg)) { 603 UnsafeArg = Arg; 604 return true; 605 } 606 return false; 607 }); 608 } 609 610 // Matches a FunctionDecl node such that 611 // 1. It's name, after stripping off predefined prefix and suffix, is 612 // `CoreName`; and 613 // 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which 614 // is a set of libc function names. 615 // 616 // Note: For predefined prefix and suffix, see `LibcFunNamePrefixSuffixParser`. 617 // The notation `CoreName[str/wcs]` means a new name obtained from replace 618 // string "wcs" with "str" in `CoreName`. 619 AST_MATCHER(FunctionDecl, isPredefinedUnsafeLibcFunc) { 620 static std::unique_ptr<std::set<StringRef>> PredefinedNames = nullptr; 621 if (!PredefinedNames) 622 PredefinedNames = 623 std::make_unique<std::set<StringRef>, std::set<StringRef>>({ 624 // numeric conversion: 625 "atof", 626 "atoi", 627 "atol", 628 "atoll", 629 "strtol", 630 "strtoll", 631 "strtoul", 632 "strtoull", 633 "strtof", 634 "strtod", 635 "strtold", 636 "strtoimax", 637 "strtoumax", 638 // "strfromf", "strfromd", "strfroml", // C23? 639 // string manipulation: 640 "strcpy", 641 "strncpy", 642 "strlcpy", 643 "strcat", 644 "strncat", 645 "strlcat", 646 "strxfrm", 647 "strdup", 648 "strndup", 649 // string examination: 650 "strlen", 651 "strnlen", 652 "strcmp", 653 "strncmp", 654 "stricmp", 655 "strcasecmp", 656 "strcoll", 657 "strchr", 658 "strrchr", 659 "strspn", 660 "strcspn", 661 "strpbrk", 662 "strstr", 663 "strtok", 664 // "mem-" functions 665 "memchr", 666 "wmemchr", 667 "memcmp", 668 "wmemcmp", 669 "memcpy", 670 "memccpy", 671 "mempcpy", 672 "wmemcpy", 673 "memmove", 674 "wmemmove", 675 "memset", 676 "wmemset", 677 // IO: 678 "fread", 679 "fwrite", 680 "fgets", 681 "fgetws", 682 "gets", 683 "fputs", 684 "fputws", 685 "puts", 686 // others 687 "strerror_s", 688 "strerror_r", 689 "bcopy", 690 "bzero", 691 "bsearch", 692 "qsort", 693 }); 694 695 auto *II = Node.getIdentifier(); 696 697 if (!II) 698 return false; 699 700 StringRef Name = LibcFunNamePrefixSuffixParser().matchName( 701 II->getName(), Node.getBuiltinID()); 702 703 // Match predefined names: 704 if (PredefinedNames->find(Name) != PredefinedNames->end()) 705 return true; 706 707 std::string NameWCS = Name.str(); 708 size_t WcsPos = NameWCS.find("wcs"); 709 710 while (WcsPos != std::string::npos) { 711 NameWCS[WcsPos++] = 's'; 712 NameWCS[WcsPos++] = 't'; 713 NameWCS[WcsPos++] = 'r'; 714 WcsPos = NameWCS.find("wcs", WcsPos); 715 } 716 if (PredefinedNames->find(NameWCS) != PredefinedNames->end()) 717 return true; 718 // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They 719 // all should end with "scanf"): 720 return Name.ends_with("scanf"); 721 } 722 723 // Match a call to one of the `v*printf` functions taking `va_list`. We cannot 724 // check safety for these functions so they should be changed to their 725 // non-va_list versions. 726 AST_MATCHER(FunctionDecl, isUnsafeVaListPrintfFunc) { 727 auto *II = Node.getIdentifier(); 728 729 if (!II) 730 return false; 731 732 StringRef Name = LibcFunNamePrefixSuffixParser().matchName( 733 II->getName(), Node.getBuiltinID()); 734 735 if (!Name.ends_with("printf")) 736 return false; // neither printf nor scanf 737 return Name.starts_with("v"); 738 } 739 740 // Matches a call to one of the `sprintf` functions as they are always unsafe 741 // and should be changed to `snprintf`. 742 AST_MATCHER(FunctionDecl, isUnsafeSprintfFunc) { 743 auto *II = Node.getIdentifier(); 744 745 if (!II) 746 return false; 747 748 StringRef Name = LibcFunNamePrefixSuffixParser().matchName( 749 II->getName(), Node.getBuiltinID()); 750 751 if (!Name.ends_with("printf") || 752 // Let `isUnsafeVaListPrintfFunc` check for cases with va-list: 753 Name.starts_with("v")) 754 return false; 755 756 StringRef Prefix = Name.drop_back(6); 757 758 if (Prefix.ends_with("w")) 759 Prefix = Prefix.drop_back(1); 760 return Prefix == "s"; 761 } 762 763 // Match function declarations of `printf`, `fprintf`, `snprintf` and their wide 764 // character versions. Calls to these functions can be safe if their arguments 765 // are carefully made safe. 766 AST_MATCHER(FunctionDecl, isNormalPrintfFunc) { 767 auto *II = Node.getIdentifier(); 768 769 if (!II) 770 return false; 771 772 StringRef Name = LibcFunNamePrefixSuffixParser().matchName( 773 II->getName(), Node.getBuiltinID()); 774 775 if (!Name.ends_with("printf") || Name.starts_with("v")) 776 return false; 777 778 StringRef Prefix = Name.drop_back(6); 779 780 if (Prefix.ends_with("w")) 781 Prefix = Prefix.drop_back(1); 782 783 return Prefix.empty() || Prefix == "k" || Prefix == "f" || Prefix == "sn"; 784 } 785 786 // This matcher requires that it is known that the callee `isNormalPrintf`. 787 // Then if the format string is a string literal, this matcher matches when at 788 // least one string argument is unsafe. If the format is not a string literal, 789 // this matcher matches when at least one pointer type argument is unsafe. 790 AST_MATCHER_P(CallExpr, hasUnsafePrintfStringArg, 791 clang::ast_matchers::internal::Matcher<Expr>, 792 UnsafeStringArgMatcher) { 793 // Determine what printf it is by examining formal parameters: 794 const FunctionDecl *FD = Node.getDirectCallee(); 795 796 assert(FD && "It should have been checked that FD is non-null."); 797 798 unsigned NumParms = FD->getNumParams(); 799 800 if (NumParms < 1) 801 return false; // possibly some user-defined printf function 802 803 ASTContext &Ctx = Finder->getASTContext(); 804 QualType FirstParmTy = FD->getParamDecl(0)->getType(); 805 806 if (!FirstParmTy->isPointerType()) 807 return false; // possibly some user-defined printf function 808 809 QualType FirstPteTy = FirstParmTy->castAs<PointerType>()->getPointeeType(); 810 811 if (!Ctx.getFILEType() 812 .isNull() && //`FILE *` must be in the context if it is fprintf 813 FirstPteTy.getCanonicalType() == Ctx.getFILEType().getCanonicalType()) { 814 // It is a fprintf: 815 const Expr *UnsafeArg; 816 817 if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 1, Ctx, false)) 818 return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder); 819 return false; 820 } 821 822 if (FirstPteTy.isConstQualified()) { 823 // If the first parameter is a `const char *`, it is a printf/kprintf: 824 bool isKprintf = false; 825 const Expr *UnsafeArg; 826 827 if (auto *II = FD->getIdentifier()) 828 isKprintf = II->getName() == "kprintf"; 829 if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 0, Ctx, isKprintf)) 830 return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder); 831 return false; 832 } 833 834 if (NumParms > 2) { 835 QualType SecondParmTy = FD->getParamDecl(1)->getType(); 836 837 if (!FirstPteTy.isConstQualified() && SecondParmTy->isIntegerType()) { 838 // If the first parameter type is non-const qualified `char *` and the 839 // second is an integer, it is a snprintf: 840 const Expr *UnsafeArg; 841 842 if (hasUnsafeFormatOrSArg(&Node, UnsafeArg, 2, Ctx, false)) 843 return UnsafeStringArgMatcher.matches(*UnsafeArg, Finder, Builder); 844 return false; 845 } 846 } 847 // We don't really recognize this "normal" printf, the only thing we 848 // can do is to require all pointers to be null-terminated: 849 for (auto Arg : Node.arguments()) 850 if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg)) 851 if (UnsafeStringArgMatcher.matches(*Arg, Finder, Builder)) 852 return true; 853 return false; 854 } 855 856 // This matcher requires that it is known that the callee `isNormalPrintf`. 857 // Then it matches if the first two arguments of the call is a pointer and an 858 // integer and they are not in a safe pattern. 859 // 860 // For the first two arguments: `ptr` and `size`, they are safe if in the 861 // following patterns: 862 // 863 // Pattern 1: 864 // ptr := DRE.data(); 865 // size:= DRE.size()/DRE.size_bytes() 866 // And DRE is a hardened container or view. 867 // 868 // Pattern 2: 869 // ptr := Constant-Array-DRE; 870 // size:= any expression that has compile-time constant value equivalent to 871 // sizeof (Constant-Array-DRE) 872 AST_MATCHER(CallExpr, hasUnsafeSnprintfBuffer) { 873 const FunctionDecl *FD = Node.getDirectCallee(); 874 875 assert(FD && "It should have been checked that FD is non-null."); 876 877 if (FD->getNumParams() < 3) 878 return false; // Not an snprint 879 880 QualType FirstParmTy = FD->getParamDecl(0)->getType(); 881 882 if (!FirstParmTy->isPointerType()) 883 return false; // Not an snprint 884 885 QualType FirstPteTy = FirstParmTy->castAs<PointerType>()->getPointeeType(); 886 const Expr *Buf = Node.getArg(0), *Size = Node.getArg(1); 887 888 if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() || 889 !Size->getType()->isIntegerType()) 890 return false; // not an snprintf call 891 892 // Pattern 1: 893 static StringRef SizedObjs[] = {"span", "array", "vector", 894 "basic_string_view", "basic_string"}; 895 Buf = Buf->IgnoreParenImpCasts(); 896 Size = Size->IgnoreParenImpCasts(); 897 if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(Buf)) 898 if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Size)) { 899 auto *DREOfPtr = dyn_cast<DeclRefExpr>( 900 MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts()); 901 auto *DREOfSize = dyn_cast<DeclRefExpr>( 902 MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts()); 903 904 if (!DREOfPtr || !DREOfSize) 905 return true; // not in safe pattern 906 if (DREOfPtr->getDecl() != DREOfSize->getDecl()) 907 return true; // not in safe pattern 908 if (MCEPtr->getMethodDecl()->getName() != "data") 909 return true; // not in safe pattern 910 911 if (MCESize->getMethodDecl()->getName() == "size_bytes" || 912 // Note here the pointer must be a pointer-to-char type unless there 913 // is explicit casting. If there is explicit casting, this branch 914 // is unreachable. Thus, at this branch "size" and "size_bytes" are 915 // equivalent as the pointer is a char pointer: 916 MCESize->getMethodDecl()->getName() == "size") 917 for (StringRef SizedObj : SizedObjs) 918 if (MCEPtr->getRecordDecl()->isInStdNamespace() && 919 MCEPtr->getRecordDecl()->getCanonicalDecl()->getName() == 920 SizedObj) 921 return false; // It is in fact safe 922 } 923 924 // Pattern 2: 925 if (auto *DRE = dyn_cast<DeclRefExpr>(Buf->IgnoreParenImpCasts())) { 926 ASTContext &Ctx = Finder->getASTContext(); 927 928 if (auto *CAT = Ctx.getAsConstantArrayType(DRE->getType())) { 929 Expr::EvalResult ER; 930 // The array element type must be compatible with `char` otherwise an 931 // explicit cast will be needed, which will make this check unreachable. 932 // Therefore, the array extent is same as its' bytewise size. 933 if (Size->EvaluateAsConstantExpr(ER, Ctx)) { 934 APSInt EVal = ER.Val.getInt(); // Size must have integer type 935 936 return APSInt::compareValues(EVal, APSInt(CAT->getSize(), true)) != 0; 937 } 938 } 939 } 940 return true; // ptr and size are not in safe pattern 941 } 942 } // namespace libc_func_matchers 943 } // namespace clang::ast_matchers 944 945 namespace { 946 // Because the analysis revolves around variables and their types, we'll need to 947 // track uses of variables (aka DeclRefExprs). 948 using DeclUseList = SmallVector<const DeclRefExpr *, 1>; 949 950 // Convenience typedef. 951 using FixItList = SmallVector<FixItHint, 4>; 952 } // namespace 953 954 namespace { 955 /// Gadget is an individual operation in the code that may be of interest to 956 /// this analysis. Each (non-abstract) subclass corresponds to a specific 957 /// rigid AST structure that constitutes an operation on a pointer-type object. 958 /// Discovery of a gadget in the code corresponds to claiming that we understand 959 /// what this part of code is doing well enough to potentially improve it. 960 /// Gadgets can be warning (immediately deserving a warning) or fixable (not 961 /// always deserving a warning per se, but requires our attention to identify 962 /// it warrants a fixit). 963 class Gadget { 964 public: 965 enum class Kind { 966 #define GADGET(x) x, 967 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 968 }; 969 970 /// Common type of ASTMatchers used for discovering gadgets. 971 /// Useful for implementing the static matcher() methods 972 /// that are expected from all non-abstract subclasses. 973 using Matcher = decltype(stmt()); 974 975 Gadget(Kind K) : K(K) {} 976 977 Kind getKind() const { return K; } 978 979 #ifndef NDEBUG 980 StringRef getDebugName() const { 981 switch (K) { 982 #define GADGET(x) \ 983 case Kind::x: \ 984 return #x; 985 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 986 } 987 llvm_unreachable("Unhandled Gadget::Kind enum"); 988 } 989 #endif 990 991 virtual bool isWarningGadget() const = 0; 992 // TODO remove this method from WarningGadget interface. It's only used for 993 // debug prints in FixableGadget. 994 virtual SourceLocation getSourceLoc() const = 0; 995 996 /// Returns the list of pointer-type variables on which this gadget performs 997 /// its operation. Typically, there's only one variable. This isn't a list 998 /// of all DeclRefExprs in the gadget's AST! 999 virtual DeclUseList getClaimedVarUseSites() const = 0; 1000 1001 virtual ~Gadget() = default; 1002 1003 private: 1004 Kind K; 1005 }; 1006 1007 /// Warning gadgets correspond to unsafe code patterns that warrants 1008 /// an immediate warning. 1009 class WarningGadget : public Gadget { 1010 public: 1011 WarningGadget(Kind K) : Gadget(K) {} 1012 1013 static bool classof(const Gadget *G) { return G->isWarningGadget(); } 1014 bool isWarningGadget() const final { return true; } 1015 1016 virtual void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, 1017 bool IsRelatedToDecl, 1018 ASTContext &Ctx) const = 0; 1019 }; 1020 1021 /// Fixable gadgets correspond to code patterns that aren't always unsafe but 1022 /// need to be properly recognized in order to emit fixes. For example, if a raw 1023 /// pointer-type variable is replaced by a safe C++ container, every use of such 1024 /// variable must be carefully considered and possibly updated. 1025 class FixableGadget : public Gadget { 1026 public: 1027 FixableGadget(Kind K) : Gadget(K) {} 1028 1029 static bool classof(const Gadget *G) { return !G->isWarningGadget(); } 1030 bool isWarningGadget() const final { return false; } 1031 1032 /// Returns a fixit that would fix the current gadget according to 1033 /// the current strategy. Returns std::nullopt if the fix cannot be produced; 1034 /// returns an empty list if no fixes are necessary. 1035 virtual std::optional<FixItList> getFixits(const FixitStrategy &) const { 1036 return std::nullopt; 1037 } 1038 1039 /// Returns a list of two elements where the first element is the LHS of a 1040 /// pointer assignment statement and the second element is the RHS. This 1041 /// two-element list represents the fact that the LHS buffer gets its bounds 1042 /// information from the RHS buffer. This information will be used later to 1043 /// group all those variables whose types must be modified together to prevent 1044 /// type mismatches. 1045 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 1046 getStrategyImplications() const { 1047 return std::nullopt; 1048 } 1049 }; 1050 1051 static auto toSupportedVariable() { return to(varDecl()); } 1052 1053 using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>; 1054 using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>; 1055 1056 /// An increment of a pointer-type value is unsafe as it may run the pointer 1057 /// out of bounds. 1058 class IncrementGadget : public WarningGadget { 1059 static constexpr const char *const OpTag = "op"; 1060 const UnaryOperator *Op; 1061 1062 public: 1063 IncrementGadget(const MatchFinder::MatchResult &Result) 1064 : WarningGadget(Kind::Increment), 1065 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {} 1066 1067 static bool classof(const Gadget *G) { 1068 return G->getKind() == Kind::Increment; 1069 } 1070 1071 static Matcher matcher() { 1072 return stmt( 1073 unaryOperator(hasOperatorName("++"), 1074 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))) 1075 .bind(OpTag)); 1076 } 1077 1078 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, 1079 bool IsRelatedToDecl, 1080 ASTContext &Ctx) const override { 1081 Handler.handleUnsafeOperation(Op, IsRelatedToDecl, Ctx); 1082 } 1083 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); } 1084 1085 DeclUseList getClaimedVarUseSites() const override { 1086 SmallVector<const DeclRefExpr *, 2> Uses; 1087 if (const auto *DRE = 1088 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) { 1089 Uses.push_back(DRE); 1090 } 1091 1092 return std::move(Uses); 1093 } 1094 }; 1095 1096 /// A decrement of a pointer-type value is unsafe as it may run the pointer 1097 /// out of bounds. 1098 class DecrementGadget : public WarningGadget { 1099 static constexpr const char *const OpTag = "op"; 1100 const UnaryOperator *Op; 1101 1102 public: 1103 DecrementGadget(const MatchFinder::MatchResult &Result) 1104 : WarningGadget(Kind::Decrement), 1105 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {} 1106 1107 static bool classof(const Gadget *G) { 1108 return G->getKind() == Kind::Decrement; 1109 } 1110 1111 static Matcher matcher() { 1112 return stmt( 1113 unaryOperator(hasOperatorName("--"), 1114 hasUnaryOperand(ignoringParenImpCasts(hasPointerType()))) 1115 .bind(OpTag)); 1116 } 1117 1118 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, 1119 bool IsRelatedToDecl, 1120 ASTContext &Ctx) const override { 1121 Handler.handleUnsafeOperation(Op, IsRelatedToDecl, Ctx); 1122 } 1123 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); } 1124 1125 DeclUseList getClaimedVarUseSites() const override { 1126 if (const auto *DRE = 1127 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) { 1128 return {DRE}; 1129 } 1130 1131 return {}; 1132 } 1133 }; 1134 1135 /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as 1136 /// it doesn't have any bounds checks for the array. 1137 class ArraySubscriptGadget : public WarningGadget { 1138 static constexpr const char *const ArraySubscrTag = "ArraySubscript"; 1139 const ArraySubscriptExpr *ASE; 1140 1141 public: 1142 ArraySubscriptGadget(const MatchFinder::MatchResult &Result) 1143 : WarningGadget(Kind::ArraySubscript), 1144 ASE(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ArraySubscrTag)) {} 1145 1146 static bool classof(const Gadget *G) { 1147 return G->getKind() == Kind::ArraySubscript; 1148 } 1149 1150 static Matcher matcher() { 1151 // clang-format off 1152 return stmt(arraySubscriptExpr( 1153 hasBase(ignoringParenImpCasts( 1154 anyOf(hasPointerType(), hasArrayType()))), 1155 unless(anyOf( 1156 isSafeArraySubscript(), 1157 hasIndex( 1158 anyOf(integerLiteral(equals(0)), arrayInitIndexExpr()) 1159 ) 1160 ))).bind(ArraySubscrTag)); 1161 // clang-format on 1162 } 1163 1164 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, 1165 bool IsRelatedToDecl, 1166 ASTContext &Ctx) const override { 1167 Handler.handleUnsafeOperation(ASE, IsRelatedToDecl, Ctx); 1168 } 1169 SourceLocation getSourceLoc() const override { return ASE->getBeginLoc(); } 1170 1171 DeclUseList getClaimedVarUseSites() const override { 1172 if (const auto *DRE = 1173 dyn_cast<DeclRefExpr>(ASE->getBase()->IgnoreParenImpCasts())) { 1174 return {DRE}; 1175 } 1176 1177 return {}; 1178 } 1179 }; 1180 1181 /// A pointer arithmetic expression of one of the forms: 1182 /// \code 1183 /// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n 1184 /// \endcode 1185 class PointerArithmeticGadget : public WarningGadget { 1186 static constexpr const char *const PointerArithmeticTag = "ptrAdd"; 1187 static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr"; 1188 const BinaryOperator *PA; // pointer arithmetic expression 1189 const Expr *Ptr; // the pointer expression in `PA` 1190 1191 public: 1192 PointerArithmeticGadget(const MatchFinder::MatchResult &Result) 1193 : WarningGadget(Kind::PointerArithmetic), 1194 PA(Result.Nodes.getNodeAs<BinaryOperator>(PointerArithmeticTag)), 1195 Ptr(Result.Nodes.getNodeAs<Expr>(PointerArithmeticPointerTag)) {} 1196 1197 static bool classof(const Gadget *G) { 1198 return G->getKind() == Kind::PointerArithmetic; 1199 } 1200 1201 static Matcher matcher() { 1202 auto HasIntegerType = anyOf(hasType(isInteger()), hasType(enumType())); 1203 auto PtrAtRight = 1204 allOf(hasOperatorName("+"), 1205 hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), 1206 hasLHS(HasIntegerType)); 1207 auto PtrAtLeft = 1208 allOf(anyOf(hasOperatorName("+"), hasOperatorName("-"), 1209 hasOperatorName("+="), hasOperatorName("-=")), 1210 hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), 1211 hasRHS(HasIntegerType)); 1212 1213 return stmt(binaryOperator(anyOf(PtrAtLeft, PtrAtRight)) 1214 .bind(PointerArithmeticTag)); 1215 } 1216 1217 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, 1218 bool IsRelatedToDecl, 1219 ASTContext &Ctx) const override { 1220 Handler.handleUnsafeOperation(PA, IsRelatedToDecl, Ctx); 1221 } 1222 SourceLocation getSourceLoc() const override { return PA->getBeginLoc(); } 1223 1224 DeclUseList getClaimedVarUseSites() const override { 1225 if (const auto *DRE = dyn_cast<DeclRefExpr>(Ptr->IgnoreParenImpCasts())) { 1226 return {DRE}; 1227 } 1228 1229 return {}; 1230 } 1231 // FIXME: pointer adding zero should be fine 1232 // FIXME: this gadge will need a fix-it 1233 }; 1234 1235 class SpanTwoParamConstructorGadget : public WarningGadget { 1236 static constexpr const char *const SpanTwoParamConstructorTag = 1237 "spanTwoParamConstructor"; 1238 const CXXConstructExpr *Ctor; // the span constructor expression 1239 1240 public: 1241 SpanTwoParamConstructorGadget(const MatchFinder::MatchResult &Result) 1242 : WarningGadget(Kind::SpanTwoParamConstructor), 1243 Ctor(Result.Nodes.getNodeAs<CXXConstructExpr>( 1244 SpanTwoParamConstructorTag)) {} 1245 1246 static bool classof(const Gadget *G) { 1247 return G->getKind() == Kind::SpanTwoParamConstructor; 1248 } 1249 1250 static Matcher matcher() { 1251 auto HasTwoParamSpanCtorDecl = hasDeclaration( 1252 cxxConstructorDecl(hasDeclContext(isInStdNamespace()), hasName("span"), 1253 parameterCountIs(2))); 1254 1255 return stmt(cxxConstructExpr(HasTwoParamSpanCtorDecl, 1256 unless(isSafeSpanTwoParamConstruct())) 1257 .bind(SpanTwoParamConstructorTag)); 1258 } 1259 1260 static Matcher matcher(const UnsafeBufferUsageHandler *Handler) { 1261 return stmt(unless(ignoreUnsafeBufferInContainer(Handler)), matcher()); 1262 } 1263 1264 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, 1265 bool IsRelatedToDecl, 1266 ASTContext &Ctx) const override { 1267 Handler.handleUnsafeOperationInContainer(Ctor, IsRelatedToDecl, Ctx); 1268 } 1269 SourceLocation getSourceLoc() const override { return Ctor->getBeginLoc(); } 1270 1271 DeclUseList getClaimedVarUseSites() const override { 1272 // If the constructor call is of the form `std::span{var, n}`, `var` is 1273 // considered an unsafe variable. 1274 if (auto *DRE = dyn_cast<DeclRefExpr>(Ctor->getArg(0))) { 1275 if (isa<VarDecl>(DRE->getDecl())) 1276 return {DRE}; 1277 } 1278 return {}; 1279 } 1280 }; 1281 1282 /// A pointer initialization expression of the form: 1283 /// \code 1284 /// int *p = q; 1285 /// \endcode 1286 class PointerInitGadget : public FixableGadget { 1287 private: 1288 static constexpr const char *const PointerInitLHSTag = "ptrInitLHS"; 1289 static constexpr const char *const PointerInitRHSTag = "ptrInitRHS"; 1290 const VarDecl *PtrInitLHS; // the LHS pointer expression in `PI` 1291 const DeclRefExpr *PtrInitRHS; // the RHS pointer expression in `PI` 1292 1293 public: 1294 PointerInitGadget(const MatchFinder::MatchResult &Result) 1295 : FixableGadget(Kind::PointerInit), 1296 PtrInitLHS(Result.Nodes.getNodeAs<VarDecl>(PointerInitLHSTag)), 1297 PtrInitRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerInitRHSTag)) {} 1298 1299 static bool classof(const Gadget *G) { 1300 return G->getKind() == Kind::PointerInit; 1301 } 1302 1303 static Matcher matcher() { 1304 auto PtrInitStmt = declStmt(hasSingleDecl( 1305 varDecl(hasInitializer(ignoringImpCasts( 1306 declRefExpr(hasPointerType(), toSupportedVariable()) 1307 .bind(PointerInitRHSTag)))) 1308 .bind(PointerInitLHSTag))); 1309 1310 return stmt(PtrInitStmt); 1311 } 1312 1313 virtual std::optional<FixItList> 1314 getFixits(const FixitStrategy &S) const override; 1315 SourceLocation getSourceLoc() const override { 1316 return PtrInitRHS->getBeginLoc(); 1317 } 1318 1319 virtual DeclUseList getClaimedVarUseSites() const override { 1320 return DeclUseList{PtrInitRHS}; 1321 } 1322 1323 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 1324 getStrategyImplications() const override { 1325 return std::make_pair(PtrInitLHS, cast<VarDecl>(PtrInitRHS->getDecl())); 1326 } 1327 }; 1328 1329 /// A pointer assignment expression of the form: 1330 /// \code 1331 /// p = q; 1332 /// \endcode 1333 /// where both `p` and `q` are pointers. 1334 class PtrToPtrAssignmentGadget : public FixableGadget { 1335 private: 1336 static constexpr const char *const PointerAssignLHSTag = "ptrLHS"; 1337 static constexpr const char *const PointerAssignRHSTag = "ptrRHS"; 1338 const DeclRefExpr *PtrLHS; // the LHS pointer expression in `PA` 1339 const DeclRefExpr *PtrRHS; // the RHS pointer expression in `PA` 1340 1341 public: 1342 PtrToPtrAssignmentGadget(const MatchFinder::MatchResult &Result) 1343 : FixableGadget(Kind::PtrToPtrAssignment), 1344 PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignLHSTag)), 1345 PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignRHSTag)) {} 1346 1347 static bool classof(const Gadget *G) { 1348 return G->getKind() == Kind::PtrToPtrAssignment; 1349 } 1350 1351 static Matcher matcher() { 1352 auto PtrAssignExpr = binaryOperator( 1353 allOf(hasOperatorName("="), 1354 hasRHS(ignoringParenImpCasts( 1355 declRefExpr(hasPointerType(), toSupportedVariable()) 1356 .bind(PointerAssignRHSTag))), 1357 hasLHS(declRefExpr(hasPointerType(), toSupportedVariable()) 1358 .bind(PointerAssignLHSTag)))); 1359 1360 return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr)); 1361 } 1362 1363 virtual std::optional<FixItList> 1364 getFixits(const FixitStrategy &S) const override; 1365 SourceLocation getSourceLoc() const override { return PtrLHS->getBeginLoc(); } 1366 1367 virtual DeclUseList getClaimedVarUseSites() const override { 1368 return DeclUseList{PtrLHS, PtrRHS}; 1369 } 1370 1371 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 1372 getStrategyImplications() const override { 1373 return std::make_pair(cast<VarDecl>(PtrLHS->getDecl()), 1374 cast<VarDecl>(PtrRHS->getDecl())); 1375 } 1376 }; 1377 1378 /// An assignment expression of the form: 1379 /// \code 1380 /// ptr = array; 1381 /// \endcode 1382 /// where `p` is a pointer and `array` is a constant size array. 1383 class CArrayToPtrAssignmentGadget : public FixableGadget { 1384 private: 1385 static constexpr const char *const PointerAssignLHSTag = "ptrLHS"; 1386 static constexpr const char *const PointerAssignRHSTag = "ptrRHS"; 1387 const DeclRefExpr *PtrLHS; // the LHS pointer expression in `PA` 1388 const DeclRefExpr *PtrRHS; // the RHS pointer expression in `PA` 1389 1390 public: 1391 CArrayToPtrAssignmentGadget(const MatchFinder::MatchResult &Result) 1392 : FixableGadget(Kind::CArrayToPtrAssignment), 1393 PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignLHSTag)), 1394 PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignRHSTag)) {} 1395 1396 static bool classof(const Gadget *G) { 1397 return G->getKind() == Kind::CArrayToPtrAssignment; 1398 } 1399 1400 static Matcher matcher() { 1401 auto PtrAssignExpr = binaryOperator( 1402 allOf(hasOperatorName("="), 1403 hasRHS(ignoringParenImpCasts( 1404 declRefExpr(hasType(hasCanonicalType(constantArrayType())), 1405 toSupportedVariable()) 1406 .bind(PointerAssignRHSTag))), 1407 hasLHS(declRefExpr(hasPointerType(), toSupportedVariable()) 1408 .bind(PointerAssignLHSTag)))); 1409 1410 return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr)); 1411 } 1412 1413 virtual std::optional<FixItList> 1414 getFixits(const FixitStrategy &S) const override; 1415 SourceLocation getSourceLoc() const override { return PtrLHS->getBeginLoc(); } 1416 1417 virtual DeclUseList getClaimedVarUseSites() const override { 1418 return DeclUseList{PtrLHS, PtrRHS}; 1419 } 1420 1421 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 1422 getStrategyImplications() const override { 1423 return {}; 1424 } 1425 }; 1426 1427 /// A call of a function or method that performs unchecked buffer operations 1428 /// over one of its pointer parameters. 1429 class UnsafeBufferUsageAttrGadget : public WarningGadget { 1430 constexpr static const char *const OpTag = "attr_expr"; 1431 const Expr *Op; 1432 1433 public: 1434 UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult &Result) 1435 : WarningGadget(Kind::UnsafeBufferUsageAttr), 1436 Op(Result.Nodes.getNodeAs<Expr>(OpTag)) {} 1437 1438 static bool classof(const Gadget *G) { 1439 return G->getKind() == Kind::UnsafeBufferUsageAttr; 1440 } 1441 1442 static Matcher matcher() { 1443 auto HasUnsafeFieldDecl = 1444 member(fieldDecl(hasAttr(attr::UnsafeBufferUsage))); 1445 1446 auto HasUnsafeFnDecl = 1447 callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))); 1448 1449 return stmt(anyOf(callExpr(HasUnsafeFnDecl).bind(OpTag), 1450 memberExpr(HasUnsafeFieldDecl).bind(OpTag))); 1451 } 1452 1453 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, 1454 bool IsRelatedToDecl, 1455 ASTContext &Ctx) const override { 1456 Handler.handleUnsafeOperation(Op, IsRelatedToDecl, Ctx); 1457 } 1458 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); } 1459 1460 DeclUseList getClaimedVarUseSites() const override { return {}; } 1461 }; 1462 1463 /// A call of a constructor that performs unchecked buffer operations 1464 /// over one of its pointer parameters, or constructs a class object that will 1465 /// perform buffer operations that depend on the correctness of the parameters. 1466 class UnsafeBufferUsageCtorAttrGadget : public WarningGadget { 1467 constexpr static const char *const OpTag = "cxx_construct_expr"; 1468 const CXXConstructExpr *Op; 1469 1470 public: 1471 UnsafeBufferUsageCtorAttrGadget(const MatchFinder::MatchResult &Result) 1472 : WarningGadget(Kind::UnsafeBufferUsageCtorAttr), 1473 Op(Result.Nodes.getNodeAs<CXXConstructExpr>(OpTag)) {} 1474 1475 static bool classof(const Gadget *G) { 1476 return G->getKind() == Kind::UnsafeBufferUsageCtorAttr; 1477 } 1478 1479 static Matcher matcher() { 1480 auto HasUnsafeCtorDecl = 1481 hasDeclaration(cxxConstructorDecl(hasAttr(attr::UnsafeBufferUsage))); 1482 // std::span(ptr, size) ctor is handled by SpanTwoParamConstructorGadget. 1483 auto HasTwoParamSpanCtorDecl = SpanTwoParamConstructorGadget::matcher(); 1484 return stmt( 1485 cxxConstructExpr(HasUnsafeCtorDecl, unless(HasTwoParamSpanCtorDecl)) 1486 .bind(OpTag)); 1487 } 1488 1489 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, 1490 bool IsRelatedToDecl, 1491 ASTContext &Ctx) const override { 1492 Handler.handleUnsafeOperation(Op, IsRelatedToDecl, Ctx); 1493 } 1494 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); } 1495 1496 DeclUseList getClaimedVarUseSites() const override { return {}; } 1497 }; 1498 1499 // Warning gadget for unsafe invocation of span::data method. 1500 // Triggers when the pointer returned by the invocation is immediately 1501 // cast to a larger type. 1502 1503 class DataInvocationGadget : public WarningGadget { 1504 constexpr static const char *const OpTag = "data_invocation_expr"; 1505 const ExplicitCastExpr *Op; 1506 1507 public: 1508 DataInvocationGadget(const MatchFinder::MatchResult &Result) 1509 : WarningGadget(Kind::DataInvocation), 1510 Op(Result.Nodes.getNodeAs<ExplicitCastExpr>(OpTag)) {} 1511 1512 static bool classof(const Gadget *G) { 1513 return G->getKind() == Kind::DataInvocation; 1514 } 1515 1516 static Matcher matcher() { 1517 1518 Matcher callExpr = cxxMemberCallExpr(callee( 1519 cxxMethodDecl(hasName("data"), 1520 ofClass(anyOf(hasName("std::span"), hasName("std::array"), 1521 hasName("std::vector")))))); 1522 return stmt( 1523 explicitCastExpr(anyOf(has(callExpr), has(parenExpr(has(callExpr))))) 1524 .bind(OpTag)); 1525 } 1526 1527 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, 1528 bool IsRelatedToDecl, 1529 ASTContext &Ctx) const override { 1530 Handler.handleUnsafeOperation(Op, IsRelatedToDecl, Ctx); 1531 } 1532 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); } 1533 1534 DeclUseList getClaimedVarUseSites() const override { return {}; } 1535 }; 1536 1537 class UnsafeLibcFunctionCallGadget : public WarningGadget { 1538 const CallExpr *const Call; 1539 const Expr *UnsafeArg = nullptr; 1540 constexpr static const char *const Tag = "UnsafeLibcFunctionCall"; 1541 // Extra tags for additional information: 1542 constexpr static const char *const UnsafeSprintfTag = 1543 "UnsafeLibcFunctionCall_sprintf"; 1544 constexpr static const char *const UnsafeSizedByTag = 1545 "UnsafeLibcFunctionCall_sized_by"; 1546 constexpr static const char *const UnsafeStringTag = 1547 "UnsafeLibcFunctionCall_string"; 1548 constexpr static const char *const UnsafeVaListTag = 1549 "UnsafeLibcFunctionCall_va_list"; 1550 1551 enum UnsafeKind { 1552 OTHERS = 0, // no specific information, the callee function is unsafe 1553 SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead. 1554 SIZED_BY = 1555 2, // the first two arguments of `snprintf` function have 1556 // "__sized_by" relation but they do not conform to safe patterns 1557 STRING = 3, // an argument is a pointer-to-char-as-string but does not 1558 // guarantee null-termination 1559 VA_LIST = 4, // one of the `-printf`s function that take va_list, which is 1560 // considered unsafe as it is not compile-time check 1561 } WarnedFunKind = OTHERS; 1562 1563 public: 1564 UnsafeLibcFunctionCallGadget(const MatchFinder::MatchResult &Result) 1565 : WarningGadget(Kind::UnsafeLibcFunctionCall), 1566 Call(Result.Nodes.getNodeAs<CallExpr>(Tag)) { 1567 if (Result.Nodes.getNodeAs<Decl>(UnsafeSprintfTag)) 1568 WarnedFunKind = SPRINTF; 1569 else if (auto *E = Result.Nodes.getNodeAs<Expr>(UnsafeStringTag)) { 1570 WarnedFunKind = STRING; 1571 UnsafeArg = E; 1572 } else if (Result.Nodes.getNodeAs<CallExpr>(UnsafeSizedByTag)) { 1573 WarnedFunKind = SIZED_BY; 1574 UnsafeArg = Call->getArg(0); 1575 } else if (Result.Nodes.getNodeAs<Decl>(UnsafeVaListTag)) 1576 WarnedFunKind = VA_LIST; 1577 } 1578 1579 static Matcher matcher(const UnsafeBufferUsageHandler *Handler) { 1580 return stmt(unless(ignoreUnsafeLibcCall(Handler)), 1581 anyOf( 1582 callExpr( 1583 callee(functionDecl(anyOf( 1584 // Match a predefined unsafe libc 1585 // function: 1586 functionDecl(libc_func_matchers::isPredefinedUnsafeLibcFunc()), 1587 // Match a call to one of the `v*printf` functions 1588 // taking va-list, which cannot be checked at 1589 // compile-time: 1590 functionDecl(libc_func_matchers::isUnsafeVaListPrintfFunc()) 1591 .bind(UnsafeVaListTag), 1592 // Match a call to a `sprintf` function, which is never 1593 // safe: 1594 functionDecl(libc_func_matchers::isUnsafeSprintfFunc()) 1595 .bind(UnsafeSprintfTag)))), 1596 // (unless the call has a sole string literal argument): 1597 unless( 1598 allOf(hasArgument(0, expr(stringLiteral())), hasNumArgs(1)))), 1599 1600 // The following two cases require checking against actual 1601 // arguments of the call: 1602 1603 // Match a call to an `snprintf` function. And first two 1604 // arguments of the call (that describe a buffer) are not in 1605 // safe patterns: 1606 callExpr(callee(functionDecl(libc_func_matchers::isNormalPrintfFunc())), 1607 libc_func_matchers::hasUnsafeSnprintfBuffer()) 1608 .bind(UnsafeSizedByTag), 1609 // Match a call to a `printf` function, which can be safe if 1610 // all arguments are null-terminated: 1611 callExpr(callee(functionDecl(libc_func_matchers::isNormalPrintfFunc())), 1612 libc_func_matchers::hasUnsafePrintfStringArg( 1613 expr().bind(UnsafeStringTag))))); 1614 } 1615 1616 const Stmt *getBaseStmt() const { return Call; } 1617 1618 SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); } 1619 1620 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler, 1621 bool IsRelatedToDecl, 1622 ASTContext &Ctx) const override { 1623 Handler.handleUnsafeLibcCall(Call, WarnedFunKind, Ctx, UnsafeArg); 1624 } 1625 1626 DeclUseList getClaimedVarUseSites() const override { return {}; } 1627 }; 1628 1629 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue 1630 // Context (see `isInUnspecifiedLvalueContext`). 1631 // Note here `[]` is the built-in subscript operator. 1632 class ULCArraySubscriptGadget : public FixableGadget { 1633 private: 1634 static constexpr const char *const ULCArraySubscriptTag = 1635 "ArraySubscriptUnderULC"; 1636 const ArraySubscriptExpr *Node; 1637 1638 public: 1639 ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result) 1640 : FixableGadget(Kind::ULCArraySubscript), 1641 Node(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ULCArraySubscriptTag)) { 1642 assert(Node != nullptr && "Expecting a non-null matching result"); 1643 } 1644 1645 static bool classof(const Gadget *G) { 1646 return G->getKind() == Kind::ULCArraySubscript; 1647 } 1648 1649 static Matcher matcher() { 1650 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType()); 1651 auto BaseIsArrayOrPtrDRE = hasBase( 1652 ignoringParenImpCasts(declRefExpr(ArrayOrPtr, toSupportedVariable()))); 1653 auto Target = 1654 arraySubscriptExpr(BaseIsArrayOrPtrDRE).bind(ULCArraySubscriptTag); 1655 1656 return expr(isInUnspecifiedLvalueContext(Target)); 1657 } 1658 1659 virtual std::optional<FixItList> 1660 getFixits(const FixitStrategy &S) const override; 1661 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); } 1662 1663 virtual DeclUseList getClaimedVarUseSites() const override { 1664 if (const auto *DRE = 1665 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) { 1666 return {DRE}; 1667 } 1668 return {}; 1669 } 1670 }; 1671 1672 // Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the 1673 // unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits 1674 // fixit of the form `UPC(DRE.data())`. 1675 class UPCStandalonePointerGadget : public FixableGadget { 1676 private: 1677 static constexpr const char *const DeclRefExprTag = "StandalonePointer"; 1678 const DeclRefExpr *Node; 1679 1680 public: 1681 UPCStandalonePointerGadget(const MatchFinder::MatchResult &Result) 1682 : FixableGadget(Kind::UPCStandalonePointer), 1683 Node(Result.Nodes.getNodeAs<DeclRefExpr>(DeclRefExprTag)) { 1684 assert(Node != nullptr && "Expecting a non-null matching result"); 1685 } 1686 1687 static bool classof(const Gadget *G) { 1688 return G->getKind() == Kind::UPCStandalonePointer; 1689 } 1690 1691 static Matcher matcher() { 1692 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType()); 1693 auto target = expr(ignoringParenImpCasts( 1694 declRefExpr(allOf(ArrayOrPtr, toSupportedVariable())) 1695 .bind(DeclRefExprTag))); 1696 return stmt(isInUnspecifiedPointerContext(target)); 1697 } 1698 1699 virtual std::optional<FixItList> 1700 getFixits(const FixitStrategy &S) const override; 1701 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); } 1702 1703 virtual DeclUseList getClaimedVarUseSites() const override { return {Node}; } 1704 }; 1705 1706 class PointerDereferenceGadget : public FixableGadget { 1707 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE"; 1708 static constexpr const char *const OperatorTag = "op"; 1709 1710 const DeclRefExpr *BaseDeclRefExpr = nullptr; 1711 const UnaryOperator *Op = nullptr; 1712 1713 public: 1714 PointerDereferenceGadget(const MatchFinder::MatchResult &Result) 1715 : FixableGadget(Kind::PointerDereference), 1716 BaseDeclRefExpr( 1717 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)), 1718 Op(Result.Nodes.getNodeAs<UnaryOperator>(OperatorTag)) {} 1719 1720 static bool classof(const Gadget *G) { 1721 return G->getKind() == Kind::PointerDereference; 1722 } 1723 1724 static Matcher matcher() { 1725 auto Target = 1726 unaryOperator( 1727 hasOperatorName("*"), 1728 has(expr(ignoringParenImpCasts( 1729 declRefExpr(toSupportedVariable()).bind(BaseDeclRefExprTag))))) 1730 .bind(OperatorTag); 1731 1732 return expr(isInUnspecifiedLvalueContext(Target)); 1733 } 1734 1735 DeclUseList getClaimedVarUseSites() const override { 1736 return {BaseDeclRefExpr}; 1737 } 1738 1739 virtual std::optional<FixItList> 1740 getFixits(const FixitStrategy &S) const override; 1741 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); } 1742 }; 1743 1744 // Represents expressions of the form `&DRE[any]` in the Unspecified Pointer 1745 // Context (see `isInUnspecifiedPointerContext`). 1746 // Note here `[]` is the built-in subscript operator. 1747 class UPCAddressofArraySubscriptGadget : public FixableGadget { 1748 private: 1749 static constexpr const char *const UPCAddressofArraySubscriptTag = 1750 "AddressofArraySubscriptUnderUPC"; 1751 const UnaryOperator *Node; // the `&DRE[any]` node 1752 1753 public: 1754 UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result) 1755 : FixableGadget(Kind::ULCArraySubscript), 1756 Node(Result.Nodes.getNodeAs<UnaryOperator>( 1757 UPCAddressofArraySubscriptTag)) { 1758 assert(Node != nullptr && "Expecting a non-null matching result"); 1759 } 1760 1761 static bool classof(const Gadget *G) { 1762 return G->getKind() == Kind::UPCAddressofArraySubscript; 1763 } 1764 1765 static Matcher matcher() { 1766 return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts( 1767 unaryOperator( 1768 hasOperatorName("&"), 1769 hasUnaryOperand(arraySubscriptExpr(hasBase( 1770 ignoringParenImpCasts(declRefExpr(toSupportedVariable())))))) 1771 .bind(UPCAddressofArraySubscriptTag))))); 1772 } 1773 1774 virtual std::optional<FixItList> 1775 getFixits(const FixitStrategy &) const override; 1776 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); } 1777 1778 virtual DeclUseList getClaimedVarUseSites() const override { 1779 const auto *ArraySubst = cast<ArraySubscriptExpr>(Node->getSubExpr()); 1780 const auto *DRE = 1781 cast<DeclRefExpr>(ArraySubst->getBase()->IgnoreParenImpCasts()); 1782 return {DRE}; 1783 } 1784 }; 1785 } // namespace 1786 1787 namespace { 1788 // An auxiliary tracking facility for the fixit analysis. It helps connect 1789 // declarations to its uses and make sure we've covered all uses with our 1790 // analysis before we try to fix the declaration. 1791 class DeclUseTracker { 1792 using UseSetTy = SmallSet<const DeclRefExpr *, 16>; 1793 using DefMapTy = DenseMap<const VarDecl *, const DeclStmt *>; 1794 1795 // Allocate on the heap for easier move. 1796 std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()}; 1797 DefMapTy Defs{}; 1798 1799 public: 1800 DeclUseTracker() = default; 1801 DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies. 1802 DeclUseTracker &operator=(const DeclUseTracker &) = delete; 1803 DeclUseTracker(DeclUseTracker &&) = default; 1804 DeclUseTracker &operator=(DeclUseTracker &&) = default; 1805 1806 // Start tracking a freshly discovered DRE. 1807 void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); } 1808 1809 // Stop tracking the DRE as it's been fully figured out. 1810 void claimUse(const DeclRefExpr *DRE) { 1811 assert(Uses->count(DRE) && 1812 "DRE not found or claimed by multiple matchers!"); 1813 Uses->erase(DRE); 1814 } 1815 1816 // A variable is unclaimed if at least one use is unclaimed. 1817 bool hasUnclaimedUses(const VarDecl *VD) const { 1818 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs? 1819 return any_of(*Uses, [VD](const DeclRefExpr *DRE) { 1820 return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl(); 1821 }); 1822 } 1823 1824 UseSetTy getUnclaimedUses(const VarDecl *VD) const { 1825 UseSetTy ReturnSet; 1826 for (auto use : *Uses) { 1827 if (use->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl()) { 1828 ReturnSet.insert(use); 1829 } 1830 } 1831 return ReturnSet; 1832 } 1833 1834 void discoverDecl(const DeclStmt *DS) { 1835 for (const Decl *D : DS->decls()) { 1836 if (const auto *VD = dyn_cast<VarDecl>(D)) { 1837 // FIXME: Assertion temporarily disabled due to a bug in 1838 // ASTMatcher internal behavior in presence of GNU 1839 // statement-expressions. We need to properly investigate this 1840 // because it can screw up our algorithm in other ways. 1841 // assert(Defs.count(VD) == 0 && "Definition already discovered!"); 1842 Defs[VD] = DS; 1843 } 1844 } 1845 } 1846 1847 const DeclStmt *lookupDecl(const VarDecl *VD) const { 1848 return Defs.lookup(VD); 1849 } 1850 }; 1851 } // namespace 1852 1853 // Representing a pointer type expression of the form `++Ptr` in an Unspecified 1854 // Pointer Context (UPC): 1855 class UPCPreIncrementGadget : public FixableGadget { 1856 private: 1857 static constexpr const char *const UPCPreIncrementTag = 1858 "PointerPreIncrementUnderUPC"; 1859 const UnaryOperator *Node; // the `++Ptr` node 1860 1861 public: 1862 UPCPreIncrementGadget(const MatchFinder::MatchResult &Result) 1863 : FixableGadget(Kind::UPCPreIncrement), 1864 Node(Result.Nodes.getNodeAs<UnaryOperator>(UPCPreIncrementTag)) { 1865 assert(Node != nullptr && "Expecting a non-null matching result"); 1866 } 1867 1868 static bool classof(const Gadget *G) { 1869 return G->getKind() == Kind::UPCPreIncrement; 1870 } 1871 1872 static Matcher matcher() { 1873 // Note here we match `++Ptr` for any expression `Ptr` of pointer type. 1874 // Although currently we can only provide fix-its when `Ptr` is a DRE, we 1875 // can have the matcher be general, so long as `getClaimedVarUseSites` does 1876 // things right. 1877 return stmt(isInUnspecifiedPointerContext(expr(ignoringImpCasts( 1878 unaryOperator(isPreInc(), 1879 hasUnaryOperand(declRefExpr(toSupportedVariable()))) 1880 .bind(UPCPreIncrementTag))))); 1881 } 1882 1883 virtual std::optional<FixItList> 1884 getFixits(const FixitStrategy &S) const override; 1885 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); } 1886 1887 virtual DeclUseList getClaimedVarUseSites() const override { 1888 return {dyn_cast<DeclRefExpr>(Node->getSubExpr())}; 1889 } 1890 }; 1891 1892 // Representing a pointer type expression of the form `Ptr += n` in an 1893 // Unspecified Untyped Context (UUC): 1894 class UUCAddAssignGadget : public FixableGadget { 1895 private: 1896 static constexpr const char *const UUCAddAssignTag = 1897 "PointerAddAssignUnderUUC"; 1898 static constexpr const char *const OffsetTag = "Offset"; 1899 1900 const BinaryOperator *Node; // the `Ptr += n` node 1901 const Expr *Offset = nullptr; 1902 1903 public: 1904 UUCAddAssignGadget(const MatchFinder::MatchResult &Result) 1905 : FixableGadget(Kind::UUCAddAssign), 1906 Node(Result.Nodes.getNodeAs<BinaryOperator>(UUCAddAssignTag)), 1907 Offset(Result.Nodes.getNodeAs<Expr>(OffsetTag)) { 1908 assert(Node != nullptr && "Expecting a non-null matching result"); 1909 } 1910 1911 static bool classof(const Gadget *G) { 1912 return G->getKind() == Kind::UUCAddAssign; 1913 } 1914 1915 static Matcher matcher() { 1916 // clang-format off 1917 return stmt(isInUnspecifiedUntypedContext(expr(ignoringImpCasts( 1918 binaryOperator(hasOperatorName("+="), 1919 hasLHS( 1920 declRefExpr( 1921 hasPointerType(), 1922 toSupportedVariable())), 1923 hasRHS(expr().bind(OffsetTag))) 1924 .bind(UUCAddAssignTag))))); 1925 // clang-format on 1926 } 1927 1928 virtual std::optional<FixItList> 1929 getFixits(const FixitStrategy &S) const override; 1930 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); } 1931 1932 virtual DeclUseList getClaimedVarUseSites() const override { 1933 return {dyn_cast<DeclRefExpr>(Node->getLHS())}; 1934 } 1935 }; 1936 1937 // Representing a fixable expression of the form `*(ptr + 123)` or `*(123 + 1938 // ptr)`: 1939 class DerefSimplePtrArithFixableGadget : public FixableGadget { 1940 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE"; 1941 static constexpr const char *const DerefOpTag = "DerefOp"; 1942 static constexpr const char *const AddOpTag = "AddOp"; 1943 static constexpr const char *const OffsetTag = "Offset"; 1944 1945 const DeclRefExpr *BaseDeclRefExpr = nullptr; 1946 const UnaryOperator *DerefOp = nullptr; 1947 const BinaryOperator *AddOp = nullptr; 1948 const IntegerLiteral *Offset = nullptr; 1949 1950 public: 1951 DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult &Result) 1952 : FixableGadget(Kind::DerefSimplePtrArithFixable), 1953 BaseDeclRefExpr( 1954 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)), 1955 DerefOp(Result.Nodes.getNodeAs<UnaryOperator>(DerefOpTag)), 1956 AddOp(Result.Nodes.getNodeAs<BinaryOperator>(AddOpTag)), 1957 Offset(Result.Nodes.getNodeAs<IntegerLiteral>(OffsetTag)) {} 1958 1959 static Matcher matcher() { 1960 // clang-format off 1961 auto ThePtr = expr(hasPointerType(), 1962 ignoringImpCasts(declRefExpr(toSupportedVariable()). 1963 bind(BaseDeclRefExprTag))); 1964 auto PlusOverPtrAndInteger = expr(anyOf( 1965 binaryOperator(hasOperatorName("+"), hasLHS(ThePtr), 1966 hasRHS(integerLiteral().bind(OffsetTag))) 1967 .bind(AddOpTag), 1968 binaryOperator(hasOperatorName("+"), hasRHS(ThePtr), 1969 hasLHS(integerLiteral().bind(OffsetTag))) 1970 .bind(AddOpTag))); 1971 return isInUnspecifiedLvalueContext(unaryOperator( 1972 hasOperatorName("*"), 1973 hasUnaryOperand(ignoringParens(PlusOverPtrAndInteger))) 1974 .bind(DerefOpTag)); 1975 // clang-format on 1976 } 1977 1978 virtual std::optional<FixItList> 1979 getFixits(const FixitStrategy &s) const final; 1980 SourceLocation getSourceLoc() const override { 1981 return DerefOp->getBeginLoc(); 1982 } 1983 1984 virtual DeclUseList getClaimedVarUseSites() const final { 1985 return {BaseDeclRefExpr}; 1986 } 1987 }; 1988 1989 /// Scan the function and return a list of gadgets found with provided kits. 1990 static void findGadgets(const Stmt *S, ASTContext &Ctx, 1991 const UnsafeBufferUsageHandler &Handler, 1992 bool EmitSuggestions, FixableGadgetList &FixableGadgets, 1993 WarningGadgetList &WarningGadgets, 1994 DeclUseTracker &Tracker) { 1995 1996 struct GadgetFinderCallback : MatchFinder::MatchCallback { 1997 GadgetFinderCallback(FixableGadgetList &FixableGadgets, 1998 WarningGadgetList &WarningGadgets, 1999 DeclUseTracker &Tracker) 2000 : FixableGadgets(FixableGadgets), WarningGadgets(WarningGadgets), 2001 Tracker(Tracker) {} 2002 2003 void run(const MatchFinder::MatchResult &Result) override { 2004 // In debug mode, assert that we've found exactly one gadget. 2005 // This helps us avoid conflicts in .bind() tags. 2006 #if NDEBUG 2007 #define NEXT return 2008 #else 2009 [[maybe_unused]] int numFound = 0; 2010 #define NEXT ++numFound 2011 #endif 2012 2013 if (const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>("any_dre")) { 2014 Tracker.discoverUse(DRE); 2015 NEXT; 2016 } 2017 2018 if (const auto *DS = Result.Nodes.getNodeAs<DeclStmt>("any_ds")) { 2019 Tracker.discoverDecl(DS); 2020 NEXT; 2021 } 2022 2023 // Figure out which matcher we've found, and call the appropriate 2024 // subclass constructor. 2025 // FIXME: Can we do this more logarithmically? 2026 #define FIXABLE_GADGET(name) \ 2027 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \ 2028 FixableGadgets.push_back(std::make_unique<name##Gadget>(Result)); \ 2029 NEXT; \ 2030 } 2031 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 2032 #define WARNING_GADGET(name) \ 2033 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \ 2034 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \ 2035 NEXT; \ 2036 } 2037 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 2038 2039 assert(numFound >= 1 && "Gadgets not found in match result!"); 2040 assert(numFound <= 1 && "Conflicting bind tags in gadgets!"); 2041 } 2042 2043 FixableGadgetList &FixableGadgets; 2044 WarningGadgetList &WarningGadgets; 2045 DeclUseTracker &Tracker; 2046 }; 2047 2048 MatchFinder M; 2049 GadgetFinderCallback CB{FixableGadgets, WarningGadgets, Tracker}; 2050 2051 // clang-format off 2052 M.addMatcher( 2053 stmt( 2054 forEachDescendantEvaluatedStmt(stmt(anyOf( 2055 // Add Gadget::matcher() for every gadget in the registry. 2056 #define WARNING_GADGET(x) \ 2057 allOf(x ## Gadget::matcher().bind(#x), \ 2058 notInSafeBufferOptOut(&Handler)), 2059 #define WARNING_OPTIONAL_GADGET(x) \ 2060 allOf(x ## Gadget::matcher(&Handler).bind(#x), \ 2061 notInSafeBufferOptOut(&Handler)), 2062 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 2063 // Avoid a hanging comma. 2064 unless(stmt()) 2065 ))) 2066 ), 2067 &CB 2068 ); 2069 // clang-format on 2070 2071 if (EmitSuggestions) { 2072 // clang-format off 2073 M.addMatcher( 2074 stmt( 2075 forEachDescendantStmt(stmt(eachOf( 2076 #define FIXABLE_GADGET(x) \ 2077 x ## Gadget::matcher().bind(#x), 2078 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 2079 // In parallel, match all DeclRefExprs so that to find out 2080 // whether there are any uncovered by gadgets. 2081 declRefExpr(anyOf(hasPointerType(), hasArrayType()), 2082 to(anyOf(varDecl(), bindingDecl()))).bind("any_dre"), 2083 // Also match DeclStmts because we'll need them when fixing 2084 // their underlying VarDecls that otherwise don't have 2085 // any backreferences to DeclStmts. 2086 declStmt().bind("any_ds") 2087 ))) 2088 ), 2089 &CB 2090 ); 2091 // clang-format on 2092 } 2093 2094 M.match(*S, Ctx); 2095 } 2096 2097 // Compares AST nodes by source locations. 2098 template <typename NodeTy> struct CompareNode { 2099 bool operator()(const NodeTy *N1, const NodeTy *N2) const { 2100 return N1->getBeginLoc().getRawEncoding() < 2101 N2->getBeginLoc().getRawEncoding(); 2102 } 2103 }; 2104 2105 struct WarningGadgetSets { 2106 std::map<const VarDecl *, std::set<const WarningGadget *>, 2107 // To keep keys sorted by their locations in the map so that the 2108 // order is deterministic: 2109 CompareNode<VarDecl>> 2110 byVar; 2111 // These Gadgets are not related to pointer variables (e. g. temporaries). 2112 llvm::SmallVector<const WarningGadget *, 16> noVar; 2113 }; 2114 2115 static WarningGadgetSets 2116 groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) { 2117 WarningGadgetSets result; 2118 // If some gadgets cover more than one 2119 // variable, they'll appear more than once in the map. 2120 for (auto &G : AllUnsafeOperations) { 2121 DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites(); 2122 2123 bool AssociatedWithVarDecl = false; 2124 for (const DeclRefExpr *DRE : ClaimedVarUseSites) { 2125 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { 2126 result.byVar[VD].insert(G.get()); 2127 AssociatedWithVarDecl = true; 2128 } 2129 } 2130 2131 if (!AssociatedWithVarDecl) { 2132 result.noVar.push_back(G.get()); 2133 continue; 2134 } 2135 } 2136 return result; 2137 } 2138 2139 struct FixableGadgetSets { 2140 std::map<const VarDecl *, std::set<const FixableGadget *>, 2141 // To keep keys sorted by their locations in the map so that the 2142 // order is deterministic: 2143 CompareNode<VarDecl>> 2144 byVar; 2145 }; 2146 2147 static FixableGadgetSets 2148 groupFixablesByVar(FixableGadgetList &&AllFixableOperations) { 2149 FixableGadgetSets FixablesForUnsafeVars; 2150 for (auto &F : AllFixableOperations) { 2151 DeclUseList DREs = F->getClaimedVarUseSites(); 2152 2153 for (const DeclRefExpr *DRE : DREs) { 2154 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { 2155 FixablesForUnsafeVars.byVar[VD].insert(F.get()); 2156 } 2157 } 2158 } 2159 return FixablesForUnsafeVars; 2160 } 2161 2162 bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts, 2163 const SourceManager &SM) { 2164 // A simple interval overlap detection algorithm. Sorts all ranges by their 2165 // begin location then finds the first overlap in one pass. 2166 std::vector<const FixItHint *> All; // a copy of `FixIts` 2167 2168 for (const FixItHint &H : FixIts) 2169 All.push_back(&H); 2170 std::sort(All.begin(), All.end(), 2171 [&SM](const FixItHint *H1, const FixItHint *H2) { 2172 return SM.isBeforeInTranslationUnit(H1->RemoveRange.getBegin(), 2173 H2->RemoveRange.getBegin()); 2174 }); 2175 2176 const FixItHint *CurrHint = nullptr; 2177 2178 for (const FixItHint *Hint : All) { 2179 if (!CurrHint || 2180 SM.isBeforeInTranslationUnit(CurrHint->RemoveRange.getEnd(), 2181 Hint->RemoveRange.getBegin())) { 2182 // Either to initialize `CurrHint` or `CurrHint` does not 2183 // overlap with `Hint`: 2184 CurrHint = Hint; 2185 } else 2186 // In case `Hint` overlaps the `CurrHint`, we found at least one 2187 // conflict: 2188 return true; 2189 } 2190 return false; 2191 } 2192 2193 std::optional<FixItList> 2194 PtrToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const { 2195 const auto *LeftVD = cast<VarDecl>(PtrLHS->getDecl()); 2196 const auto *RightVD = cast<VarDecl>(PtrRHS->getDecl()); 2197 switch (S.lookup(LeftVD)) { 2198 case FixitStrategy::Kind::Span: 2199 if (S.lookup(RightVD) == FixitStrategy::Kind::Span) 2200 return FixItList{}; 2201 return std::nullopt; 2202 case FixitStrategy::Kind::Wontfix: 2203 return std::nullopt; 2204 case FixitStrategy::Kind::Iterator: 2205 case FixitStrategy::Kind::Array: 2206 return std::nullopt; 2207 case FixitStrategy::Kind::Vector: 2208 llvm_unreachable("unsupported strategies for FixableGadgets"); 2209 } 2210 return std::nullopt; 2211 } 2212 2213 /// \returns fixit that adds .data() call after \DRE. 2214 static inline std::optional<FixItList> createDataFixit(const ASTContext &Ctx, 2215 const DeclRefExpr *DRE); 2216 2217 std::optional<FixItList> 2218 CArrayToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const { 2219 const auto *LeftVD = cast<VarDecl>(PtrLHS->getDecl()); 2220 const auto *RightVD = cast<VarDecl>(PtrRHS->getDecl()); 2221 // TLDR: Implementing fixits for non-Wontfix strategy on both LHS and RHS is 2222 // non-trivial. 2223 // 2224 // CArrayToPtrAssignmentGadget doesn't have strategy implications because 2225 // constant size array propagates its bounds. Because of that LHS and RHS are 2226 // addressed by two different fixits. 2227 // 2228 // At the same time FixitStrategy S doesn't reflect what group a fixit belongs 2229 // to and can't be generally relied on in multi-variable Fixables! 2230 // 2231 // E. g. If an instance of this gadget is fixing variable on LHS then the 2232 // variable on RHS is fixed by a different fixit and its strategy for LHS 2233 // fixit is as if Wontfix. 2234 // 2235 // The only exception is Wontfix strategy for a given variable as that is 2236 // valid for any fixit produced for the given input source code. 2237 if (S.lookup(LeftVD) == FixitStrategy::Kind::Span) { 2238 if (S.lookup(RightVD) == FixitStrategy::Kind::Wontfix) { 2239 return FixItList{}; 2240 } 2241 } else if (S.lookup(LeftVD) == FixitStrategy::Kind::Wontfix) { 2242 if (S.lookup(RightVD) == FixitStrategy::Kind::Array) { 2243 return createDataFixit(RightVD->getASTContext(), PtrRHS); 2244 } 2245 } 2246 return std::nullopt; 2247 } 2248 2249 std::optional<FixItList> 2250 PointerInitGadget::getFixits(const FixitStrategy &S) const { 2251 const auto *LeftVD = PtrInitLHS; 2252 const auto *RightVD = cast<VarDecl>(PtrInitRHS->getDecl()); 2253 switch (S.lookup(LeftVD)) { 2254 case FixitStrategy::Kind::Span: 2255 if (S.lookup(RightVD) == FixitStrategy::Kind::Span) 2256 return FixItList{}; 2257 return std::nullopt; 2258 case FixitStrategy::Kind::Wontfix: 2259 return std::nullopt; 2260 case FixitStrategy::Kind::Iterator: 2261 case FixitStrategy::Kind::Array: 2262 return std::nullopt; 2263 case FixitStrategy::Kind::Vector: 2264 llvm_unreachable("unsupported strategies for FixableGadgets"); 2265 } 2266 return std::nullopt; 2267 } 2268 2269 static bool isNonNegativeIntegerExpr(const Expr *Expr, const VarDecl *VD, 2270 const ASTContext &Ctx) { 2271 if (auto ConstVal = Expr->getIntegerConstantExpr(Ctx)) { 2272 if (ConstVal->isNegative()) 2273 return false; 2274 } else if (!Expr->getType()->isUnsignedIntegerType()) 2275 return false; 2276 return true; 2277 } 2278 2279 std::optional<FixItList> 2280 ULCArraySubscriptGadget::getFixits(const FixitStrategy &S) const { 2281 if (const auto *DRE = 2282 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) 2283 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { 2284 switch (S.lookup(VD)) { 2285 case FixitStrategy::Kind::Span: { 2286 2287 // If the index has a negative constant value, we give up as no valid 2288 // fix-it can be generated: 2289 const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in! 2290 VD->getASTContext(); 2291 if (!isNonNegativeIntegerExpr(Node->getIdx(), VD, Ctx)) 2292 return std::nullopt; 2293 // no-op is a good fix-it, otherwise 2294 return FixItList{}; 2295 } 2296 case FixitStrategy::Kind::Array: 2297 return FixItList{}; 2298 case FixitStrategy::Kind::Wontfix: 2299 case FixitStrategy::Kind::Iterator: 2300 case FixitStrategy::Kind::Vector: 2301 llvm_unreachable("unsupported strategies for FixableGadgets"); 2302 } 2303 } 2304 return std::nullopt; 2305 } 2306 2307 static std::optional<FixItList> // forward declaration 2308 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node); 2309 2310 std::optional<FixItList> 2311 UPCAddressofArraySubscriptGadget::getFixits(const FixitStrategy &S) const { 2312 auto DREs = getClaimedVarUseSites(); 2313 const auto *VD = cast<VarDecl>(DREs.front()->getDecl()); 2314 2315 switch (S.lookup(VD)) { 2316 case FixitStrategy::Kind::Span: 2317 return fixUPCAddressofArraySubscriptWithSpan(Node); 2318 case FixitStrategy::Kind::Wontfix: 2319 case FixitStrategy::Kind::Iterator: 2320 case FixitStrategy::Kind::Array: 2321 return std::nullopt; 2322 case FixitStrategy::Kind::Vector: 2323 llvm_unreachable("unsupported strategies for FixableGadgets"); 2324 } 2325 return std::nullopt; // something went wrong, no fix-it 2326 } 2327 2328 // FIXME: this function should be customizable through format 2329 static StringRef getEndOfLine() { 2330 static const char *const EOL = "\n"; 2331 return EOL; 2332 } 2333 2334 // Returns the text indicating that the user needs to provide input there: 2335 static std::string 2336 getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") { 2337 std::string s = std::string("<# "); 2338 s += HintTextToUser; 2339 s += " #>"; 2340 return s; 2341 } 2342 2343 // Return the source location of the last character of the AST `Node`. 2344 template <typename NodeTy> 2345 static std::optional<SourceLocation> 2346 getEndCharLoc(const NodeTy *Node, const SourceManager &SM, 2347 const LangOptions &LangOpts) { 2348 unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts); 2349 SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1); 2350 2351 if (Loc.isValid()) 2352 return Loc; 2353 2354 return std::nullopt; 2355 } 2356 2357 // Return the source location just past the last character of the AST `Node`. 2358 template <typename NodeTy> 2359 static std::optional<SourceLocation> getPastLoc(const NodeTy *Node, 2360 const SourceManager &SM, 2361 const LangOptions &LangOpts) { 2362 SourceLocation Loc = 2363 Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts); 2364 if (Loc.isValid()) 2365 return Loc; 2366 return std::nullopt; 2367 } 2368 2369 // Return text representation of an `Expr`. 2370 static std::optional<StringRef> getExprText(const Expr *E, 2371 const SourceManager &SM, 2372 const LangOptions &LangOpts) { 2373 std::optional<SourceLocation> LastCharLoc = getPastLoc(E, SM, LangOpts); 2374 2375 if (LastCharLoc) 2376 return Lexer::getSourceText( 2377 CharSourceRange::getCharRange(E->getBeginLoc(), *LastCharLoc), SM, 2378 LangOpts); 2379 2380 return std::nullopt; 2381 } 2382 2383 // Returns the literal text in `SourceRange SR`, if `SR` is a valid range. 2384 static std::optional<StringRef> getRangeText(SourceRange SR, 2385 const SourceManager &SM, 2386 const LangOptions &LangOpts) { 2387 bool Invalid = false; 2388 CharSourceRange CSR = CharSourceRange::getCharRange(SR); 2389 StringRef Text = Lexer::getSourceText(CSR, SM, LangOpts, &Invalid); 2390 2391 if (!Invalid) 2392 return Text; 2393 return std::nullopt; 2394 } 2395 2396 // Returns the begin location of the identifier of the given variable 2397 // declaration. 2398 static SourceLocation getVarDeclIdentifierLoc(const VarDecl *VD) { 2399 // According to the implementation of `VarDecl`, `VD->getLocation()` actually 2400 // returns the begin location of the identifier of the declaration: 2401 return VD->getLocation(); 2402 } 2403 2404 // Returns the literal text of the identifier of the given variable declaration. 2405 static std::optional<StringRef> 2406 getVarDeclIdentifierText(const VarDecl *VD, const SourceManager &SM, 2407 const LangOptions &LangOpts) { 2408 SourceLocation ParmIdentBeginLoc = getVarDeclIdentifierLoc(VD); 2409 SourceLocation ParmIdentEndLoc = 2410 Lexer::getLocForEndOfToken(ParmIdentBeginLoc, 0, SM, LangOpts); 2411 2412 if (ParmIdentEndLoc.isMacroID() && 2413 !Lexer::isAtEndOfMacroExpansion(ParmIdentEndLoc, SM, LangOpts)) 2414 return std::nullopt; 2415 return getRangeText({ParmIdentBeginLoc, ParmIdentEndLoc}, SM, LangOpts); 2416 } 2417 2418 // We cannot fix a variable declaration if it has some other specifiers than the 2419 // type specifier. Because the source ranges of those specifiers could overlap 2420 // with the source range that is being replaced using fix-its. Especially when 2421 // we often cannot obtain accurate source ranges of cv-qualified type 2422 // specifiers. 2423 // FIXME: also deal with type attributes 2424 static bool hasUnsupportedSpecifiers(const VarDecl *VD, 2425 const SourceManager &SM) { 2426 // AttrRangeOverlapping: true if at least one attribute of `VD` overlaps the 2427 // source range of `VD`: 2428 bool AttrRangeOverlapping = llvm::any_of(VD->attrs(), [&](Attr *At) -> bool { 2429 return !(SM.isBeforeInTranslationUnit(At->getRange().getEnd(), 2430 VD->getBeginLoc())) && 2431 !(SM.isBeforeInTranslationUnit(VD->getEndLoc(), 2432 At->getRange().getBegin())); 2433 }); 2434 return VD->isInlineSpecified() || VD->isConstexpr() || 2435 VD->hasConstantInitialization() || !VD->hasLocalStorage() || 2436 AttrRangeOverlapping; 2437 } 2438 2439 // Returns the `SourceRange` of `D`. The reason why this function exists is 2440 // that `D->getSourceRange()` may return a range where the end location is the 2441 // starting location of the last token. The end location of the source range 2442 // returned by this function is the last location of the last token. 2443 static SourceRange getSourceRangeToTokenEnd(const Decl *D, 2444 const SourceManager &SM, 2445 const LangOptions &LangOpts) { 2446 SourceLocation Begin = D->getBeginLoc(); 2447 SourceLocation 2448 End = // `D->getEndLoc` should always return the starting location of the 2449 // last token, so we should get the end of the token 2450 Lexer::getLocForEndOfToken(D->getEndLoc(), 0, SM, LangOpts); 2451 2452 return SourceRange(Begin, End); 2453 } 2454 2455 // Returns the text of the pointee type of `T` from a `VarDecl` of a pointer 2456 // type. The text is obtained through from `TypeLoc`s. Since `TypeLoc` does not 2457 // have source ranges of qualifiers ( The `QualifiedTypeLoc` looks hacky too me 2458 // :( ), `Qualifiers` of the pointee type is returned separately through the 2459 // output parameter `QualifiersToAppend`. 2460 static std::optional<std::string> 2461 getPointeeTypeText(const VarDecl *VD, const SourceManager &SM, 2462 const LangOptions &LangOpts, 2463 std::optional<Qualifiers> *QualifiersToAppend) { 2464 QualType Ty = VD->getType(); 2465 QualType PteTy; 2466 2467 assert(Ty->isPointerType() && !Ty->isFunctionPointerType() && 2468 "Expecting a VarDecl of type of pointer to object type"); 2469 PteTy = Ty->getPointeeType(); 2470 2471 TypeLoc TyLoc = VD->getTypeSourceInfo()->getTypeLoc().getUnqualifiedLoc(); 2472 TypeLoc PteTyLoc; 2473 2474 // We only deal with the cases that we know `TypeLoc::getNextTypeLoc` returns 2475 // the `TypeLoc` of the pointee type: 2476 switch (TyLoc.getTypeLocClass()) { 2477 case TypeLoc::ConstantArray: 2478 case TypeLoc::IncompleteArray: 2479 case TypeLoc::VariableArray: 2480 case TypeLoc::DependentSizedArray: 2481 case TypeLoc::Decayed: 2482 assert(isa<ParmVarDecl>(VD) && "An array type shall not be treated as a " 2483 "pointer type unless it decays."); 2484 PteTyLoc = TyLoc.getNextTypeLoc(); 2485 break; 2486 case TypeLoc::Pointer: 2487 PteTyLoc = TyLoc.castAs<PointerTypeLoc>().getPointeeLoc(); 2488 break; 2489 default: 2490 return std::nullopt; 2491 } 2492 if (PteTyLoc.isNull()) 2493 // Sometimes we cannot get a useful `TypeLoc` for the pointee type, e.g., 2494 // when the pointer type is `auto`. 2495 return std::nullopt; 2496 2497 SourceLocation IdentLoc = getVarDeclIdentifierLoc(VD); 2498 2499 if (!(IdentLoc.isValid() && PteTyLoc.getSourceRange().isValid())) { 2500 // We are expecting these locations to be valid. But in some cases, they are 2501 // not all valid. It is a Clang bug to me and we are not responsible for 2502 // fixing it. So we will just give up for now when it happens. 2503 return std::nullopt; 2504 } 2505 2506 // Note that TypeLoc.getEndLoc() returns the begin location of the last token: 2507 SourceLocation PteEndOfTokenLoc = 2508 Lexer::getLocForEndOfToken(PteTyLoc.getEndLoc(), 0, SM, LangOpts); 2509 2510 if (!PteEndOfTokenLoc.isValid()) 2511 // Sometimes we cannot get the end location of the pointee type, e.g., when 2512 // there are macros involved. 2513 return std::nullopt; 2514 if (!SM.isBeforeInTranslationUnit(PteEndOfTokenLoc, IdentLoc)) { 2515 // We only deal with the cases where the source text of the pointee type 2516 // appears on the left-hand side of the variable identifier completely, 2517 // including the following forms: 2518 // `T ident`, 2519 // `T ident[]`, where `T` is any type. 2520 // Examples of excluded cases are `T (*ident)[]` or `T ident[][n]`. 2521 return std::nullopt; 2522 } 2523 if (PteTy.hasQualifiers()) { 2524 // TypeLoc does not provide source ranges for qualifiers (it says it's 2525 // intentional but seems fishy to me), so we cannot get the full text 2526 // `PteTy` via source ranges. 2527 *QualifiersToAppend = PteTy.getQualifiers(); 2528 } 2529 return getRangeText({PteTyLoc.getBeginLoc(), PteEndOfTokenLoc}, SM, LangOpts) 2530 ->str(); 2531 } 2532 2533 // Returns the text of the name (with qualifiers) of a `FunctionDecl`. 2534 static std::optional<StringRef> getFunNameText(const FunctionDecl *FD, 2535 const SourceManager &SM, 2536 const LangOptions &LangOpts) { 2537 SourceLocation BeginLoc = FD->getQualifier() 2538 ? FD->getQualifierLoc().getBeginLoc() 2539 : FD->getNameInfo().getBeginLoc(); 2540 // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the 2541 // last token: 2542 SourceLocation EndLoc = Lexer::getLocForEndOfToken( 2543 FD->getNameInfo().getEndLoc(), 0, SM, LangOpts); 2544 SourceRange NameRange{BeginLoc, EndLoc}; 2545 2546 return getRangeText(NameRange, SM, LangOpts); 2547 } 2548 2549 // Returns the text representing a `std::span` type where the element type is 2550 // represented by `EltTyText`. 2551 // 2552 // Note the optional parameter `Qualifiers`: one needs to pass qualifiers 2553 // explicitly if the element type needs to be qualified. 2554 static std::string 2555 getSpanTypeText(StringRef EltTyText, 2556 std::optional<Qualifiers> Quals = std::nullopt) { 2557 const char *const SpanOpen = "std::span<"; 2558 2559 if (Quals) 2560 return SpanOpen + EltTyText.str() + ' ' + Quals->getAsString() + '>'; 2561 return SpanOpen + EltTyText.str() + '>'; 2562 } 2563 2564 std::optional<FixItList> 2565 DerefSimplePtrArithFixableGadget::getFixits(const FixitStrategy &s) const { 2566 const VarDecl *VD = dyn_cast<VarDecl>(BaseDeclRefExpr->getDecl()); 2567 2568 if (VD && s.lookup(VD) == FixitStrategy::Kind::Span) { 2569 ASTContext &Ctx = VD->getASTContext(); 2570 // std::span can't represent elements before its begin() 2571 if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx)) 2572 if (ConstVal->isNegative()) 2573 return std::nullopt; 2574 2575 // note that the expr may (oddly) has multiple layers of parens 2576 // example: 2577 // *((..(pointer + 123)..)) 2578 // goal: 2579 // pointer[123] 2580 // Fix-It: 2581 // remove '*(' 2582 // replace ' + ' with '[' 2583 // replace ')' with ']' 2584 2585 // example: 2586 // *((..(123 + pointer)..)) 2587 // goal: 2588 // 123[pointer] 2589 // Fix-It: 2590 // remove '*(' 2591 // replace ' + ' with '[' 2592 // replace ')' with ']' 2593 2594 const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS(); 2595 const SourceManager &SM = Ctx.getSourceManager(); 2596 const LangOptions &LangOpts = Ctx.getLangOpts(); 2597 CharSourceRange StarWithTrailWhitespace = 2598 clang::CharSourceRange::getCharRange(DerefOp->getOperatorLoc(), 2599 LHS->getBeginLoc()); 2600 2601 std::optional<SourceLocation> LHSLocation = getPastLoc(LHS, SM, LangOpts); 2602 if (!LHSLocation) 2603 return std::nullopt; 2604 2605 CharSourceRange PlusWithSurroundingWhitespace = 2606 clang::CharSourceRange::getCharRange(*LHSLocation, RHS->getBeginLoc()); 2607 2608 std::optional<SourceLocation> AddOpLocation = 2609 getPastLoc(AddOp, SM, LangOpts); 2610 std::optional<SourceLocation> DerefOpLocation = 2611 getPastLoc(DerefOp, SM, LangOpts); 2612 2613 if (!AddOpLocation || !DerefOpLocation) 2614 return std::nullopt; 2615 2616 CharSourceRange ClosingParenWithPrecWhitespace = 2617 clang::CharSourceRange::getCharRange(*AddOpLocation, *DerefOpLocation); 2618 2619 return FixItList{ 2620 {FixItHint::CreateRemoval(StarWithTrailWhitespace), 2621 FixItHint::CreateReplacement(PlusWithSurroundingWhitespace, "["), 2622 FixItHint::CreateReplacement(ClosingParenWithPrecWhitespace, "]")}}; 2623 } 2624 return std::nullopt; // something wrong or unsupported, give up 2625 } 2626 2627 std::optional<FixItList> 2628 PointerDereferenceGadget::getFixits(const FixitStrategy &S) const { 2629 const VarDecl *VD = cast<VarDecl>(BaseDeclRefExpr->getDecl()); 2630 switch (S.lookup(VD)) { 2631 case FixitStrategy::Kind::Span: { 2632 ASTContext &Ctx = VD->getASTContext(); 2633 SourceManager &SM = Ctx.getSourceManager(); 2634 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0] 2635 // Deletes the *operand 2636 CharSourceRange derefRange = clang::CharSourceRange::getCharRange( 2637 Op->getBeginLoc(), Op->getBeginLoc().getLocWithOffset(1)); 2638 // Inserts the [0] 2639 if (auto LocPastOperand = 2640 getPastLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts())) { 2641 return FixItList{{FixItHint::CreateRemoval(derefRange), 2642 FixItHint::CreateInsertion(*LocPastOperand, "[0]")}}; 2643 } 2644 break; 2645 } 2646 case FixitStrategy::Kind::Iterator: 2647 case FixitStrategy::Kind::Array: 2648 return std::nullopt; 2649 case FixitStrategy::Kind::Vector: 2650 llvm_unreachable("FixitStrategy not implemented yet!"); 2651 case FixitStrategy::Kind::Wontfix: 2652 llvm_unreachable("Invalid strategy!"); 2653 } 2654 2655 return std::nullopt; 2656 } 2657 2658 static inline std::optional<FixItList> createDataFixit(const ASTContext &Ctx, 2659 const DeclRefExpr *DRE) { 2660 const SourceManager &SM = Ctx.getSourceManager(); 2661 // Inserts the .data() after the DRE 2662 std::optional<SourceLocation> EndOfOperand = 2663 getPastLoc(DRE, SM, Ctx.getLangOpts()); 2664 2665 if (EndOfOperand) 2666 return FixItList{{FixItHint::CreateInsertion(*EndOfOperand, ".data()")}}; 2667 2668 return std::nullopt; 2669 } 2670 2671 // Generates fix-its replacing an expression of the form UPC(DRE) with 2672 // `DRE.data()` 2673 std::optional<FixItList> 2674 UPCStandalonePointerGadget::getFixits(const FixitStrategy &S) const { 2675 const auto VD = cast<VarDecl>(Node->getDecl()); 2676 switch (S.lookup(VD)) { 2677 case FixitStrategy::Kind::Array: 2678 case FixitStrategy::Kind::Span: { 2679 return createDataFixit(VD->getASTContext(), Node); 2680 // FIXME: Points inside a macro expansion. 2681 break; 2682 } 2683 case FixitStrategy::Kind::Wontfix: 2684 case FixitStrategy::Kind::Iterator: 2685 return std::nullopt; 2686 case FixitStrategy::Kind::Vector: 2687 llvm_unreachable("unsupported strategies for FixableGadgets"); 2688 } 2689 2690 return std::nullopt; 2691 } 2692 2693 // Generates fix-its replacing an expression of the form `&DRE[e]` with 2694 // `&DRE.data()[e]`: 2695 static std::optional<FixItList> 2696 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) { 2697 const auto *ArraySub = cast<ArraySubscriptExpr>(Node->getSubExpr()); 2698 const auto *DRE = cast<DeclRefExpr>(ArraySub->getBase()->IgnoreImpCasts()); 2699 // FIXME: this `getASTContext` call is costly, we should pass the 2700 // ASTContext in: 2701 const ASTContext &Ctx = DRE->getDecl()->getASTContext(); 2702 const Expr *Idx = ArraySub->getIdx(); 2703 const SourceManager &SM = Ctx.getSourceManager(); 2704 const LangOptions &LangOpts = Ctx.getLangOpts(); 2705 std::stringstream SS; 2706 bool IdxIsLitZero = false; 2707 2708 if (auto ICE = Idx->getIntegerConstantExpr(Ctx)) 2709 if ((*ICE).isZero()) 2710 IdxIsLitZero = true; 2711 std::optional<StringRef> DreString = getExprText(DRE, SM, LangOpts); 2712 if (!DreString) 2713 return std::nullopt; 2714 2715 if (IdxIsLitZero) { 2716 // If the index is literal zero, we produce the most concise fix-it: 2717 SS << (*DreString).str() << ".data()"; 2718 } else { 2719 std::optional<StringRef> IndexString = getExprText(Idx, SM, LangOpts); 2720 if (!IndexString) 2721 return std::nullopt; 2722 2723 SS << "&" << (*DreString).str() << ".data()" 2724 << "[" << (*IndexString).str() << "]"; 2725 } 2726 return FixItList{ 2727 FixItHint::CreateReplacement(Node->getSourceRange(), SS.str())}; 2728 } 2729 2730 std::optional<FixItList> 2731 UUCAddAssignGadget::getFixits(const FixitStrategy &S) const { 2732 DeclUseList DREs = getClaimedVarUseSites(); 2733 2734 if (DREs.size() != 1) 2735 return std::nullopt; // In cases of `Ptr += n` where `Ptr` is not a DRE, we 2736 // give up 2737 if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) { 2738 if (S.lookup(VD) == FixitStrategy::Kind::Span) { 2739 FixItList Fixes; 2740 2741 const Stmt *AddAssignNode = Node; 2742 StringRef varName = VD->getName(); 2743 const ASTContext &Ctx = VD->getASTContext(); 2744 2745 if (!isNonNegativeIntegerExpr(Offset, VD, Ctx)) 2746 return std::nullopt; 2747 2748 // To transform UUC(p += n) to UUC(p = p.subspan(..)): 2749 bool NotParenExpr = 2750 (Offset->IgnoreParens()->getBeginLoc() == Offset->getBeginLoc()); 2751 std::string SS = varName.str() + " = " + varName.str() + ".subspan"; 2752 if (NotParenExpr) 2753 SS += "("; 2754 2755 std::optional<SourceLocation> AddAssignLocation = getEndCharLoc( 2756 AddAssignNode, Ctx.getSourceManager(), Ctx.getLangOpts()); 2757 if (!AddAssignLocation) 2758 return std::nullopt; 2759 2760 Fixes.push_back(FixItHint::CreateReplacement( 2761 SourceRange(AddAssignNode->getBeginLoc(), Node->getOperatorLoc()), 2762 SS)); 2763 if (NotParenExpr) 2764 Fixes.push_back(FixItHint::CreateInsertion( 2765 Offset->getEndLoc().getLocWithOffset(1), ")")); 2766 return Fixes; 2767 } 2768 } 2769 return std::nullopt; // Not in the cases that we can handle for now, give up. 2770 } 2771 2772 std::optional<FixItList> 2773 UPCPreIncrementGadget::getFixits(const FixitStrategy &S) const { 2774 DeclUseList DREs = getClaimedVarUseSites(); 2775 2776 if (DREs.size() != 1) 2777 return std::nullopt; // In cases of `++Ptr` where `Ptr` is not a DRE, we 2778 // give up 2779 if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) { 2780 if (S.lookup(VD) == FixitStrategy::Kind::Span) { 2781 FixItList Fixes; 2782 std::stringstream SS; 2783 StringRef varName = VD->getName(); 2784 const ASTContext &Ctx = VD->getASTContext(); 2785 2786 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()): 2787 SS << "(" << varName.data() << " = " << varName.data() 2788 << ".subspan(1)).data()"; 2789 std::optional<SourceLocation> PreIncLocation = 2790 getEndCharLoc(Node, Ctx.getSourceManager(), Ctx.getLangOpts()); 2791 if (!PreIncLocation) 2792 return std::nullopt; 2793 2794 Fixes.push_back(FixItHint::CreateReplacement( 2795 SourceRange(Node->getBeginLoc(), *PreIncLocation), SS.str())); 2796 return Fixes; 2797 } 2798 } 2799 return std::nullopt; // Not in the cases that we can handle for now, give up. 2800 } 2801 2802 // For a non-null initializer `Init` of `T *` type, this function returns 2803 // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it 2804 // to output stream. 2805 // In many cases, this function cannot figure out the actual extent `S`. It 2806 // then will use a place holder to replace `S` to ask users to fill `S` in. The 2807 // initializer shall be used to initialize a variable of type `std::span<T>`. 2808 // In some cases (e. g. constant size array) the initializer should remain 2809 // unchanged and the function returns empty list. In case the function can't 2810 // provide the right fixit it will return nullopt. 2811 // 2812 // FIXME: Support multi-level pointers 2813 // 2814 // Parameters: 2815 // `Init` a pointer to the initializer expression 2816 // `Ctx` a reference to the ASTContext 2817 static std::optional<FixItList> 2818 FixVarInitializerWithSpan(const Expr *Init, ASTContext &Ctx, 2819 const StringRef UserFillPlaceHolder) { 2820 const SourceManager &SM = Ctx.getSourceManager(); 2821 const LangOptions &LangOpts = Ctx.getLangOpts(); 2822 2823 // If `Init` has a constant value that is (or equivalent to) a 2824 // NULL pointer, we use the default constructor to initialize the span 2825 // object, i.e., a `std:span` variable declaration with no initializer. 2826 // So the fix-it is just to remove the initializer. 2827 if (Init->isNullPointerConstant( 2828 Ctx, 2829 // FIXME: Why does this function not ask for `const ASTContext 2830 // &`? It should. Maybe worth an NFC patch later. 2831 Expr::NullPointerConstantValueDependence:: 2832 NPC_ValueDependentIsNotNull)) { 2833 std::optional<SourceLocation> InitLocation = 2834 getEndCharLoc(Init, SM, LangOpts); 2835 if (!InitLocation) 2836 return std::nullopt; 2837 2838 SourceRange SR(Init->getBeginLoc(), *InitLocation); 2839 2840 return FixItList{FixItHint::CreateRemoval(SR)}; 2841 } 2842 2843 FixItList FixIts{}; 2844 std::string ExtentText = UserFillPlaceHolder.data(); 2845 StringRef One = "1"; 2846 2847 // Insert `{` before `Init`: 2848 FixIts.push_back(FixItHint::CreateInsertion(Init->getBeginLoc(), "{")); 2849 // Try to get the data extent. Break into different cases: 2850 if (auto CxxNew = dyn_cast<CXXNewExpr>(Init->IgnoreImpCasts())) { 2851 // In cases `Init` is `new T[n]` and there is no explicit cast over 2852 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects 2853 // of `T`. So the extent is `n` unless `n` has side effects. Similar but 2854 // simpler for the case where `Init` is `new T`. 2855 if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) { 2856 if (!Ext->HasSideEffects(Ctx)) { 2857 std::optional<StringRef> ExtentString = getExprText(Ext, SM, LangOpts); 2858 if (!ExtentString) 2859 return std::nullopt; 2860 ExtentText = *ExtentString; 2861 } 2862 } else if (!CxxNew->isArray()) 2863 // Although the initializer is not allocating a buffer, the pointer 2864 // variable could still be used in buffer access operations. 2865 ExtentText = One; 2866 } else if (Ctx.getAsConstantArrayType(Init->IgnoreImpCasts()->getType())) { 2867 // std::span has a single parameter constructor for initialization with 2868 // constant size array. The size is auto-deduced as the constructor is a 2869 // function template. The correct fixit is empty - no changes should happen. 2870 return FixItList{}; 2871 } else { 2872 // In cases `Init` is of the form `&Var` after stripping of implicit 2873 // casts, where `&` is the built-in operator, the extent is 1. 2874 if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Init->IgnoreImpCasts())) 2875 if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf && 2876 isa_and_present<DeclRefExpr>(AddrOfExpr->getSubExpr())) 2877 ExtentText = One; 2878 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`, 2879 // and explicit casting, etc. etc. 2880 } 2881 2882 SmallString<32> StrBuffer{}; 2883 std::optional<SourceLocation> LocPassInit = getPastLoc(Init, SM, LangOpts); 2884 2885 if (!LocPassInit) 2886 return std::nullopt; 2887 2888 StrBuffer.append(", "); 2889 StrBuffer.append(ExtentText); 2890 StrBuffer.append("}"); 2891 FixIts.push_back(FixItHint::CreateInsertion(*LocPassInit, StrBuffer.str())); 2892 return FixIts; 2893 } 2894 2895 #ifndef NDEBUG 2896 #define DEBUG_NOTE_DECL_FAIL(D, Msg) \ 2897 Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), \ 2898 "failed to produce fixit for declaration '" + \ 2899 (D)->getNameAsString() + "'" + (Msg)) 2900 #else 2901 #define DEBUG_NOTE_DECL_FAIL(D, Msg) 2902 #endif 2903 2904 // For the given variable declaration with a pointer-to-T type, returns the text 2905 // `std::span<T>`. If it is unable to generate the text, returns 2906 // `std::nullopt`. 2907 static std::optional<std::string> 2908 createSpanTypeForVarDecl(const VarDecl *VD, const ASTContext &Ctx) { 2909 assert(VD->getType()->isPointerType()); 2910 2911 std::optional<Qualifiers> PteTyQualifiers = std::nullopt; 2912 std::optional<std::string> PteTyText = getPointeeTypeText( 2913 VD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers); 2914 2915 if (!PteTyText) 2916 return std::nullopt; 2917 2918 std::string SpanTyText = "std::span<"; 2919 2920 SpanTyText.append(*PteTyText); 2921 // Append qualifiers to span element type if any: 2922 if (PteTyQualifiers) { 2923 SpanTyText.append(" "); 2924 SpanTyText.append(PteTyQualifiers->getAsString()); 2925 } 2926 SpanTyText.append(">"); 2927 return SpanTyText; 2928 } 2929 2930 // For a `VarDecl` of the form `T * var (= Init)?`, this 2931 // function generates fix-its that 2932 // 1) replace `T * var` with `std::span<T> var`; and 2933 // 2) change `Init` accordingly to a span constructor, if it exists. 2934 // 2935 // FIXME: support Multi-level pointers 2936 // 2937 // Parameters: 2938 // `D` a pointer the variable declaration node 2939 // `Ctx` a reference to the ASTContext 2940 // `UserFillPlaceHolder` the user-input placeholder text 2941 // Returns: 2942 // the non-empty fix-it list, if fix-its are successfuly generated; empty 2943 // list otherwise. 2944 static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx, 2945 const StringRef UserFillPlaceHolder, 2946 UnsafeBufferUsageHandler &Handler) { 2947 if (hasUnsupportedSpecifiers(D, Ctx.getSourceManager())) 2948 return {}; 2949 2950 FixItList FixIts{}; 2951 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(D, Ctx); 2952 2953 if (!SpanTyText) { 2954 DEBUG_NOTE_DECL_FAIL(D, " : failed to generate 'std::span' type"); 2955 return {}; 2956 } 2957 2958 // Will hold the text for `std::span<T> Ident`: 2959 std::stringstream SS; 2960 2961 SS << *SpanTyText; 2962 // Fix the initializer if it exists: 2963 if (const Expr *Init = D->getInit()) { 2964 std::optional<FixItList> InitFixIts = 2965 FixVarInitializerWithSpan(Init, Ctx, UserFillPlaceHolder); 2966 if (!InitFixIts) 2967 return {}; 2968 FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts->begin()), 2969 std::make_move_iterator(InitFixIts->end())); 2970 } 2971 // For declaration of the form `T * ident = init;`, we want to replace 2972 // `T * ` with `std::span<T>`. 2973 // We ignore CV-qualifiers so for `T * const ident;` we also want to replace 2974 // just `T *` with `std::span<T>`. 2975 const SourceLocation EndLocForReplacement = D->getTypeSpecEndLoc(); 2976 if (!EndLocForReplacement.isValid()) { 2977 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the end of the declaration"); 2978 return {}; 2979 } 2980 // The only exception is that for `T *ident` we'll add a single space between 2981 // "std::span<T>" and "ident". 2982 // FIXME: The condition is false for identifiers expended from macros. 2983 if (EndLocForReplacement.getLocWithOffset(1) == getVarDeclIdentifierLoc(D)) 2984 SS << " "; 2985 2986 FixIts.push_back(FixItHint::CreateReplacement( 2987 SourceRange(D->getBeginLoc(), EndLocForReplacement), SS.str())); 2988 return FixIts; 2989 } 2990 2991 static bool hasConflictingOverload(const FunctionDecl *FD) { 2992 return !FD->getDeclContext()->lookup(FD->getDeclName()).isSingleResult(); 2993 } 2994 2995 // For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new 2996 // types, this function produces fix-its to make the change self-contained. Let 2997 // 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the 2998 // entity defined by the `FunctionDecl` after the change to the parameters. 2999 // Fix-its produced by this function are 3000 // 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration 3001 // of 'F'; 3002 // 2. Create a declaration of "NewF" next to each declaration of `F`; 3003 // 3. Create a definition of "F" (as its' original definition is now belongs 3004 // to "NewF") next to its original definition. The body of the creating 3005 // definition calls to "NewF". 3006 // 3007 // Example: 3008 // 3009 // void f(int *p); // original declaration 3010 // void f(int *p) { // original definition 3011 // p[5]; 3012 // } 3013 // 3014 // To change the parameter `p` to be of `std::span<int>` type, we 3015 // also add overloads: 3016 // 3017 // [[clang::unsafe_buffer_usage]] void f(int *p); // original decl 3018 // void f(std::span<int> p); // added overload decl 3019 // void f(std::span<int> p) { // original def where param is changed 3020 // p[5]; 3021 // } 3022 // [[clang::unsafe_buffer_usage]] void f(int *p) { // added def 3023 // return f(std::span(p, <# size #>)); 3024 // } 3025 // 3026 static std::optional<FixItList> 3027 createOverloadsForFixedParams(const FixitStrategy &S, const FunctionDecl *FD, 3028 const ASTContext &Ctx, 3029 UnsafeBufferUsageHandler &Handler) { 3030 // FIXME: need to make this conflict checking better: 3031 if (hasConflictingOverload(FD)) 3032 return std::nullopt; 3033 3034 const SourceManager &SM = Ctx.getSourceManager(); 3035 const LangOptions &LangOpts = Ctx.getLangOpts(); 3036 const unsigned NumParms = FD->getNumParams(); 3037 std::vector<std::string> NewTysTexts(NumParms); 3038 std::vector<bool> ParmsMask(NumParms, false); 3039 bool AtLeastOneParmToFix = false; 3040 3041 for (unsigned i = 0; i < NumParms; i++) { 3042 const ParmVarDecl *PVD = FD->getParamDecl(i); 3043 3044 if (S.lookup(PVD) == FixitStrategy::Kind::Wontfix) 3045 continue; 3046 if (S.lookup(PVD) != FixitStrategy::Kind::Span) 3047 // Not supported, not suppose to happen: 3048 return std::nullopt; 3049 3050 std::optional<Qualifiers> PteTyQuals = std::nullopt; 3051 std::optional<std::string> PteTyText = 3052 getPointeeTypeText(PVD, SM, LangOpts, &PteTyQuals); 3053 3054 if (!PteTyText) 3055 // something wrong in obtaining the text of the pointee type, give up 3056 return std::nullopt; 3057 // FIXME: whether we should create std::span type depends on the 3058 // FixitStrategy. 3059 NewTysTexts[i] = getSpanTypeText(*PteTyText, PteTyQuals); 3060 ParmsMask[i] = true; 3061 AtLeastOneParmToFix = true; 3062 } 3063 if (!AtLeastOneParmToFix) 3064 // No need to create function overloads: 3065 return {}; 3066 // FIXME Respect indentation of the original code. 3067 3068 // A lambda that creates the text representation of a function declaration 3069 // with the new type signatures: 3070 const auto NewOverloadSignatureCreator = 3071 [&SM, &LangOpts, &NewTysTexts, 3072 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> { 3073 std::stringstream SS; 3074 3075 SS << ";"; 3076 SS << getEndOfLine().str(); 3077 // Append: ret-type func-name "(" 3078 if (auto Prefix = getRangeText( 3079 SourceRange(FD->getBeginLoc(), (*FD->param_begin())->getBeginLoc()), 3080 SM, LangOpts)) 3081 SS << Prefix->str(); 3082 else 3083 return std::nullopt; // give up 3084 // Append: parameter-type-list 3085 const unsigned NumParms = FD->getNumParams(); 3086 3087 for (unsigned i = 0; i < NumParms; i++) { 3088 const ParmVarDecl *Parm = FD->getParamDecl(i); 3089 3090 if (Parm->isImplicit()) 3091 continue; 3092 if (ParmsMask[i]) { 3093 // This `i`-th parameter will be fixed with `NewTysTexts[i]` being its 3094 // new type: 3095 SS << NewTysTexts[i]; 3096 // print parameter name if provided: 3097 if (IdentifierInfo *II = Parm->getIdentifier()) 3098 SS << ' ' << II->getName().str(); 3099 } else if (auto ParmTypeText = 3100 getRangeText(getSourceRangeToTokenEnd(Parm, SM, LangOpts), 3101 SM, LangOpts)) { 3102 // print the whole `Parm` without modification: 3103 SS << ParmTypeText->str(); 3104 } else 3105 return std::nullopt; // something wrong, give up 3106 if (i != NumParms - 1) 3107 SS << ", "; 3108 } 3109 SS << ")"; 3110 return SS.str(); 3111 }; 3112 3113 // A lambda that creates the text representation of a function definition with 3114 // the original signature: 3115 const auto OldOverloadDefCreator = 3116 [&Handler, &SM, &LangOpts, &NewTysTexts, 3117 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> { 3118 std::stringstream SS; 3119 3120 SS << getEndOfLine().str(); 3121 // Append: attr-name ret-type func-name "(" param-list ")" "{" 3122 if (auto FDPrefix = getRangeText( 3123 SourceRange(FD->getBeginLoc(), FD->getBody()->getBeginLoc()), SM, 3124 LangOpts)) 3125 SS << Handler.getUnsafeBufferUsageAttributeTextAt(FD->getBeginLoc(), " ") 3126 << FDPrefix->str() << "{"; 3127 else 3128 return std::nullopt; 3129 // Append: "return" func-name "(" 3130 if (auto FunQualName = getFunNameText(FD, SM, LangOpts)) 3131 SS << "return " << FunQualName->str() << "("; 3132 else 3133 return std::nullopt; 3134 3135 // Append: arg-list 3136 const unsigned NumParms = FD->getNumParams(); 3137 for (unsigned i = 0; i < NumParms; i++) { 3138 const ParmVarDecl *Parm = FD->getParamDecl(i); 3139 3140 if (Parm->isImplicit()) 3141 continue; 3142 // FIXME: If a parameter has no name, it is unused in the 3143 // definition. So we could just leave it as it is. 3144 if (!Parm->getIdentifier()) 3145 // If a parameter of a function definition has no name: 3146 return std::nullopt; 3147 if (ParmsMask[i]) 3148 // This is our spanified paramter! 3149 SS << NewTysTexts[i] << "(" << Parm->getIdentifier()->getName().str() 3150 << ", " << getUserFillPlaceHolder("size") << ")"; 3151 else 3152 SS << Parm->getIdentifier()->getName().str(); 3153 if (i != NumParms - 1) 3154 SS << ", "; 3155 } 3156 // finish call and the body 3157 SS << ");}" << getEndOfLine().str(); 3158 // FIXME: 80-char line formatting? 3159 return SS.str(); 3160 }; 3161 3162 FixItList FixIts{}; 3163 for (FunctionDecl *FReDecl : FD->redecls()) { 3164 std::optional<SourceLocation> Loc = getPastLoc(FReDecl, SM, LangOpts); 3165 3166 if (!Loc) 3167 return {}; 3168 if (FReDecl->isThisDeclarationADefinition()) { 3169 assert(FReDecl == FD && "inconsistent function definition"); 3170 // Inserts a definition with the old signature to the end of 3171 // `FReDecl`: 3172 if (auto OldOverloadDef = OldOverloadDefCreator(FReDecl)) 3173 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *OldOverloadDef)); 3174 else 3175 return {}; // give up 3176 } else { 3177 // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`: 3178 if (!FReDecl->hasAttr<UnsafeBufferUsageAttr>()) { 3179 FixIts.emplace_back(FixItHint::CreateInsertion( 3180 FReDecl->getBeginLoc(), Handler.getUnsafeBufferUsageAttributeTextAt( 3181 FReDecl->getBeginLoc(), " "))); 3182 } 3183 // Inserts a declaration with the new signature to the end of `FReDecl`: 3184 if (auto NewOverloadDecl = NewOverloadSignatureCreator(FReDecl)) 3185 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *NewOverloadDecl)); 3186 else 3187 return {}; 3188 } 3189 } 3190 return FixIts; 3191 } 3192 3193 // To fix a `ParmVarDecl` to be of `std::span` type. 3194 static FixItList fixParamWithSpan(const ParmVarDecl *PVD, const ASTContext &Ctx, 3195 UnsafeBufferUsageHandler &Handler) { 3196 if (hasUnsupportedSpecifiers(PVD, Ctx.getSourceManager())) { 3197 DEBUG_NOTE_DECL_FAIL(PVD, " : has unsupport specifier(s)"); 3198 return {}; 3199 } 3200 if (PVD->hasDefaultArg()) { 3201 // FIXME: generate fix-its for default values: 3202 DEBUG_NOTE_DECL_FAIL(PVD, " : has default arg"); 3203 return {}; 3204 } 3205 3206 std::optional<Qualifiers> PteTyQualifiers = std::nullopt; 3207 std::optional<std::string> PteTyText = getPointeeTypeText( 3208 PVD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers); 3209 3210 if (!PteTyText) { 3211 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid pointee type"); 3212 return {}; 3213 } 3214 3215 std::optional<StringRef> PVDNameText = PVD->getIdentifier()->getName(); 3216 3217 if (!PVDNameText) { 3218 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid identifier name"); 3219 return {}; 3220 } 3221 3222 std::stringstream SS; 3223 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(PVD, Ctx); 3224 3225 if (PteTyQualifiers) 3226 // Append qualifiers if they exist: 3227 SS << getSpanTypeText(*PteTyText, PteTyQualifiers); 3228 else 3229 SS << getSpanTypeText(*PteTyText); 3230 // Append qualifiers to the type of the parameter: 3231 if (PVD->getType().hasQualifiers()) 3232 SS << ' ' << PVD->getType().getQualifiers().getAsString(); 3233 // Append parameter's name: 3234 SS << ' ' << PVDNameText->str(); 3235 // Add replacement fix-it: 3236 return {FixItHint::CreateReplacement(PVD->getSourceRange(), SS.str())}; 3237 } 3238 3239 static FixItList fixVariableWithSpan(const VarDecl *VD, 3240 const DeclUseTracker &Tracker, 3241 ASTContext &Ctx, 3242 UnsafeBufferUsageHandler &Handler) { 3243 const DeclStmt *DS = Tracker.lookupDecl(VD); 3244 if (!DS) { 3245 DEBUG_NOTE_DECL_FAIL(VD, 3246 " : variables declared this way not implemented yet"); 3247 return {}; 3248 } 3249 if (!DS->isSingleDecl()) { 3250 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt` 3251 DEBUG_NOTE_DECL_FAIL(VD, " : multiple VarDecls"); 3252 return {}; 3253 } 3254 // Currently DS is an unused variable but we'll need it when 3255 // non-single decls are implemented, where the pointee type name 3256 // and the '*' are spread around the place. 3257 (void)DS; 3258 3259 // FIXME: handle cases where DS has multiple declarations 3260 return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler); 3261 } 3262 3263 static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx, 3264 UnsafeBufferUsageHandler &Handler) { 3265 FixItList FixIts{}; 3266 3267 // Note: the code below expects the declaration to not use any type sugar like 3268 // typedef. 3269 if (auto CAT = Ctx.getAsConstantArrayType(D->getType())) { 3270 const QualType &ArrayEltT = CAT->getElementType(); 3271 assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!"); 3272 // FIXME: support multi-dimensional arrays 3273 if (isa<clang::ArrayType>(ArrayEltT.getCanonicalType())) 3274 return {}; 3275 3276 const SourceLocation IdentifierLoc = getVarDeclIdentifierLoc(D); 3277 3278 // Get the spelling of the element type as written in the source file 3279 // (including macros, etc.). 3280 auto MaybeElemTypeTxt = 3281 getRangeText({D->getBeginLoc(), IdentifierLoc}, Ctx.getSourceManager(), 3282 Ctx.getLangOpts()); 3283 if (!MaybeElemTypeTxt) 3284 return {}; 3285 const llvm::StringRef ElemTypeTxt = MaybeElemTypeTxt->trim(); 3286 3287 // Find the '[' token. 3288 std::optional<Token> NextTok = Lexer::findNextToken( 3289 IdentifierLoc, Ctx.getSourceManager(), Ctx.getLangOpts()); 3290 while (NextTok && !NextTok->is(tok::l_square) && 3291 NextTok->getLocation() <= D->getSourceRange().getEnd()) 3292 NextTok = Lexer::findNextToken(NextTok->getLocation(), 3293 Ctx.getSourceManager(), Ctx.getLangOpts()); 3294 if (!NextTok) 3295 return {}; 3296 const SourceLocation LSqBracketLoc = NextTok->getLocation(); 3297 3298 // Get the spelling of the array size as written in the source file 3299 // (including macros, etc.). 3300 auto MaybeArraySizeTxt = getRangeText( 3301 {LSqBracketLoc.getLocWithOffset(1), D->getTypeSpecEndLoc()}, 3302 Ctx.getSourceManager(), Ctx.getLangOpts()); 3303 if (!MaybeArraySizeTxt) 3304 return {}; 3305 const llvm::StringRef ArraySizeTxt = MaybeArraySizeTxt->trim(); 3306 if (ArraySizeTxt.empty()) { 3307 // FIXME: Support array size getting determined from the initializer. 3308 // Examples: 3309 // int arr1[] = {0, 1, 2}; 3310 // int arr2{3, 4, 5}; 3311 // We might be able to preserve the non-specified size with `auto` and 3312 // `std::to_array`: 3313 // auto arr1 = std::to_array<int>({0, 1, 2}); 3314 return {}; 3315 } 3316 3317 std::optional<StringRef> IdentText = 3318 getVarDeclIdentifierText(D, Ctx.getSourceManager(), Ctx.getLangOpts()); 3319 3320 if (!IdentText) { 3321 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier"); 3322 return {}; 3323 } 3324 3325 SmallString<32> Replacement; 3326 raw_svector_ostream OS(Replacement); 3327 OS << "std::array<" << ElemTypeTxt << ", " << ArraySizeTxt << "> " 3328 << IdentText->str(); 3329 3330 FixIts.push_back(FixItHint::CreateReplacement( 3331 SourceRange{D->getBeginLoc(), D->getTypeSpecEndLoc()}, OS.str())); 3332 } 3333 3334 return FixIts; 3335 } 3336 3337 static FixItList fixVariableWithArray(const VarDecl *VD, 3338 const DeclUseTracker &Tracker, 3339 const ASTContext &Ctx, 3340 UnsafeBufferUsageHandler &Handler) { 3341 const DeclStmt *DS = Tracker.lookupDecl(VD); 3342 assert(DS && "Fixing non-local variables not implemented yet!"); 3343 if (!DS->isSingleDecl()) { 3344 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt` 3345 return {}; 3346 } 3347 // Currently DS is an unused variable but we'll need it when 3348 // non-single decls are implemented, where the pointee type name 3349 // and the '*' are spread around the place. 3350 (void)DS; 3351 3352 // FIXME: handle cases where DS has multiple declarations 3353 return fixVarDeclWithArray(VD, Ctx, Handler); 3354 } 3355 3356 // TODO: we should be consistent to use `std::nullopt` to represent no-fix due 3357 // to any unexpected problem. 3358 static FixItList 3359 fixVariable(const VarDecl *VD, FixitStrategy::Kind K, 3360 /* The function decl under analysis */ const Decl *D, 3361 const DeclUseTracker &Tracker, ASTContext &Ctx, 3362 UnsafeBufferUsageHandler &Handler) { 3363 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD)) { 3364 auto *FD = dyn_cast<clang::FunctionDecl>(PVD->getDeclContext()); 3365 if (!FD || FD != D) { 3366 // `FD != D` means that `PVD` belongs to a function that is not being 3367 // analyzed currently. Thus `FD` may not be complete. 3368 DEBUG_NOTE_DECL_FAIL(VD, " : function not currently analyzed"); 3369 return {}; 3370 } 3371 3372 // TODO If function has a try block we can't change params unless we check 3373 // also its catch block for their use. 3374 // FIXME We might support static class methods, some select methods, 3375 // operators and possibly lamdas. 3376 if (FD->isMain() || FD->isConstexpr() || 3377 FD->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate || 3378 FD->isVariadic() || 3379 // also covers call-operator of lamdas 3380 isa<CXXMethodDecl>(FD) || 3381 // skip when the function body is a try-block 3382 (FD->hasBody() && isa<CXXTryStmt>(FD->getBody())) || 3383 FD->isOverloadedOperator()) { 3384 DEBUG_NOTE_DECL_FAIL(VD, " : unsupported function decl"); 3385 return {}; // TODO test all these cases 3386 } 3387 } 3388 3389 switch (K) { 3390 case FixitStrategy::Kind::Span: { 3391 if (VD->getType()->isPointerType()) { 3392 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD)) 3393 return fixParamWithSpan(PVD, Ctx, Handler); 3394 3395 if (VD->isLocalVarDecl()) 3396 return fixVariableWithSpan(VD, Tracker, Ctx, Handler); 3397 } 3398 DEBUG_NOTE_DECL_FAIL(VD, " : not a pointer"); 3399 return {}; 3400 } 3401 case FixitStrategy::Kind::Array: { 3402 if (VD->isLocalVarDecl() && Ctx.getAsConstantArrayType(VD->getType())) 3403 return fixVariableWithArray(VD, Tracker, Ctx, Handler); 3404 3405 DEBUG_NOTE_DECL_FAIL(VD, " : not a local const-size array"); 3406 return {}; 3407 } 3408 case FixitStrategy::Kind::Iterator: 3409 case FixitStrategy::Kind::Vector: 3410 llvm_unreachable("FixitStrategy not implemented yet!"); 3411 case FixitStrategy::Kind::Wontfix: 3412 llvm_unreachable("Invalid strategy!"); 3413 } 3414 llvm_unreachable("Unknown strategy!"); 3415 } 3416 3417 // Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the 3418 // `RemoveRange` of 'h' overlaps with a macro use. 3419 static bool overlapWithMacro(const FixItList &FixIts) { 3420 // FIXME: For now we only check if the range (or the first token) is (part of) 3421 // a macro expansion. Ideally, we want to check for all tokens in the range. 3422 return llvm::any_of(FixIts, [](const FixItHint &Hint) { 3423 auto Range = Hint.RemoveRange; 3424 if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID()) 3425 // If the range (or the first token) is (part of) a macro expansion: 3426 return true; 3427 return false; 3428 }); 3429 } 3430 3431 // Returns true iff `VD` is a parameter of the declaration `D`: 3432 static bool isParameterOf(const VarDecl *VD, const Decl *D) { 3433 return isa<ParmVarDecl>(VD) && 3434 VD->getDeclContext() == dyn_cast<DeclContext>(D); 3435 } 3436 3437 // Erases variables in `FixItsForVariable`, if such a variable has an unfixable 3438 // group mate. A variable `v` is unfixable iff `FixItsForVariable` does not 3439 // contain `v`. 3440 static void eraseVarsForUnfixableGroupMates( 3441 std::map<const VarDecl *, FixItList> &FixItsForVariable, 3442 const VariableGroupsManager &VarGrpMgr) { 3443 // Variables will be removed from `FixItsForVariable`: 3444 SmallVector<const VarDecl *, 8> ToErase; 3445 3446 for (const auto &[VD, Ignore] : FixItsForVariable) { 3447 VarGrpRef Grp = VarGrpMgr.getGroupOfVar(VD); 3448 if (llvm::any_of(Grp, 3449 [&FixItsForVariable](const VarDecl *GrpMember) -> bool { 3450 return !FixItsForVariable.count(GrpMember); 3451 })) { 3452 // At least one group member cannot be fixed, so we have to erase the 3453 // whole group: 3454 for (const VarDecl *Member : Grp) 3455 ToErase.push_back(Member); 3456 } 3457 } 3458 for (auto *VarToErase : ToErase) 3459 FixItsForVariable.erase(VarToErase); 3460 } 3461 3462 // Returns the fix-its that create bounds-safe function overloads for the 3463 // function `D`, if `D`'s parameters will be changed to safe-types through 3464 // fix-its in `FixItsForVariable`. 3465 // 3466 // NOTE: In case `D`'s parameters will be changed but bounds-safe function 3467 // overloads cannot created, the whole group that contains the parameters will 3468 // be erased from `FixItsForVariable`. 3469 static FixItList createFunctionOverloadsForParms( 3470 std::map<const VarDecl *, FixItList> &FixItsForVariable /* mutable */, 3471 const VariableGroupsManager &VarGrpMgr, const FunctionDecl *FD, 3472 const FixitStrategy &S, ASTContext &Ctx, 3473 UnsafeBufferUsageHandler &Handler) { 3474 FixItList FixItsSharedByParms{}; 3475 3476 std::optional<FixItList> OverloadFixes = 3477 createOverloadsForFixedParams(S, FD, Ctx, Handler); 3478 3479 if (OverloadFixes) { 3480 FixItsSharedByParms.append(*OverloadFixes); 3481 } else { 3482 // Something wrong in generating `OverloadFixes`, need to remove the 3483 // whole group, where parameters are in, from `FixItsForVariable` (Note 3484 // that all parameters should be in the same group): 3485 for (auto *Member : VarGrpMgr.getGroupOfParms()) 3486 FixItsForVariable.erase(Member); 3487 } 3488 return FixItsSharedByParms; 3489 } 3490 3491 // Constructs self-contained fix-its for each variable in `FixablesForAllVars`. 3492 static std::map<const VarDecl *, FixItList> 3493 getFixIts(FixableGadgetSets &FixablesForAllVars, const FixitStrategy &S, 3494 ASTContext &Ctx, 3495 /* The function decl under analysis */ const Decl *D, 3496 const DeclUseTracker &Tracker, UnsafeBufferUsageHandler &Handler, 3497 const VariableGroupsManager &VarGrpMgr) { 3498 // `FixItsForVariable` will map each variable to a set of fix-its directly 3499 // associated to the variable itself. Fix-its of distinct variables in 3500 // `FixItsForVariable` are disjoint. 3501 std::map<const VarDecl *, FixItList> FixItsForVariable; 3502 3503 // Populate `FixItsForVariable` with fix-its directly associated with each 3504 // variable. Fix-its directly associated to a variable 'v' are the ones 3505 // produced by the `FixableGadget`s whose claimed variable is 'v'. 3506 for (const auto &[VD, Fixables] : FixablesForAllVars.byVar) { 3507 FixItsForVariable[VD] = 3508 fixVariable(VD, S.lookup(VD), D, Tracker, Ctx, Handler); 3509 // If we fail to produce Fix-It for the declaration we have to skip the 3510 // variable entirely. 3511 if (FixItsForVariable[VD].empty()) { 3512 FixItsForVariable.erase(VD); 3513 continue; 3514 } 3515 for (const auto &F : Fixables) { 3516 std::optional<FixItList> Fixits = F->getFixits(S); 3517 3518 if (Fixits) { 3519 FixItsForVariable[VD].insert(FixItsForVariable[VD].end(), 3520 Fixits->begin(), Fixits->end()); 3521 continue; 3522 } 3523 #ifndef NDEBUG 3524 Handler.addDebugNoteForVar( 3525 VD, F->getSourceLoc(), 3526 ("gadget '" + F->getDebugName() + "' refused to produce a fix") 3527 .str()); 3528 #endif 3529 FixItsForVariable.erase(VD); 3530 break; 3531 } 3532 } 3533 3534 // `FixItsForVariable` now contains only variables that can be 3535 // fixed. A variable can be fixed if its' declaration and all Fixables 3536 // associated to it can all be fixed. 3537 3538 // To further remove from `FixItsForVariable` variables whose group mates 3539 // cannot be fixed... 3540 eraseVarsForUnfixableGroupMates(FixItsForVariable, VarGrpMgr); 3541 // Now `FixItsForVariable` gets further reduced: a variable is in 3542 // `FixItsForVariable` iff it can be fixed and all its group mates can be 3543 // fixed. 3544 3545 // Fix-its of bounds-safe overloads of `D` are shared by parameters of `D`. 3546 // That is, when fixing multiple parameters in one step, these fix-its will 3547 // be applied only once (instead of being applied per parameter). 3548 FixItList FixItsSharedByParms{}; 3549 3550 if (auto *FD = dyn_cast<FunctionDecl>(D)) 3551 FixItsSharedByParms = createFunctionOverloadsForParms( 3552 FixItsForVariable, VarGrpMgr, FD, S, Ctx, Handler); 3553 3554 // The map that maps each variable `v` to fix-its for the whole group where 3555 // `v` is in: 3556 std::map<const VarDecl *, FixItList> FinalFixItsForVariable{ 3557 FixItsForVariable}; 3558 3559 for (auto &[Var, Ignore] : FixItsForVariable) { 3560 bool AnyParm = false; 3561 const auto VarGroupForVD = VarGrpMgr.getGroupOfVar(Var, &AnyParm); 3562 3563 for (const VarDecl *GrpMate : VarGroupForVD) { 3564 if (Var == GrpMate) 3565 continue; 3566 if (FixItsForVariable.count(GrpMate)) 3567 FinalFixItsForVariable[Var].append(FixItsForVariable[GrpMate]); 3568 } 3569 if (AnyParm) { 3570 // This assertion should never fail. Otherwise we have a bug. 3571 assert(!FixItsSharedByParms.empty() && 3572 "Should not try to fix a parameter that does not belong to a " 3573 "FunctionDecl"); 3574 FinalFixItsForVariable[Var].append(FixItsSharedByParms); 3575 } 3576 } 3577 // Fix-its that will be applied in one step shall NOT: 3578 // 1. overlap with macros or/and templates; or 3579 // 2. conflict with each other. 3580 // Otherwise, the fix-its will be dropped. 3581 for (auto Iter = FinalFixItsForVariable.begin(); 3582 Iter != FinalFixItsForVariable.end();) 3583 if (overlapWithMacro(Iter->second) || 3584 clang::internal::anyConflict(Iter->second, Ctx.getSourceManager())) { 3585 Iter = FinalFixItsForVariable.erase(Iter); 3586 } else 3587 Iter++; 3588 return FinalFixItsForVariable; 3589 } 3590 3591 template <typename VarDeclIterTy> 3592 static FixitStrategy 3593 getNaiveStrategy(llvm::iterator_range<VarDeclIterTy> UnsafeVars) { 3594 FixitStrategy S; 3595 for (const VarDecl *VD : UnsafeVars) { 3596 if (isa<ConstantArrayType>(VD->getType().getCanonicalType())) 3597 S.set(VD, FixitStrategy::Kind::Array); 3598 else 3599 S.set(VD, FixitStrategy::Kind::Span); 3600 } 3601 return S; 3602 } 3603 3604 // Manages variable groups: 3605 class VariableGroupsManagerImpl : public VariableGroupsManager { 3606 const std::vector<VarGrpTy> Groups; 3607 const std::map<const VarDecl *, unsigned> &VarGrpMap; 3608 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms; 3609 3610 public: 3611 VariableGroupsManagerImpl( 3612 const std::vector<VarGrpTy> &Groups, 3613 const std::map<const VarDecl *, unsigned> &VarGrpMap, 3614 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms) 3615 : Groups(Groups), VarGrpMap(VarGrpMap), 3616 GrpsUnionForParms(GrpsUnionForParms) {} 3617 3618 VarGrpRef getGroupOfVar(const VarDecl *Var, bool *HasParm) const override { 3619 if (GrpsUnionForParms.contains(Var)) { 3620 if (HasParm) 3621 *HasParm = true; 3622 return GrpsUnionForParms.getArrayRef(); 3623 } 3624 if (HasParm) 3625 *HasParm = false; 3626 3627 auto It = VarGrpMap.find(Var); 3628 3629 if (It == VarGrpMap.end()) 3630 return {}; 3631 return Groups[It->second]; 3632 } 3633 3634 VarGrpRef getGroupOfParms() const override { 3635 return GrpsUnionForParms.getArrayRef(); 3636 } 3637 }; 3638 3639 void applyGadgets(const Decl *D, FixableGadgetList FixableGadgets, 3640 WarningGadgetList WarningGadgets, DeclUseTracker Tracker, 3641 UnsafeBufferUsageHandler &Handler, bool EmitSuggestions) { 3642 if (!EmitSuggestions) { 3643 // Our job is very easy without suggestions. Just warn about 3644 // every problematic operation and consider it done. No need to deal 3645 // with fixable gadgets, no need to group operations by variable. 3646 for (const auto &G : WarningGadgets) { 3647 G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/false, 3648 D->getASTContext()); 3649 } 3650 3651 // This return guarantees that most of the machine doesn't run when 3652 // suggestions aren't requested. 3653 assert(FixableGadgets.size() == 0 && 3654 "Fixable gadgets found but suggestions not requested!"); 3655 return; 3656 } 3657 3658 // If no `WarningGadget`s ever matched, there is no unsafe operations in the 3659 // function under the analysis. No need to fix any Fixables. 3660 if (!WarningGadgets.empty()) { 3661 // Gadgets "claim" variables they're responsible for. Once this loop 3662 // finishes, the tracker will only track DREs that weren't claimed by any 3663 // gadgets, i.e. not understood by the analysis. 3664 for (const auto &G : FixableGadgets) { 3665 for (const auto *DRE : G->getClaimedVarUseSites()) { 3666 Tracker.claimUse(DRE); 3667 } 3668 } 3669 } 3670 3671 // If no `WarningGadget`s ever matched, there is no unsafe operations in the 3672 // function under the analysis. Thus, it early returns here as there is 3673 // nothing needs to be fixed. 3674 // 3675 // Note this claim is based on the assumption that there is no unsafe 3676 // variable whose declaration is invisible from the analyzing function. 3677 // Otherwise, we need to consider if the uses of those unsafe varuables needs 3678 // fix. 3679 // So far, we are not fixing any global variables or class members. And, 3680 // lambdas will be analyzed along with the enclosing function. So this early 3681 // return is correct for now. 3682 if (WarningGadgets.empty()) 3683 return; 3684 3685 WarningGadgetSets UnsafeOps = 3686 groupWarningGadgetsByVar(std::move(WarningGadgets)); 3687 FixableGadgetSets FixablesForAllVars = 3688 groupFixablesByVar(std::move(FixableGadgets)); 3689 3690 std::map<const VarDecl *, FixItList> FixItsForVariableGroup; 3691 3692 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s. 3693 for (auto it = FixablesForAllVars.byVar.cbegin(); 3694 it != FixablesForAllVars.byVar.cend();) { 3695 // FIXME: need to deal with global variables later 3696 if ((!it->first->isLocalVarDecl() && !isa<ParmVarDecl>(it->first))) { 3697 #ifndef NDEBUG 3698 Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(), 3699 ("failed to produce fixit for '" + 3700 it->first->getNameAsString() + 3701 "' : neither local nor a parameter")); 3702 #endif 3703 it = FixablesForAllVars.byVar.erase(it); 3704 } else if (it->first->getType().getCanonicalType()->isReferenceType()) { 3705 #ifndef NDEBUG 3706 Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(), 3707 ("failed to produce fixit for '" + 3708 it->first->getNameAsString() + 3709 "' : has a reference type")); 3710 #endif 3711 it = FixablesForAllVars.byVar.erase(it); 3712 } else if (Tracker.hasUnclaimedUses(it->first)) { 3713 it = FixablesForAllVars.byVar.erase(it); 3714 } else if (it->first->isInitCapture()) { 3715 #ifndef NDEBUG 3716 Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(), 3717 ("failed to produce fixit for '" + 3718 it->first->getNameAsString() + 3719 "' : init capture")); 3720 #endif 3721 it = FixablesForAllVars.byVar.erase(it); 3722 } else { 3723 ++it; 3724 } 3725 } 3726 3727 #ifndef NDEBUG 3728 for (const auto &it : UnsafeOps.byVar) { 3729 const VarDecl *const UnsafeVD = it.first; 3730 auto UnclaimedDREs = Tracker.getUnclaimedUses(UnsafeVD); 3731 if (UnclaimedDREs.empty()) 3732 continue; 3733 const auto UnfixedVDName = UnsafeVD->getNameAsString(); 3734 for (const clang::DeclRefExpr *UnclaimedDRE : UnclaimedDREs) { 3735 std::string UnclaimedUseTrace = 3736 getDREAncestorString(UnclaimedDRE, D->getASTContext()); 3737 3738 Handler.addDebugNoteForVar( 3739 UnsafeVD, UnclaimedDRE->getBeginLoc(), 3740 ("failed to produce fixit for '" + UnfixedVDName + 3741 "' : has an unclaimed use\nThe unclaimed DRE trace: " + 3742 UnclaimedUseTrace)); 3743 } 3744 } 3745 #endif 3746 3747 // Fixpoint iteration for pointer assignments 3748 using DepMapTy = DenseMap<const VarDecl *, llvm::SetVector<const VarDecl *>>; 3749 DepMapTy DependenciesMap{}; 3750 DepMapTy PtrAssignmentGraph{}; 3751 3752 for (auto it : FixablesForAllVars.byVar) { 3753 for (const FixableGadget *fixable : it.second) { 3754 std::optional<std::pair<const VarDecl *, const VarDecl *>> ImplPair = 3755 fixable->getStrategyImplications(); 3756 if (ImplPair) { 3757 std::pair<const VarDecl *, const VarDecl *> Impl = std::move(*ImplPair); 3758 PtrAssignmentGraph[Impl.first].insert(Impl.second); 3759 } 3760 } 3761 } 3762 3763 /* 3764 The following code does a BFS traversal of the `PtrAssignmentGraph` 3765 considering all unsafe vars as starting nodes and constructs an undirected 3766 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner 3767 elimiates all variables that are unreachable from any unsafe var. In other 3768 words, this removes all dependencies that don't include any unsafe variable 3769 and consequently don't need any fixit generation. 3770 Note: A careful reader would observe that the code traverses 3771 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and 3772 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would 3773 achieve the same result but the one used here dramatically cuts the 3774 amount of hoops the second part of the algorithm needs to jump, given that 3775 a lot of these connections become "direct". The reader is advised not to 3776 imagine how the graph is transformed because of using `Var` instead of 3777 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used, 3778 and think about why it's equivalent later. 3779 */ 3780 std::set<const VarDecl *> VisitedVarsDirected{}; 3781 for (const auto &[Var, ignore] : UnsafeOps.byVar) { 3782 if (VisitedVarsDirected.find(Var) == VisitedVarsDirected.end()) { 3783 3784 std::queue<const VarDecl *> QueueDirected{}; 3785 QueueDirected.push(Var); 3786 while (!QueueDirected.empty()) { 3787 const VarDecl *CurrentVar = QueueDirected.front(); 3788 QueueDirected.pop(); 3789 VisitedVarsDirected.insert(CurrentVar); 3790 auto AdjacentNodes = PtrAssignmentGraph[CurrentVar]; 3791 for (const VarDecl *Adj : AdjacentNodes) { 3792 if (VisitedVarsDirected.find(Adj) == VisitedVarsDirected.end()) { 3793 QueueDirected.push(Adj); 3794 } 3795 DependenciesMap[Var].insert(Adj); 3796 DependenciesMap[Adj].insert(Var); 3797 } 3798 } 3799 } 3800 } 3801 3802 // `Groups` stores the set of Connected Components in the graph. 3803 std::vector<VarGrpTy> Groups; 3804 // `VarGrpMap` maps variables that need fix to the groups (indexes) that the 3805 // variables belong to. Group indexes refer to the elements in `Groups`. 3806 // `VarGrpMap` is complete in that every variable that needs fix is in it. 3807 std::map<const VarDecl *, unsigned> VarGrpMap; 3808 // The union group over the ones in "Groups" that contain parameters of `D`: 3809 llvm::SetVector<const VarDecl *> 3810 GrpsUnionForParms; // these variables need to be fixed in one step 3811 3812 // Group Connected Components for Unsafe Vars 3813 // (Dependencies based on pointer assignments) 3814 std::set<const VarDecl *> VisitedVars{}; 3815 for (const auto &[Var, ignore] : UnsafeOps.byVar) { 3816 if (VisitedVars.find(Var) == VisitedVars.end()) { 3817 VarGrpTy &VarGroup = Groups.emplace_back(); 3818 std::queue<const VarDecl *> Queue{}; 3819 3820 Queue.push(Var); 3821 while (!Queue.empty()) { 3822 const VarDecl *CurrentVar = Queue.front(); 3823 Queue.pop(); 3824 VisitedVars.insert(CurrentVar); 3825 VarGroup.push_back(CurrentVar); 3826 auto AdjacentNodes = DependenciesMap[CurrentVar]; 3827 for (const VarDecl *Adj : AdjacentNodes) { 3828 if (VisitedVars.find(Adj) == VisitedVars.end()) { 3829 Queue.push(Adj); 3830 } 3831 } 3832 } 3833 3834 bool HasParm = false; 3835 unsigned GrpIdx = Groups.size() - 1; 3836 3837 for (const VarDecl *V : VarGroup) { 3838 VarGrpMap[V] = GrpIdx; 3839 if (!HasParm && isParameterOf(V, D)) 3840 HasParm = true; 3841 } 3842 if (HasParm) 3843 GrpsUnionForParms.insert(VarGroup.begin(), VarGroup.end()); 3844 } 3845 } 3846 3847 // Remove a `FixableGadget` if the associated variable is not in the graph 3848 // computed above. We do not want to generate fix-its for such variables, 3849 // since they are neither warned nor reachable from a warned one. 3850 // 3851 // Note a variable is not warned if it is not directly used in any unsafe 3852 // operation. A variable `v` is NOT reachable from an unsafe variable, if it 3853 // does not exist another variable `u` such that `u` is warned and fixing `u` 3854 // (transitively) implicates fixing `v`. 3855 // 3856 // For example, 3857 // ``` 3858 // void f(int * p) { 3859 // int * a = p; *p = 0; 3860 // } 3861 // ``` 3862 // `*p = 0` is a fixable gadget associated with a variable `p` that is neither 3863 // warned nor reachable from a warned one. If we add `a[5] = 0` to the end of 3864 // the function above, `p` becomes reachable from a warned variable. 3865 for (auto I = FixablesForAllVars.byVar.begin(); 3866 I != FixablesForAllVars.byVar.end();) { 3867 // Note `VisitedVars` contain all the variables in the graph: 3868 if (!VisitedVars.count((*I).first)) { 3869 // no such var in graph: 3870 I = FixablesForAllVars.byVar.erase(I); 3871 } else 3872 ++I; 3873 } 3874 3875 // We assign strategies to variables that are 1) in the graph and 2) can be 3876 // fixed. Other variables have the default "Won't fix" strategy. 3877 FixitStrategy NaiveStrategy = getNaiveStrategy(llvm::make_filter_range( 3878 VisitedVars, [&FixablesForAllVars](const VarDecl *V) { 3879 // If a warned variable has no "Fixable", it is considered unfixable: 3880 return FixablesForAllVars.byVar.count(V); 3881 })); 3882 VariableGroupsManagerImpl VarGrpMgr(Groups, VarGrpMap, GrpsUnionForParms); 3883 3884 if (isa<NamedDecl>(D)) 3885 // The only case where `D` is not a `NamedDecl` is when `D` is a 3886 // `BlockDecl`. Let's not fix variables in blocks for now 3887 FixItsForVariableGroup = 3888 getFixIts(FixablesForAllVars, NaiveStrategy, D->getASTContext(), D, 3889 Tracker, Handler, VarGrpMgr); 3890 3891 for (const auto &G : UnsafeOps.noVar) { 3892 G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/false, 3893 D->getASTContext()); 3894 } 3895 3896 for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) { 3897 auto FixItsIt = FixItsForVariableGroup.find(VD); 3898 Handler.handleUnsafeVariableGroup(VD, VarGrpMgr, 3899 FixItsIt != FixItsForVariableGroup.end() 3900 ? std::move(FixItsIt->second) 3901 : FixItList{}, 3902 D, NaiveStrategy); 3903 for (const auto &G : WarningGadgets) { 3904 G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/true, 3905 D->getASTContext()); 3906 } 3907 } 3908 } 3909 3910 void clang::checkUnsafeBufferUsage(const Decl *D, 3911 UnsafeBufferUsageHandler &Handler, 3912 bool EmitSuggestions) { 3913 #ifndef NDEBUG 3914 Handler.clearDebugNotes(); 3915 #endif 3916 3917 assert(D); 3918 3919 SmallVector<Stmt *> Stmts; 3920 3921 if (const auto *FD = dyn_cast<FunctionDecl>(D)) { 3922 // We do not want to visit a Lambda expression defined inside a method 3923 // independently. Instead, it should be visited along with the outer method. 3924 // FIXME: do we want to do the same thing for `BlockDecl`s? 3925 if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) { 3926 if (MD->getParent()->isLambda() && MD->getParent()->isLocalClass()) 3927 return; 3928 } 3929 3930 for (FunctionDecl *FReDecl : FD->redecls()) { 3931 if (FReDecl->isExternC()) { 3932 // Do not emit fixit suggestions for functions declared in an 3933 // extern "C" block. 3934 EmitSuggestions = false; 3935 break; 3936 } 3937 } 3938 3939 Stmts.push_back(FD->getBody()); 3940 3941 if (const auto *ID = dyn_cast<CXXConstructorDecl>(D)) { 3942 for (const CXXCtorInitializer *CI : ID->inits()) { 3943 Stmts.push_back(CI->getInit()); 3944 } 3945 } 3946 } else if (isa<BlockDecl>(D) || isa<ObjCMethodDecl>(D)) { 3947 Stmts.push_back(D->getBody()); 3948 } 3949 3950 assert(!Stmts.empty()); 3951 3952 FixableGadgetList FixableGadgets; 3953 WarningGadgetList WarningGadgets; 3954 DeclUseTracker Tracker; 3955 for (Stmt *S : Stmts) { 3956 findGadgets(S, D->getASTContext(), Handler, EmitSuggestions, FixableGadgets, 3957 WarningGadgets, Tracker); 3958 } 3959 applyGadgets(D, std::move(FixableGadgets), std::move(WarningGadgets), 3960 std::move(Tracker), Handler, EmitSuggestions); 3961 } 3962