1 //===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" 10 #include "clang/AST/Decl.h" 11 #include "clang/AST/Expr.h" 12 #include "clang/AST/RecursiveASTVisitor.h" 13 #include "clang/AST/StmtVisitor.h" 14 #include "clang/ASTMatchers/ASTMatchFinder.h" 15 #include "clang/Lex/Lexer.h" 16 #include "clang/Lex/Preprocessor.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include <memory> 19 #include <optional> 20 #include <sstream> 21 #include <queue> 22 23 using namespace llvm; 24 using namespace clang; 25 using namespace ast_matchers; 26 27 #ifndef NDEBUG 28 namespace { 29 class StmtDebugPrinter 30 : public ConstStmtVisitor<StmtDebugPrinter, std::string> { 31 public: 32 std::string VisitStmt(const Stmt *S) { return S->getStmtClassName(); } 33 34 std::string VisitBinaryOperator(const BinaryOperator *BO) { 35 return "BinaryOperator(" + BO->getOpcodeStr().str() + ")"; 36 } 37 38 std::string VisitUnaryOperator(const UnaryOperator *UO) { 39 return "UnaryOperator(" + UO->getOpcodeStr(UO->getOpcode()).str() + ")"; 40 } 41 42 std::string VisitImplicitCastExpr(const ImplicitCastExpr *ICE) { 43 return "ImplicitCastExpr(" + std::string(ICE->getCastKindName()) + ")"; 44 } 45 }; 46 47 // Returns a string of ancestor `Stmt`s of the given `DRE` in such a form: 48 // "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...". 49 static std::string getDREAncestorString(const DeclRefExpr *DRE, 50 ASTContext &Ctx) { 51 std::stringstream SS; 52 const Stmt *St = DRE; 53 StmtDebugPrinter StmtPriner; 54 55 do { 56 SS << StmtPriner.Visit(St); 57 58 DynTypedNodeList StParents = Ctx.getParents(*St); 59 60 if (StParents.size() > 1) 61 return "unavailable due to multiple parents"; 62 if (StParents.size() == 0) 63 break; 64 St = StParents.begin()->get<Stmt>(); 65 if (St) 66 SS << " ==> "; 67 } while (St); 68 return SS.str(); 69 } 70 } // namespace 71 #endif /* NDEBUG */ 72 73 namespace clang::ast_matchers { 74 // A `RecursiveASTVisitor` that traverses all descendants of a given node "n" 75 // except for those belonging to a different callable of "n". 76 class MatchDescendantVisitor 77 : public RecursiveASTVisitor<MatchDescendantVisitor> { 78 public: 79 typedef RecursiveASTVisitor<MatchDescendantVisitor> VisitorBase; 80 81 // Creates an AST visitor that matches `Matcher` on all 82 // descendants of a given node "n" except for the ones 83 // belonging to a different callable of "n". 84 MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher, 85 internal::ASTMatchFinder *Finder, 86 internal::BoundNodesTreeBuilder *Builder, 87 internal::ASTMatchFinder::BindKind Bind, 88 const bool ignoreUnevaluatedContext) 89 : Matcher(Matcher), Finder(Finder), Builder(Builder), Bind(Bind), 90 Matches(false), ignoreUnevaluatedContext(ignoreUnevaluatedContext) {} 91 92 // Returns true if a match is found in a subtree of `DynNode`, which belongs 93 // to the same callable of `DynNode`. 94 bool findMatch(const DynTypedNode &DynNode) { 95 Matches = false; 96 if (const Stmt *StmtNode = DynNode.get<Stmt>()) { 97 TraverseStmt(const_cast<Stmt *>(StmtNode)); 98 *Builder = ResultBindings; 99 return Matches; 100 } 101 return false; 102 } 103 104 // The following are overriding methods from the base visitor class. 105 // They are public only to allow CRTP to work. They are *not *part 106 // of the public API of this class. 107 108 // For the matchers so far used in safe buffers, we only need to match 109 // `Stmt`s. To override more as needed. 110 111 bool TraverseDecl(Decl *Node) { 112 if (!Node) 113 return true; 114 if (!match(*Node)) 115 return false; 116 // To skip callables: 117 if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Node)) 118 return true; 119 // Traverse descendants 120 return VisitorBase::TraverseDecl(Node); 121 } 122 123 bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node) { 124 // These are unevaluated, except the result expression. 125 if(ignoreUnevaluatedContext) 126 return TraverseStmt(Node->getResultExpr()); 127 return VisitorBase::TraverseGenericSelectionExpr(Node); 128 } 129 130 bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) { 131 // Unevaluated context. 132 if(ignoreUnevaluatedContext) 133 return true; 134 return VisitorBase::TraverseUnaryExprOrTypeTraitExpr(Node); 135 } 136 137 bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node) { 138 // Unevaluated context. 139 if(ignoreUnevaluatedContext) 140 return true; 141 return VisitorBase::TraverseTypeOfExprTypeLoc(Node); 142 } 143 144 bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node) { 145 // Unevaluated context. 146 if(ignoreUnevaluatedContext) 147 return true; 148 return VisitorBase::TraverseDecltypeTypeLoc(Node); 149 } 150 151 bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node) { 152 // Unevaluated context. 153 if(ignoreUnevaluatedContext) 154 return true; 155 return VisitorBase::TraverseCXXNoexceptExpr(Node); 156 } 157 158 bool TraverseCXXTypeidExpr(CXXTypeidExpr *Node) { 159 // Unevaluated context. 160 if(ignoreUnevaluatedContext) 161 return true; 162 return VisitorBase::TraverseCXXTypeidExpr(Node); 163 } 164 165 bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) { 166 if (!Node) 167 return true; 168 if (!match(*Node)) 169 return false; 170 return VisitorBase::TraverseStmt(Node); 171 } 172 173 bool shouldVisitTemplateInstantiations() const { return true; } 174 bool shouldVisitImplicitCode() const { 175 // TODO: let's ignore implicit code for now 176 return false; 177 } 178 179 private: 180 // Sets 'Matched' to true if 'Matcher' matches 'Node' 181 // 182 // Returns 'true' if traversal should continue after this function 183 // returns, i.e. if no match is found or 'Bind' is 'BK_All'. 184 template <typename T> bool match(const T &Node) { 185 internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder); 186 187 if (Matcher->matches(DynTypedNode::create(Node), Finder, 188 &RecursiveBuilder)) { 189 ResultBindings.addMatch(RecursiveBuilder); 190 Matches = true; 191 if (Bind != internal::ASTMatchFinder::BK_All) 192 return false; // Abort as soon as a match is found. 193 } 194 return true; 195 } 196 197 const internal::DynTypedMatcher *const Matcher; 198 internal::ASTMatchFinder *const Finder; 199 internal::BoundNodesTreeBuilder *const Builder; 200 internal::BoundNodesTreeBuilder ResultBindings; 201 const internal::ASTMatchFinder::BindKind Bind; 202 bool Matches; 203 bool ignoreUnevaluatedContext; 204 }; 205 206 // Because we're dealing with raw pointers, let's define what we mean by that. 207 static auto hasPointerType() { 208 return hasType(hasCanonicalType(pointerType())); 209 } 210 211 static auto hasArrayType() { 212 return hasType(hasCanonicalType(arrayType())); 213 } 214 215 AST_MATCHER_P(Stmt, forEachDescendantEvaluatedStmt, internal::Matcher<Stmt>, innerMatcher) { 216 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher); 217 218 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, true); 219 return Visitor.findMatch(DynTypedNode::create(Node)); 220 } 221 222 AST_MATCHER_P(Stmt, forEachDescendantStmt, internal::Matcher<Stmt>, innerMatcher) { 223 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher); 224 225 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, false); 226 return Visitor.findMatch(DynTypedNode::create(Node)); 227 } 228 229 // Matches a `Stmt` node iff the node is in a safe-buffer opt-out region 230 AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *, 231 Handler) { 232 return !Handler->isSafeBufferOptOut(Node.getBeginLoc()); 233 } 234 235 AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) { 236 return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder); 237 } 238 239 // Matches a `UnaryOperator` whose operator is pre-increment: 240 AST_MATCHER(UnaryOperator, isPreInc) { 241 return Node.getOpcode() == UnaryOperator::Opcode::UO_PreInc; 242 } 243 244 // Returns a matcher that matches any expression 'e' such that `innerMatcher` 245 // matches 'e' and 'e' is in an Unspecified Lvalue Context. 246 static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) { 247 // clang-format off 248 return 249 expr(anyOf( 250 implicitCastExpr( 251 hasCastKind(CastKind::CK_LValueToRValue), 252 castSubExpr(innerMatcher)), 253 binaryOperator( 254 hasAnyOperatorName("="), 255 hasLHS(innerMatcher) 256 ) 257 )); 258 // clang-format on 259 } 260 261 262 // Returns a matcher that matches any expression `e` such that `InnerMatcher` 263 // matches `e` and `e` is in an Unspecified Pointer Context (UPC). 264 static internal::Matcher<Stmt> 265 isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) { 266 // A UPC can be 267 // 1. an argument of a function call (except the callee has [[unsafe_...]] 268 // attribute), or 269 // 2. the operand of a pointer-to-(integer or bool) cast operation; or 270 // 3. the operand of a comparator operation; or 271 // 4. the operand of a pointer subtraction operation 272 // (i.e., computing the distance between two pointers); or ... 273 274 auto CallArgMatcher = 275 callExpr(forEachArgumentWithParam(InnerMatcher, 276 hasPointerType() /* array also decays to pointer type*/), 277 unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))))); 278 279 auto CastOperandMatcher = 280 castExpr(anyOf(hasCastKind(CastKind::CK_PointerToIntegral), 281 hasCastKind(CastKind::CK_PointerToBoolean)), 282 castSubExpr(allOf(hasPointerType(), InnerMatcher))); 283 284 auto CompOperandMatcher = 285 binaryOperator(hasAnyOperatorName("!=", "==", "<", "<=", ">", ">="), 286 eachOf(hasLHS(allOf(hasPointerType(), InnerMatcher)), 287 hasRHS(allOf(hasPointerType(), InnerMatcher)))); 288 289 // A matcher that matches pointer subtractions: 290 auto PtrSubtractionMatcher = 291 binaryOperator(hasOperatorName("-"), 292 // Note that here we need both LHS and RHS to be 293 // pointer. Then the inner matcher can match any of 294 // them: 295 allOf(hasLHS(hasPointerType()), 296 hasRHS(hasPointerType())), 297 eachOf(hasLHS(InnerMatcher), 298 hasRHS(InnerMatcher))); 299 300 return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher, 301 PtrSubtractionMatcher)); 302 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we 303 // don't have to check that.) 304 } 305 306 // Returns a matcher that matches any expression 'e' such that `innerMatcher` 307 // matches 'e' and 'e' is in an unspecified untyped context (i.e the expression 308 // 'e' isn't evaluated to an RValue). For example, consider the following code: 309 // int *p = new int[4]; 310 // int *q = new int[4]; 311 // if ((p = q)) {} 312 // p = q; 313 // The expression `p = q` in the conditional of the `if` statement 314 // `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;` 315 // in the assignment statement is in an untyped context. 316 static internal::Matcher<Stmt> 317 isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher) { 318 // An unspecified context can be 319 // 1. A compound statement, 320 // 2. The body of an if statement 321 // 3. Body of a loop 322 auto CompStmt = compoundStmt(forEach(InnerMatcher)); 323 auto IfStmtThen = ifStmt(hasThen(InnerMatcher)); 324 auto IfStmtElse = ifStmt(hasElse(InnerMatcher)); 325 // FIXME: Handle loop bodies. 326 return stmt(anyOf(CompStmt, IfStmtThen, IfStmtElse)); 327 } 328 } // namespace clang::ast_matchers 329 330 namespace { 331 // Because the analysis revolves around variables and their types, we'll need to 332 // track uses of variables (aka DeclRefExprs). 333 using DeclUseList = SmallVector<const DeclRefExpr *, 1>; 334 335 // Convenience typedef. 336 using FixItList = SmallVector<FixItHint, 4>; 337 338 // Defined below. 339 class Strategy; 340 } // namespace 341 342 namespace { 343 /// Gadget is an individual operation in the code that may be of interest to 344 /// this analysis. Each (non-abstract) subclass corresponds to a specific 345 /// rigid AST structure that constitutes an operation on a pointer-type object. 346 /// Discovery of a gadget in the code corresponds to claiming that we understand 347 /// what this part of code is doing well enough to potentially improve it. 348 /// Gadgets can be warning (immediately deserving a warning) or fixable (not 349 /// always deserving a warning per se, but requires our attention to identify 350 /// it warrants a fixit). 351 class Gadget { 352 public: 353 enum class Kind { 354 #define GADGET(x) x, 355 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 356 }; 357 358 /// Common type of ASTMatchers used for discovering gadgets. 359 /// Useful for implementing the static matcher() methods 360 /// that are expected from all non-abstract subclasses. 361 using Matcher = decltype(stmt()); 362 363 Gadget(Kind K) : K(K) {} 364 365 Kind getKind() const { return K; } 366 367 #ifndef NDEBUG 368 StringRef getDebugName() const { 369 switch (K) { 370 #define GADGET(x) case Kind::x: return #x; 371 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 372 } 373 llvm_unreachable("Unhandled Gadget::Kind enum"); 374 } 375 #endif 376 377 virtual bool isWarningGadget() const = 0; 378 virtual const Stmt *getBaseStmt() const = 0; 379 380 /// Returns the list of pointer-type variables on which this gadget performs 381 /// its operation. Typically, there's only one variable. This isn't a list 382 /// of all DeclRefExprs in the gadget's AST! 383 virtual DeclUseList getClaimedVarUseSites() const = 0; 384 385 virtual ~Gadget() = default; 386 387 private: 388 Kind K; 389 }; 390 391 392 /// Warning gadgets correspond to unsafe code patterns that warrants 393 /// an immediate warning. 394 class WarningGadget : public Gadget { 395 public: 396 WarningGadget(Kind K) : Gadget(K) {} 397 398 static bool classof(const Gadget *G) { return G->isWarningGadget(); } 399 bool isWarningGadget() const final { return true; } 400 }; 401 402 /// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be 403 /// properly recognized in order to emit fixes. For example, if a raw pointer-type 404 /// variable is replaced by a safe C++ container, every use of such variable must be 405 /// carefully considered and possibly updated. 406 class FixableGadget : public Gadget { 407 public: 408 FixableGadget(Kind K) : Gadget(K) {} 409 410 static bool classof(const Gadget *G) { return !G->isWarningGadget(); } 411 bool isWarningGadget() const final { return false; } 412 413 /// Returns a fixit that would fix the current gadget according to 414 /// the current strategy. Returns std::nullopt if the fix cannot be produced; 415 /// returns an empty list if no fixes are necessary. 416 virtual std::optional<FixItList> getFixits(const Strategy &) const { 417 return std::nullopt; 418 } 419 420 /// Returns a list of two elements where the first element is the LHS of a pointer assignment 421 /// statement and the second element is the RHS. This two-element list represents the fact that 422 /// the LHS buffer gets its bounds information from the RHS buffer. This information will be used 423 /// later to group all those variables whose types must be modified together to prevent type 424 /// mismatches. 425 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 426 getStrategyImplications() const { 427 return std::nullopt; 428 } 429 }; 430 431 static auto toSupportedVariable() { 432 return to(varDecl()); 433 } 434 435 using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>; 436 using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>; 437 438 /// An increment of a pointer-type value is unsafe as it may run the pointer 439 /// out of bounds. 440 class IncrementGadget : public WarningGadget { 441 static constexpr const char *const OpTag = "op"; 442 const UnaryOperator *Op; 443 444 public: 445 IncrementGadget(const MatchFinder::MatchResult &Result) 446 : WarningGadget(Kind::Increment), 447 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {} 448 449 static bool classof(const Gadget *G) { 450 return G->getKind() == Kind::Increment; 451 } 452 453 static Matcher matcher() { 454 return stmt(unaryOperator( 455 hasOperatorName("++"), 456 hasUnaryOperand(ignoringParenImpCasts(hasPointerType())) 457 ).bind(OpTag)); 458 } 459 460 const UnaryOperator *getBaseStmt() const override { return Op; } 461 462 DeclUseList getClaimedVarUseSites() const override { 463 SmallVector<const DeclRefExpr *, 2> Uses; 464 if (const auto *DRE = 465 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) { 466 Uses.push_back(DRE); 467 } 468 469 return std::move(Uses); 470 } 471 }; 472 473 /// A decrement of a pointer-type value is unsafe as it may run the pointer 474 /// out of bounds. 475 class DecrementGadget : public WarningGadget { 476 static constexpr const char *const OpTag = "op"; 477 const UnaryOperator *Op; 478 479 public: 480 DecrementGadget(const MatchFinder::MatchResult &Result) 481 : WarningGadget(Kind::Decrement), 482 Op(Result.Nodes.getNodeAs<UnaryOperator>(OpTag)) {} 483 484 static bool classof(const Gadget *G) { 485 return G->getKind() == Kind::Decrement; 486 } 487 488 static Matcher matcher() { 489 return stmt(unaryOperator( 490 hasOperatorName("--"), 491 hasUnaryOperand(ignoringParenImpCasts(hasPointerType())) 492 ).bind(OpTag)); 493 } 494 495 const UnaryOperator *getBaseStmt() const override { return Op; } 496 497 DeclUseList getClaimedVarUseSites() const override { 498 if (const auto *DRE = 499 dyn_cast<DeclRefExpr>(Op->getSubExpr()->IgnoreParenImpCasts())) { 500 return {DRE}; 501 } 502 503 return {}; 504 } 505 }; 506 507 /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as 508 /// it doesn't have any bounds checks for the array. 509 class ArraySubscriptGadget : public WarningGadget { 510 static constexpr const char *const ArraySubscrTag = "ArraySubscript"; 511 const ArraySubscriptExpr *ASE; 512 513 public: 514 ArraySubscriptGadget(const MatchFinder::MatchResult &Result) 515 : WarningGadget(Kind::ArraySubscript), 516 ASE(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ArraySubscrTag)) {} 517 518 static bool classof(const Gadget *G) { 519 return G->getKind() == Kind::ArraySubscript; 520 } 521 522 static Matcher matcher() { 523 // FIXME: What if the index is integer literal 0? Should this be 524 // a safe gadget in this case? 525 // clang-format off 526 return stmt(arraySubscriptExpr( 527 hasBase(ignoringParenImpCasts( 528 anyOf(hasPointerType(), hasArrayType()))), 529 unless(hasIndex( 530 anyOf(integerLiteral(equals(0)), arrayInitIndexExpr()) 531 ))) 532 .bind(ArraySubscrTag)); 533 // clang-format on 534 } 535 536 const ArraySubscriptExpr *getBaseStmt() const override { return ASE; } 537 538 DeclUseList getClaimedVarUseSites() const override { 539 if (const auto *DRE = 540 dyn_cast<DeclRefExpr>(ASE->getBase()->IgnoreParenImpCasts())) { 541 return {DRE}; 542 } 543 544 return {}; 545 } 546 }; 547 548 /// A pointer arithmetic expression of one of the forms: 549 /// \code 550 /// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n 551 /// \endcode 552 class PointerArithmeticGadget : public WarningGadget { 553 static constexpr const char *const PointerArithmeticTag = "ptrAdd"; 554 static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr"; 555 const BinaryOperator *PA; // pointer arithmetic expression 556 const Expr *Ptr; // the pointer expression in `PA` 557 558 public: 559 PointerArithmeticGadget(const MatchFinder::MatchResult &Result) 560 : WarningGadget(Kind::PointerArithmetic), 561 PA(Result.Nodes.getNodeAs<BinaryOperator>(PointerArithmeticTag)), 562 Ptr(Result.Nodes.getNodeAs<Expr>(PointerArithmeticPointerTag)) {} 563 564 static bool classof(const Gadget *G) { 565 return G->getKind() == Kind::PointerArithmetic; 566 } 567 568 static Matcher matcher() { 569 auto HasIntegerType = anyOf(hasType(isInteger()), hasType(enumType())); 570 auto PtrAtRight = 571 allOf(hasOperatorName("+"), 572 hasRHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), 573 hasLHS(HasIntegerType)); 574 auto PtrAtLeft = 575 allOf(anyOf(hasOperatorName("+"), hasOperatorName("-"), 576 hasOperatorName("+="), hasOperatorName("-=")), 577 hasLHS(expr(hasPointerType()).bind(PointerArithmeticPointerTag)), 578 hasRHS(HasIntegerType)); 579 580 return stmt(binaryOperator(anyOf(PtrAtLeft, PtrAtRight)) 581 .bind(PointerArithmeticTag)); 582 } 583 584 const Stmt *getBaseStmt() const override { return PA; } 585 586 DeclUseList getClaimedVarUseSites() const override { 587 if (const auto *DRE = dyn_cast<DeclRefExpr>(Ptr->IgnoreParenImpCasts())) { 588 return {DRE}; 589 } 590 591 return {}; 592 } 593 // FIXME: pointer adding zero should be fine 594 // FIXME: this gadge will need a fix-it 595 }; 596 597 /// A pointer initialization expression of the form: 598 /// \code 599 /// int *p = q; 600 /// \endcode 601 class PointerInitGadget : public FixableGadget { 602 private: 603 static constexpr const char *const PointerInitLHSTag = "ptrInitLHS"; 604 static constexpr const char *const PointerInitRHSTag = "ptrInitRHS"; 605 const VarDecl * PtrInitLHS; // the LHS pointer expression in `PI` 606 const DeclRefExpr * PtrInitRHS; // the RHS pointer expression in `PI` 607 608 public: 609 PointerInitGadget(const MatchFinder::MatchResult &Result) 610 : FixableGadget(Kind::PointerInit), 611 PtrInitLHS(Result.Nodes.getNodeAs<VarDecl>(PointerInitLHSTag)), 612 PtrInitRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerInitRHSTag)) {} 613 614 static bool classof(const Gadget *G) { 615 return G->getKind() == Kind::PointerInit; 616 } 617 618 static Matcher matcher() { 619 auto PtrInitStmt = declStmt(hasSingleDecl(varDecl( 620 hasInitializer(ignoringImpCasts(declRefExpr( 621 hasPointerType(), 622 toSupportedVariable()). 623 bind(PointerInitRHSTag)))). 624 bind(PointerInitLHSTag))); 625 626 return stmt(PtrInitStmt); 627 } 628 629 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 630 631 virtual const Stmt *getBaseStmt() const override { 632 // FIXME: This needs to be the entire DeclStmt, assuming that this method 633 // makes sense at all on a FixableGadget. 634 return PtrInitRHS; 635 } 636 637 virtual DeclUseList getClaimedVarUseSites() const override { 638 return DeclUseList{PtrInitRHS}; 639 } 640 641 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 642 getStrategyImplications() const override { 643 return std::make_pair(PtrInitLHS, 644 cast<VarDecl>(PtrInitRHS->getDecl())); 645 } 646 }; 647 648 /// A pointer assignment expression of the form: 649 /// \code 650 /// p = q; 651 /// \endcode 652 class PointerAssignmentGadget : public FixableGadget { 653 private: 654 static constexpr const char *const PointerAssignLHSTag = "ptrLHS"; 655 static constexpr const char *const PointerAssignRHSTag = "ptrRHS"; 656 const DeclRefExpr * PtrLHS; // the LHS pointer expression in `PA` 657 const DeclRefExpr * PtrRHS; // the RHS pointer expression in `PA` 658 659 public: 660 PointerAssignmentGadget(const MatchFinder::MatchResult &Result) 661 : FixableGadget(Kind::PointerAssignment), 662 PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignLHSTag)), 663 PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(PointerAssignRHSTag)) {} 664 665 static bool classof(const Gadget *G) { 666 return G->getKind() == Kind::PointerAssignment; 667 } 668 669 static Matcher matcher() { 670 auto PtrAssignExpr = binaryOperator(allOf(hasOperatorName("="), 671 hasRHS(ignoringParenImpCasts(declRefExpr(hasPointerType(), 672 toSupportedVariable()). 673 bind(PointerAssignRHSTag))), 674 hasLHS(declRefExpr(hasPointerType(), 675 toSupportedVariable()). 676 bind(PointerAssignLHSTag)))); 677 678 return stmt(isInUnspecifiedUntypedContext(PtrAssignExpr)); 679 } 680 681 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 682 683 virtual const Stmt *getBaseStmt() const override { 684 // FIXME: This should be the binary operator, assuming that this method 685 // makes sense at all on a FixableGadget. 686 return PtrLHS; 687 } 688 689 virtual DeclUseList getClaimedVarUseSites() const override { 690 return DeclUseList{PtrLHS, PtrRHS}; 691 } 692 693 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>> 694 getStrategyImplications() const override { 695 return std::make_pair(cast<VarDecl>(PtrLHS->getDecl()), 696 cast<VarDecl>(PtrRHS->getDecl())); 697 } 698 }; 699 700 /// A call of a function or method that performs unchecked buffer operations 701 /// over one of its pointer parameters. 702 class UnsafeBufferUsageAttrGadget : public WarningGadget { 703 constexpr static const char *const OpTag = "call_expr"; 704 const CallExpr *Op; 705 706 public: 707 UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult &Result) 708 : WarningGadget(Kind::UnsafeBufferUsageAttr), 709 Op(Result.Nodes.getNodeAs<CallExpr>(OpTag)) {} 710 711 static bool classof(const Gadget *G) { 712 return G->getKind() == Kind::UnsafeBufferUsageAttr; 713 } 714 715 static Matcher matcher() { 716 return stmt(callExpr(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))) 717 .bind(OpTag)); 718 } 719 const Stmt *getBaseStmt() const override { return Op; } 720 721 DeclUseList getClaimedVarUseSites() const override { return {}; } 722 }; 723 724 // Represents expressions of the form `DRE[*]` in the Unspecified Lvalue 725 // Context (see `isInUnspecifiedLvalueContext`). 726 // Note here `[]` is the built-in subscript operator. 727 class ULCArraySubscriptGadget : public FixableGadget { 728 private: 729 static constexpr const char *const ULCArraySubscriptTag = 730 "ArraySubscriptUnderULC"; 731 const ArraySubscriptExpr *Node; 732 733 public: 734 ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result) 735 : FixableGadget(Kind::ULCArraySubscript), 736 Node(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ULCArraySubscriptTag)) { 737 assert(Node != nullptr && "Expecting a non-null matching result"); 738 } 739 740 static bool classof(const Gadget *G) { 741 return G->getKind() == Kind::ULCArraySubscript; 742 } 743 744 static Matcher matcher() { 745 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType()); 746 auto BaseIsArrayOrPtrDRE = 747 hasBase(ignoringParenImpCasts(declRefExpr(ArrayOrPtr, 748 toSupportedVariable()))); 749 auto Target = 750 arraySubscriptExpr(BaseIsArrayOrPtrDRE).bind(ULCArraySubscriptTag); 751 752 return expr(isInUnspecifiedLvalueContext(Target)); 753 } 754 755 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 756 757 virtual const Stmt *getBaseStmt() const override { return Node; } 758 759 virtual DeclUseList getClaimedVarUseSites() const override { 760 if (const auto *DRE = 761 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) { 762 return {DRE}; 763 } 764 return {}; 765 } 766 }; 767 768 // Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the 769 // unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits 770 // fixit of the form `UPC(DRE.data())`. 771 class UPCStandalonePointerGadget : public FixableGadget { 772 private: 773 static constexpr const char *const DeclRefExprTag = "StandalonePointer"; 774 const DeclRefExpr *Node; 775 776 public: 777 UPCStandalonePointerGadget(const MatchFinder::MatchResult &Result) 778 : FixableGadget(Kind::UPCStandalonePointer), 779 Node(Result.Nodes.getNodeAs<DeclRefExpr>(DeclRefExprTag)) { 780 assert(Node != nullptr && "Expecting a non-null matching result"); 781 } 782 783 static bool classof(const Gadget *G) { 784 return G->getKind() == Kind::UPCStandalonePointer; 785 } 786 787 static Matcher matcher() { 788 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType()); 789 auto target = expr( 790 ignoringParenImpCasts(declRefExpr(allOf(ArrayOrPtr, 791 toSupportedVariable())).bind(DeclRefExprTag))); 792 return stmt(isInUnspecifiedPointerContext(target)); 793 } 794 795 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 796 797 virtual const Stmt *getBaseStmt() const override { return Node; } 798 799 virtual DeclUseList getClaimedVarUseSites() const override { 800 return {Node}; 801 } 802 }; 803 804 class PointerDereferenceGadget : public FixableGadget { 805 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE"; 806 static constexpr const char *const OperatorTag = "op"; 807 808 const DeclRefExpr *BaseDeclRefExpr = nullptr; 809 const UnaryOperator *Op = nullptr; 810 811 public: 812 PointerDereferenceGadget(const MatchFinder::MatchResult &Result) 813 : FixableGadget(Kind::PointerDereference), 814 BaseDeclRefExpr( 815 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)), 816 Op(Result.Nodes.getNodeAs<UnaryOperator>(OperatorTag)) {} 817 818 static bool classof(const Gadget *G) { 819 return G->getKind() == Kind::PointerDereference; 820 } 821 822 static Matcher matcher() { 823 auto Target = 824 unaryOperator( 825 hasOperatorName("*"), 826 has(expr(ignoringParenImpCasts( 827 declRefExpr(toSupportedVariable()).bind(BaseDeclRefExprTag))))) 828 .bind(OperatorTag); 829 830 return expr(isInUnspecifiedLvalueContext(Target)); 831 } 832 833 DeclUseList getClaimedVarUseSites() const override { 834 return {BaseDeclRefExpr}; 835 } 836 837 virtual const Stmt *getBaseStmt() const final { return Op; } 838 839 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 840 }; 841 842 // Represents expressions of the form `&DRE[any]` in the Unspecified Pointer 843 // Context (see `isInUnspecifiedPointerContext`). 844 // Note here `[]` is the built-in subscript operator. 845 class UPCAddressofArraySubscriptGadget : public FixableGadget { 846 private: 847 static constexpr const char *const UPCAddressofArraySubscriptTag = 848 "AddressofArraySubscriptUnderUPC"; 849 const UnaryOperator *Node; // the `&DRE[any]` node 850 851 public: 852 UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result) 853 : FixableGadget(Kind::ULCArraySubscript), 854 Node(Result.Nodes.getNodeAs<UnaryOperator>( 855 UPCAddressofArraySubscriptTag)) { 856 assert(Node != nullptr && "Expecting a non-null matching result"); 857 } 858 859 static bool classof(const Gadget *G) { 860 return G->getKind() == Kind::UPCAddressofArraySubscript; 861 } 862 863 static Matcher matcher() { 864 return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts( 865 unaryOperator(hasOperatorName("&"), 866 hasUnaryOperand(arraySubscriptExpr( 867 hasBase(ignoringParenImpCasts(declRefExpr( 868 toSupportedVariable())))))) 869 .bind(UPCAddressofArraySubscriptTag))))); 870 } 871 872 virtual std::optional<FixItList> getFixits(const Strategy &) const override; 873 874 virtual const Stmt *getBaseStmt() const override { return Node; } 875 876 virtual DeclUseList getClaimedVarUseSites() const override { 877 const auto *ArraySubst = cast<ArraySubscriptExpr>(Node->getSubExpr()); 878 const auto *DRE = 879 cast<DeclRefExpr>(ArraySubst->getBase()->IgnoreImpCasts()); 880 return {DRE}; 881 } 882 }; 883 } // namespace 884 885 namespace { 886 // An auxiliary tracking facility for the fixit analysis. It helps connect 887 // declarations to its uses and make sure we've covered all uses with our 888 // analysis before we try to fix the declaration. 889 class DeclUseTracker { 890 using UseSetTy = SmallSet<const DeclRefExpr *, 16>; 891 using DefMapTy = DenseMap<const VarDecl *, const DeclStmt *>; 892 893 // Allocate on the heap for easier move. 894 std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()}; 895 DefMapTy Defs{}; 896 897 public: 898 DeclUseTracker() = default; 899 DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies. 900 DeclUseTracker &operator=(const DeclUseTracker &) = delete; 901 DeclUseTracker(DeclUseTracker &&) = default; 902 DeclUseTracker &operator=(DeclUseTracker &&) = default; 903 904 // Start tracking a freshly discovered DRE. 905 void discoverUse(const DeclRefExpr *DRE) { Uses->insert(DRE); } 906 907 // Stop tracking the DRE as it's been fully figured out. 908 void claimUse(const DeclRefExpr *DRE) { 909 assert(Uses->count(DRE) && 910 "DRE not found or claimed by multiple matchers!"); 911 Uses->erase(DRE); 912 } 913 914 // A variable is unclaimed if at least one use is unclaimed. 915 bool hasUnclaimedUses(const VarDecl *VD) const { 916 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs? 917 return any_of(*Uses, [VD](const DeclRefExpr *DRE) { 918 return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl(); 919 }); 920 } 921 922 UseSetTy getUnclaimedUses(const VarDecl *VD) const { 923 UseSetTy ReturnSet; 924 for (auto use : *Uses) { 925 if (use->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl()) { 926 ReturnSet.insert(use); 927 } 928 } 929 return ReturnSet; 930 } 931 932 void discoverDecl(const DeclStmt *DS) { 933 for (const Decl *D : DS->decls()) { 934 if (const auto *VD = dyn_cast<VarDecl>(D)) { 935 // FIXME: Assertion temporarily disabled due to a bug in 936 // ASTMatcher internal behavior in presence of GNU 937 // statement-expressions. We need to properly investigate this 938 // because it can screw up our algorithm in other ways. 939 // assert(Defs.count(VD) == 0 && "Definition already discovered!"); 940 Defs[VD] = DS; 941 } 942 } 943 } 944 945 const DeclStmt *lookupDecl(const VarDecl *VD) const { 946 return Defs.lookup(VD); 947 } 948 }; 949 } // namespace 950 951 namespace { 952 // Strategy is a map from variables to the way we plan to emit fixes for 953 // these variables. It is figured out gradually by trying different fixes 954 // for different variables depending on gadgets in which these variables 955 // participate. 956 class Strategy { 957 public: 958 enum class Kind { 959 Wontfix, // We don't plan to emit a fixit for this variable. 960 Span, // We recommend replacing the variable with std::span. 961 Iterator, // We recommend replacing the variable with std::span::iterator. 962 Array, // We recommend replacing the variable with std::array. 963 Vector // We recommend replacing the variable with std::vector. 964 }; 965 966 private: 967 using MapTy = llvm::DenseMap<const VarDecl *, Kind>; 968 969 MapTy Map; 970 971 public: 972 Strategy() = default; 973 Strategy(const Strategy &) = delete; // Let's avoid copies. 974 Strategy &operator=(const Strategy &) = delete; 975 Strategy(Strategy &&) = default; 976 Strategy &operator=(Strategy &&) = default; 977 978 void set(const VarDecl *VD, Kind K) { Map[VD] = K; } 979 980 Kind lookup(const VarDecl *VD) const { 981 auto I = Map.find(VD); 982 if (I == Map.end()) 983 return Kind::Wontfix; 984 985 return I->second; 986 } 987 }; 988 } // namespace 989 990 991 // Representing a pointer type expression of the form `++Ptr` in an Unspecified 992 // Pointer Context (UPC): 993 class UPCPreIncrementGadget : public FixableGadget { 994 private: 995 static constexpr const char *const UPCPreIncrementTag = 996 "PointerPreIncrementUnderUPC"; 997 const UnaryOperator *Node; // the `++Ptr` node 998 999 public: 1000 UPCPreIncrementGadget(const MatchFinder::MatchResult &Result) 1001 : FixableGadget(Kind::UPCPreIncrement), 1002 Node(Result.Nodes.getNodeAs<UnaryOperator>(UPCPreIncrementTag)) { 1003 assert(Node != nullptr && "Expecting a non-null matching result"); 1004 } 1005 1006 static bool classof(const Gadget *G) { 1007 return G->getKind() == Kind::UPCPreIncrement; 1008 } 1009 1010 static Matcher matcher() { 1011 // Note here we match `++Ptr` for any expression `Ptr` of pointer type. 1012 // Although currently we can only provide fix-its when `Ptr` is a DRE, we 1013 // can have the matcher be general, so long as `getClaimedVarUseSites` does 1014 // things right. 1015 return stmt(isInUnspecifiedPointerContext(expr(ignoringImpCasts( 1016 unaryOperator(isPreInc(), 1017 hasUnaryOperand(declRefExpr( 1018 toSupportedVariable())) 1019 ).bind(UPCPreIncrementTag))))); 1020 } 1021 1022 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 1023 1024 virtual const Stmt *getBaseStmt() const override { return Node; } 1025 1026 virtual DeclUseList getClaimedVarUseSites() const override { 1027 return {dyn_cast<DeclRefExpr>(Node->getSubExpr())}; 1028 } 1029 }; 1030 1031 // Representing a pointer type expression of the form `Ptr += n` in an 1032 // Unspecified Untyped Context (UUC): 1033 class UUCAddAssignGadget : public FixableGadget { 1034 private: 1035 static constexpr const char *const UUCAddAssignTag = 1036 "PointerAddAssignUnderUUC"; 1037 static constexpr const char *const OffsetTag = "Offset"; 1038 1039 const BinaryOperator *Node; // the `Ptr += n` node 1040 const Expr *Offset = nullptr; 1041 1042 public: 1043 UUCAddAssignGadget(const MatchFinder::MatchResult &Result) 1044 : FixableGadget(Kind::UUCAddAssign), 1045 Node(Result.Nodes.getNodeAs<BinaryOperator>(UUCAddAssignTag)), 1046 Offset(Result.Nodes.getNodeAs<Expr>(OffsetTag)) { 1047 assert(Node != nullptr && "Expecting a non-null matching result"); 1048 } 1049 1050 static bool classof(const Gadget *G) { 1051 return G->getKind() == Kind::UUCAddAssign; 1052 } 1053 1054 static Matcher matcher() { 1055 return stmt(isInUnspecifiedUntypedContext(expr(ignoringImpCasts( 1056 binaryOperator(hasOperatorName("+="), 1057 hasLHS(declRefExpr(toSupportedVariable())), 1058 hasRHS(expr().bind(OffsetTag))) 1059 .bind(UUCAddAssignTag))))); 1060 } 1061 1062 virtual std::optional<FixItList> getFixits(const Strategy &S) const override; 1063 1064 virtual const Stmt *getBaseStmt() const override { return Node; } 1065 1066 virtual DeclUseList getClaimedVarUseSites() const override { 1067 return {dyn_cast<DeclRefExpr>(Node->getLHS())}; 1068 } 1069 }; 1070 1071 // Representing a fixable expression of the form `*(ptr + 123)` or `*(123 + 1072 // ptr)`: 1073 class DerefSimplePtrArithFixableGadget : public FixableGadget { 1074 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE"; 1075 static constexpr const char *const DerefOpTag = "DerefOp"; 1076 static constexpr const char *const AddOpTag = "AddOp"; 1077 static constexpr const char *const OffsetTag = "Offset"; 1078 1079 const DeclRefExpr *BaseDeclRefExpr = nullptr; 1080 const UnaryOperator *DerefOp = nullptr; 1081 const BinaryOperator *AddOp = nullptr; 1082 const IntegerLiteral *Offset = nullptr; 1083 1084 public: 1085 DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult &Result) 1086 : FixableGadget(Kind::DerefSimplePtrArithFixable), 1087 BaseDeclRefExpr( 1088 Result.Nodes.getNodeAs<DeclRefExpr>(BaseDeclRefExprTag)), 1089 DerefOp(Result.Nodes.getNodeAs<UnaryOperator>(DerefOpTag)), 1090 AddOp(Result.Nodes.getNodeAs<BinaryOperator>(AddOpTag)), 1091 Offset(Result.Nodes.getNodeAs<IntegerLiteral>(OffsetTag)) {} 1092 1093 static Matcher matcher() { 1094 // clang-format off 1095 auto ThePtr = expr(hasPointerType(), 1096 ignoringImpCasts(declRefExpr(toSupportedVariable()). 1097 bind(BaseDeclRefExprTag))); 1098 auto PlusOverPtrAndInteger = expr(anyOf( 1099 binaryOperator(hasOperatorName("+"), hasLHS(ThePtr), 1100 hasRHS(integerLiteral().bind(OffsetTag))) 1101 .bind(AddOpTag), 1102 binaryOperator(hasOperatorName("+"), hasRHS(ThePtr), 1103 hasLHS(integerLiteral().bind(OffsetTag))) 1104 .bind(AddOpTag))); 1105 return isInUnspecifiedLvalueContext(unaryOperator( 1106 hasOperatorName("*"), 1107 hasUnaryOperand(ignoringParens(PlusOverPtrAndInteger))) 1108 .bind(DerefOpTag)); 1109 // clang-format on 1110 } 1111 1112 virtual std::optional<FixItList> getFixits(const Strategy &s) const final; 1113 1114 // TODO remove this method from FixableGadget interface 1115 virtual const Stmt *getBaseStmt() const final { return nullptr; } 1116 1117 virtual DeclUseList getClaimedVarUseSites() const final { 1118 return {BaseDeclRefExpr}; 1119 } 1120 }; 1121 1122 /// Scan the function and return a list of gadgets found with provided kits. 1123 static std::tuple<FixableGadgetList, WarningGadgetList, DeclUseTracker> 1124 findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler, 1125 bool EmitSuggestions) { 1126 1127 struct GadgetFinderCallback : MatchFinder::MatchCallback { 1128 FixableGadgetList FixableGadgets; 1129 WarningGadgetList WarningGadgets; 1130 DeclUseTracker Tracker; 1131 1132 void run(const MatchFinder::MatchResult &Result) override { 1133 // In debug mode, assert that we've found exactly one gadget. 1134 // This helps us avoid conflicts in .bind() tags. 1135 #if NDEBUG 1136 #define NEXT return 1137 #else 1138 [[maybe_unused]] int numFound = 0; 1139 #define NEXT ++numFound 1140 #endif 1141 1142 if (const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>("any_dre")) { 1143 Tracker.discoverUse(DRE); 1144 NEXT; 1145 } 1146 1147 if (const auto *DS = Result.Nodes.getNodeAs<DeclStmt>("any_ds")) { 1148 Tracker.discoverDecl(DS); 1149 NEXT; 1150 } 1151 1152 // Figure out which matcher we've found, and call the appropriate 1153 // subclass constructor. 1154 // FIXME: Can we do this more logarithmically? 1155 #define FIXABLE_GADGET(name) \ 1156 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \ 1157 FixableGadgets.push_back(std::make_unique<name##Gadget>(Result)); \ 1158 NEXT; \ 1159 } 1160 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1161 #define WARNING_GADGET(name) \ 1162 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \ 1163 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \ 1164 NEXT; \ 1165 } 1166 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1167 1168 assert(numFound >= 1 && "Gadgets not found in match result!"); 1169 assert(numFound <= 1 && "Conflicting bind tags in gadgets!"); 1170 } 1171 }; 1172 1173 MatchFinder M; 1174 GadgetFinderCallback CB; 1175 1176 // clang-format off 1177 M.addMatcher( 1178 stmt( 1179 forEachDescendantEvaluatedStmt(stmt(anyOf( 1180 // Add Gadget::matcher() for every gadget in the registry. 1181 #define WARNING_GADGET(x) \ 1182 allOf(x ## Gadget::matcher().bind(#x), \ 1183 notInSafeBufferOptOut(&Handler)), 1184 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1185 // Avoid a hanging comma. 1186 unless(stmt()) 1187 ))) 1188 ), 1189 &CB 1190 ); 1191 // clang-format on 1192 1193 if (EmitSuggestions) { 1194 // clang-format off 1195 M.addMatcher( 1196 stmt( 1197 forEachDescendantStmt(stmt(eachOf( 1198 #define FIXABLE_GADGET(x) \ 1199 x ## Gadget::matcher().bind(#x), 1200 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def" 1201 // In parallel, match all DeclRefExprs so that to find out 1202 // whether there are any uncovered by gadgets. 1203 declRefExpr(anyOf(hasPointerType(), hasArrayType()), 1204 to(anyOf(varDecl(), bindingDecl()))).bind("any_dre"), 1205 // Also match DeclStmts because we'll need them when fixing 1206 // their underlying VarDecls that otherwise don't have 1207 // any backreferences to DeclStmts. 1208 declStmt().bind("any_ds") 1209 ))) 1210 ), 1211 &CB 1212 ); 1213 // clang-format on 1214 } 1215 1216 M.match(*D->getBody(), D->getASTContext()); 1217 return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets), 1218 std::move(CB.Tracker)}; 1219 } 1220 1221 // Compares AST nodes by source locations. 1222 template <typename NodeTy> struct CompareNode { 1223 bool operator()(const NodeTy *N1, const NodeTy *N2) const { 1224 return N1->getBeginLoc().getRawEncoding() < 1225 N2->getBeginLoc().getRawEncoding(); 1226 } 1227 }; 1228 1229 struct WarningGadgetSets { 1230 std::map<const VarDecl *, std::set<const WarningGadget *>, 1231 // To keep keys sorted by their locations in the map so that the 1232 // order is deterministic: 1233 CompareNode<VarDecl>> 1234 byVar; 1235 // These Gadgets are not related to pointer variables (e. g. temporaries). 1236 llvm::SmallVector<const WarningGadget *, 16> noVar; 1237 }; 1238 1239 static WarningGadgetSets 1240 groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) { 1241 WarningGadgetSets result; 1242 // If some gadgets cover more than one 1243 // variable, they'll appear more than once in the map. 1244 for (auto &G : AllUnsafeOperations) { 1245 DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites(); 1246 1247 bool AssociatedWithVarDecl = false; 1248 for (const DeclRefExpr *DRE : ClaimedVarUseSites) { 1249 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { 1250 result.byVar[VD].insert(G.get()); 1251 AssociatedWithVarDecl = true; 1252 } 1253 } 1254 1255 if (!AssociatedWithVarDecl) { 1256 result.noVar.push_back(G.get()); 1257 continue; 1258 } 1259 } 1260 return result; 1261 } 1262 1263 struct FixableGadgetSets { 1264 std::map<const VarDecl *, std::set<const FixableGadget *>, 1265 // To keep keys sorted by their locations in the map so that the 1266 // order is deterministic: 1267 CompareNode<VarDecl>> 1268 byVar; 1269 }; 1270 1271 static FixableGadgetSets 1272 groupFixablesByVar(FixableGadgetList &&AllFixableOperations) { 1273 FixableGadgetSets FixablesForUnsafeVars; 1274 for (auto &F : AllFixableOperations) { 1275 DeclUseList DREs = F->getClaimedVarUseSites(); 1276 1277 for (const DeclRefExpr *DRE : DREs) { 1278 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { 1279 FixablesForUnsafeVars.byVar[VD].insert(F.get()); 1280 } 1281 } 1282 } 1283 return FixablesForUnsafeVars; 1284 } 1285 1286 bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts, 1287 const SourceManager &SM) { 1288 // A simple interval overlap detection algorithm. Sorts all ranges by their 1289 // begin location then finds the first overlap in one pass. 1290 std::vector<const FixItHint *> All; // a copy of `FixIts` 1291 1292 for (const FixItHint &H : FixIts) 1293 All.push_back(&H); 1294 std::sort(All.begin(), All.end(), 1295 [&SM](const FixItHint *H1, const FixItHint *H2) { 1296 return SM.isBeforeInTranslationUnit(H1->RemoveRange.getBegin(), 1297 H2->RemoveRange.getBegin()); 1298 }); 1299 1300 const FixItHint *CurrHint = nullptr; 1301 1302 for (const FixItHint *Hint : All) { 1303 if (!CurrHint || 1304 SM.isBeforeInTranslationUnit(CurrHint->RemoveRange.getEnd(), 1305 Hint->RemoveRange.getBegin())) { 1306 // Either to initialize `CurrHint` or `CurrHint` does not 1307 // overlap with `Hint`: 1308 CurrHint = Hint; 1309 } else 1310 // In case `Hint` overlaps the `CurrHint`, we found at least one 1311 // conflict: 1312 return true; 1313 } 1314 return false; 1315 } 1316 1317 std::optional<FixItList> 1318 PointerAssignmentGadget::getFixits(const Strategy &S) const { 1319 const auto *LeftVD = cast<VarDecl>(PtrLHS->getDecl()); 1320 const auto *RightVD = cast<VarDecl>(PtrRHS->getDecl()); 1321 switch (S.lookup(LeftVD)) { 1322 case Strategy::Kind::Span: 1323 if (S.lookup(RightVD) == Strategy::Kind::Span) 1324 return FixItList{}; 1325 return std::nullopt; 1326 case Strategy::Kind::Wontfix: 1327 return std::nullopt; 1328 case Strategy::Kind::Iterator: 1329 case Strategy::Kind::Array: 1330 case Strategy::Kind::Vector: 1331 llvm_unreachable("unsupported strategies for FixableGadgets"); 1332 } 1333 return std::nullopt; 1334 } 1335 1336 std::optional<FixItList> 1337 PointerInitGadget::getFixits(const Strategy &S) const { 1338 const auto *LeftVD = PtrInitLHS; 1339 const auto *RightVD = cast<VarDecl>(PtrInitRHS->getDecl()); 1340 switch (S.lookup(LeftVD)) { 1341 case Strategy::Kind::Span: 1342 if (S.lookup(RightVD) == Strategy::Kind::Span) 1343 return FixItList{}; 1344 return std::nullopt; 1345 case Strategy::Kind::Wontfix: 1346 return std::nullopt; 1347 case Strategy::Kind::Iterator: 1348 case Strategy::Kind::Array: 1349 case Strategy::Kind::Vector: 1350 llvm_unreachable("unsupported strategies for FixableGadgets"); 1351 } 1352 return std::nullopt; 1353 } 1354 1355 static bool isNonNegativeIntegerExpr(const Expr *Expr, const VarDecl *VD, 1356 const ASTContext &Ctx) { 1357 if (auto ConstVal = Expr->getIntegerConstantExpr(Ctx)) { 1358 if (ConstVal->isNegative()) 1359 return false; 1360 } else if (!Expr->getType()->isUnsignedIntegerType()) 1361 return false; 1362 return true; 1363 } 1364 1365 std::optional<FixItList> 1366 ULCArraySubscriptGadget::getFixits(const Strategy &S) const { 1367 if (const auto *DRE = 1368 dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) 1369 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { 1370 switch (S.lookup(VD)) { 1371 case Strategy::Kind::Span: { 1372 1373 // If the index has a negative constant value, we give up as no valid 1374 // fix-it can be generated: 1375 const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in! 1376 VD->getASTContext(); 1377 if (!isNonNegativeIntegerExpr(Node->getIdx(), VD, Ctx)) 1378 return std::nullopt; 1379 // no-op is a good fix-it, otherwise 1380 return FixItList{}; 1381 } 1382 case Strategy::Kind::Wontfix: 1383 case Strategy::Kind::Iterator: 1384 case Strategy::Kind::Array: 1385 case Strategy::Kind::Vector: 1386 llvm_unreachable("unsupported strategies for FixableGadgets"); 1387 } 1388 } 1389 return std::nullopt; 1390 } 1391 1392 static std::optional<FixItList> // forward declaration 1393 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node); 1394 1395 std::optional<FixItList> 1396 UPCAddressofArraySubscriptGadget::getFixits(const Strategy &S) const { 1397 auto DREs = getClaimedVarUseSites(); 1398 const auto *VD = cast<VarDecl>(DREs.front()->getDecl()); 1399 1400 switch (S.lookup(VD)) { 1401 case Strategy::Kind::Span: 1402 return fixUPCAddressofArraySubscriptWithSpan(Node); 1403 case Strategy::Kind::Wontfix: 1404 case Strategy::Kind::Iterator: 1405 case Strategy::Kind::Array: 1406 case Strategy::Kind::Vector: 1407 llvm_unreachable("unsupported strategies for FixableGadgets"); 1408 } 1409 return std::nullopt; // something went wrong, no fix-it 1410 } 1411 1412 // FIXME: this function should be customizable through format 1413 static StringRef getEndOfLine() { 1414 static const char *const EOL = "\n"; 1415 return EOL; 1416 } 1417 1418 // Returns the text indicating that the user needs to provide input there: 1419 std::string getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") { 1420 std::string s = std::string("<# "); 1421 s += HintTextToUser; 1422 s += " #>"; 1423 return s; 1424 } 1425 1426 // Return the text representation of the given `APInt Val`: 1427 static std::string getAPIntText(APInt Val) { 1428 SmallVector<char> Txt; 1429 Val.toString(Txt, 10, true); 1430 // APInt::toString does not add '\0' to the end of the string for us: 1431 Txt.push_back('\0'); 1432 return Txt.data(); 1433 } 1434 1435 // Return the source location of the last character of the AST `Node`. 1436 template <typename NodeTy> 1437 static std::optional<SourceLocation> 1438 getEndCharLoc(const NodeTy *Node, const SourceManager &SM, 1439 const LangOptions &LangOpts) { 1440 unsigned TkLen = Lexer::MeasureTokenLength(Node->getEndLoc(), SM, LangOpts); 1441 SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1); 1442 1443 if (Loc.isValid()) 1444 return Loc; 1445 1446 return std::nullopt; 1447 } 1448 1449 // Return the source location just past the last character of the AST `Node`. 1450 template <typename NodeTy> 1451 static std::optional<SourceLocation> getPastLoc(const NodeTy *Node, 1452 const SourceManager &SM, 1453 const LangOptions &LangOpts) { 1454 SourceLocation Loc = 1455 Lexer::getLocForEndOfToken(Node->getEndLoc(), 0, SM, LangOpts); 1456 if (Loc.isValid()) 1457 return Loc; 1458 return std::nullopt; 1459 } 1460 1461 // Return text representation of an `Expr`. 1462 static std::optional<StringRef> getExprText(const Expr *E, 1463 const SourceManager &SM, 1464 const LangOptions &LangOpts) { 1465 std::optional<SourceLocation> LastCharLoc = getPastLoc(E, SM, LangOpts); 1466 1467 if (LastCharLoc) 1468 return Lexer::getSourceText( 1469 CharSourceRange::getCharRange(E->getBeginLoc(), *LastCharLoc), SM, 1470 LangOpts); 1471 1472 return std::nullopt; 1473 } 1474 1475 // Returns the literal text in `SourceRange SR`, if `SR` is a valid range. 1476 static std::optional<StringRef> getRangeText(SourceRange SR, 1477 const SourceManager &SM, 1478 const LangOptions &LangOpts) { 1479 bool Invalid = false; 1480 CharSourceRange CSR = CharSourceRange::getCharRange(SR); 1481 StringRef Text = Lexer::getSourceText(CSR, SM, LangOpts, &Invalid); 1482 1483 if (!Invalid) 1484 return Text; 1485 return std::nullopt; 1486 } 1487 1488 // Returns the begin location of the identifier of the given variable 1489 // declaration. 1490 static SourceLocation getVarDeclIdentifierLoc(const VarDecl *VD) { 1491 // According to the implementation of `VarDecl`, `VD->getLocation()` actually 1492 // returns the begin location of the identifier of the declaration: 1493 return VD->getLocation(); 1494 } 1495 1496 // Returns the literal text of the identifier of the given variable declaration. 1497 static std::optional<StringRef> 1498 getVarDeclIdentifierText(const VarDecl *VD, const SourceManager &SM, 1499 const LangOptions &LangOpts) { 1500 SourceLocation ParmIdentBeginLoc = getVarDeclIdentifierLoc(VD); 1501 SourceLocation ParmIdentEndLoc = 1502 Lexer::getLocForEndOfToken(ParmIdentBeginLoc, 0, SM, LangOpts); 1503 1504 if (ParmIdentEndLoc.isMacroID() && 1505 !Lexer::isAtEndOfMacroExpansion(ParmIdentEndLoc, SM, LangOpts)) 1506 return std::nullopt; 1507 return getRangeText({ParmIdentBeginLoc, ParmIdentEndLoc}, SM, LangOpts); 1508 } 1509 1510 // We cannot fix a variable declaration if it has some other specifiers than the 1511 // type specifier. Because the source ranges of those specifiers could overlap 1512 // with the source range that is being replaced using fix-its. Especially when 1513 // we often cannot obtain accurate source ranges of cv-qualified type 1514 // specifiers. 1515 // FIXME: also deal with type attributes 1516 static bool hasUnsupportedSpecifiers(const VarDecl *VD, 1517 const SourceManager &SM) { 1518 // AttrRangeOverlapping: true if at least one attribute of `VD` overlaps the 1519 // source range of `VD`: 1520 bool AttrRangeOverlapping = llvm::any_of(VD->attrs(), [&](Attr *At) -> bool { 1521 return !(SM.isBeforeInTranslationUnit(At->getRange().getEnd(), 1522 VD->getBeginLoc())) && 1523 !(SM.isBeforeInTranslationUnit(VD->getEndLoc(), 1524 At->getRange().getBegin())); 1525 }); 1526 return VD->isInlineSpecified() || VD->isConstexpr() || 1527 VD->hasConstantInitialization() || !VD->hasLocalStorage() || 1528 AttrRangeOverlapping; 1529 } 1530 1531 // Returns the `SourceRange` of `D`. The reason why this function exists is 1532 // that `D->getSourceRange()` may return a range where the end location is the 1533 // starting location of the last token. The end location of the source range 1534 // returned by this function is the last location of the last token. 1535 static SourceRange getSourceRangeToTokenEnd(const Decl *D, 1536 const SourceManager &SM, 1537 const LangOptions &LangOpts) { 1538 SourceLocation Begin = D->getBeginLoc(); 1539 SourceLocation 1540 End = // `D->getEndLoc` should always return the starting location of the 1541 // last token, so we should get the end of the token 1542 Lexer::getLocForEndOfToken(D->getEndLoc(), 0, SM, LangOpts); 1543 1544 return SourceRange(Begin, End); 1545 } 1546 1547 // Returns the text of the pointee type of `T` from a `VarDecl` of a pointer 1548 // type. The text is obtained through from `TypeLoc`s. Since `TypeLoc` does not 1549 // have source ranges of qualifiers ( The `QualifiedTypeLoc` looks hacky too me 1550 // :( ), `Qualifiers` of the pointee type is returned separately through the 1551 // output parameter `QualifiersToAppend`. 1552 static std::optional<std::string> 1553 getPointeeTypeText(const VarDecl *VD, const SourceManager &SM, 1554 const LangOptions &LangOpts, 1555 std::optional<Qualifiers> *QualifiersToAppend) { 1556 QualType Ty = VD->getType(); 1557 QualType PteTy; 1558 1559 assert(Ty->isPointerType() && !Ty->isFunctionPointerType() && 1560 "Expecting a VarDecl of type of pointer to object type"); 1561 PteTy = Ty->getPointeeType(); 1562 1563 TypeLoc TyLoc = VD->getTypeSourceInfo()->getTypeLoc().getUnqualifiedLoc(); 1564 TypeLoc PteTyLoc; 1565 1566 // We only deal with the cases that we know `TypeLoc::getNextTypeLoc` returns 1567 // the `TypeLoc` of the pointee type: 1568 switch (TyLoc.getTypeLocClass()) { 1569 case TypeLoc::ConstantArray: 1570 case TypeLoc::IncompleteArray: 1571 case TypeLoc::VariableArray: 1572 case TypeLoc::DependentSizedArray: 1573 case TypeLoc::Decayed: 1574 assert(isa<ParmVarDecl>(VD) && "An array type shall not be treated as a " 1575 "pointer type unless it decays."); 1576 PteTyLoc = TyLoc.getNextTypeLoc(); 1577 break; 1578 case TypeLoc::Pointer: 1579 PteTyLoc = TyLoc.castAs<PointerTypeLoc>().getPointeeLoc(); 1580 break; 1581 default: 1582 return std::nullopt; 1583 } 1584 if (PteTyLoc.isNull()) 1585 // Sometimes we cannot get a useful `TypeLoc` for the pointee type, e.g., 1586 // when the pointer type is `auto`. 1587 return std::nullopt; 1588 1589 SourceLocation IdentLoc = getVarDeclIdentifierLoc(VD); 1590 1591 if (!(IdentLoc.isValid() && PteTyLoc.getSourceRange().isValid())) { 1592 // We are expecting these locations to be valid. But in some cases, they are 1593 // not all valid. It is a Clang bug to me and we are not responsible for 1594 // fixing it. So we will just give up for now when it happens. 1595 return std::nullopt; 1596 } 1597 1598 // Note that TypeLoc.getEndLoc() returns the begin location of the last token: 1599 SourceLocation PteEndOfTokenLoc = 1600 Lexer::getLocForEndOfToken(PteTyLoc.getEndLoc(), 0, SM, LangOpts); 1601 1602 if (!PteEndOfTokenLoc.isValid()) 1603 // Sometimes we cannot get the end location of the pointee type, e.g., when 1604 // there are macros involved. 1605 return std::nullopt; 1606 if (!SM.isBeforeInTranslationUnit(PteEndOfTokenLoc, IdentLoc)) { 1607 // We only deal with the cases where the source text of the pointee type 1608 // appears on the left-hand side of the variable identifier completely, 1609 // including the following forms: 1610 // `T ident`, 1611 // `T ident[]`, where `T` is any type. 1612 // Examples of excluded cases are `T (*ident)[]` or `T ident[][n]`. 1613 return std::nullopt; 1614 } 1615 if (PteTy.hasQualifiers()) { 1616 // TypeLoc does not provide source ranges for qualifiers (it says it's 1617 // intentional but seems fishy to me), so we cannot get the full text 1618 // `PteTy` via source ranges. 1619 *QualifiersToAppend = PteTy.getQualifiers(); 1620 } 1621 return getRangeText({PteTyLoc.getBeginLoc(), PteEndOfTokenLoc}, SM, LangOpts) 1622 ->str(); 1623 } 1624 1625 // Returns the text of the name (with qualifiers) of a `FunctionDecl`. 1626 static std::optional<StringRef> getFunNameText(const FunctionDecl *FD, 1627 const SourceManager &SM, 1628 const LangOptions &LangOpts) { 1629 SourceLocation BeginLoc = FD->getQualifier() 1630 ? FD->getQualifierLoc().getBeginLoc() 1631 : FD->getNameInfo().getBeginLoc(); 1632 // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the 1633 // last token: 1634 SourceLocation EndLoc = Lexer::getLocForEndOfToken( 1635 FD->getNameInfo().getEndLoc(), 0, SM, LangOpts); 1636 SourceRange NameRange{BeginLoc, EndLoc}; 1637 1638 return getRangeText(NameRange, SM, LangOpts); 1639 } 1640 1641 // Returns the text representing a `std::span` type where the element type is 1642 // represented by `EltTyText`. 1643 // 1644 // Note the optional parameter `Qualifiers`: one needs to pass qualifiers 1645 // explicitly if the element type needs to be qualified. 1646 static std::string 1647 getSpanTypeText(StringRef EltTyText, 1648 std::optional<Qualifiers> Quals = std::nullopt) { 1649 const char *const SpanOpen = "std::span<"; 1650 1651 if (Quals) 1652 return SpanOpen + EltTyText.str() + ' ' + Quals->getAsString() + '>'; 1653 return SpanOpen + EltTyText.str() + '>'; 1654 } 1655 1656 std::optional<FixItList> 1657 DerefSimplePtrArithFixableGadget::getFixits(const Strategy &s) const { 1658 const VarDecl *VD = dyn_cast<VarDecl>(BaseDeclRefExpr->getDecl()); 1659 1660 if (VD && s.lookup(VD) == Strategy::Kind::Span) { 1661 ASTContext &Ctx = VD->getASTContext(); 1662 // std::span can't represent elements before its begin() 1663 if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx)) 1664 if (ConstVal->isNegative()) 1665 return std::nullopt; 1666 1667 // note that the expr may (oddly) has multiple layers of parens 1668 // example: 1669 // *((..(pointer + 123)..)) 1670 // goal: 1671 // pointer[123] 1672 // Fix-It: 1673 // remove '*(' 1674 // replace ' + ' with '[' 1675 // replace ')' with ']' 1676 1677 // example: 1678 // *((..(123 + pointer)..)) 1679 // goal: 1680 // 123[pointer] 1681 // Fix-It: 1682 // remove '*(' 1683 // replace ' + ' with '[' 1684 // replace ')' with ']' 1685 1686 const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS(); 1687 const SourceManager &SM = Ctx.getSourceManager(); 1688 const LangOptions &LangOpts = Ctx.getLangOpts(); 1689 CharSourceRange StarWithTrailWhitespace = 1690 clang::CharSourceRange::getCharRange(DerefOp->getOperatorLoc(), 1691 LHS->getBeginLoc()); 1692 1693 std::optional<SourceLocation> LHSLocation = getPastLoc(LHS, SM, LangOpts); 1694 if (!LHSLocation) 1695 return std::nullopt; 1696 1697 CharSourceRange PlusWithSurroundingWhitespace = 1698 clang::CharSourceRange::getCharRange(*LHSLocation, RHS->getBeginLoc()); 1699 1700 std::optional<SourceLocation> AddOpLocation = 1701 getPastLoc(AddOp, SM, LangOpts); 1702 std::optional<SourceLocation> DerefOpLocation = 1703 getPastLoc(DerefOp, SM, LangOpts); 1704 1705 if (!AddOpLocation || !DerefOpLocation) 1706 return std::nullopt; 1707 1708 CharSourceRange ClosingParenWithPrecWhitespace = 1709 clang::CharSourceRange::getCharRange(*AddOpLocation, *DerefOpLocation); 1710 1711 return FixItList{ 1712 {FixItHint::CreateRemoval(StarWithTrailWhitespace), 1713 FixItHint::CreateReplacement(PlusWithSurroundingWhitespace, "["), 1714 FixItHint::CreateReplacement(ClosingParenWithPrecWhitespace, "]")}}; 1715 } 1716 return std::nullopt; // something wrong or unsupported, give up 1717 } 1718 1719 std::optional<FixItList> 1720 PointerDereferenceGadget::getFixits(const Strategy &S) const { 1721 const VarDecl *VD = cast<VarDecl>(BaseDeclRefExpr->getDecl()); 1722 switch (S.lookup(VD)) { 1723 case Strategy::Kind::Span: { 1724 ASTContext &Ctx = VD->getASTContext(); 1725 SourceManager &SM = Ctx.getSourceManager(); 1726 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0] 1727 // Deletes the *operand 1728 CharSourceRange derefRange = clang::CharSourceRange::getCharRange( 1729 Op->getBeginLoc(), Op->getBeginLoc().getLocWithOffset(1)); 1730 // Inserts the [0] 1731 if (auto LocPastOperand = 1732 getPastLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts())) { 1733 return FixItList{{FixItHint::CreateRemoval(derefRange), 1734 FixItHint::CreateInsertion(*LocPastOperand, "[0]")}}; 1735 } 1736 break; 1737 } 1738 case Strategy::Kind::Iterator: 1739 case Strategy::Kind::Array: 1740 case Strategy::Kind::Vector: 1741 llvm_unreachable("Strategy not implemented yet!"); 1742 case Strategy::Kind::Wontfix: 1743 llvm_unreachable("Invalid strategy!"); 1744 } 1745 1746 return std::nullopt; 1747 } 1748 1749 // Generates fix-its replacing an expression of the form UPC(DRE) with 1750 // `DRE.data()` 1751 std::optional<FixItList> UPCStandalonePointerGadget::getFixits(const Strategy &S) 1752 const { 1753 const auto VD = cast<VarDecl>(Node->getDecl()); 1754 switch (S.lookup(VD)) { 1755 case Strategy::Kind::Span: { 1756 ASTContext &Ctx = VD->getASTContext(); 1757 SourceManager &SM = Ctx.getSourceManager(); 1758 // Inserts the .data() after the DRE 1759 std::optional<SourceLocation> EndOfOperand = 1760 getPastLoc(Node, SM, Ctx.getLangOpts()); 1761 1762 if (EndOfOperand) 1763 return FixItList{{FixItHint::CreateInsertion( 1764 *EndOfOperand, ".data()")}}; 1765 // FIXME: Points inside a macro expansion. 1766 break; 1767 } 1768 case Strategy::Kind::Wontfix: 1769 case Strategy::Kind::Iterator: 1770 case Strategy::Kind::Array: 1771 case Strategy::Kind::Vector: 1772 llvm_unreachable("unsupported strategies for FixableGadgets"); 1773 } 1774 1775 return std::nullopt; 1776 } 1777 1778 // Generates fix-its replacing an expression of the form `&DRE[e]` with 1779 // `&DRE.data()[e]`: 1780 static std::optional<FixItList> 1781 fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) { 1782 const auto *ArraySub = cast<ArraySubscriptExpr>(Node->getSubExpr()); 1783 const auto *DRE = cast<DeclRefExpr>(ArraySub->getBase()->IgnoreImpCasts()); 1784 // FIXME: this `getASTContext` call is costly, we should pass the 1785 // ASTContext in: 1786 const ASTContext &Ctx = DRE->getDecl()->getASTContext(); 1787 const Expr *Idx = ArraySub->getIdx(); 1788 const SourceManager &SM = Ctx.getSourceManager(); 1789 const LangOptions &LangOpts = Ctx.getLangOpts(); 1790 std::stringstream SS; 1791 bool IdxIsLitZero = false; 1792 1793 if (auto ICE = Idx->getIntegerConstantExpr(Ctx)) 1794 if ((*ICE).isZero()) 1795 IdxIsLitZero = true; 1796 std::optional<StringRef> DreString = getExprText(DRE, SM, LangOpts); 1797 if (!DreString) 1798 return std::nullopt; 1799 1800 if (IdxIsLitZero) { 1801 // If the index is literal zero, we produce the most concise fix-it: 1802 SS << (*DreString).str() << ".data()"; 1803 } else { 1804 std::optional<StringRef> IndexString = getExprText(Idx, SM, LangOpts); 1805 if (!IndexString) 1806 return std::nullopt; 1807 1808 SS << "&" << (*DreString).str() << ".data()" 1809 << "[" << (*IndexString).str() << "]"; 1810 } 1811 return FixItList{ 1812 FixItHint::CreateReplacement(Node->getSourceRange(), SS.str())}; 1813 } 1814 1815 std::optional<FixItList> 1816 UUCAddAssignGadget::getFixits(const Strategy &S) const { 1817 DeclUseList DREs = getClaimedVarUseSites(); 1818 1819 if (DREs.size() != 1) 1820 return std::nullopt; // In cases of `Ptr += n` where `Ptr` is not a DRE, we 1821 // give up 1822 if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) { 1823 if (S.lookup(VD) == Strategy::Kind::Span) { 1824 FixItList Fixes; 1825 1826 const Stmt *AddAssignNode = getBaseStmt(); 1827 StringRef varName = VD->getName(); 1828 const ASTContext &Ctx = VD->getASTContext(); 1829 1830 if (!isNonNegativeIntegerExpr(Offset, VD, Ctx)) 1831 return std::nullopt; 1832 1833 // To transform UUC(p += n) to UUC(p = p.subspan(..)): 1834 bool NotParenExpr = 1835 (Offset->IgnoreParens()->getBeginLoc() == Offset->getBeginLoc()); 1836 std::string SS = varName.str() + " = " + varName.str() + ".subspan"; 1837 if (NotParenExpr) 1838 SS += "("; 1839 1840 std::optional<SourceLocation> AddAssignLocation = getEndCharLoc( 1841 AddAssignNode, Ctx.getSourceManager(), Ctx.getLangOpts()); 1842 if (!AddAssignLocation) 1843 return std::nullopt; 1844 1845 Fixes.push_back(FixItHint::CreateReplacement( 1846 SourceRange(AddAssignNode->getBeginLoc(), Node->getOperatorLoc()), 1847 SS)); 1848 if (NotParenExpr) 1849 Fixes.push_back(FixItHint::CreateInsertion( 1850 Offset->getEndLoc().getLocWithOffset(1), ")")); 1851 return Fixes; 1852 } 1853 } 1854 return std::nullopt; // Not in the cases that we can handle for now, give up. 1855 } 1856 1857 std::optional<FixItList> UPCPreIncrementGadget::getFixits(const Strategy &S) const { 1858 DeclUseList DREs = getClaimedVarUseSites(); 1859 1860 if (DREs.size() != 1) 1861 return std::nullopt; // In cases of `++Ptr` where `Ptr` is not a DRE, we 1862 // give up 1863 if (const VarDecl *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) { 1864 if (S.lookup(VD) == Strategy::Kind::Span) { 1865 FixItList Fixes; 1866 std::stringstream SS; 1867 const Stmt *PreIncNode = getBaseStmt(); 1868 StringRef varName = VD->getName(); 1869 const ASTContext &Ctx = VD->getASTContext(); 1870 1871 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()): 1872 SS << "(" << varName.data() << " = " << varName.data() 1873 << ".subspan(1)).data()"; 1874 std::optional<SourceLocation> PreIncLocation = 1875 getEndCharLoc(PreIncNode, Ctx.getSourceManager(), Ctx.getLangOpts()); 1876 if (!PreIncLocation) 1877 return std::nullopt; 1878 1879 Fixes.push_back(FixItHint::CreateReplacement( 1880 SourceRange(PreIncNode->getBeginLoc(), *PreIncLocation), SS.str())); 1881 return Fixes; 1882 } 1883 } 1884 return std::nullopt; // Not in the cases that we can handle for now, give up. 1885 } 1886 1887 1888 // For a non-null initializer `Init` of `T *` type, this function returns 1889 // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it 1890 // to output stream. 1891 // In many cases, this function cannot figure out the actual extent `S`. It 1892 // then will use a place holder to replace `S` to ask users to fill `S` in. The 1893 // initializer shall be used to initialize a variable of type `std::span<T>`. 1894 // 1895 // FIXME: Support multi-level pointers 1896 // 1897 // Parameters: 1898 // `Init` a pointer to the initializer expression 1899 // `Ctx` a reference to the ASTContext 1900 static FixItList 1901 FixVarInitializerWithSpan(const Expr *Init, ASTContext &Ctx, 1902 const StringRef UserFillPlaceHolder) { 1903 const SourceManager &SM = Ctx.getSourceManager(); 1904 const LangOptions &LangOpts = Ctx.getLangOpts(); 1905 1906 // If `Init` has a constant value that is (or equivalent to) a 1907 // NULL pointer, we use the default constructor to initialize the span 1908 // object, i.e., a `std:span` variable declaration with no initializer. 1909 // So the fix-it is just to remove the initializer. 1910 if (Init->isNullPointerConstant(Ctx, 1911 // FIXME: Why does this function not ask for `const ASTContext 1912 // &`? It should. Maybe worth an NFC patch later. 1913 Expr::NullPointerConstantValueDependence:: 1914 NPC_ValueDependentIsNotNull)) { 1915 std::optional<SourceLocation> InitLocation = 1916 getEndCharLoc(Init, SM, LangOpts); 1917 if (!InitLocation) 1918 return {}; 1919 1920 SourceRange SR(Init->getBeginLoc(), *InitLocation); 1921 1922 return {FixItHint::CreateRemoval(SR)}; 1923 } 1924 1925 FixItList FixIts{}; 1926 std::string ExtentText = UserFillPlaceHolder.data(); 1927 StringRef One = "1"; 1928 1929 // Insert `{` before `Init`: 1930 FixIts.push_back(FixItHint::CreateInsertion(Init->getBeginLoc(), "{")); 1931 // Try to get the data extent. Break into different cases: 1932 if (auto CxxNew = dyn_cast<CXXNewExpr>(Init->IgnoreImpCasts())) { 1933 // In cases `Init` is `new T[n]` and there is no explicit cast over 1934 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects 1935 // of `T`. So the extent is `n` unless `n` has side effects. Similar but 1936 // simpler for the case where `Init` is `new T`. 1937 if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) { 1938 if (!Ext->HasSideEffects(Ctx)) { 1939 std::optional<StringRef> ExtentString = getExprText(Ext, SM, LangOpts); 1940 if (!ExtentString) 1941 return {}; 1942 ExtentText = *ExtentString; 1943 } 1944 } else if (!CxxNew->isArray()) 1945 // Although the initializer is not allocating a buffer, the pointer 1946 // variable could still be used in buffer access operations. 1947 ExtentText = One; 1948 } else if (const auto *CArrTy = Ctx.getAsConstantArrayType( 1949 Init->IgnoreImpCasts()->getType())) { 1950 // In cases `Init` is of an array type after stripping off implicit casts, 1951 // the extent is the array size. Note that if the array size is not a 1952 // constant, we cannot use it as the extent. 1953 ExtentText = getAPIntText(CArrTy->getSize()); 1954 } else { 1955 // In cases `Init` is of the form `&Var` after stripping of implicit 1956 // casts, where `&` is the built-in operator, the extent is 1. 1957 if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Init->IgnoreImpCasts())) 1958 if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf && 1959 isa_and_present<DeclRefExpr>(AddrOfExpr->getSubExpr())) 1960 ExtentText = One; 1961 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`, 1962 // and explicit casting, etc. etc. 1963 } 1964 1965 SmallString<32> StrBuffer{}; 1966 std::optional<SourceLocation> LocPassInit = getPastLoc(Init, SM, LangOpts); 1967 1968 if (!LocPassInit) 1969 return {}; 1970 1971 StrBuffer.append(", "); 1972 StrBuffer.append(ExtentText); 1973 StrBuffer.append("}"); 1974 FixIts.push_back(FixItHint::CreateInsertion(*LocPassInit, StrBuffer.str())); 1975 return FixIts; 1976 } 1977 1978 #ifndef NDEBUG 1979 #define DEBUG_NOTE_DECL_FAIL(D, Msg) \ 1980 Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), "failed to produce fixit for declaration '" + (D)->getNameAsString() + "'" + (Msg)) 1981 #else 1982 #define DEBUG_NOTE_DECL_FAIL(D, Msg) 1983 #endif 1984 1985 // For the given variable declaration with a pointer-to-T type, returns the text 1986 // `std::span<T>`. If it is unable to generate the text, returns 1987 // `std::nullopt`. 1988 static std::optional<std::string> createSpanTypeForVarDecl(const VarDecl *VD, 1989 const ASTContext &Ctx) { 1990 assert(VD->getType()->isPointerType()); 1991 1992 std::optional<Qualifiers> PteTyQualifiers = std::nullopt; 1993 std::optional<std::string> PteTyText = getPointeeTypeText( 1994 VD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers); 1995 1996 if (!PteTyText) 1997 return std::nullopt; 1998 1999 std::string SpanTyText = "std::span<"; 2000 2001 SpanTyText.append(*PteTyText); 2002 // Append qualifiers to span element type if any: 2003 if (PteTyQualifiers) { 2004 SpanTyText.append(" "); 2005 SpanTyText.append(PteTyQualifiers->getAsString()); 2006 } 2007 SpanTyText.append(">"); 2008 return SpanTyText; 2009 } 2010 2011 // For a `VarDecl` of the form `T * var (= Init)?`, this 2012 // function generates fix-its that 2013 // 1) replace `T * var` with `std::span<T> var`; and 2014 // 2) change `Init` accordingly to a span constructor, if it exists. 2015 // 2016 // FIXME: support Multi-level pointers 2017 // 2018 // Parameters: 2019 // `D` a pointer the variable declaration node 2020 // `Ctx` a reference to the ASTContext 2021 // `UserFillPlaceHolder` the user-input placeholder text 2022 // Returns: 2023 // the non-empty fix-it list, if fix-its are successfuly generated; empty 2024 // list otherwise. 2025 static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx, 2026 const StringRef UserFillPlaceHolder, 2027 UnsafeBufferUsageHandler &Handler) { 2028 if (hasUnsupportedSpecifiers(D, Ctx.getSourceManager())) 2029 return {}; 2030 2031 FixItList FixIts{}; 2032 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(D, Ctx); 2033 2034 if (!SpanTyText) { 2035 DEBUG_NOTE_DECL_FAIL(D, " : failed to generate 'std::span' type"); 2036 return {}; 2037 } 2038 2039 // Will hold the text for `std::span<T> Ident`: 2040 std::stringstream SS; 2041 2042 SS << *SpanTyText; 2043 // Append qualifiers to the type of `D`, if any: 2044 if (D->getType().hasQualifiers()) 2045 SS << " " << D->getType().getQualifiers().getAsString(); 2046 2047 // The end of the range of the original source that will be replaced 2048 // by `std::span<T> ident`: 2049 SourceLocation EndLocForReplacement = D->getEndLoc(); 2050 std::optional<StringRef> IdentText = 2051 getVarDeclIdentifierText(D, Ctx.getSourceManager(), Ctx.getLangOpts()); 2052 2053 if (!IdentText) { 2054 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier"); 2055 return {}; 2056 } 2057 // Fix the initializer if it exists: 2058 if (const Expr *Init = D->getInit()) { 2059 FixItList InitFixIts = 2060 FixVarInitializerWithSpan(Init, Ctx, UserFillPlaceHolder); 2061 if (InitFixIts.empty()) 2062 return {}; 2063 FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts.begin()), 2064 std::make_move_iterator(InitFixIts.end())); 2065 // If the declaration has the form `T *ident = init`, we want to replace 2066 // `T *ident = ` with `std::span<T> ident`: 2067 EndLocForReplacement = Init->getBeginLoc().getLocWithOffset(-1); 2068 } 2069 SS << " " << IdentText->str(); 2070 if (!EndLocForReplacement.isValid()) { 2071 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the end of the declaration"); 2072 return {}; 2073 } 2074 FixIts.push_back(FixItHint::CreateReplacement( 2075 SourceRange(D->getBeginLoc(), EndLocForReplacement), SS.str())); 2076 return FixIts; 2077 } 2078 2079 static bool hasConflictingOverload(const FunctionDecl *FD) { 2080 return !FD->getDeclContext()->lookup(FD->getDeclName()).isSingleResult(); 2081 } 2082 2083 // For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new 2084 // types, this function produces fix-its to make the change self-contained. Let 2085 // 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the 2086 // entity defined by the `FunctionDecl` after the change to the parameters. 2087 // Fix-its produced by this function are 2088 // 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration 2089 // of 'F'; 2090 // 2. Create a declaration of "NewF" next to each declaration of `F`; 2091 // 3. Create a definition of "F" (as its' original definition is now belongs 2092 // to "NewF") next to its original definition. The body of the creating 2093 // definition calls to "NewF". 2094 // 2095 // Example: 2096 // 2097 // void f(int *p); // original declaration 2098 // void f(int *p) { // original definition 2099 // p[5]; 2100 // } 2101 // 2102 // To change the parameter `p` to be of `std::span<int>` type, we 2103 // also add overloads: 2104 // 2105 // [[clang::unsafe_buffer_usage]] void f(int *p); // original decl 2106 // void f(std::span<int> p); // added overload decl 2107 // void f(std::span<int> p) { // original def where param is changed 2108 // p[5]; 2109 // } 2110 // [[clang::unsafe_buffer_usage]] void f(int *p) { // added def 2111 // return f(std::span(p, <# size #>)); 2112 // } 2113 // 2114 static std::optional<FixItList> 2115 createOverloadsForFixedParams(const Strategy &S, const FunctionDecl *FD, 2116 const ASTContext &Ctx, 2117 UnsafeBufferUsageHandler &Handler) { 2118 // FIXME: need to make this conflict checking better: 2119 if (hasConflictingOverload(FD)) 2120 return std::nullopt; 2121 2122 const SourceManager &SM = Ctx.getSourceManager(); 2123 const LangOptions &LangOpts = Ctx.getLangOpts(); 2124 const unsigned NumParms = FD->getNumParams(); 2125 std::vector<std::string> NewTysTexts(NumParms); 2126 std::vector<bool> ParmsMask(NumParms, false); 2127 bool AtLeastOneParmToFix = false; 2128 2129 for (unsigned i = 0; i < NumParms; i++) { 2130 const ParmVarDecl *PVD = FD->getParamDecl(i); 2131 2132 if (S.lookup(PVD) == Strategy::Kind::Wontfix) 2133 continue; 2134 if (S.lookup(PVD) != Strategy::Kind::Span) 2135 // Not supported, not suppose to happen: 2136 return std::nullopt; 2137 2138 std::optional<Qualifiers> PteTyQuals = std::nullopt; 2139 std::optional<std::string> PteTyText = 2140 getPointeeTypeText(PVD, SM, LangOpts, &PteTyQuals); 2141 2142 if (!PteTyText) 2143 // something wrong in obtaining the text of the pointee type, give up 2144 return std::nullopt; 2145 // FIXME: whether we should create std::span type depends on the Strategy. 2146 NewTysTexts[i] = getSpanTypeText(*PteTyText, PteTyQuals); 2147 ParmsMask[i] = true; 2148 AtLeastOneParmToFix = true; 2149 } 2150 if (!AtLeastOneParmToFix) 2151 // No need to create function overloads: 2152 return {}; 2153 // FIXME Respect indentation of the original code. 2154 2155 // A lambda that creates the text representation of a function declaration 2156 // with the new type signatures: 2157 const auto NewOverloadSignatureCreator = 2158 [&SM, &LangOpts, &NewTysTexts, 2159 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> { 2160 std::stringstream SS; 2161 2162 SS << ";"; 2163 SS << getEndOfLine().str(); 2164 // Append: ret-type func-name "(" 2165 if (auto Prefix = getRangeText( 2166 SourceRange(FD->getBeginLoc(), (*FD->param_begin())->getBeginLoc()), 2167 SM, LangOpts)) 2168 SS << Prefix->str(); 2169 else 2170 return std::nullopt; // give up 2171 // Append: parameter-type-list 2172 const unsigned NumParms = FD->getNumParams(); 2173 2174 for (unsigned i = 0; i < NumParms; i++) { 2175 const ParmVarDecl *Parm = FD->getParamDecl(i); 2176 2177 if (Parm->isImplicit()) 2178 continue; 2179 if (ParmsMask[i]) { 2180 // This `i`-th parameter will be fixed with `NewTysTexts[i]` being its 2181 // new type: 2182 SS << NewTysTexts[i]; 2183 // print parameter name if provided: 2184 if (IdentifierInfo *II = Parm->getIdentifier()) 2185 SS << ' ' << II->getName().str(); 2186 } else if (auto ParmTypeText = getRangeText( 2187 getSourceRangeToTokenEnd(Parm, SM, LangOpts), 2188 SM, LangOpts)) { 2189 // print the whole `Parm` without modification: 2190 SS << ParmTypeText->str(); 2191 } else 2192 return std::nullopt; // something wrong, give up 2193 if (i != NumParms - 1) 2194 SS << ", "; 2195 } 2196 SS << ")"; 2197 return SS.str(); 2198 }; 2199 2200 // A lambda that creates the text representation of a function definition with 2201 // the original signature: 2202 const auto OldOverloadDefCreator = 2203 [&Handler, &SM, &LangOpts, &NewTysTexts, 2204 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> { 2205 std::stringstream SS; 2206 2207 SS << getEndOfLine().str(); 2208 // Append: attr-name ret-type func-name "(" param-list ")" "{" 2209 if (auto FDPrefix = getRangeText( 2210 SourceRange(FD->getBeginLoc(), FD->getBody()->getBeginLoc()), SM, 2211 LangOpts)) 2212 SS << Handler.getUnsafeBufferUsageAttributeTextAt(FD->getBeginLoc(), " ") 2213 << FDPrefix->str() << "{"; 2214 else 2215 return std::nullopt; 2216 // Append: "return" func-name "(" 2217 if (auto FunQualName = getFunNameText(FD, SM, LangOpts)) 2218 SS << "return " << FunQualName->str() << "("; 2219 else 2220 return std::nullopt; 2221 2222 // Append: arg-list 2223 const unsigned NumParms = FD->getNumParams(); 2224 for (unsigned i = 0; i < NumParms; i++) { 2225 const ParmVarDecl *Parm = FD->getParamDecl(i); 2226 2227 if (Parm->isImplicit()) 2228 continue; 2229 // FIXME: If a parameter has no name, it is unused in the 2230 // definition. So we could just leave it as it is. 2231 if (!Parm->getIdentifier()) 2232 // If a parameter of a function definition has no name: 2233 return std::nullopt; 2234 if (ParmsMask[i]) 2235 // This is our spanified paramter! 2236 SS << NewTysTexts[i] << "(" << Parm->getIdentifier()->getName().str() 2237 << ", " << getUserFillPlaceHolder("size") << ")"; 2238 else 2239 SS << Parm->getIdentifier()->getName().str(); 2240 if (i != NumParms - 1) 2241 SS << ", "; 2242 } 2243 // finish call and the body 2244 SS << ");}" << getEndOfLine().str(); 2245 // FIXME: 80-char line formatting? 2246 return SS.str(); 2247 }; 2248 2249 FixItList FixIts{}; 2250 for (FunctionDecl *FReDecl : FD->redecls()) { 2251 std::optional<SourceLocation> Loc = getPastLoc(FReDecl, SM, LangOpts); 2252 2253 if (!Loc) 2254 return {}; 2255 if (FReDecl->isThisDeclarationADefinition()) { 2256 assert(FReDecl == FD && "inconsistent function definition"); 2257 // Inserts a definition with the old signature to the end of 2258 // `FReDecl`: 2259 if (auto OldOverloadDef = OldOverloadDefCreator(FReDecl)) 2260 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *OldOverloadDef)); 2261 else 2262 return {}; // give up 2263 } else { 2264 // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`: 2265 if (!FReDecl->hasAttr<UnsafeBufferUsageAttr>()) { 2266 FixIts.emplace_back(FixItHint::CreateInsertion( 2267 FReDecl->getBeginLoc(), Handler.getUnsafeBufferUsageAttributeTextAt( 2268 FReDecl->getBeginLoc(), " "))); 2269 } 2270 // Inserts a declaration with the new signature to the end of `FReDecl`: 2271 if (auto NewOverloadDecl = NewOverloadSignatureCreator(FReDecl)) 2272 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *NewOverloadDecl)); 2273 else 2274 return {}; 2275 } 2276 } 2277 return FixIts; 2278 } 2279 2280 // To fix a `ParmVarDecl` to be of `std::span` type. 2281 static FixItList fixParamWithSpan(const ParmVarDecl *PVD, const ASTContext &Ctx, 2282 UnsafeBufferUsageHandler &Handler) { 2283 if (hasUnsupportedSpecifiers(PVD, Ctx.getSourceManager())) { 2284 DEBUG_NOTE_DECL_FAIL(PVD, " : has unsupport specifier(s)"); 2285 return {}; 2286 } 2287 if (PVD->hasDefaultArg()) { 2288 // FIXME: generate fix-its for default values: 2289 DEBUG_NOTE_DECL_FAIL(PVD, " : has default arg"); 2290 return {}; 2291 } 2292 2293 std::optional<Qualifiers> PteTyQualifiers = std::nullopt; 2294 std::optional<std::string> PteTyText = getPointeeTypeText( 2295 PVD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers); 2296 2297 if (!PteTyText) { 2298 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid pointee type"); 2299 return {}; 2300 } 2301 2302 std::optional<StringRef> PVDNameText = PVD->getIdentifier()->getName(); 2303 2304 if (!PVDNameText) { 2305 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid identifier name"); 2306 return {}; 2307 } 2308 2309 std::stringstream SS; 2310 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(PVD, Ctx); 2311 2312 if (PteTyQualifiers) 2313 // Append qualifiers if they exist: 2314 SS << getSpanTypeText(*PteTyText, PteTyQualifiers); 2315 else 2316 SS << getSpanTypeText(*PteTyText); 2317 // Append qualifiers to the type of the parameter: 2318 if (PVD->getType().hasQualifiers()) 2319 SS << ' ' << PVD->getType().getQualifiers().getAsString(); 2320 // Append parameter's name: 2321 SS << ' ' << PVDNameText->str(); 2322 // Add replacement fix-it: 2323 return {FixItHint::CreateReplacement(PVD->getSourceRange(), SS.str())}; 2324 } 2325 2326 static FixItList fixVariableWithSpan(const VarDecl *VD, 2327 const DeclUseTracker &Tracker, 2328 ASTContext &Ctx, 2329 UnsafeBufferUsageHandler &Handler) { 2330 const DeclStmt *DS = Tracker.lookupDecl(VD); 2331 if (!DS) { 2332 DEBUG_NOTE_DECL_FAIL(VD, " : variables declared this way not implemented yet"); 2333 return {}; 2334 } 2335 if (!DS->isSingleDecl()) { 2336 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt` 2337 DEBUG_NOTE_DECL_FAIL(VD, " : multiple VarDecls"); 2338 return {}; 2339 } 2340 // Currently DS is an unused variable but we'll need it when 2341 // non-single decls are implemented, where the pointee type name 2342 // and the '*' are spread around the place. 2343 (void)DS; 2344 2345 // FIXME: handle cases where DS has multiple declarations 2346 return fixLocalVarDeclWithSpan(VD, Ctx, getUserFillPlaceHolder(), Handler); 2347 } 2348 2349 // TODO: we should be consistent to use `std::nullopt` to represent no-fix due 2350 // to any unexpected problem. 2351 static FixItList 2352 fixVariable(const VarDecl *VD, Strategy::Kind K, 2353 /* The function decl under analysis */ const Decl *D, 2354 const DeclUseTracker &Tracker, ASTContext &Ctx, 2355 UnsafeBufferUsageHandler &Handler) { 2356 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD)) { 2357 auto *FD = dyn_cast<clang::FunctionDecl>(PVD->getDeclContext()); 2358 if (!FD || FD != D) { 2359 // `FD != D` means that `PVD` belongs to a function that is not being 2360 // analyzed currently. Thus `FD` may not be complete. 2361 DEBUG_NOTE_DECL_FAIL(VD, " : function not currently analyzed"); 2362 return {}; 2363 } 2364 2365 // TODO If function has a try block we can't change params unless we check 2366 // also its catch block for their use. 2367 // FIXME We might support static class methods, some select methods, 2368 // operators and possibly lamdas. 2369 if (FD->isMain() || FD->isConstexpr() || 2370 FD->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate || 2371 FD->isVariadic() || 2372 // also covers call-operator of lamdas 2373 isa<CXXMethodDecl>(FD) || 2374 // skip when the function body is a try-block 2375 (FD->hasBody() && isa<CXXTryStmt>(FD->getBody())) || 2376 FD->isOverloadedOperator()) { 2377 DEBUG_NOTE_DECL_FAIL(VD, " : unsupported function decl"); 2378 return {}; // TODO test all these cases 2379 } 2380 } 2381 2382 switch (K) { 2383 case Strategy::Kind::Span: { 2384 if (VD->getType()->isPointerType()) { 2385 if (const auto *PVD = dyn_cast<ParmVarDecl>(VD)) 2386 return fixParamWithSpan(PVD, Ctx, Handler); 2387 2388 if (VD->isLocalVarDecl()) 2389 return fixVariableWithSpan(VD, Tracker, Ctx, Handler); 2390 } 2391 DEBUG_NOTE_DECL_FAIL(VD, " : not a pointer"); 2392 return {}; 2393 } 2394 case Strategy::Kind::Iterator: 2395 case Strategy::Kind::Array: 2396 case Strategy::Kind::Vector: 2397 llvm_unreachable("Strategy not implemented yet!"); 2398 case Strategy::Kind::Wontfix: 2399 llvm_unreachable("Invalid strategy!"); 2400 } 2401 llvm_unreachable("Unknown strategy!"); 2402 } 2403 2404 // Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the 2405 // `RemoveRange` of 'h' overlaps with a macro use. 2406 static bool overlapWithMacro(const FixItList &FixIts) { 2407 // FIXME: For now we only check if the range (or the first token) is (part of) 2408 // a macro expansion. Ideally, we want to check for all tokens in the range. 2409 return llvm::any_of(FixIts, [](const FixItHint &Hint) { 2410 auto Range = Hint.RemoveRange; 2411 if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID()) 2412 // If the range (or the first token) is (part of) a macro expansion: 2413 return true; 2414 return false; 2415 }); 2416 } 2417 2418 // Returns true iff `VD` is a parameter of the declaration `D`: 2419 static bool isParameterOf(const VarDecl *VD, const Decl *D) { 2420 return isa<ParmVarDecl>(VD) && 2421 VD->getDeclContext() == dyn_cast<DeclContext>(D); 2422 } 2423 2424 // Erases variables in `FixItsForVariable`, if such a variable has an unfixable 2425 // group mate. A variable `v` is unfixable iff `FixItsForVariable` does not 2426 // contain `v`. 2427 static void eraseVarsForUnfixableGroupMates( 2428 std::map<const VarDecl *, FixItList> &FixItsForVariable, 2429 const VariableGroupsManager &VarGrpMgr) { 2430 // Variables will be removed from `FixItsForVariable`: 2431 SmallVector<const VarDecl *, 8> ToErase; 2432 2433 for (const auto &[VD, Ignore] : FixItsForVariable) { 2434 VarGrpRef Grp = VarGrpMgr.getGroupOfVar(VD); 2435 if (llvm::any_of(Grp, 2436 [&FixItsForVariable](const VarDecl *GrpMember) -> bool { 2437 return !FixItsForVariable.count(GrpMember); 2438 })) { 2439 // At least one group member cannot be fixed, so we have to erase the 2440 // whole group: 2441 for (const VarDecl *Member : Grp) 2442 ToErase.push_back(Member); 2443 } 2444 } 2445 for (auto *VarToErase : ToErase) 2446 FixItsForVariable.erase(VarToErase); 2447 } 2448 2449 // Returns the fix-its that create bounds-safe function overloads for the 2450 // function `D`, if `D`'s parameters will be changed to safe-types through 2451 // fix-its in `FixItsForVariable`. 2452 // 2453 // NOTE: In case `D`'s parameters will be changed but bounds-safe function 2454 // overloads cannot created, the whole group that contains the parameters will 2455 // be erased from `FixItsForVariable`. 2456 static FixItList createFunctionOverloadsForParms( 2457 std::map<const VarDecl *, FixItList> &FixItsForVariable /* mutable */, 2458 const VariableGroupsManager &VarGrpMgr, const FunctionDecl *FD, 2459 const Strategy &S, ASTContext &Ctx, UnsafeBufferUsageHandler &Handler) { 2460 FixItList FixItsSharedByParms{}; 2461 2462 std::optional<FixItList> OverloadFixes = 2463 createOverloadsForFixedParams(S, FD, Ctx, Handler); 2464 2465 if (OverloadFixes) { 2466 FixItsSharedByParms.append(*OverloadFixes); 2467 } else { 2468 // Something wrong in generating `OverloadFixes`, need to remove the 2469 // whole group, where parameters are in, from `FixItsForVariable` (Note 2470 // that all parameters should be in the same group): 2471 for (auto *Member : VarGrpMgr.getGroupOfParms()) 2472 FixItsForVariable.erase(Member); 2473 } 2474 return FixItsSharedByParms; 2475 } 2476 2477 // Constructs self-contained fix-its for each variable in `FixablesForAllVars`. 2478 static std::map<const VarDecl *, FixItList> 2479 getFixIts(FixableGadgetSets &FixablesForAllVars, const Strategy &S, 2480 ASTContext &Ctx, 2481 /* The function decl under analysis */ const Decl *D, 2482 const DeclUseTracker &Tracker, UnsafeBufferUsageHandler &Handler, 2483 const VariableGroupsManager &VarGrpMgr) { 2484 // `FixItsForVariable` will map each variable to a set of fix-its directly 2485 // associated to the variable itself. Fix-its of distinct variables in 2486 // `FixItsForVariable` are disjoint. 2487 std::map<const VarDecl *, FixItList> FixItsForVariable; 2488 2489 // Populate `FixItsForVariable` with fix-its directly associated with each 2490 // variable. Fix-its directly associated to a variable 'v' are the ones 2491 // produced by the `FixableGadget`s whose claimed variable is 'v'. 2492 for (const auto &[VD, Fixables] : FixablesForAllVars.byVar) { 2493 FixItsForVariable[VD] = 2494 fixVariable(VD, S.lookup(VD), D, Tracker, Ctx, Handler); 2495 // If we fail to produce Fix-It for the declaration we have to skip the 2496 // variable entirely. 2497 if (FixItsForVariable[VD].empty()) { 2498 FixItsForVariable.erase(VD); 2499 continue; 2500 } 2501 for (const auto &F : Fixables) { 2502 std::optional<FixItList> Fixits = F->getFixits(S); 2503 2504 if (Fixits) { 2505 FixItsForVariable[VD].insert(FixItsForVariable[VD].end(), 2506 Fixits->begin(), Fixits->end()); 2507 continue; 2508 } 2509 #ifndef NDEBUG 2510 Handler.addDebugNoteForVar( 2511 VD, F->getBaseStmt()->getBeginLoc(), 2512 ("gadget '" + F->getDebugName() + "' refused to produce a fix") 2513 .str()); 2514 #endif 2515 FixItsForVariable.erase(VD); 2516 break; 2517 } 2518 } 2519 2520 // `FixItsForVariable` now contains only variables that can be 2521 // fixed. A variable can be fixed if its' declaration and all Fixables 2522 // associated to it can all be fixed. 2523 2524 // To further remove from `FixItsForVariable` variables whose group mates 2525 // cannot be fixed... 2526 eraseVarsForUnfixableGroupMates(FixItsForVariable, VarGrpMgr); 2527 // Now `FixItsForVariable` gets further reduced: a variable is in 2528 // `FixItsForVariable` iff it can be fixed and all its group mates can be 2529 // fixed. 2530 2531 // Fix-its of bounds-safe overloads of `D` are shared by parameters of `D`. 2532 // That is, when fixing multiple parameters in one step, these fix-its will 2533 // be applied only once (instead of being applied per parameter). 2534 FixItList FixItsSharedByParms{}; 2535 2536 if (auto *FD = dyn_cast<FunctionDecl>(D)) 2537 FixItsSharedByParms = createFunctionOverloadsForParms( 2538 FixItsForVariable, VarGrpMgr, FD, S, Ctx, Handler); 2539 2540 // The map that maps each variable `v` to fix-its for the whole group where 2541 // `v` is in: 2542 std::map<const VarDecl *, FixItList> FinalFixItsForVariable{ 2543 FixItsForVariable}; 2544 2545 for (auto &[Var, Ignore] : FixItsForVariable) { 2546 bool AnyParm = false; 2547 const auto VarGroupForVD = VarGrpMgr.getGroupOfVar(Var, &AnyParm); 2548 2549 for (const VarDecl *GrpMate : VarGroupForVD) { 2550 if (Var == GrpMate) 2551 continue; 2552 if (FixItsForVariable.count(GrpMate)) 2553 FinalFixItsForVariable[Var].append(FixItsForVariable[GrpMate]); 2554 } 2555 if (AnyParm) { 2556 // This assertion should never fail. Otherwise we have a bug. 2557 assert(!FixItsSharedByParms.empty() && 2558 "Should not try to fix a parameter that does not belong to a " 2559 "FunctionDecl"); 2560 FinalFixItsForVariable[Var].append(FixItsSharedByParms); 2561 } 2562 } 2563 // Fix-its that will be applied in one step shall NOT: 2564 // 1. overlap with macros or/and templates; or 2565 // 2. conflict with each other. 2566 // Otherwise, the fix-its will be dropped. 2567 for (auto Iter = FinalFixItsForVariable.begin(); 2568 Iter != FinalFixItsForVariable.end();) 2569 if (overlapWithMacro(Iter->second) || 2570 clang::internal::anyConflict(Iter->second, Ctx.getSourceManager())) { 2571 Iter = FinalFixItsForVariable.erase(Iter); 2572 } else 2573 Iter++; 2574 return FinalFixItsForVariable; 2575 } 2576 2577 template <typename VarDeclIterTy> 2578 static Strategy 2579 getNaiveStrategy(llvm::iterator_range<VarDeclIterTy> UnsafeVars) { 2580 Strategy S; 2581 for (const VarDecl *VD : UnsafeVars) { 2582 S.set(VD, Strategy::Kind::Span); 2583 } 2584 return S; 2585 } 2586 2587 // Manages variable groups: 2588 class VariableGroupsManagerImpl : public VariableGroupsManager { 2589 const std::vector<VarGrpTy> Groups; 2590 const std::map<const VarDecl *, unsigned> &VarGrpMap; 2591 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms; 2592 2593 public: 2594 VariableGroupsManagerImpl( 2595 const std::vector<VarGrpTy> &Groups, 2596 const std::map<const VarDecl *, unsigned> &VarGrpMap, 2597 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms) 2598 : Groups(Groups), VarGrpMap(VarGrpMap), 2599 GrpsUnionForParms(GrpsUnionForParms) {} 2600 2601 VarGrpRef getGroupOfVar(const VarDecl *Var, bool *HasParm) const override { 2602 if (GrpsUnionForParms.contains(Var)) { 2603 if (HasParm) 2604 *HasParm = true; 2605 return GrpsUnionForParms.getArrayRef(); 2606 } 2607 if (HasParm) 2608 *HasParm = false; 2609 2610 auto It = VarGrpMap.find(Var); 2611 2612 if (It == VarGrpMap.end()) 2613 return std::nullopt; 2614 return Groups[It->second]; 2615 } 2616 2617 VarGrpRef getGroupOfParms() const override { 2618 return GrpsUnionForParms.getArrayRef(); 2619 } 2620 }; 2621 2622 void clang::checkUnsafeBufferUsage(const Decl *D, 2623 UnsafeBufferUsageHandler &Handler, 2624 bool EmitSuggestions) { 2625 #ifndef NDEBUG 2626 Handler.clearDebugNotes(); 2627 #endif 2628 2629 assert(D && D->getBody()); 2630 // We do not want to visit a Lambda expression defined inside a method independently. 2631 // Instead, it should be visited along with the outer method. 2632 // FIXME: do we want to do the same thing for `BlockDecl`s? 2633 if (const auto *fd = dyn_cast<CXXMethodDecl>(D)) { 2634 if (fd->getParent()->isLambda() && fd->getParent()->isLocalClass()) 2635 return; 2636 } 2637 2638 // Do not emit fixit suggestions for functions declared in an 2639 // extern "C" block. 2640 if (const auto *FD = dyn_cast<FunctionDecl>(D)) { 2641 for (FunctionDecl *FReDecl : FD->redecls()) { 2642 if (FReDecl->isExternC()) { 2643 EmitSuggestions = false; 2644 break; 2645 } 2646 } 2647 } 2648 2649 WarningGadgetSets UnsafeOps; 2650 FixableGadgetSets FixablesForAllVars; 2651 2652 auto [FixableGadgets, WarningGadgets, Tracker] = 2653 findGadgets(D, Handler, EmitSuggestions); 2654 2655 if (!EmitSuggestions) { 2656 // Our job is very easy without suggestions. Just warn about 2657 // every problematic operation and consider it done. No need to deal 2658 // with fixable gadgets, no need to group operations by variable. 2659 for (const auto &G : WarningGadgets) { 2660 Handler.handleUnsafeOperation(G->getBaseStmt(), 2661 /*IsRelatedToDecl=*/false); 2662 } 2663 2664 // This return guarantees that most of the machine doesn't run when 2665 // suggestions aren't requested. 2666 assert(FixableGadgets.size() == 0 && 2667 "Fixable gadgets found but suggestions not requested!"); 2668 return; 2669 } 2670 2671 // If no `WarningGadget`s ever matched, there is no unsafe operations in the 2672 // function under the analysis. No need to fix any Fixables. 2673 if (!WarningGadgets.empty()) { 2674 // Gadgets "claim" variables they're responsible for. Once this loop 2675 // finishes, the tracker will only track DREs that weren't claimed by any 2676 // gadgets, i.e. not understood by the analysis. 2677 for (const auto &G : FixableGadgets) { 2678 for (const auto *DRE : G->getClaimedVarUseSites()) { 2679 Tracker.claimUse(DRE); 2680 } 2681 } 2682 } 2683 2684 // If no `WarningGadget`s ever matched, there is no unsafe operations in the 2685 // function under the analysis. Thus, it early returns here as there is 2686 // nothing needs to be fixed. 2687 // 2688 // Note this claim is based on the assumption that there is no unsafe 2689 // variable whose declaration is invisible from the analyzing function. 2690 // Otherwise, we need to consider if the uses of those unsafe varuables needs 2691 // fix. 2692 // So far, we are not fixing any global variables or class members. And, 2693 // lambdas will be analyzed along with the enclosing function. So this early 2694 // return is correct for now. 2695 if (WarningGadgets.empty()) 2696 return; 2697 2698 UnsafeOps = groupWarningGadgetsByVar(std::move(WarningGadgets)); 2699 FixablesForAllVars = groupFixablesByVar(std::move(FixableGadgets)); 2700 2701 std::map<const VarDecl *, FixItList> FixItsForVariableGroup; 2702 2703 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s. 2704 for (auto it = FixablesForAllVars.byVar.cbegin(); 2705 it != FixablesForAllVars.byVar.cend();) { 2706 // FIXME: need to deal with global variables later 2707 if ((!it->first->isLocalVarDecl() && !isa<ParmVarDecl>(it->first))) { 2708 #ifndef NDEBUG 2709 Handler.addDebugNoteForVar( 2710 it->first, it->first->getBeginLoc(), 2711 ("failed to produce fixit for '" + it->first->getNameAsString() + 2712 "' : neither local nor a parameter")); 2713 #endif 2714 it = FixablesForAllVars.byVar.erase(it); 2715 } else if (it->first->getType().getCanonicalType()->isReferenceType()) { 2716 #ifndef NDEBUG 2717 Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(), 2718 ("failed to produce fixit for '" + 2719 it->first->getNameAsString() + 2720 "' : has a reference type")); 2721 #endif 2722 it = FixablesForAllVars.byVar.erase(it); 2723 } else if (Tracker.hasUnclaimedUses(it->first)) { 2724 #ifndef NDEBUG 2725 auto AllUnclaimed = Tracker.getUnclaimedUses(it->first); 2726 for (auto UnclaimedDRE : AllUnclaimed) { 2727 std::string UnclaimedUseTrace = 2728 getDREAncestorString(UnclaimedDRE, D->getASTContext()); 2729 2730 Handler.addDebugNoteForVar( 2731 it->first, UnclaimedDRE->getBeginLoc(), 2732 ("failed to produce fixit for '" + it->first->getNameAsString() + 2733 "' : has an unclaimed use\nThe unclaimed DRE trace: " + 2734 UnclaimedUseTrace)); 2735 } 2736 #endif 2737 it = FixablesForAllVars.byVar.erase(it); 2738 } else if (it->first->isInitCapture()) { 2739 #ifndef NDEBUG 2740 Handler.addDebugNoteForVar( 2741 it->first, it->first->getBeginLoc(), 2742 ("failed to produce fixit for '" + it->first->getNameAsString() + 2743 "' : init capture")); 2744 #endif 2745 it = FixablesForAllVars.byVar.erase(it); 2746 }else { 2747 ++it; 2748 } 2749 } 2750 2751 // Fixpoint iteration for pointer assignments 2752 using DepMapTy = DenseMap<const VarDecl *, llvm::SetVector<const VarDecl *>>; 2753 DepMapTy DependenciesMap{}; 2754 DepMapTy PtrAssignmentGraph{}; 2755 2756 for (auto it : FixablesForAllVars.byVar) { 2757 for (const FixableGadget *fixable : it.second) { 2758 std::optional<std::pair<const VarDecl *, const VarDecl *>> ImplPair = 2759 fixable->getStrategyImplications(); 2760 if (ImplPair) { 2761 std::pair<const VarDecl *, const VarDecl *> Impl = std::move(*ImplPair); 2762 PtrAssignmentGraph[Impl.first].insert(Impl.second); 2763 } 2764 } 2765 } 2766 2767 /* 2768 The following code does a BFS traversal of the `PtrAssignmentGraph` 2769 considering all unsafe vars as starting nodes and constructs an undirected 2770 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner 2771 elimiates all variables that are unreachable from any unsafe var. In other 2772 words, this removes all dependencies that don't include any unsafe variable 2773 and consequently don't need any fixit generation. 2774 Note: A careful reader would observe that the code traverses 2775 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and 2776 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would 2777 achieve the same result but the one used here dramatically cuts the 2778 amount of hoops the second part of the algorithm needs to jump, given that 2779 a lot of these connections become "direct". The reader is advised not to 2780 imagine how the graph is transformed because of using `Var` instead of 2781 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used, 2782 and think about why it's equivalent later. 2783 */ 2784 std::set<const VarDecl *> VisitedVarsDirected{}; 2785 for (const auto &[Var, ignore] : UnsafeOps.byVar) { 2786 if (VisitedVarsDirected.find(Var) == VisitedVarsDirected.end()) { 2787 2788 std::queue<const VarDecl*> QueueDirected{}; 2789 QueueDirected.push(Var); 2790 while(!QueueDirected.empty()) { 2791 const VarDecl* CurrentVar = QueueDirected.front(); 2792 QueueDirected.pop(); 2793 VisitedVarsDirected.insert(CurrentVar); 2794 auto AdjacentNodes = PtrAssignmentGraph[CurrentVar]; 2795 for (const VarDecl *Adj : AdjacentNodes) { 2796 if (VisitedVarsDirected.find(Adj) == VisitedVarsDirected.end()) { 2797 QueueDirected.push(Adj); 2798 } 2799 DependenciesMap[Var].insert(Adj); 2800 DependenciesMap[Adj].insert(Var); 2801 } 2802 } 2803 } 2804 } 2805 2806 // `Groups` stores the set of Connected Components in the graph. 2807 std::vector<VarGrpTy> Groups; 2808 // `VarGrpMap` maps variables that need fix to the groups (indexes) that the 2809 // variables belong to. Group indexes refer to the elements in `Groups`. 2810 // `VarGrpMap` is complete in that every variable that needs fix is in it. 2811 std::map<const VarDecl *, unsigned> VarGrpMap; 2812 // The union group over the ones in "Groups" that contain parameters of `D`: 2813 llvm::SetVector<const VarDecl *> 2814 GrpsUnionForParms; // these variables need to be fixed in one step 2815 2816 // Group Connected Components for Unsafe Vars 2817 // (Dependencies based on pointer assignments) 2818 std::set<const VarDecl *> VisitedVars{}; 2819 for (const auto &[Var, ignore] : UnsafeOps.byVar) { 2820 if (VisitedVars.find(Var) == VisitedVars.end()) { 2821 VarGrpTy &VarGroup = Groups.emplace_back(); 2822 std::queue<const VarDecl*> Queue{}; 2823 2824 Queue.push(Var); 2825 while(!Queue.empty()) { 2826 const VarDecl* CurrentVar = Queue.front(); 2827 Queue.pop(); 2828 VisitedVars.insert(CurrentVar); 2829 VarGroup.push_back(CurrentVar); 2830 auto AdjacentNodes = DependenciesMap[CurrentVar]; 2831 for (const VarDecl *Adj : AdjacentNodes) { 2832 if (VisitedVars.find(Adj) == VisitedVars.end()) { 2833 Queue.push(Adj); 2834 } 2835 } 2836 } 2837 2838 bool HasParm = false; 2839 unsigned GrpIdx = Groups.size() - 1; 2840 2841 for (const VarDecl *V : VarGroup) { 2842 VarGrpMap[V] = GrpIdx; 2843 if (!HasParm && isParameterOf(V, D)) 2844 HasParm = true; 2845 } 2846 if (HasParm) 2847 GrpsUnionForParms.insert(VarGroup.begin(), VarGroup.end()); 2848 } 2849 } 2850 2851 // Remove a `FixableGadget` if the associated variable is not in the graph 2852 // computed above. We do not want to generate fix-its for such variables, 2853 // since they are neither warned nor reachable from a warned one. 2854 // 2855 // Note a variable is not warned if it is not directly used in any unsafe 2856 // operation. A variable `v` is NOT reachable from an unsafe variable, if it 2857 // does not exist another variable `u` such that `u` is warned and fixing `u` 2858 // (transitively) implicates fixing `v`. 2859 // 2860 // For example, 2861 // ``` 2862 // void f(int * p) { 2863 // int * a = p; *p = 0; 2864 // } 2865 // ``` 2866 // `*p = 0` is a fixable gadget associated with a variable `p` that is neither 2867 // warned nor reachable from a warned one. If we add `a[5] = 0` to the end of 2868 // the function above, `p` becomes reachable from a warned variable. 2869 for (auto I = FixablesForAllVars.byVar.begin(); 2870 I != FixablesForAllVars.byVar.end();) { 2871 // Note `VisitedVars` contain all the variables in the graph: 2872 if (!VisitedVars.count((*I).first)) { 2873 // no such var in graph: 2874 I = FixablesForAllVars.byVar.erase(I); 2875 } else 2876 ++I; 2877 } 2878 2879 // We assign strategies to variables that are 1) in the graph and 2) can be 2880 // fixed. Other variables have the default "Won't fix" strategy. 2881 Strategy NaiveStrategy = getNaiveStrategy(llvm::make_filter_range( 2882 VisitedVars, [&FixablesForAllVars](const VarDecl *V) { 2883 // If a warned variable has no "Fixable", it is considered unfixable: 2884 return FixablesForAllVars.byVar.count(V); 2885 })); 2886 VariableGroupsManagerImpl VarGrpMgr(Groups, VarGrpMap, GrpsUnionForParms); 2887 2888 if (isa<NamedDecl>(D)) 2889 // The only case where `D` is not a `NamedDecl` is when `D` is a 2890 // `BlockDecl`. Let's not fix variables in blocks for now 2891 FixItsForVariableGroup = 2892 getFixIts(FixablesForAllVars, NaiveStrategy, D->getASTContext(), D, 2893 Tracker, Handler, VarGrpMgr); 2894 2895 for (const auto &G : UnsafeOps.noVar) { 2896 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/false); 2897 } 2898 2899 for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) { 2900 auto FixItsIt = FixItsForVariableGroup.find(VD); 2901 Handler.handleUnsafeVariableGroup(VD, VarGrpMgr, 2902 FixItsIt != FixItsForVariableGroup.end() 2903 ? std::move(FixItsIt->second) 2904 : FixItList{}, 2905 D); 2906 for (const auto &G : WarningGadgets) { 2907 Handler.handleUnsafeOperation(G->getBaseStmt(), /*IsRelatedToDecl=*/true); 2908 } 2909 } 2910 } 2911