1 //===--- UseNullptrCheck.cpp - clang-tidy----------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "UseNullptrCheck.h" 11 #include "clang/AST/ASTContext.h" 12 #include "clang/AST/RecursiveASTVisitor.h" 13 #include "clang/ASTMatchers/ASTMatchFinder.h" 14 #include "clang/Lex/Lexer.h" 15 16 using namespace clang; 17 using namespace clang::ast_matchers; 18 using namespace llvm; 19 20 namespace clang { 21 namespace tidy { 22 namespace modernize { 23 namespace { 24 25 const char CastSequence[] = "sequence"; 26 27 /// \brief Matches cast expressions that have a cast kind of CK_NullToPointer 28 /// or CK_NullToMemberPointer. 29 /// 30 /// Given 31 /// \code 32 /// int *p = 0; 33 /// \endcode 34 /// implicitCastExpr(isNullToPointer()) matches the implicit cast clang adds 35 /// around \c 0. 36 AST_MATCHER(CastExpr, isNullToPointer) { 37 return Node.getCastKind() == CK_NullToPointer || 38 Node.getCastKind() == CK_NullToMemberPointer; 39 } 40 41 AST_MATCHER(Type, sugaredNullptrType) { 42 const Type *DesugaredType = Node.getUnqualifiedDesugaredType(); 43 if (const BuiltinType *BT = dyn_cast<BuiltinType>(DesugaredType)) 44 return BT->getKind() == BuiltinType::NullPtr; 45 return false; 46 } 47 48 /// \brief Create a matcher that finds implicit casts as well as the head of a 49 /// sequence of zero or more nested explicit casts that have an implicit cast 50 /// to null within. 51 /// Finding sequences of explict casts is necessary so that an entire sequence 52 /// can be replaced instead of just the inner-most implicit cast. 53 StatementMatcher makeCastSequenceMatcher() { 54 StatementMatcher ImplicitCastToNull = implicitCastExpr( 55 isNullToPointer(), 56 unless(hasSourceExpression(hasType(sugaredNullptrType())))); 57 58 return castExpr(anyOf(ImplicitCastToNull, 59 explicitCastExpr(hasDescendant(ImplicitCastToNull))), 60 unless(hasAncestor(explicitCastExpr()))) 61 .bind(CastSequence); 62 } 63 64 bool isReplaceableRange(SourceLocation StartLoc, SourceLocation EndLoc, 65 const SourceManager &SM) { 66 return SM.isWrittenInSameFile(StartLoc, EndLoc); 67 } 68 69 /// \brief Replaces the provided range with the text "nullptr", but only if 70 /// the start and end location are both in main file. 71 /// Returns true if and only if a replacement was made. 72 void replaceWithNullptr(ClangTidyCheck &Check, SourceManager &SM, 73 SourceLocation StartLoc, SourceLocation EndLoc) { 74 CharSourceRange Range(SourceRange(StartLoc, EndLoc), true); 75 // Add a space if nullptr follows an alphanumeric character. This happens 76 // whenever there is an c-style explicit cast to nullptr not surrounded by 77 // parentheses and right beside a return statement. 78 SourceLocation PreviousLocation = StartLoc.getLocWithOffset(-1); 79 bool NeedsSpace = isAlphanumeric(*SM.getCharacterData(PreviousLocation)); 80 Check.diag(Range.getBegin(), "use nullptr") << FixItHint::CreateReplacement( 81 Range, NeedsSpace ? " nullptr" : "nullptr"); 82 } 83 84 /// \brief Returns the name of the outermost macro. 85 /// 86 /// Given 87 /// \code 88 /// #define MY_NULL NULL 89 /// \endcode 90 /// If \p Loc points to NULL, this function will return the name MY_NULL. 91 StringRef getOutermostMacroName(SourceLocation Loc, const SourceManager &SM, 92 const LangOptions &LO) { 93 assert(Loc.isMacroID()); 94 SourceLocation OutermostMacroLoc; 95 96 while (Loc.isMacroID()) { 97 OutermostMacroLoc = Loc; 98 Loc = SM.getImmediateMacroCallerLoc(Loc); 99 } 100 101 return Lexer::getImmediateMacroName(OutermostMacroLoc, SM, LO); 102 } 103 104 /// \brief RecursiveASTVisitor for ensuring all nodes rooted at a given AST 105 /// subtree that have file-level source locations corresponding to a macro 106 /// argument have implicit NullTo(Member)Pointer nodes as ancestors. 107 class MacroArgUsageVisitor : public RecursiveASTVisitor<MacroArgUsageVisitor> { 108 public: 109 MacroArgUsageVisitor(SourceLocation CastLoc, const SourceManager &SM) 110 : CastLoc(CastLoc), SM(SM), Visited(false), CastFound(false), 111 InvalidFound(false) { 112 assert(CastLoc.isFileID()); 113 } 114 115 bool TraverseStmt(Stmt *S) { 116 bool VisitedPreviously = Visited; 117 118 if (!RecursiveASTVisitor<MacroArgUsageVisitor>::TraverseStmt(S)) 119 return false; 120 121 // The point at which VisitedPreviously is false and Visited is true is the 122 // root of a subtree containing nodes whose locations match CastLoc. It's 123 // at this point we test that the Implicit NullTo(Member)Pointer cast was 124 // found or not. 125 if (!VisitedPreviously) { 126 if (Visited && !CastFound) { 127 // Found nodes with matching SourceLocations but didn't come across a 128 // cast. This is an invalid macro arg use. Can stop traversal 129 // completely now. 130 InvalidFound = true; 131 return false; 132 } 133 // Reset state as we unwind back up the tree. 134 CastFound = false; 135 Visited = false; 136 } 137 return true; 138 } 139 140 bool VisitStmt(Stmt *S) { 141 if (SM.getFileLoc(S->getLocStart()) != CastLoc) 142 return true; 143 Visited = true; 144 145 const ImplicitCastExpr *Cast = dyn_cast<ImplicitCastExpr>(S); 146 if (Cast && (Cast->getCastKind() == CK_NullToPointer || 147 Cast->getCastKind() == CK_NullToMemberPointer)) 148 CastFound = true; 149 150 return true; 151 } 152 153 bool TraverseInitListExpr(InitListExpr *S) { 154 // Only go through the semantic form of the InitListExpr, because 155 // ImplicitCast might not appear in the syntactic form, and this results in 156 // finding usages of the macro argument that don't have a ImplicitCast as an 157 // ancestor (thus invalidating the replacement) when they actually have. 158 return RecursiveASTVisitor<MacroArgUsageVisitor>:: 159 TraverseSynOrSemInitListExpr( 160 S->isSemanticForm() ? S : S->getSemanticForm()); 161 } 162 163 bool foundInvalid() const { return InvalidFound; } 164 165 private: 166 SourceLocation CastLoc; 167 const SourceManager &SM; 168 169 bool Visited; 170 bool CastFound; 171 bool InvalidFound; 172 }; 173 174 /// \brief Looks for implicit casts as well as sequences of 0 or more explicit 175 /// casts with an implicit null-to-pointer cast within. 176 /// 177 /// The matcher this visitor is used with will find a single implicit cast or a 178 /// top-most explicit cast (i.e. it has no explicit casts as an ancestor) where 179 /// an implicit cast is nested within. However, there is no guarantee that only 180 /// explicit casts exist between the found top-most explicit cast and the 181 /// possibly more than one nested implicit cast. This visitor finds all cast 182 /// sequences with an implicit cast to null within and creates a replacement 183 /// leaving the outermost explicit cast unchanged to avoid introducing 184 /// ambiguities. 185 class CastSequenceVisitor : public RecursiveASTVisitor<CastSequenceVisitor> { 186 public: 187 CastSequenceVisitor(ASTContext &Context, ArrayRef<StringRef> NullMacros, 188 ClangTidyCheck &check) 189 : SM(Context.getSourceManager()), Context(Context), 190 NullMacros(NullMacros), Check(check), FirstSubExpr(nullptr), 191 PruneSubtree(false) {} 192 193 bool TraverseStmt(Stmt *S) { 194 // Stop traversing down the tree if requested. 195 if (PruneSubtree) { 196 PruneSubtree = false; 197 return true; 198 } 199 return RecursiveASTVisitor<CastSequenceVisitor>::TraverseStmt(S); 200 } 201 202 // Only VisitStmt is overridden as we shouldn't find other base AST types 203 // within a cast expression. 204 bool VisitStmt(Stmt *S) { 205 CastExpr *C = dyn_cast<CastExpr>(S); 206 if (!C) { 207 FirstSubExpr = nullptr; 208 return true; 209 } 210 if (!FirstSubExpr) 211 FirstSubExpr = C->getSubExpr()->IgnoreParens(); 212 213 if (C->getCastKind() != CK_NullToPointer && 214 C->getCastKind() != CK_NullToMemberPointer) { 215 return true; 216 } 217 218 SourceLocation StartLoc = FirstSubExpr->getLocStart(); 219 SourceLocation EndLoc = FirstSubExpr->getLocEnd(); 220 221 // If the location comes from a macro arg expansion, *all* uses of that 222 // arg must be checked to result in NullTo(Member)Pointer casts. 223 // 224 // If the location comes from a macro body expansion, check to see if its 225 // coming from one of the allowed 'NULL' macros. 226 if (SM.isMacroArgExpansion(StartLoc) && SM.isMacroArgExpansion(EndLoc)) { 227 SourceLocation FileLocStart = SM.getFileLoc(StartLoc), 228 FileLocEnd = SM.getFileLoc(EndLoc); 229 if (isReplaceableRange(FileLocStart, FileLocEnd, SM) && 230 allArgUsesValid(C)) { 231 replaceWithNullptr(Check, SM, FileLocStart, FileLocEnd); 232 } 233 return skipSubTree(); 234 } 235 236 if (SM.isMacroBodyExpansion(StartLoc) && SM.isMacroBodyExpansion(EndLoc)) { 237 StringRef OutermostMacroName = 238 getOutermostMacroName(StartLoc, SM, Context.getLangOpts()); 239 240 // Check to see if the user wants to replace the macro being expanded. 241 if (std::find(NullMacros.begin(), NullMacros.end(), OutermostMacroName) == 242 NullMacros.end()) { 243 return skipSubTree(); 244 } 245 246 StartLoc = SM.getFileLoc(StartLoc); 247 EndLoc = SM.getFileLoc(EndLoc); 248 } 249 250 if (!isReplaceableRange(StartLoc, EndLoc, SM)) { 251 return skipSubTree(); 252 } 253 replaceWithNullptr(Check, SM, StartLoc, EndLoc); 254 255 return skipSubTree(); 256 } 257 258 private: 259 bool skipSubTree() { 260 PruneSubtree = true; 261 return true; 262 } 263 264 /// \brief Tests that all expansions of a macro arg, one of which expands to 265 /// result in \p CE, yield NullTo(Member)Pointer casts. 266 bool allArgUsesValid(const CastExpr *CE) { 267 SourceLocation CastLoc = CE->getLocStart(); 268 269 // Step 1: Get location of macro arg and location of the macro the arg was 270 // provided to. 271 SourceLocation ArgLoc, MacroLoc; 272 if (!getMacroAndArgLocations(CastLoc, ArgLoc, MacroLoc)) 273 return false; 274 275 // Step 2: Find the first ancestor that doesn't expand from this macro. 276 ast_type_traits::DynTypedNode ContainingAncestor; 277 if (!findContainingAncestor( 278 ast_type_traits::DynTypedNode::create<Stmt>(*CE), MacroLoc, 279 ContainingAncestor)) 280 return false; 281 282 // Step 3: 283 // Visit children of this containing parent looking for the least-descended 284 // nodes of the containing parent which are macro arg expansions that expand 285 // from the given arg location. 286 // Visitor needs: arg loc. 287 MacroArgUsageVisitor ArgUsageVisitor(SM.getFileLoc(CastLoc), SM); 288 if (const auto *D = ContainingAncestor.get<Decl>()) 289 ArgUsageVisitor.TraverseDecl(const_cast<Decl *>(D)); 290 else if (const auto *S = ContainingAncestor.get<Stmt>()) 291 ArgUsageVisitor.TraverseStmt(const_cast<Stmt *>(S)); 292 else 293 llvm_unreachable("Unhandled ContainingAncestor node type"); 294 295 return !ArgUsageVisitor.foundInvalid(); 296 } 297 298 /// \brief Given the SourceLocation for a macro arg expansion, finds the 299 /// non-macro SourceLocation of the macro the arg was passed to and the 300 /// non-macro SourceLocation of the argument in the arg list to that macro. 301 /// These results are returned via \c MacroLoc and \c ArgLoc respectively. 302 /// These values are undefined if the return value is false. 303 /// 304 /// \returns false if one of the returned SourceLocations would be a 305 /// SourceLocation pointing within the definition of another macro. 306 bool getMacroAndArgLocations(SourceLocation Loc, SourceLocation &ArgLoc, 307 SourceLocation &MacroLoc) { 308 assert(Loc.isMacroID() && "Only reasonble to call this on macros"); 309 310 ArgLoc = Loc; 311 312 // Find the location of the immediate macro expansion. 313 while (true) { 314 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(ArgLoc); 315 const SrcMgr::SLocEntry *E = &SM.getSLocEntry(LocInfo.first); 316 const SrcMgr::ExpansionInfo &Expansion = E->getExpansion(); 317 318 SourceLocation OldArgLoc = ArgLoc; 319 ArgLoc = Expansion.getExpansionLocStart(); 320 if (!Expansion.isMacroArgExpansion()) { 321 if (!MacroLoc.isFileID()) 322 return false; 323 324 StringRef Name = 325 Lexer::getImmediateMacroName(OldArgLoc, SM, Context.getLangOpts()); 326 return std::find(NullMacros.begin(), NullMacros.end(), Name) != 327 NullMacros.end(); 328 } 329 330 MacroLoc = SM.getExpansionRange(ArgLoc).first; 331 332 ArgLoc = Expansion.getSpellingLoc().getLocWithOffset(LocInfo.second); 333 if (ArgLoc.isFileID()) 334 return true; 335 336 // If spelling location resides in the same FileID as macro expansion 337 // location, it means there is no inner macro. 338 FileID MacroFID = SM.getFileID(MacroLoc); 339 if (SM.isInFileID(ArgLoc, MacroFID)) { 340 // Don't transform this case. If the characters that caused the 341 // null-conversion come from within a macro, they can't be changed. 342 return false; 343 } 344 } 345 346 llvm_unreachable("getMacroAndArgLocations"); 347 } 348 349 /// \brief Tests if TestMacroLoc is found while recursively unravelling 350 /// expansions starting at TestLoc. TestMacroLoc.isFileID() must be true. 351 /// Implementation is very similar to getMacroAndArgLocations() except in this 352 /// case, it's not assumed that TestLoc is expanded from a macro argument. 353 /// While unravelling expansions macro arguments are handled as with 354 /// getMacroAndArgLocations() but in this function macro body expansions are 355 /// also handled. 356 /// 357 /// False means either: 358 /// - TestLoc is not from a macro expansion. 359 /// - TestLoc is from a different macro expansion. 360 bool expandsFrom(SourceLocation TestLoc, SourceLocation TestMacroLoc) { 361 if (TestLoc.isFileID()) { 362 return false; 363 } 364 365 SourceLocation Loc = TestLoc, MacroLoc; 366 367 while (true) { 368 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); 369 const SrcMgr::SLocEntry *E = &SM.getSLocEntry(LocInfo.first); 370 const SrcMgr::ExpansionInfo &Expansion = E->getExpansion(); 371 372 Loc = Expansion.getExpansionLocStart(); 373 374 if (!Expansion.isMacroArgExpansion()) { 375 if (Loc.isFileID()) { 376 return Loc == TestMacroLoc; 377 } 378 // Since Loc is still a macro ID and it's not an argument expansion, we 379 // don't need to do the work of handling an argument expansion. Simply 380 // keep recursively expanding until we hit a FileID or a macro arg 381 // expansion or a macro arg expansion. 382 continue; 383 } 384 385 MacroLoc = SM.getImmediateExpansionRange(Loc).first; 386 if (MacroLoc.isFileID() && MacroLoc == TestMacroLoc) { 387 // Match made. 388 return true; 389 } 390 391 Loc = Expansion.getSpellingLoc().getLocWithOffset(LocInfo.second); 392 if (Loc.isFileID()) { 393 // If we made it this far without finding a match, there is no match to 394 // be made. 395 return false; 396 } 397 } 398 399 llvm_unreachable("expandsFrom"); 400 } 401 402 /// \brief Given a starting point \c Start in the AST, find an ancestor that 403 /// doesn't expand from the macro called at file location \c MacroLoc. 404 /// 405 /// \pre MacroLoc.isFileID() 406 /// \returns true if such an ancestor was found, false otherwise. 407 bool findContainingAncestor(ast_type_traits::DynTypedNode Start, 408 SourceLocation MacroLoc, 409 ast_type_traits::DynTypedNode &Result) { 410 // Below we're only following the first parent back up the AST. This should 411 // be fine since for the statements we care about there should only be one 412 // parent, except for the case specified below. 413 414 assert(MacroLoc.isFileID()); 415 416 while (true) { 417 const auto &Parents = Context.getParents(Start); 418 if (Parents.empty()) 419 return false; 420 if (Parents.size() > 1) { 421 // If there are more than one parents, don't do the replacement unless 422 // they are InitListsExpr (semantic and syntactic form). In this case we 423 // can choose any one here, and the ASTVisitor will take care of 424 // traversing the right one. 425 for (const auto &Parent : Parents) { 426 if (!Parent.get<InitListExpr>()) 427 return false; 428 } 429 } 430 431 const ast_type_traits::DynTypedNode &Parent = Parents[0]; 432 433 SourceLocation Loc; 434 if (const auto *D = Parent.get<Decl>()) 435 Loc = D->getLocStart(); 436 else if (const auto *S = Parent.get<Stmt>()) 437 Loc = S->getLocStart(); 438 439 // TypeLoc and NestedNameSpecifierLoc are members of the parent map. Skip 440 // them and keep going up. 441 if (Loc.isValid()) { 442 if (!expandsFrom(Loc, MacroLoc)) { 443 Result = Parent; 444 return true; 445 } 446 } 447 Start = Parent; 448 } 449 450 llvm_unreachable("findContainingAncestor"); 451 } 452 453 private: 454 SourceManager &SM; 455 ASTContext &Context; 456 ArrayRef<StringRef> NullMacros; 457 ClangTidyCheck &Check; 458 Expr *FirstSubExpr; 459 bool PruneSubtree; 460 }; 461 462 } // namespace 463 464 UseNullptrCheck::UseNullptrCheck(StringRef Name, ClangTidyContext *Context) 465 : ClangTidyCheck(Name, Context), 466 NullMacrosStr(Options.get("NullMacros", "")) { 467 StringRef(NullMacrosStr).split(NullMacros, ","); 468 } 469 470 void UseNullptrCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { 471 Options.store(Opts, "NullMacros", NullMacrosStr); 472 } 473 474 void UseNullptrCheck::registerMatchers(MatchFinder *Finder) { 475 // Only register the matcher for C++. Because this checker is used for 476 // modernization, it is reasonable to run it on any C++ standard with the 477 // assumption the user is trying to modernize their codebase. 478 if (getLangOpts().CPlusPlus) 479 Finder->addMatcher(makeCastSequenceMatcher(), this); 480 } 481 482 void UseNullptrCheck::check(const MatchFinder::MatchResult &Result) { 483 const auto *NullCast = Result.Nodes.getNodeAs<CastExpr>(CastSequence); 484 assert(NullCast && "Bad Callback. No node provided"); 485 486 // Given an implicit null-ptr cast or an explicit cast with an implicit 487 // null-to-pointer cast within use CastSequenceVisitor to identify sequences 488 // of explicit casts that can be converted into 'nullptr'. 489 CastSequenceVisitor(*Result.Context, NullMacros, *this) 490 .TraverseStmt(const_cast<CastExpr *>(NullCast)); 491 } 492 493 } // namespace modernize 494 } // namespace tidy 495 } // namespace clang 496