1 //===--- UseNullptrCheck.cpp - clang-tidy----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "UseNullptrCheck.h" 10 #include "../utils/Matchers.h" 11 #include "../utils/OptionsUtils.h" 12 #include "clang/AST/ASTContext.h" 13 #include "clang/AST/RecursiveASTVisitor.h" 14 #include "clang/ASTMatchers/ASTMatchFinder.h" 15 #include "clang/Lex/Lexer.h" 16 17 using namespace clang; 18 using namespace clang::ast_matchers; 19 using namespace llvm; 20 21 namespace clang::tidy::modernize { 22 namespace { 23 24 const char CastSequence[] = "sequence"; 25 26 AST_MATCHER(Type, sugaredNullptrType) { 27 const Type *DesugaredType = Node.getUnqualifiedDesugaredType(); 28 if (const auto *BT = dyn_cast<BuiltinType>(DesugaredType)) 29 return BT->getKind() == BuiltinType::NullPtr; 30 return false; 31 } 32 33 /// Create a matcher that finds implicit casts as well as the head of a 34 /// sequence of zero or more nested explicit casts that have an implicit cast 35 /// to null within. 36 /// Finding sequences of explicit casts is necessary so that an entire sequence 37 /// can be replaced instead of just the inner-most implicit cast. 38 /// 39 /// TODO/NOTE: The second "anyOf" below discards matches on a substituted type, 40 /// since we don't know if that would _always_ be a pointer type for all other 41 /// specializations, unless the expression was "__null", in which case we assume 42 /// that all specializations are expected to be for pointer types. Ideally this 43 /// would check for the "NULL" macro instead, but that'd be harder to express. 44 /// In practice, "NULL" is often defined as "__null", and this is a useful 45 /// condition. 46 StatementMatcher makeCastSequenceMatcher(llvm::ArrayRef<StringRef> NameList) { 47 auto ImplicitCastToNull = implicitCastExpr( 48 anyOf(hasCastKind(CK_NullToPointer), hasCastKind(CK_NullToMemberPointer)), 49 anyOf(hasSourceExpression(gnuNullExpr()), 50 unless(hasImplicitDestinationType( 51 qualType(substTemplateTypeParmType())))), 52 unless(hasSourceExpression(hasType(sugaredNullptrType()))), 53 unless(hasImplicitDestinationType( 54 qualType(matchers::matchesAnyListedTypeName(NameList))))); 55 56 auto IsOrHasDescendant = [](auto InnerMatcher) { 57 return anyOf(InnerMatcher, hasDescendant(InnerMatcher)); 58 }; 59 60 return traverse( 61 TK_AsIs, 62 anyOf(castExpr(anyOf(ImplicitCastToNull, 63 explicitCastExpr(hasDescendant(ImplicitCastToNull))), 64 unless(hasAncestor(explicitCastExpr())), 65 unless(hasAncestor(cxxRewrittenBinaryOperator()))) 66 .bind(CastSequence), 67 cxxRewrittenBinaryOperator( 68 // Match rewritten operators, but verify (in the check method) 69 // that if an implicit cast is found, it is not from another 70 // nested rewritten operator. 71 expr().bind("matchBinopOperands"), 72 hasEitherOperand(IsOrHasDescendant( 73 implicitCastExpr( 74 ImplicitCastToNull, 75 hasAncestor(cxxRewrittenBinaryOperator().bind( 76 "checkBinopOperands"))) 77 .bind(CastSequence))), 78 // Skip defaulted comparison operators. 79 unless(hasAncestor(functionDecl(isDefaulted())))))); 80 } 81 82 bool isReplaceableRange(SourceLocation StartLoc, SourceLocation EndLoc, 83 const SourceManager &SM) { 84 return SM.isWrittenInSameFile(StartLoc, EndLoc); 85 } 86 87 /// Replaces the provided range with the text "nullptr", but only if 88 /// the start and end location are both in main file. 89 /// Returns true if and only if a replacement was made. 90 void replaceWithNullptr(ClangTidyCheck &Check, SourceManager &SM, 91 SourceLocation StartLoc, SourceLocation EndLoc) { 92 CharSourceRange Range(SourceRange(StartLoc, EndLoc), true); 93 // Add a space if nullptr follows an alphanumeric character. This happens 94 // whenever there is an c-style explicit cast to nullptr not surrounded by 95 // parentheses and right beside a return statement. 96 SourceLocation PreviousLocation = StartLoc.getLocWithOffset(-1); 97 bool NeedsSpace = isAlphanumeric(*SM.getCharacterData(PreviousLocation)); 98 Check.diag(Range.getBegin(), "use nullptr") << FixItHint::CreateReplacement( 99 Range, NeedsSpace ? " nullptr" : "nullptr"); 100 } 101 102 /// Returns the name of the outermost macro. 103 /// 104 /// Given 105 /// \code 106 /// #define MY_NULL NULL 107 /// \endcode 108 /// If \p Loc points to NULL, this function will return the name MY_NULL. 109 StringRef getOutermostMacroName(SourceLocation Loc, const SourceManager &SM, 110 const LangOptions &LO) { 111 assert(Loc.isMacroID()); 112 SourceLocation OutermostMacroLoc; 113 114 while (Loc.isMacroID()) { 115 OutermostMacroLoc = Loc; 116 Loc = SM.getImmediateMacroCallerLoc(Loc); 117 } 118 119 return Lexer::getImmediateMacroName(OutermostMacroLoc, SM, LO); 120 } 121 122 /// RecursiveASTVisitor for ensuring all nodes rooted at a given AST 123 /// subtree that have file-level source locations corresponding to a macro 124 /// argument have implicit NullTo(Member)Pointer nodes as ancestors. 125 class MacroArgUsageVisitor : public RecursiveASTVisitor<MacroArgUsageVisitor> { 126 public: 127 MacroArgUsageVisitor(SourceLocation CastLoc, const SourceManager &SM) 128 : CastLoc(CastLoc), SM(SM) { 129 assert(CastLoc.isFileID()); 130 } 131 132 bool TraverseStmt(Stmt *S) { 133 bool VisitedPreviously = Visited; 134 135 if (!RecursiveASTVisitor<MacroArgUsageVisitor>::TraverseStmt(S)) 136 return false; 137 138 // The point at which VisitedPreviously is false and Visited is true is the 139 // root of a subtree containing nodes whose locations match CastLoc. It's 140 // at this point we test that the Implicit NullTo(Member)Pointer cast was 141 // found or not. 142 if (!VisitedPreviously) { 143 if (Visited && !CastFound) { 144 // Found nodes with matching SourceLocations but didn't come across a 145 // cast. This is an invalid macro arg use. Can stop traversal 146 // completely now. 147 InvalidFound = true; 148 return false; 149 } 150 // Reset state as we unwind back up the tree. 151 CastFound = false; 152 Visited = false; 153 } 154 return true; 155 } 156 157 bool VisitStmt(Stmt *S) { 158 if (SM.getFileLoc(S->getBeginLoc()) != CastLoc) 159 return true; 160 Visited = true; 161 162 const ImplicitCastExpr *Cast = dyn_cast<ImplicitCastExpr>(S); 163 if (Cast && (Cast->getCastKind() == CK_NullToPointer || 164 Cast->getCastKind() == CK_NullToMemberPointer)) 165 CastFound = true; 166 167 return true; 168 } 169 170 bool TraverseInitListExpr(InitListExpr *S) { 171 // Only go through the semantic form of the InitListExpr, because 172 // ImplicitCast might not appear in the syntactic form, and this results in 173 // finding usages of the macro argument that don't have a ImplicitCast as an 174 // ancestor (thus invalidating the replacement) when they actually have. 175 return RecursiveASTVisitor<MacroArgUsageVisitor>:: 176 TraverseSynOrSemInitListExpr( 177 S->isSemanticForm() ? S : S->getSemanticForm()); 178 } 179 180 bool foundInvalid() const { return InvalidFound; } 181 182 private: 183 SourceLocation CastLoc; 184 const SourceManager &SM; 185 186 bool Visited = false; 187 bool CastFound = false; 188 bool InvalidFound = false; 189 }; 190 191 /// Looks for implicit casts as well as sequences of 0 or more explicit 192 /// casts with an implicit null-to-pointer cast within. 193 /// 194 /// The matcher this visitor is used with will find a single implicit cast or a 195 /// top-most explicit cast (i.e. it has no explicit casts as an ancestor) where 196 /// an implicit cast is nested within. However, there is no guarantee that only 197 /// explicit casts exist between the found top-most explicit cast and the 198 /// possibly more than one nested implicit cast. This visitor finds all cast 199 /// sequences with an implicit cast to null within and creates a replacement 200 /// leaving the outermost explicit cast unchanged to avoid introducing 201 /// ambiguities. 202 class CastSequenceVisitor : public RecursiveASTVisitor<CastSequenceVisitor> { 203 public: 204 CastSequenceVisitor(ASTContext &Context, ArrayRef<StringRef> NullMacros, 205 ClangTidyCheck &Check) 206 : SM(Context.getSourceManager()), Context(Context), 207 NullMacros(NullMacros), Check(Check) {} 208 209 bool TraverseStmt(Stmt *S) { 210 // Stop traversing down the tree if requested. 211 if (PruneSubtree) { 212 PruneSubtree = false; 213 return true; 214 } 215 return RecursiveASTVisitor<CastSequenceVisitor>::TraverseStmt(S); 216 } 217 218 // Only VisitStmt is overridden as we shouldn't find other base AST types 219 // within a cast expression. 220 bool VisitStmt(Stmt *S) { 221 auto *C = dyn_cast<CastExpr>(S); 222 // Catch the castExpr inside cxxDefaultArgExpr. 223 if (auto *E = dyn_cast<CXXDefaultArgExpr>(S)) { 224 C = dyn_cast<CastExpr>(E->getExpr()); 225 FirstSubExpr = nullptr; 226 } 227 if (!C) { 228 FirstSubExpr = nullptr; 229 return true; 230 } 231 232 auto* CastSubExpr = C->getSubExpr()->IgnoreParens(); 233 // Ignore cast expressions which cast nullptr literal. 234 if (isa<CXXNullPtrLiteralExpr>(CastSubExpr)) { 235 return true; 236 } 237 238 if (!FirstSubExpr) 239 FirstSubExpr = CastSubExpr; 240 241 if (C->getCastKind() != CK_NullToPointer && 242 C->getCastKind() != CK_NullToMemberPointer) { 243 return true; 244 } 245 246 SourceLocation StartLoc = FirstSubExpr->getBeginLoc(); 247 SourceLocation EndLoc = FirstSubExpr->getEndLoc(); 248 249 // If the location comes from a macro arg expansion, *all* uses of that 250 // arg must be checked to result in NullTo(Member)Pointer casts. 251 // 252 // If the location comes from a macro body expansion, check to see if its 253 // coming from one of the allowed 'NULL' macros. 254 if (SM.isMacroArgExpansion(StartLoc) && SM.isMacroArgExpansion(EndLoc)) { 255 SourceLocation FileLocStart = SM.getFileLoc(StartLoc), 256 FileLocEnd = SM.getFileLoc(EndLoc); 257 SourceLocation ImmediateMacroArgLoc, MacroLoc; 258 // Skip NULL macros used in macro. 259 if (!getMacroAndArgLocations(StartLoc, ImmediateMacroArgLoc, MacroLoc) || 260 ImmediateMacroArgLoc != FileLocStart) 261 return skipSubTree(); 262 263 if (isReplaceableRange(FileLocStart, FileLocEnd, SM) && 264 allArgUsesValid(C)) { 265 replaceWithNullptr(Check, SM, FileLocStart, FileLocEnd); 266 } 267 return true; 268 } 269 270 if (SM.isMacroBodyExpansion(StartLoc) && SM.isMacroBodyExpansion(EndLoc)) { 271 StringRef OutermostMacroName = 272 getOutermostMacroName(StartLoc, SM, Context.getLangOpts()); 273 274 // Check to see if the user wants to replace the macro being expanded. 275 if (!llvm::is_contained(NullMacros, OutermostMacroName)) 276 return skipSubTree(); 277 278 StartLoc = SM.getFileLoc(StartLoc); 279 EndLoc = SM.getFileLoc(EndLoc); 280 } 281 282 if (!isReplaceableRange(StartLoc, EndLoc, SM)) { 283 return skipSubTree(); 284 } 285 replaceWithNullptr(Check, SM, StartLoc, EndLoc); 286 287 return true; 288 } 289 290 private: 291 bool skipSubTree() { 292 PruneSubtree = true; 293 return true; 294 } 295 296 /// Tests that all expansions of a macro arg, one of which expands to 297 /// result in \p CE, yield NullTo(Member)Pointer casts. 298 bool allArgUsesValid(const CastExpr *CE) { 299 SourceLocation CastLoc = CE->getBeginLoc(); 300 301 // Step 1: Get location of macro arg and location of the macro the arg was 302 // provided to. 303 SourceLocation ArgLoc, MacroLoc; 304 if (!getMacroAndArgLocations(CastLoc, ArgLoc, MacroLoc)) 305 return false; 306 307 // Step 2: Find the first ancestor that doesn't expand from this macro. 308 DynTypedNode ContainingAncestor; 309 if (!findContainingAncestor(DynTypedNode::create<Stmt>(*CE), MacroLoc, 310 ContainingAncestor)) 311 return false; 312 313 // Step 3: 314 // Visit children of this containing parent looking for the least-descended 315 // nodes of the containing parent which are macro arg expansions that expand 316 // from the given arg location. 317 // Visitor needs: arg loc. 318 MacroArgUsageVisitor ArgUsageVisitor(SM.getFileLoc(CastLoc), SM); 319 if (const auto *D = ContainingAncestor.get<Decl>()) 320 ArgUsageVisitor.TraverseDecl(const_cast<Decl *>(D)); 321 else if (const auto *S = ContainingAncestor.get<Stmt>()) 322 ArgUsageVisitor.TraverseStmt(const_cast<Stmt *>(S)); 323 else 324 llvm_unreachable("Unhandled ContainingAncestor node type"); 325 326 return !ArgUsageVisitor.foundInvalid(); 327 } 328 329 /// Given the SourceLocation for a macro arg expansion, finds the 330 /// non-macro SourceLocation of the macro the arg was passed to and the 331 /// non-macro SourceLocation of the argument in the arg list to that macro. 332 /// These results are returned via \c MacroLoc and \c ArgLoc respectively. 333 /// These values are undefined if the return value is false. 334 /// 335 /// \returns false if one of the returned SourceLocations would be a 336 /// SourceLocation pointing within the definition of another macro. 337 bool getMacroAndArgLocations(SourceLocation Loc, SourceLocation &ArgLoc, 338 SourceLocation &MacroLoc) { 339 assert(Loc.isMacroID() && "Only reasonable to call this on macros"); 340 341 ArgLoc = Loc; 342 343 // Find the location of the immediate macro expansion. 344 while (true) { 345 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(ArgLoc); 346 const SrcMgr::SLocEntry *E = &SM.getSLocEntry(LocInfo.first); 347 const SrcMgr::ExpansionInfo &Expansion = E->getExpansion(); 348 349 SourceLocation OldArgLoc = ArgLoc; 350 ArgLoc = Expansion.getExpansionLocStart(); 351 if (!Expansion.isMacroArgExpansion()) { 352 if (!MacroLoc.isFileID()) 353 return false; 354 355 StringRef Name = 356 Lexer::getImmediateMacroName(OldArgLoc, SM, Context.getLangOpts()); 357 return llvm::is_contained(NullMacros, Name); 358 } 359 360 MacroLoc = SM.getExpansionRange(ArgLoc).getBegin(); 361 362 ArgLoc = Expansion.getSpellingLoc().getLocWithOffset(LocInfo.second); 363 if (ArgLoc.isFileID()) 364 return true; 365 366 // If spelling location resides in the same FileID as macro expansion 367 // location, it means there is no inner macro. 368 FileID MacroFID = SM.getFileID(MacroLoc); 369 if (SM.isInFileID(ArgLoc, MacroFID)) { 370 // Don't transform this case. If the characters that caused the 371 // null-conversion come from within a macro, they can't be changed. 372 return false; 373 } 374 } 375 376 llvm_unreachable("getMacroAndArgLocations"); 377 } 378 379 /// Tests if TestMacroLoc is found while recursively unravelling 380 /// expansions starting at TestLoc. TestMacroLoc.isFileID() must be true. 381 /// Implementation is very similar to getMacroAndArgLocations() except in this 382 /// case, it's not assumed that TestLoc is expanded from a macro argument. 383 /// While unravelling expansions macro arguments are handled as with 384 /// getMacroAndArgLocations() but in this function macro body expansions are 385 /// also handled. 386 /// 387 /// False means either: 388 /// - TestLoc is not from a macro expansion. 389 /// - TestLoc is from a different macro expansion. 390 bool expandsFrom(SourceLocation TestLoc, SourceLocation TestMacroLoc) { 391 if (TestLoc.isFileID()) { 392 return false; 393 } 394 395 SourceLocation Loc = TestLoc, MacroLoc; 396 397 while (true) { 398 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc); 399 const SrcMgr::SLocEntry *E = &SM.getSLocEntry(LocInfo.first); 400 const SrcMgr::ExpansionInfo &Expansion = E->getExpansion(); 401 402 Loc = Expansion.getExpansionLocStart(); 403 404 if (!Expansion.isMacroArgExpansion()) { 405 if (Loc.isFileID()) { 406 return Loc == TestMacroLoc; 407 } 408 // Since Loc is still a macro ID and it's not an argument expansion, we 409 // don't need to do the work of handling an argument expansion. Simply 410 // keep recursively expanding until we hit a FileID or a macro arg 411 // expansion or a macro arg expansion. 412 continue; 413 } 414 415 MacroLoc = SM.getImmediateExpansionRange(Loc).getBegin(); 416 if (MacroLoc.isFileID() && MacroLoc == TestMacroLoc) { 417 // Match made. 418 return true; 419 } 420 421 Loc = Expansion.getSpellingLoc().getLocWithOffset(LocInfo.second); 422 if (Loc.isFileID()) { 423 // If we made it this far without finding a match, there is no match to 424 // be made. 425 return false; 426 } 427 } 428 429 llvm_unreachable("expandsFrom"); 430 } 431 432 /// Given a starting point \c Start in the AST, find an ancestor that 433 /// doesn't expand from the macro called at file location \c MacroLoc. 434 /// 435 /// \pre MacroLoc.isFileID() 436 /// \returns true if such an ancestor was found, false otherwise. 437 bool findContainingAncestor(DynTypedNode Start, SourceLocation MacroLoc, 438 DynTypedNode &Result) { 439 // Below we're only following the first parent back up the AST. This should 440 // be fine since for the statements we care about there should only be one 441 // parent, except for the case specified below. 442 443 assert(MacroLoc.isFileID()); 444 445 while (true) { 446 const auto &Parents = Context.getParents(Start); 447 if (Parents.empty()) 448 return false; 449 if (Parents.size() > 1) { 450 // If there are more than one parents, don't do the replacement unless 451 // they are InitListsExpr (semantic and syntactic form). In this case we 452 // can choose any one here, and the ASTVisitor will take care of 453 // traversing the right one. 454 for (const auto &Parent : Parents) { 455 if (!Parent.get<InitListExpr>()) 456 return false; 457 } 458 } 459 460 const DynTypedNode &Parent = Parents[0]; 461 462 SourceLocation Loc; 463 if (const auto *D = Parent.get<Decl>()) 464 Loc = D->getBeginLoc(); 465 else if (const auto *S = Parent.get<Stmt>()) 466 Loc = S->getBeginLoc(); 467 468 // TypeLoc and NestedNameSpecifierLoc are members of the parent map. Skip 469 // them and keep going up. 470 if (Loc.isValid()) { 471 if (!expandsFrom(Loc, MacroLoc)) { 472 Result = Parent; 473 return true; 474 } 475 } 476 Start = Parent; 477 } 478 479 llvm_unreachable("findContainingAncestor"); 480 } 481 482 SourceManager &SM; 483 ASTContext &Context; 484 ArrayRef<StringRef> NullMacros; 485 ClangTidyCheck &Check; 486 Expr *FirstSubExpr = nullptr; 487 bool PruneSubtree = false; 488 }; 489 490 } // namespace 491 492 UseNullptrCheck::UseNullptrCheck(StringRef Name, ClangTidyContext *Context) 493 : ClangTidyCheck(Name, Context), 494 NullMacrosStr(Options.get("NullMacros", "NULL")), 495 IgnoredTypes(utils::options::parseStringList(Options.get( 496 "IgnoredTypes", 497 "std::_CmpUnspecifiedParam::;^std::__cmp_cat::__unspec"))) { 498 StringRef(NullMacrosStr).split(NullMacros, ","); 499 } 500 501 void UseNullptrCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { 502 Options.store(Opts, "NullMacros", NullMacrosStr); 503 Options.store(Opts, "IgnoredTypes", 504 utils::options::serializeStringList(IgnoredTypes)); 505 } 506 507 void UseNullptrCheck::registerMatchers(MatchFinder *Finder) { 508 Finder->addMatcher(makeCastSequenceMatcher(IgnoredTypes), this); 509 } 510 511 void UseNullptrCheck::check(const MatchFinder::MatchResult &Result) { 512 const auto *NullCast = Result.Nodes.getNodeAs<CastExpr>(CastSequence); 513 assert(NullCast && "Bad Callback. No node provided"); 514 515 if (Result.Nodes.getNodeAs<CXXRewrittenBinaryOperator>( 516 "matchBinopOperands") != 517 Result.Nodes.getNodeAs<CXXRewrittenBinaryOperator>("checkBinopOperands")) 518 return; 519 520 // Given an implicit null-ptr cast or an explicit cast with an implicit 521 // null-to-pointer cast within use CastSequenceVisitor to identify sequences 522 // of explicit casts that can be converted into 'nullptr'. 523 CastSequenceVisitor(*Result.Context, NullMacros, *this) 524 .TraverseStmt(const_cast<CastExpr *>(NullCast)); 525 } 526 527 } // namespace clang::tidy::modernize 528