xref: /llvm-project/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp (revision 7365802efadfa07b0c252dc92ae71b151eab413e)
1 //===--- UseNullptrCheck.cpp - clang-tidy----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "UseNullptrCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/AST/RecursiveASTVisitor.h"
12 #include "clang/ASTMatchers/ASTMatchFinder.h"
13 #include "clang/Lex/Lexer.h"
14 
15 using namespace clang;
16 using namespace clang::ast_matchers;
17 using namespace llvm;
18 
19 namespace clang::tidy::modernize {
20 namespace {
21 
22 const char CastSequence[] = "sequence";
23 
24 AST_MATCHER(Type, sugaredNullptrType) {
25   const Type *DesugaredType = Node.getUnqualifiedDesugaredType();
26   if (const auto *BT = dyn_cast<BuiltinType>(DesugaredType))
27     return BT->getKind() == BuiltinType::NullPtr;
28   return false;
29 }
30 
31 /// Create a matcher that finds implicit casts as well as the head of a
32 /// sequence of zero or more nested explicit casts that have an implicit cast
33 /// to null within.
34 /// Finding sequences of explicit casts is necessary so that an entire sequence
35 /// can be replaced instead of just the inner-most implicit cast.
36 StatementMatcher makeCastSequenceMatcher() {
37   StatementMatcher ImplicitCastToNull = implicitCastExpr(
38       anyOf(hasCastKind(CK_NullToPointer), hasCastKind(CK_NullToMemberPointer)),
39       unless(hasImplicitDestinationType(qualType(substTemplateTypeParmType()))),
40       unless(hasSourceExpression(hasType(sugaredNullptrType()))));
41 
42   auto IsOrHasDescendant = [](auto InnerMatcher) {
43     return anyOf(InnerMatcher, hasDescendant(InnerMatcher));
44   };
45 
46   return traverse(
47       TK_AsIs,
48       anyOf(castExpr(anyOf(ImplicitCastToNull,
49                            explicitCastExpr(hasDescendant(ImplicitCastToNull))),
50                      unless(hasAncestor(explicitCastExpr())),
51                      unless(hasAncestor(cxxRewrittenBinaryOperator())))
52                 .bind(CastSequence),
53             cxxRewrittenBinaryOperator(
54                 // Match rewritten operators, but verify (in the check method)
55                 // that if an implicit cast is found, it is not from another
56                 // nested rewritten operator.
57                 expr().bind("matchBinopOperands"),
58                 hasEitherOperand(IsOrHasDescendant(
59                     implicitCastExpr(
60                         ImplicitCastToNull,
61                         hasAncestor(cxxRewrittenBinaryOperator().bind(
62                             "checkBinopOperands")))
63                         .bind(CastSequence))),
64                 // Skip defaulted comparison operators.
65                 unless(hasAncestor(functionDecl(isDefaulted()))))));
66 }
67 
68 bool isReplaceableRange(SourceLocation StartLoc, SourceLocation EndLoc,
69                         const SourceManager &SM) {
70   return SM.isWrittenInSameFile(StartLoc, EndLoc);
71 }
72 
73 /// Replaces the provided range with the text "nullptr", but only if
74 /// the start and end location are both in main file.
75 /// Returns true if and only if a replacement was made.
76 void replaceWithNullptr(ClangTidyCheck &Check, SourceManager &SM,
77                         SourceLocation StartLoc, SourceLocation EndLoc) {
78   CharSourceRange Range(SourceRange(StartLoc, EndLoc), true);
79   // Add a space if nullptr follows an alphanumeric character. This happens
80   // whenever there is an c-style explicit cast to nullptr not surrounded by
81   // parentheses and right beside a return statement.
82   SourceLocation PreviousLocation = StartLoc.getLocWithOffset(-1);
83   bool NeedsSpace = isAlphanumeric(*SM.getCharacterData(PreviousLocation));
84   Check.diag(Range.getBegin(), "use nullptr") << FixItHint::CreateReplacement(
85       Range, NeedsSpace ? " nullptr" : "nullptr");
86 }
87 
88 /// Returns the name of the outermost macro.
89 ///
90 /// Given
91 /// \code
92 /// #define MY_NULL NULL
93 /// \endcode
94 /// If \p Loc points to NULL, this function will return the name MY_NULL.
95 StringRef getOutermostMacroName(SourceLocation Loc, const SourceManager &SM,
96                                 const LangOptions &LO) {
97   assert(Loc.isMacroID());
98   SourceLocation OutermostMacroLoc;
99 
100   while (Loc.isMacroID()) {
101     OutermostMacroLoc = Loc;
102     Loc = SM.getImmediateMacroCallerLoc(Loc);
103   }
104 
105   return Lexer::getImmediateMacroName(OutermostMacroLoc, SM, LO);
106 }
107 
108 /// RecursiveASTVisitor for ensuring all nodes rooted at a given AST
109 /// subtree that have file-level source locations corresponding to a macro
110 /// argument have implicit NullTo(Member)Pointer nodes as ancestors.
111 class MacroArgUsageVisitor : public RecursiveASTVisitor<MacroArgUsageVisitor> {
112 public:
113   MacroArgUsageVisitor(SourceLocation CastLoc, const SourceManager &SM)
114       : CastLoc(CastLoc), SM(SM) {
115     assert(CastLoc.isFileID());
116   }
117 
118   bool TraverseStmt(Stmt *S) {
119     bool VisitedPreviously = Visited;
120 
121     if (!RecursiveASTVisitor<MacroArgUsageVisitor>::TraverseStmt(S))
122       return false;
123 
124     // The point at which VisitedPreviously is false and Visited is true is the
125     // root of a subtree containing nodes whose locations match CastLoc. It's
126     // at this point we test that the Implicit NullTo(Member)Pointer cast was
127     // found or not.
128     if (!VisitedPreviously) {
129       if (Visited && !CastFound) {
130         // Found nodes with matching SourceLocations but didn't come across a
131         // cast. This is an invalid macro arg use. Can stop traversal
132         // completely now.
133         InvalidFound = true;
134         return false;
135       }
136       // Reset state as we unwind back up the tree.
137       CastFound = false;
138       Visited = false;
139     }
140     return true;
141   }
142 
143   bool VisitStmt(Stmt *S) {
144     if (SM.getFileLoc(S->getBeginLoc()) != CastLoc)
145       return true;
146     Visited = true;
147 
148     const ImplicitCastExpr *Cast = dyn_cast<ImplicitCastExpr>(S);
149     if (Cast && (Cast->getCastKind() == CK_NullToPointer ||
150                  Cast->getCastKind() == CK_NullToMemberPointer))
151       CastFound = true;
152 
153     return true;
154   }
155 
156   bool TraverseInitListExpr(InitListExpr *S) {
157     // Only go through the semantic form of the InitListExpr, because
158     // ImplicitCast might not appear in the syntactic form, and this results in
159     // finding usages of the macro argument that don't have a ImplicitCast as an
160     // ancestor (thus invalidating the replacement) when they actually have.
161     return RecursiveASTVisitor<MacroArgUsageVisitor>::
162         TraverseSynOrSemInitListExpr(
163             S->isSemanticForm() ? S : S->getSemanticForm());
164   }
165 
166   bool foundInvalid() const { return InvalidFound; }
167 
168 private:
169   SourceLocation CastLoc;
170   const SourceManager &SM;
171 
172   bool Visited = false;
173   bool CastFound = false;
174   bool InvalidFound = false;
175 };
176 
177 /// Looks for implicit casts as well as sequences of 0 or more explicit
178 /// casts with an implicit null-to-pointer cast within.
179 ///
180 /// The matcher this visitor is used with will find a single implicit cast or a
181 /// top-most explicit cast (i.e. it has no explicit casts as an ancestor) where
182 /// an implicit cast is nested within. However, there is no guarantee that only
183 /// explicit casts exist between the found top-most explicit cast and the
184 /// possibly more than one nested implicit cast. This visitor finds all cast
185 /// sequences with an implicit cast to null within and creates a replacement
186 /// leaving the outermost explicit cast unchanged to avoid introducing
187 /// ambiguities.
188 class CastSequenceVisitor : public RecursiveASTVisitor<CastSequenceVisitor> {
189 public:
190   CastSequenceVisitor(ASTContext &Context, ArrayRef<StringRef> NullMacros,
191                       ClangTidyCheck &Check)
192       : SM(Context.getSourceManager()), Context(Context),
193         NullMacros(NullMacros), Check(Check) {}
194 
195   bool TraverseStmt(Stmt *S) {
196     // Stop traversing down the tree if requested.
197     if (PruneSubtree) {
198       PruneSubtree = false;
199       return true;
200     }
201     return RecursiveASTVisitor<CastSequenceVisitor>::TraverseStmt(S);
202   }
203 
204   // Only VisitStmt is overridden as we shouldn't find other base AST types
205   // within a cast expression.
206   bool VisitStmt(Stmt *S) {
207     auto *C = dyn_cast<CastExpr>(S);
208     // Catch the castExpr inside cxxDefaultArgExpr.
209     if (auto *E = dyn_cast<CXXDefaultArgExpr>(S)) {
210       C = dyn_cast<CastExpr>(E->getExpr());
211       FirstSubExpr = nullptr;
212     }
213     if (!C) {
214       FirstSubExpr = nullptr;
215       return true;
216     }
217 
218     auto* CastSubExpr = C->getSubExpr()->IgnoreParens();
219     // Ignore cast expressions which cast nullptr literal.
220     if (isa<CXXNullPtrLiteralExpr>(CastSubExpr)) {
221       return true;
222     }
223 
224     if (!FirstSubExpr)
225       FirstSubExpr = CastSubExpr;
226 
227     if (C->getCastKind() != CK_NullToPointer &&
228         C->getCastKind() != CK_NullToMemberPointer) {
229       return true;
230     }
231 
232     SourceLocation StartLoc = FirstSubExpr->getBeginLoc();
233     SourceLocation EndLoc = FirstSubExpr->getEndLoc();
234 
235     // If the location comes from a macro arg expansion, *all* uses of that
236     // arg must be checked to result in NullTo(Member)Pointer casts.
237     //
238     // If the location comes from a macro body expansion, check to see if its
239     // coming from one of the allowed 'NULL' macros.
240     if (SM.isMacroArgExpansion(StartLoc) && SM.isMacroArgExpansion(EndLoc)) {
241       SourceLocation FileLocStart = SM.getFileLoc(StartLoc),
242                      FileLocEnd = SM.getFileLoc(EndLoc);
243       SourceLocation ImmediateMacroArgLoc, MacroLoc;
244       // Skip NULL macros used in macro.
245       if (!getMacroAndArgLocations(StartLoc, ImmediateMacroArgLoc, MacroLoc) ||
246           ImmediateMacroArgLoc != FileLocStart)
247         return skipSubTree();
248 
249       if (isReplaceableRange(FileLocStart, FileLocEnd, SM) &&
250           allArgUsesValid(C)) {
251         replaceWithNullptr(Check, SM, FileLocStart, FileLocEnd);
252       }
253       return true;
254     }
255 
256     if (SM.isMacroBodyExpansion(StartLoc) && SM.isMacroBodyExpansion(EndLoc)) {
257       StringRef OutermostMacroName =
258           getOutermostMacroName(StartLoc, SM, Context.getLangOpts());
259 
260       // Check to see if the user wants to replace the macro being expanded.
261       if (!llvm::is_contained(NullMacros, OutermostMacroName))
262         return skipSubTree();
263 
264       StartLoc = SM.getFileLoc(StartLoc);
265       EndLoc = SM.getFileLoc(EndLoc);
266     }
267 
268     if (!isReplaceableRange(StartLoc, EndLoc, SM)) {
269       return skipSubTree();
270     }
271     replaceWithNullptr(Check, SM, StartLoc, EndLoc);
272 
273     return true;
274   }
275 
276 private:
277   bool skipSubTree() {
278     PruneSubtree = true;
279     return true;
280   }
281 
282   /// Tests that all expansions of a macro arg, one of which expands to
283   /// result in \p CE, yield NullTo(Member)Pointer casts.
284   bool allArgUsesValid(const CastExpr *CE) {
285     SourceLocation CastLoc = CE->getBeginLoc();
286 
287     // Step 1: Get location of macro arg and location of the macro the arg was
288     // provided to.
289     SourceLocation ArgLoc, MacroLoc;
290     if (!getMacroAndArgLocations(CastLoc, ArgLoc, MacroLoc))
291       return false;
292 
293     // Step 2: Find the first ancestor that doesn't expand from this macro.
294     DynTypedNode ContainingAncestor;
295     if (!findContainingAncestor(DynTypedNode::create<Stmt>(*CE), MacroLoc,
296                                 ContainingAncestor))
297       return false;
298 
299     // Step 3:
300     // Visit children of this containing parent looking for the least-descended
301     // nodes of the containing parent which are macro arg expansions that expand
302     // from the given arg location.
303     // Visitor needs: arg loc.
304     MacroArgUsageVisitor ArgUsageVisitor(SM.getFileLoc(CastLoc), SM);
305     if (const auto *D = ContainingAncestor.get<Decl>())
306       ArgUsageVisitor.TraverseDecl(const_cast<Decl *>(D));
307     else if (const auto *S = ContainingAncestor.get<Stmt>())
308       ArgUsageVisitor.TraverseStmt(const_cast<Stmt *>(S));
309     else
310       llvm_unreachable("Unhandled ContainingAncestor node type");
311 
312     return !ArgUsageVisitor.foundInvalid();
313   }
314 
315   /// Given the SourceLocation for a macro arg expansion, finds the
316   /// non-macro SourceLocation of the macro the arg was passed to and the
317   /// non-macro SourceLocation of the argument in the arg list to that macro.
318   /// These results are returned via \c MacroLoc and \c ArgLoc respectively.
319   /// These values are undefined if the return value is false.
320   ///
321   /// \returns false if one of the returned SourceLocations would be a
322   /// SourceLocation pointing within the definition of another macro.
323   bool getMacroAndArgLocations(SourceLocation Loc, SourceLocation &ArgLoc,
324                                SourceLocation &MacroLoc) {
325     assert(Loc.isMacroID() && "Only reasonable to call this on macros");
326 
327     ArgLoc = Loc;
328 
329     // Find the location of the immediate macro expansion.
330     while (true) {
331       std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(ArgLoc);
332       const SrcMgr::SLocEntry *E = &SM.getSLocEntry(LocInfo.first);
333       const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
334 
335       SourceLocation OldArgLoc = ArgLoc;
336       ArgLoc = Expansion.getExpansionLocStart();
337       if (!Expansion.isMacroArgExpansion()) {
338         if (!MacroLoc.isFileID())
339           return false;
340 
341         StringRef Name =
342             Lexer::getImmediateMacroName(OldArgLoc, SM, Context.getLangOpts());
343         return llvm::is_contained(NullMacros, Name);
344       }
345 
346       MacroLoc = SM.getExpansionRange(ArgLoc).getBegin();
347 
348       ArgLoc = Expansion.getSpellingLoc().getLocWithOffset(LocInfo.second);
349       if (ArgLoc.isFileID())
350         return true;
351 
352       // If spelling location resides in the same FileID as macro expansion
353       // location, it means there is no inner macro.
354       FileID MacroFID = SM.getFileID(MacroLoc);
355       if (SM.isInFileID(ArgLoc, MacroFID)) {
356         // Don't transform this case. If the characters that caused the
357         // null-conversion come from within a macro, they can't be changed.
358         return false;
359       }
360     }
361 
362     llvm_unreachable("getMacroAndArgLocations");
363   }
364 
365   /// Tests if TestMacroLoc is found while recursively unravelling
366   /// expansions starting at TestLoc. TestMacroLoc.isFileID() must be true.
367   /// Implementation is very similar to getMacroAndArgLocations() except in this
368   /// case, it's not assumed that TestLoc is expanded from a macro argument.
369   /// While unravelling expansions macro arguments are handled as with
370   /// getMacroAndArgLocations() but in this function macro body expansions are
371   /// also handled.
372   ///
373   /// False means either:
374   /// - TestLoc is not from a macro expansion.
375   /// - TestLoc is from a different macro expansion.
376   bool expandsFrom(SourceLocation TestLoc, SourceLocation TestMacroLoc) {
377     if (TestLoc.isFileID()) {
378       return false;
379     }
380 
381     SourceLocation Loc = TestLoc, MacroLoc;
382 
383     while (true) {
384       std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
385       const SrcMgr::SLocEntry *E = &SM.getSLocEntry(LocInfo.first);
386       const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
387 
388       Loc = Expansion.getExpansionLocStart();
389 
390       if (!Expansion.isMacroArgExpansion()) {
391         if (Loc.isFileID()) {
392           return Loc == TestMacroLoc;
393         }
394         // Since Loc is still a macro ID and it's not an argument expansion, we
395         // don't need to do the work of handling an argument expansion. Simply
396         // keep recursively expanding until we hit a FileID or a macro arg
397         // expansion or a macro arg expansion.
398         continue;
399       }
400 
401       MacroLoc = SM.getImmediateExpansionRange(Loc).getBegin();
402       if (MacroLoc.isFileID() && MacroLoc == TestMacroLoc) {
403         // Match made.
404         return true;
405       }
406 
407       Loc = Expansion.getSpellingLoc().getLocWithOffset(LocInfo.second);
408       if (Loc.isFileID()) {
409         // If we made it this far without finding a match, there is no match to
410         // be made.
411         return false;
412       }
413     }
414 
415     llvm_unreachable("expandsFrom");
416   }
417 
418   /// Given a starting point \c Start in the AST, find an ancestor that
419   /// doesn't expand from the macro called at file location \c MacroLoc.
420   ///
421   /// \pre MacroLoc.isFileID()
422   /// \returns true if such an ancestor was found, false otherwise.
423   bool findContainingAncestor(DynTypedNode Start, SourceLocation MacroLoc,
424                               DynTypedNode &Result) {
425     // Below we're only following the first parent back up the AST. This should
426     // be fine since for the statements we care about there should only be one
427     // parent, except for the case specified below.
428 
429     assert(MacroLoc.isFileID());
430 
431     while (true) {
432       const auto &Parents = Context.getParents(Start);
433       if (Parents.empty())
434         return false;
435       if (Parents.size() > 1) {
436         // If there are more than one parents, don't do the replacement unless
437         // they are InitListsExpr (semantic and syntactic form). In this case we
438         // can choose any one here, and the ASTVisitor will take care of
439         // traversing the right one.
440         for (const auto &Parent : Parents) {
441           if (!Parent.get<InitListExpr>())
442             return false;
443         }
444       }
445 
446       const DynTypedNode &Parent = Parents[0];
447 
448       SourceLocation Loc;
449       if (const auto *D = Parent.get<Decl>())
450         Loc = D->getBeginLoc();
451       else if (const auto *S = Parent.get<Stmt>())
452         Loc = S->getBeginLoc();
453 
454       // TypeLoc and NestedNameSpecifierLoc are members of the parent map. Skip
455       // them and keep going up.
456       if (Loc.isValid()) {
457         if (!expandsFrom(Loc, MacroLoc)) {
458           Result = Parent;
459           return true;
460         }
461       }
462       Start = Parent;
463     }
464 
465     llvm_unreachable("findContainingAncestor");
466   }
467 
468   SourceManager &SM;
469   ASTContext &Context;
470   ArrayRef<StringRef> NullMacros;
471   ClangTidyCheck &Check;
472   Expr *FirstSubExpr = nullptr;
473   bool PruneSubtree = false;
474 };
475 
476 } // namespace
477 
478 UseNullptrCheck::UseNullptrCheck(StringRef Name, ClangTidyContext *Context)
479     : ClangTidyCheck(Name, Context),
480       NullMacrosStr(Options.get("NullMacros", "NULL")) {
481   StringRef(NullMacrosStr).split(NullMacros, ",");
482 }
483 
484 void UseNullptrCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
485   Options.store(Opts, "NullMacros", NullMacrosStr);
486 }
487 
488 void UseNullptrCheck::registerMatchers(MatchFinder *Finder) {
489   Finder->addMatcher(makeCastSequenceMatcher(), this);
490 }
491 
492 void UseNullptrCheck::check(const MatchFinder::MatchResult &Result) {
493   const auto *NullCast = Result.Nodes.getNodeAs<CastExpr>(CastSequence);
494   assert(NullCast && "Bad Callback. No node provided");
495 
496   if (Result.Nodes.getNodeAs<CXXRewrittenBinaryOperator>(
497           "matchBinopOperands") !=
498       Result.Nodes.getNodeAs<CXXRewrittenBinaryOperator>("checkBinopOperands"))
499     return;
500 
501   // Given an implicit null-ptr cast or an explicit cast with an implicit
502   // null-to-pointer cast within use CastSequenceVisitor to identify sequences
503   // of explicit casts that can be converted into 'nullptr'.
504   CastSequenceVisitor(*Result.Context, NullMacros, *this)
505       .TraverseStmt(const_cast<CastExpr *>(NullCast));
506 }
507 
508 } // namespace clang::tidy::modernize
509