xref: /llvm-project/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp (revision 1aa5885f00bdd926357aade401cdb458cf102785)
1 //===--- UseNullptrCheck.cpp - clang-tidy----------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "UseNullptrCheck.h"
11 #include "clang/AST/ASTContext.h"
12 #include "clang/AST/RecursiveASTVisitor.h"
13 #include "clang/ASTMatchers/ASTMatchFinder.h"
14 #include "clang/Lex/Lexer.h"
15 
16 using namespace clang;
17 using namespace clang::ast_matchers;
18 using namespace llvm;
19 
20 namespace clang {
21 namespace tidy {
22 namespace modernize {
23 namespace {
24 
25 const char CastSequence[] = "sequence";
26 
27 AST_MATCHER(Type, sugaredNullptrType) {
28   const Type *DesugaredType = Node.getUnqualifiedDesugaredType();
29   if (const auto *BT = dyn_cast<BuiltinType>(DesugaredType))
30     return BT->getKind() == BuiltinType::NullPtr;
31   return false;
32 }
33 
34 /// \brief Create a matcher that finds implicit casts as well as the head of a
35 /// sequence of zero or more nested explicit casts that have an implicit cast
36 /// to null within.
37 /// Finding sequences of explict casts is necessary so that an entire sequence
38 /// can be replaced instead of just the inner-most implicit cast.
39 StatementMatcher makeCastSequenceMatcher() {
40   StatementMatcher ImplicitCastToNull = implicitCastExpr(
41       anyOf(hasCastKind(CK_NullToPointer), hasCastKind(CK_NullToMemberPointer)),
42       unless(hasImplicitDestinationType(qualType(substTemplateTypeParmType()))),
43       unless(hasSourceExpression(hasType(sugaredNullptrType()))));
44 
45   return castExpr(anyOf(ImplicitCastToNull,
46                         explicitCastExpr(hasDescendant(ImplicitCastToNull))),
47                   unless(hasAncestor(explicitCastExpr())))
48       .bind(CastSequence);
49 }
50 
51 bool isReplaceableRange(SourceLocation StartLoc, SourceLocation EndLoc,
52                         const SourceManager &SM) {
53   return SM.isWrittenInSameFile(StartLoc, EndLoc);
54 }
55 
56 /// \brief Replaces the provided range with the text "nullptr", but only if
57 /// the start and end location are both in main file.
58 /// Returns true if and only if a replacement was made.
59 void replaceWithNullptr(ClangTidyCheck &Check, SourceManager &SM,
60                         SourceLocation StartLoc, SourceLocation EndLoc) {
61   CharSourceRange Range(SourceRange(StartLoc, EndLoc), true);
62   // Add a space if nullptr follows an alphanumeric character. This happens
63   // whenever there is an c-style explicit cast to nullptr not surrounded by
64   // parentheses and right beside a return statement.
65   SourceLocation PreviousLocation = StartLoc.getLocWithOffset(-1);
66   bool NeedsSpace = isAlphanumeric(*SM.getCharacterData(PreviousLocation));
67   Check.diag(Range.getBegin(), "use nullptr") << FixItHint::CreateReplacement(
68       Range, NeedsSpace ? " nullptr" : "nullptr");
69 }
70 
71 /// \brief Returns the name of the outermost macro.
72 ///
73 /// Given
74 /// \code
75 /// #define MY_NULL NULL
76 /// \endcode
77 /// If \p Loc points to NULL, this function will return the name MY_NULL.
78 StringRef getOutermostMacroName(SourceLocation Loc, const SourceManager &SM,
79                                 const LangOptions &LO) {
80   assert(Loc.isMacroID());
81   SourceLocation OutermostMacroLoc;
82 
83   while (Loc.isMacroID()) {
84     OutermostMacroLoc = Loc;
85     Loc = SM.getImmediateMacroCallerLoc(Loc);
86   }
87 
88   return Lexer::getImmediateMacroName(OutermostMacroLoc, SM, LO);
89 }
90 
91 /// \brief RecursiveASTVisitor for ensuring all nodes rooted at a given AST
92 /// subtree that have file-level source locations corresponding to a macro
93 /// argument have implicit NullTo(Member)Pointer nodes as ancestors.
94 class MacroArgUsageVisitor : public RecursiveASTVisitor<MacroArgUsageVisitor> {
95 public:
96   MacroArgUsageVisitor(SourceLocation CastLoc, const SourceManager &SM)
97       : CastLoc(CastLoc), SM(SM), Visited(false), CastFound(false),
98         InvalidFound(false) {
99     assert(CastLoc.isFileID());
100   }
101 
102   bool TraverseStmt(Stmt *S) {
103     bool VisitedPreviously = Visited;
104 
105     if (!RecursiveASTVisitor<MacroArgUsageVisitor>::TraverseStmt(S))
106       return false;
107 
108     // The point at which VisitedPreviously is false and Visited is true is the
109     // root of a subtree containing nodes whose locations match CastLoc. It's
110     // at this point we test that the Implicit NullTo(Member)Pointer cast was
111     // found or not.
112     if (!VisitedPreviously) {
113       if (Visited && !CastFound) {
114         // Found nodes with matching SourceLocations but didn't come across a
115         // cast. This is an invalid macro arg use. Can stop traversal
116         // completely now.
117         InvalidFound = true;
118         return false;
119       }
120       // Reset state as we unwind back up the tree.
121       CastFound = false;
122       Visited = false;
123     }
124     return true;
125   }
126 
127   bool VisitStmt(Stmt *S) {
128     if (SM.getFileLoc(S->getLocStart()) != CastLoc)
129       return true;
130     Visited = true;
131 
132     const ImplicitCastExpr *Cast = dyn_cast<ImplicitCastExpr>(S);
133     if (Cast && (Cast->getCastKind() == CK_NullToPointer ||
134                  Cast->getCastKind() == CK_NullToMemberPointer))
135       CastFound = true;
136 
137     return true;
138   }
139 
140   bool TraverseInitListExpr(InitListExpr *S) {
141     // Only go through the semantic form of the InitListExpr, because
142     // ImplicitCast might not appear in the syntactic form, and this results in
143     // finding usages of the macro argument that don't have a ImplicitCast as an
144     // ancestor (thus invalidating the replacement) when they actually have.
145     return RecursiveASTVisitor<MacroArgUsageVisitor>::
146         TraverseSynOrSemInitListExpr(
147             S->isSemanticForm() ? S : S->getSemanticForm());
148   }
149 
150   bool foundInvalid() const { return InvalidFound; }
151 
152 private:
153   SourceLocation CastLoc;
154   const SourceManager &SM;
155 
156   bool Visited;
157   bool CastFound;
158   bool InvalidFound;
159 };
160 
161 /// \brief Looks for implicit casts as well as sequences of 0 or more explicit
162 /// casts with an implicit null-to-pointer cast within.
163 ///
164 /// The matcher this visitor is used with will find a single implicit cast or a
165 /// top-most explicit cast (i.e. it has no explicit casts as an ancestor) where
166 /// an implicit cast is nested within. However, there is no guarantee that only
167 /// explicit casts exist between the found top-most explicit cast and the
168 /// possibly more than one nested implicit cast. This visitor finds all cast
169 /// sequences with an implicit cast to null within and creates a replacement
170 /// leaving the outermost explicit cast unchanged to avoid introducing
171 /// ambiguities.
172 class CastSequenceVisitor : public RecursiveASTVisitor<CastSequenceVisitor> {
173 public:
174   CastSequenceVisitor(ASTContext &Context, ArrayRef<StringRef> NullMacros,
175                       ClangTidyCheck &check)
176       : SM(Context.getSourceManager()), Context(Context),
177         NullMacros(NullMacros), Check(check), FirstSubExpr(nullptr),
178         PruneSubtree(false) {}
179 
180   bool TraverseStmt(Stmt *S) {
181     // Stop traversing down the tree if requested.
182     if (PruneSubtree) {
183       PruneSubtree = false;
184       return true;
185     }
186     return RecursiveASTVisitor<CastSequenceVisitor>::TraverseStmt(S);
187   }
188 
189   // Only VisitStmt is overridden as we shouldn't find other base AST types
190   // within a cast expression.
191   bool VisitStmt(Stmt *S) {
192     auto *C = dyn_cast<CastExpr>(S);
193     // Catch the castExpr inside cxxDefaultArgExpr.
194     if (auto *E = dyn_cast<CXXDefaultArgExpr>(S)) {
195       C = dyn_cast<CastExpr>(E->getExpr());
196       FirstSubExpr = nullptr;
197     }
198     if (!C) {
199       FirstSubExpr = nullptr;
200       return true;
201     }
202 
203     if (!FirstSubExpr)
204       FirstSubExpr = C->getSubExpr()->IgnoreParens();
205 
206     // Ignore the expr if it is already a nullptr literal expr.
207     if (isa<CXXNullPtrLiteralExpr>(FirstSubExpr))
208       return true;
209 
210     if (C->getCastKind() != CK_NullToPointer &&
211         C->getCastKind() != CK_NullToMemberPointer) {
212       return true;
213     }
214 
215     SourceLocation StartLoc = FirstSubExpr->getLocStart();
216     SourceLocation EndLoc = FirstSubExpr->getLocEnd();
217 
218     // If the location comes from a macro arg expansion, *all* uses of that
219     // arg must be checked to result in NullTo(Member)Pointer casts.
220     //
221     // If the location comes from a macro body expansion, check to see if its
222     // coming from one of the allowed 'NULL' macros.
223     if (SM.isMacroArgExpansion(StartLoc) && SM.isMacroArgExpansion(EndLoc)) {
224       SourceLocation FileLocStart = SM.getFileLoc(StartLoc),
225                      FileLocEnd = SM.getFileLoc(EndLoc);
226       SourceLocation ImmediateMarcoArgLoc, MacroLoc;
227       // Skip NULL macros used in macro.
228       if (!getMacroAndArgLocations(StartLoc, ImmediateMarcoArgLoc, MacroLoc) ||
229           ImmediateMarcoArgLoc != FileLocStart)
230         return skipSubTree();
231 
232       if (isReplaceableRange(FileLocStart, FileLocEnd, SM) &&
233           allArgUsesValid(C)) {
234         replaceWithNullptr(Check, SM, FileLocStart, FileLocEnd);
235       }
236       return skipSubTree();
237     }
238 
239     if (SM.isMacroBodyExpansion(StartLoc) && SM.isMacroBodyExpansion(EndLoc)) {
240       StringRef OutermostMacroName =
241           getOutermostMacroName(StartLoc, SM, Context.getLangOpts());
242 
243       // Check to see if the user wants to replace the macro being expanded.
244       if (std::find(NullMacros.begin(), NullMacros.end(), OutermostMacroName) ==
245           NullMacros.end()) {
246         return skipSubTree();
247       }
248 
249       StartLoc = SM.getFileLoc(StartLoc);
250       EndLoc = SM.getFileLoc(EndLoc);
251     }
252 
253     if (!isReplaceableRange(StartLoc, EndLoc, SM)) {
254       return skipSubTree();
255     }
256     replaceWithNullptr(Check, SM, StartLoc, EndLoc);
257 
258     return true;
259   }
260 
261 private:
262   bool skipSubTree() {
263     PruneSubtree = true;
264     return true;
265   }
266 
267   /// \brief Tests that all expansions of a macro arg, one of which expands to
268   /// result in \p CE, yield NullTo(Member)Pointer casts.
269   bool allArgUsesValid(const CastExpr *CE) {
270     SourceLocation CastLoc = CE->getLocStart();
271 
272     // Step 1: Get location of macro arg and location of the macro the arg was
273     // provided to.
274     SourceLocation ArgLoc, MacroLoc;
275     if (!getMacroAndArgLocations(CastLoc, ArgLoc, MacroLoc))
276       return false;
277 
278     // Step 2: Find the first ancestor that doesn't expand from this macro.
279     ast_type_traits::DynTypedNode ContainingAncestor;
280     if (!findContainingAncestor(
281             ast_type_traits::DynTypedNode::create<Stmt>(*CE), MacroLoc,
282             ContainingAncestor))
283       return false;
284 
285     // Step 3:
286     // Visit children of this containing parent looking for the least-descended
287     // nodes of the containing parent which are macro arg expansions that expand
288     // from the given arg location.
289     // Visitor needs: arg loc.
290     MacroArgUsageVisitor ArgUsageVisitor(SM.getFileLoc(CastLoc), SM);
291     if (const auto *D = ContainingAncestor.get<Decl>())
292       ArgUsageVisitor.TraverseDecl(const_cast<Decl *>(D));
293     else if (const auto *S = ContainingAncestor.get<Stmt>())
294       ArgUsageVisitor.TraverseStmt(const_cast<Stmt *>(S));
295     else
296       llvm_unreachable("Unhandled ContainingAncestor node type");
297 
298     return !ArgUsageVisitor.foundInvalid();
299   }
300 
301   /// \brief Given the SourceLocation for a macro arg expansion, finds the
302   /// non-macro SourceLocation of the macro the arg was passed to and the
303   /// non-macro SourceLocation of the argument in the arg list to that macro.
304   /// These results are returned via \c MacroLoc and \c ArgLoc respectively.
305   /// These values are undefined if the return value is false.
306   ///
307   /// \returns false if one of the returned SourceLocations would be a
308   /// SourceLocation pointing within the definition of another macro.
309   bool getMacroAndArgLocations(SourceLocation Loc, SourceLocation &ArgLoc,
310                                SourceLocation &MacroLoc) {
311     assert(Loc.isMacroID() && "Only reasonble to call this on macros");
312 
313     ArgLoc = Loc;
314 
315     // Find the location of the immediate macro expansion.
316     while (true) {
317       std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(ArgLoc);
318       const SrcMgr::SLocEntry *E = &SM.getSLocEntry(LocInfo.first);
319       const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
320 
321       SourceLocation OldArgLoc = ArgLoc;
322       ArgLoc = Expansion.getExpansionLocStart();
323       if (!Expansion.isMacroArgExpansion()) {
324         if (!MacroLoc.isFileID())
325           return false;
326 
327         StringRef Name =
328             Lexer::getImmediateMacroName(OldArgLoc, SM, Context.getLangOpts());
329         return std::find(NullMacros.begin(), NullMacros.end(), Name) !=
330                NullMacros.end();
331       }
332 
333       MacroLoc = SM.getExpansionRange(ArgLoc).first;
334 
335       ArgLoc = Expansion.getSpellingLoc().getLocWithOffset(LocInfo.second);
336       if (ArgLoc.isFileID())
337         return true;
338 
339       // If spelling location resides in the same FileID as macro expansion
340       // location, it means there is no inner macro.
341       FileID MacroFID = SM.getFileID(MacroLoc);
342       if (SM.isInFileID(ArgLoc, MacroFID)) {
343         // Don't transform this case. If the characters that caused the
344         // null-conversion come from within a macro, they can't be changed.
345         return false;
346       }
347     }
348 
349     llvm_unreachable("getMacroAndArgLocations");
350   }
351 
352   /// \brief Tests if TestMacroLoc is found while recursively unravelling
353   /// expansions starting at TestLoc. TestMacroLoc.isFileID() must be true.
354   /// Implementation is very similar to getMacroAndArgLocations() except in this
355   /// case, it's not assumed that TestLoc is expanded from a macro argument.
356   /// While unravelling expansions macro arguments are handled as with
357   /// getMacroAndArgLocations() but in this function macro body expansions are
358   /// also handled.
359   ///
360   /// False means either:
361   /// - TestLoc is not from a macro expansion.
362   /// - TestLoc is from a different macro expansion.
363   bool expandsFrom(SourceLocation TestLoc, SourceLocation TestMacroLoc) {
364     if (TestLoc.isFileID()) {
365       return false;
366     }
367 
368     SourceLocation Loc = TestLoc, MacroLoc;
369 
370     while (true) {
371       std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
372       const SrcMgr::SLocEntry *E = &SM.getSLocEntry(LocInfo.first);
373       const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
374 
375       Loc = Expansion.getExpansionLocStart();
376 
377       if (!Expansion.isMacroArgExpansion()) {
378         if (Loc.isFileID()) {
379           return Loc == TestMacroLoc;
380         }
381         // Since Loc is still a macro ID and it's not an argument expansion, we
382         // don't need to do the work of handling an argument expansion. Simply
383         // keep recursively expanding until we hit a FileID or a macro arg
384         // expansion or a macro arg expansion.
385         continue;
386       }
387 
388       MacroLoc = SM.getImmediateExpansionRange(Loc).first;
389       if (MacroLoc.isFileID() && MacroLoc == TestMacroLoc) {
390         // Match made.
391         return true;
392       }
393 
394       Loc = Expansion.getSpellingLoc().getLocWithOffset(LocInfo.second);
395       if (Loc.isFileID()) {
396         // If we made it this far without finding a match, there is no match to
397         // be made.
398         return false;
399       }
400     }
401 
402     llvm_unreachable("expandsFrom");
403   }
404 
405   /// \brief Given a starting point \c Start in the AST, find an ancestor that
406   /// doesn't expand from the macro called at file location \c MacroLoc.
407   ///
408   /// \pre MacroLoc.isFileID()
409   /// \returns true if such an ancestor was found, false otherwise.
410   bool findContainingAncestor(ast_type_traits::DynTypedNode Start,
411                               SourceLocation MacroLoc,
412                               ast_type_traits::DynTypedNode &Result) {
413     // Below we're only following the first parent back up the AST. This should
414     // be fine since for the statements we care about there should only be one
415     // parent, except for the case specified below.
416 
417     assert(MacroLoc.isFileID());
418 
419     while (true) {
420       const auto &Parents = Context.getParents(Start);
421       if (Parents.empty())
422         return false;
423       if (Parents.size() > 1) {
424         // If there are more than one parents, don't do the replacement unless
425         // they are InitListsExpr (semantic and syntactic form). In this case we
426         // can choose any one here, and the ASTVisitor will take care of
427         // traversing the right one.
428         for (const auto &Parent : Parents) {
429           if (!Parent.get<InitListExpr>())
430             return false;
431         }
432       }
433 
434       const ast_type_traits::DynTypedNode &Parent = Parents[0];
435 
436       SourceLocation Loc;
437       if (const auto *D = Parent.get<Decl>())
438         Loc = D->getLocStart();
439       else if (const auto *S = Parent.get<Stmt>())
440         Loc = S->getLocStart();
441 
442       // TypeLoc and NestedNameSpecifierLoc are members of the parent map. Skip
443       // them and keep going up.
444       if (Loc.isValid()) {
445         if (!expandsFrom(Loc, MacroLoc)) {
446           Result = Parent;
447           return true;
448         }
449       }
450       Start = Parent;
451     }
452 
453     llvm_unreachable("findContainingAncestor");
454   }
455 
456 private:
457   SourceManager &SM;
458   ASTContext &Context;
459   ArrayRef<StringRef> NullMacros;
460   ClangTidyCheck &Check;
461   Expr *FirstSubExpr;
462   bool PruneSubtree;
463 };
464 
465 } // namespace
466 
467 UseNullptrCheck::UseNullptrCheck(StringRef Name, ClangTidyContext *Context)
468     : ClangTidyCheck(Name, Context),
469       NullMacrosStr(Options.get("NullMacros", "")) {
470   StringRef(NullMacrosStr).split(NullMacros, ",");
471 }
472 
473 void UseNullptrCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
474   Options.store(Opts, "NullMacros", NullMacrosStr);
475 }
476 
477 void UseNullptrCheck::registerMatchers(MatchFinder *Finder) {
478   // Only register the matcher for C++. Because this checker is used for
479   // modernization, it is reasonable to run it on any C++ standard with the
480   // assumption the user is trying to modernize their codebase.
481   if (getLangOpts().CPlusPlus)
482     Finder->addMatcher(makeCastSequenceMatcher(), this);
483 }
484 
485 void UseNullptrCheck::check(const MatchFinder::MatchResult &Result) {
486   const auto *NullCast = Result.Nodes.getNodeAs<CastExpr>(CastSequence);
487   assert(NullCast && "Bad Callback. No node provided");
488 
489   // Given an implicit null-ptr cast or an explicit cast with an implicit
490   // null-to-pointer cast within use CastSequenceVisitor to identify sequences
491   // of explicit casts that can be converted into 'nullptr'.
492   CastSequenceVisitor(*Result.Context, NullMacros, *this)
493       .TraverseStmt(const_cast<CastExpr *>(NullCast));
494 }
495 
496 } // namespace modernize
497 } // namespace tidy
498 } // namespace clang
499