xref: /llvm-project/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp (revision 98146c1f5d0c772aec56149724119d49990f4d0c)
1 //===--- UseNullptrCheck.cpp - clang-tidy----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "UseNullptrCheck.h"
10 #include "clang/AST/ASTContext.h"
11 #include "clang/AST/RecursiveASTVisitor.h"
12 #include "clang/ASTMatchers/ASTMatchFinder.h"
13 #include "clang/Lex/Lexer.h"
14 
15 using namespace clang;
16 using namespace clang::ast_matchers;
17 using namespace llvm;
18 
19 namespace clang {
20 namespace tidy {
21 namespace modernize {
22 namespace {
23 
24 const char CastSequence[] = "sequence";
25 
26 AST_MATCHER(Type, sugaredNullptrType) {
27   const Type *DesugaredType = Node.getUnqualifiedDesugaredType();
28   if (const auto *BT = dyn_cast<BuiltinType>(DesugaredType))
29     return BT->getKind() == BuiltinType::NullPtr;
30   return false;
31 }
32 
33 /// Create a matcher that finds implicit casts as well as the head of a
34 /// sequence of zero or more nested explicit casts that have an implicit cast
35 /// to null within.
36 /// Finding sequences of explicit casts is necessary so that an entire sequence
37 /// can be replaced instead of just the inner-most implicit cast.
38 StatementMatcher makeCastSequenceMatcher() {
39   StatementMatcher ImplicitCastToNull = implicitCastExpr(
40       anyOf(hasCastKind(CK_NullToPointer), hasCastKind(CK_NullToMemberPointer)),
41       unless(hasImplicitDestinationType(qualType(substTemplateTypeParmType()))),
42       unless(hasSourceExpression(hasType(sugaredNullptrType()))));
43 
44   auto IsOrHasDescendant = [](auto InnerMatcher) {
45     return anyOf(InnerMatcher, hasDescendant(InnerMatcher));
46   };
47 
48   return traverse(
49       TK_AsIs,
50       anyOf(castExpr(anyOf(ImplicitCastToNull,
51                            explicitCastExpr(hasDescendant(ImplicitCastToNull))),
52                      unless(hasAncestor(explicitCastExpr())),
53                      unless(hasAncestor(cxxRewrittenBinaryOperator())))
54                 .bind(CastSequence),
55             cxxRewrittenBinaryOperator(
56                 // Match rewritten operators, but verify (in the check method)
57                 // that if an implicit cast is found, it is not from another
58                 // nested rewritten operator.
59                 expr().bind("matchBinopOperands"),
60                 hasEitherOperand(IsOrHasDescendant(
61                     implicitCastExpr(
62                         ImplicitCastToNull,
63                         hasAncestor(cxxRewrittenBinaryOperator().bind(
64                             "checkBinopOperands")))
65                         .bind(CastSequence))))));
66 }
67 
68 bool isReplaceableRange(SourceLocation StartLoc, SourceLocation EndLoc,
69                         const SourceManager &SM) {
70   return SM.isWrittenInSameFile(StartLoc, EndLoc);
71 }
72 
73 /// Replaces the provided range with the text "nullptr", but only if
74 /// the start and end location are both in main file.
75 /// Returns true if and only if a replacement was made.
76 void replaceWithNullptr(ClangTidyCheck &Check, SourceManager &SM,
77                         SourceLocation StartLoc, SourceLocation EndLoc) {
78   CharSourceRange Range(SourceRange(StartLoc, EndLoc), true);
79   // Add a space if nullptr follows an alphanumeric character. This happens
80   // whenever there is an c-style explicit cast to nullptr not surrounded by
81   // parentheses and right beside a return statement.
82   SourceLocation PreviousLocation = StartLoc.getLocWithOffset(-1);
83   bool NeedsSpace = isAlphanumeric(*SM.getCharacterData(PreviousLocation));
84   Check.diag(Range.getBegin(), "use nullptr") << FixItHint::CreateReplacement(
85       Range, NeedsSpace ? " nullptr" : "nullptr");
86 }
87 
88 /// Returns the name of the outermost macro.
89 ///
90 /// Given
91 /// \code
92 /// #define MY_NULL NULL
93 /// \endcode
94 /// If \p Loc points to NULL, this function will return the name MY_NULL.
95 StringRef getOutermostMacroName(SourceLocation Loc, const SourceManager &SM,
96                                 const LangOptions &LO) {
97   assert(Loc.isMacroID());
98   SourceLocation OutermostMacroLoc;
99 
100   while (Loc.isMacroID()) {
101     OutermostMacroLoc = Loc;
102     Loc = SM.getImmediateMacroCallerLoc(Loc);
103   }
104 
105   return Lexer::getImmediateMacroName(OutermostMacroLoc, SM, LO);
106 }
107 
108 /// RecursiveASTVisitor for ensuring all nodes rooted at a given AST
109 /// subtree that have file-level source locations corresponding to a macro
110 /// argument have implicit NullTo(Member)Pointer nodes as ancestors.
111 class MacroArgUsageVisitor : public RecursiveASTVisitor<MacroArgUsageVisitor> {
112 public:
113   MacroArgUsageVisitor(SourceLocation CastLoc, const SourceManager &SM)
114       : CastLoc(CastLoc), SM(SM), Visited(false), CastFound(false),
115         InvalidFound(false) {
116     assert(CastLoc.isFileID());
117   }
118 
119   bool TraverseStmt(Stmt *S) {
120     bool VisitedPreviously = Visited;
121 
122     if (!RecursiveASTVisitor<MacroArgUsageVisitor>::TraverseStmt(S))
123       return false;
124 
125     // The point at which VisitedPreviously is false and Visited is true is the
126     // root of a subtree containing nodes whose locations match CastLoc. It's
127     // at this point we test that the Implicit NullTo(Member)Pointer cast was
128     // found or not.
129     if (!VisitedPreviously) {
130       if (Visited && !CastFound) {
131         // Found nodes with matching SourceLocations but didn't come across a
132         // cast. This is an invalid macro arg use. Can stop traversal
133         // completely now.
134         InvalidFound = true;
135         return false;
136       }
137       // Reset state as we unwind back up the tree.
138       CastFound = false;
139       Visited = false;
140     }
141     return true;
142   }
143 
144   bool VisitStmt(Stmt *S) {
145     if (SM.getFileLoc(S->getBeginLoc()) != CastLoc)
146       return true;
147     Visited = true;
148 
149     const ImplicitCastExpr *Cast = dyn_cast<ImplicitCastExpr>(S);
150     if (Cast && (Cast->getCastKind() == CK_NullToPointer ||
151                  Cast->getCastKind() == CK_NullToMemberPointer))
152       CastFound = true;
153 
154     return true;
155   }
156 
157   bool TraverseInitListExpr(InitListExpr *S) {
158     // Only go through the semantic form of the InitListExpr, because
159     // ImplicitCast might not appear in the syntactic form, and this results in
160     // finding usages of the macro argument that don't have a ImplicitCast as an
161     // ancestor (thus invalidating the replacement) when they actually have.
162     return RecursiveASTVisitor<MacroArgUsageVisitor>::
163         TraverseSynOrSemInitListExpr(
164             S->isSemanticForm() ? S : S->getSemanticForm());
165   }
166 
167   bool foundInvalid() const { return InvalidFound; }
168 
169 private:
170   SourceLocation CastLoc;
171   const SourceManager &SM;
172 
173   bool Visited;
174   bool CastFound;
175   bool InvalidFound;
176 };
177 
178 /// Looks for implicit casts as well as sequences of 0 or more explicit
179 /// casts with an implicit null-to-pointer cast within.
180 ///
181 /// The matcher this visitor is used with will find a single implicit cast or a
182 /// top-most explicit cast (i.e. it has no explicit casts as an ancestor) where
183 /// an implicit cast is nested within. However, there is no guarantee that only
184 /// explicit casts exist between the found top-most explicit cast and the
185 /// possibly more than one nested implicit cast. This visitor finds all cast
186 /// sequences with an implicit cast to null within and creates a replacement
187 /// leaving the outermost explicit cast unchanged to avoid introducing
188 /// ambiguities.
189 class CastSequenceVisitor : public RecursiveASTVisitor<CastSequenceVisitor> {
190 public:
191   CastSequenceVisitor(ASTContext &Context, ArrayRef<StringRef> NullMacros,
192                       ClangTidyCheck &Check)
193       : SM(Context.getSourceManager()), Context(Context),
194         NullMacros(NullMacros), Check(Check), FirstSubExpr(nullptr),
195         PruneSubtree(false) {}
196 
197   bool TraverseStmt(Stmt *S) {
198     // Stop traversing down the tree if requested.
199     if (PruneSubtree) {
200       PruneSubtree = false;
201       return true;
202     }
203     return RecursiveASTVisitor<CastSequenceVisitor>::TraverseStmt(S);
204   }
205 
206   // Only VisitStmt is overridden as we shouldn't find other base AST types
207   // within a cast expression.
208   bool VisitStmt(Stmt *S) {
209     auto *C = dyn_cast<CastExpr>(S);
210     // Catch the castExpr inside cxxDefaultArgExpr.
211     if (auto *E = dyn_cast<CXXDefaultArgExpr>(S)) {
212       C = dyn_cast<CastExpr>(E->getExpr());
213       FirstSubExpr = nullptr;
214     }
215     if (!C) {
216       FirstSubExpr = nullptr;
217       return true;
218     }
219 
220     auto* CastSubExpr = C->getSubExpr()->IgnoreParens();
221     // Ignore cast expressions which cast nullptr literal.
222     if (isa<CXXNullPtrLiteralExpr>(CastSubExpr)) {
223       return true;
224     }
225 
226     if (!FirstSubExpr)
227       FirstSubExpr = CastSubExpr;
228 
229     if (C->getCastKind() != CK_NullToPointer &&
230         C->getCastKind() != CK_NullToMemberPointer) {
231       return true;
232     }
233 
234     SourceLocation StartLoc = FirstSubExpr->getBeginLoc();
235     SourceLocation EndLoc = FirstSubExpr->getEndLoc();
236 
237     // If the location comes from a macro arg expansion, *all* uses of that
238     // arg must be checked to result in NullTo(Member)Pointer casts.
239     //
240     // If the location comes from a macro body expansion, check to see if its
241     // coming from one of the allowed 'NULL' macros.
242     if (SM.isMacroArgExpansion(StartLoc) && SM.isMacroArgExpansion(EndLoc)) {
243       SourceLocation FileLocStart = SM.getFileLoc(StartLoc),
244                      FileLocEnd = SM.getFileLoc(EndLoc);
245       SourceLocation ImmediateMacroArgLoc, MacroLoc;
246       // Skip NULL macros used in macro.
247       if (!getMacroAndArgLocations(StartLoc, ImmediateMacroArgLoc, MacroLoc) ||
248           ImmediateMacroArgLoc != FileLocStart)
249         return skipSubTree();
250 
251       if (isReplaceableRange(FileLocStart, FileLocEnd, SM) &&
252           allArgUsesValid(C)) {
253         replaceWithNullptr(Check, SM, FileLocStart, FileLocEnd);
254       }
255       return true;
256     }
257 
258     if (SM.isMacroBodyExpansion(StartLoc) && SM.isMacroBodyExpansion(EndLoc)) {
259       StringRef OutermostMacroName =
260           getOutermostMacroName(StartLoc, SM, Context.getLangOpts());
261 
262       // Check to see if the user wants to replace the macro being expanded.
263       if (!llvm::is_contained(NullMacros, OutermostMacroName))
264         return skipSubTree();
265 
266       StartLoc = SM.getFileLoc(StartLoc);
267       EndLoc = SM.getFileLoc(EndLoc);
268     }
269 
270     if (!isReplaceableRange(StartLoc, EndLoc, SM)) {
271       return skipSubTree();
272     }
273     replaceWithNullptr(Check, SM, StartLoc, EndLoc);
274 
275     return true;
276   }
277 
278 private:
279   bool skipSubTree() {
280     PruneSubtree = true;
281     return true;
282   }
283 
284   /// Tests that all expansions of a macro arg, one of which expands to
285   /// result in \p CE, yield NullTo(Member)Pointer casts.
286   bool allArgUsesValid(const CastExpr *CE) {
287     SourceLocation CastLoc = CE->getBeginLoc();
288 
289     // Step 1: Get location of macro arg and location of the macro the arg was
290     // provided to.
291     SourceLocation ArgLoc, MacroLoc;
292     if (!getMacroAndArgLocations(CastLoc, ArgLoc, MacroLoc))
293       return false;
294 
295     // Step 2: Find the first ancestor that doesn't expand from this macro.
296     DynTypedNode ContainingAncestor;
297     if (!findContainingAncestor(DynTypedNode::create<Stmt>(*CE), MacroLoc,
298                                 ContainingAncestor))
299       return false;
300 
301     // Step 3:
302     // Visit children of this containing parent looking for the least-descended
303     // nodes of the containing parent which are macro arg expansions that expand
304     // from the given arg location.
305     // Visitor needs: arg loc.
306     MacroArgUsageVisitor ArgUsageVisitor(SM.getFileLoc(CastLoc), SM);
307     if (const auto *D = ContainingAncestor.get<Decl>())
308       ArgUsageVisitor.TraverseDecl(const_cast<Decl *>(D));
309     else if (const auto *S = ContainingAncestor.get<Stmt>())
310       ArgUsageVisitor.TraverseStmt(const_cast<Stmt *>(S));
311     else
312       llvm_unreachable("Unhandled ContainingAncestor node type");
313 
314     return !ArgUsageVisitor.foundInvalid();
315   }
316 
317   /// Given the SourceLocation for a macro arg expansion, finds the
318   /// non-macro SourceLocation of the macro the arg was passed to and the
319   /// non-macro SourceLocation of the argument in the arg list to that macro.
320   /// These results are returned via \c MacroLoc and \c ArgLoc respectively.
321   /// These values are undefined if the return value is false.
322   ///
323   /// \returns false if one of the returned SourceLocations would be a
324   /// SourceLocation pointing within the definition of another macro.
325   bool getMacroAndArgLocations(SourceLocation Loc, SourceLocation &ArgLoc,
326                                SourceLocation &MacroLoc) {
327     assert(Loc.isMacroID() && "Only reasonable to call this on macros");
328 
329     ArgLoc = Loc;
330 
331     // Find the location of the immediate macro expansion.
332     while (true) {
333       std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(ArgLoc);
334       const SrcMgr::SLocEntry *E = &SM.getSLocEntry(LocInfo.first);
335       const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
336 
337       SourceLocation OldArgLoc = ArgLoc;
338       ArgLoc = Expansion.getExpansionLocStart();
339       if (!Expansion.isMacroArgExpansion()) {
340         if (!MacroLoc.isFileID())
341           return false;
342 
343         StringRef Name =
344             Lexer::getImmediateMacroName(OldArgLoc, SM, Context.getLangOpts());
345         return llvm::is_contained(NullMacros, Name);
346       }
347 
348       MacroLoc = SM.getExpansionRange(ArgLoc).getBegin();
349 
350       ArgLoc = Expansion.getSpellingLoc().getLocWithOffset(LocInfo.second);
351       if (ArgLoc.isFileID())
352         return true;
353 
354       // If spelling location resides in the same FileID as macro expansion
355       // location, it means there is no inner macro.
356       FileID MacroFID = SM.getFileID(MacroLoc);
357       if (SM.isInFileID(ArgLoc, MacroFID)) {
358         // Don't transform this case. If the characters that caused the
359         // null-conversion come from within a macro, they can't be changed.
360         return false;
361       }
362     }
363 
364     llvm_unreachable("getMacroAndArgLocations");
365   }
366 
367   /// Tests if TestMacroLoc is found while recursively unravelling
368   /// expansions starting at TestLoc. TestMacroLoc.isFileID() must be true.
369   /// Implementation is very similar to getMacroAndArgLocations() except in this
370   /// case, it's not assumed that TestLoc is expanded from a macro argument.
371   /// While unravelling expansions macro arguments are handled as with
372   /// getMacroAndArgLocations() but in this function macro body expansions are
373   /// also handled.
374   ///
375   /// False means either:
376   /// - TestLoc is not from a macro expansion.
377   /// - TestLoc is from a different macro expansion.
378   bool expandsFrom(SourceLocation TestLoc, SourceLocation TestMacroLoc) {
379     if (TestLoc.isFileID()) {
380       return false;
381     }
382 
383     SourceLocation Loc = TestLoc, MacroLoc;
384 
385     while (true) {
386       std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
387       const SrcMgr::SLocEntry *E = &SM.getSLocEntry(LocInfo.first);
388       const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
389 
390       Loc = Expansion.getExpansionLocStart();
391 
392       if (!Expansion.isMacroArgExpansion()) {
393         if (Loc.isFileID()) {
394           return Loc == TestMacroLoc;
395         }
396         // Since Loc is still a macro ID and it's not an argument expansion, we
397         // don't need to do the work of handling an argument expansion. Simply
398         // keep recursively expanding until we hit a FileID or a macro arg
399         // expansion or a macro arg expansion.
400         continue;
401       }
402 
403       MacroLoc = SM.getImmediateExpansionRange(Loc).getBegin();
404       if (MacroLoc.isFileID() && MacroLoc == TestMacroLoc) {
405         // Match made.
406         return true;
407       }
408 
409       Loc = Expansion.getSpellingLoc().getLocWithOffset(LocInfo.second);
410       if (Loc.isFileID()) {
411         // If we made it this far without finding a match, there is no match to
412         // be made.
413         return false;
414       }
415     }
416 
417     llvm_unreachable("expandsFrom");
418   }
419 
420   /// Given a starting point \c Start in the AST, find an ancestor that
421   /// doesn't expand from the macro called at file location \c MacroLoc.
422   ///
423   /// \pre MacroLoc.isFileID()
424   /// \returns true if such an ancestor was found, false otherwise.
425   bool findContainingAncestor(DynTypedNode Start, SourceLocation MacroLoc,
426                               DynTypedNode &Result) {
427     // Below we're only following the first parent back up the AST. This should
428     // be fine since for the statements we care about there should only be one
429     // parent, except for the case specified below.
430 
431     assert(MacroLoc.isFileID());
432 
433     while (true) {
434       const auto &Parents = Context.getParents(Start);
435       if (Parents.empty())
436         return false;
437       if (Parents.size() > 1) {
438         // If there are more than one parents, don't do the replacement unless
439         // they are InitListsExpr (semantic and syntactic form). In this case we
440         // can choose any one here, and the ASTVisitor will take care of
441         // traversing the right one.
442         for (const auto &Parent : Parents) {
443           if (!Parent.get<InitListExpr>())
444             return false;
445         }
446       }
447 
448       const DynTypedNode &Parent = Parents[0];
449 
450       SourceLocation Loc;
451       if (const auto *D = Parent.get<Decl>())
452         Loc = D->getBeginLoc();
453       else if (const auto *S = Parent.get<Stmt>())
454         Loc = S->getBeginLoc();
455 
456       // TypeLoc and NestedNameSpecifierLoc are members of the parent map. Skip
457       // them and keep going up.
458       if (Loc.isValid()) {
459         if (!expandsFrom(Loc, MacroLoc)) {
460           Result = Parent;
461           return true;
462         }
463       }
464       Start = Parent;
465     }
466 
467     llvm_unreachable("findContainingAncestor");
468   }
469 
470 private:
471   SourceManager &SM;
472   ASTContext &Context;
473   ArrayRef<StringRef> NullMacros;
474   ClangTidyCheck &Check;
475   Expr *FirstSubExpr;
476   bool PruneSubtree;
477 };
478 
479 } // namespace
480 
481 UseNullptrCheck::UseNullptrCheck(StringRef Name, ClangTidyContext *Context)
482     : ClangTidyCheck(Name, Context),
483       NullMacrosStr(Options.get("NullMacros", "")) {
484   StringRef(NullMacrosStr).split(NullMacros, ",");
485 }
486 
487 void UseNullptrCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
488   Options.store(Opts, "NullMacros", NullMacrosStr);
489 }
490 
491 void UseNullptrCheck::registerMatchers(MatchFinder *Finder) {
492   Finder->addMatcher(makeCastSequenceMatcher(), this);
493 }
494 
495 void UseNullptrCheck::check(const MatchFinder::MatchResult &Result) {
496   const auto *NullCast = Result.Nodes.getNodeAs<CastExpr>(CastSequence);
497   assert(NullCast && "Bad Callback. No node provided");
498 
499   if (Result.Nodes.getNodeAs<CXXRewrittenBinaryOperator>(
500           "matchBinopOperands") !=
501       Result.Nodes.getNodeAs<CXXRewrittenBinaryOperator>("checkBinopOperands"))
502     return;
503 
504   // Given an implicit null-ptr cast or an explicit cast with an implicit
505   // null-to-pointer cast within use CastSequenceVisitor to identify sequences
506   // of explicit casts that can be converted into 'nullptr'.
507   CastSequenceVisitor(*Result.Context, NullMacros, *this)
508       .TraverseStmt(const_cast<CastExpr *>(NullCast));
509 }
510 
511 } // namespace modernize
512 } // namespace tidy
513 } // namespace clang
514