xref: /llvm-project/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp (revision cebb7c010854e39a77065cfd681db91a79e7ce15)
1 //===--- UseNullptrCheck.cpp - clang-tidy----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "UseNullptrCheck.h"
10 #include "../utils/Matchers.h"
11 #include "../utils/OptionsUtils.h"
12 #include "clang/AST/ASTContext.h"
13 #include "clang/AST/RecursiveASTVisitor.h"
14 #include "clang/ASTMatchers/ASTMatchFinder.h"
15 #include "clang/Lex/Lexer.h"
16 
17 using namespace clang;
18 using namespace clang::ast_matchers;
19 using namespace llvm;
20 
21 namespace clang::tidy::modernize {
22 namespace {
23 
24 const char CastSequence[] = "sequence";
25 
26 AST_MATCHER(Type, sugaredNullptrType) {
27   const Type *DesugaredType = Node.getUnqualifiedDesugaredType();
28   if (const auto *BT = dyn_cast<BuiltinType>(DesugaredType))
29     return BT->getKind() == BuiltinType::NullPtr;
30   return false;
31 }
32 
33 /// Create a matcher that finds implicit casts as well as the head of a
34 /// sequence of zero or more nested explicit casts that have an implicit cast
35 /// to null within.
36 /// Finding sequences of explicit casts is necessary so that an entire sequence
37 /// can be replaced instead of just the inner-most implicit cast.
38 ///
39 /// TODO/NOTE: The second "anyOf" below discards matches on a substituted type,
40 /// since we don't know if that would _always_ be a pointer type for all other
41 /// specializations, unless the expression was "__null", in which case we assume
42 /// that all specializations are expected to be for pointer types. Ideally this
43 /// would check for the "NULL" macro instead, but that'd be harder to express.
44 /// In practice, "NULL" is often defined as "__null", and this is a useful
45 /// condition.
46 StatementMatcher makeCastSequenceMatcher(llvm::ArrayRef<StringRef> NameList) {
47   auto ImplicitCastToNull = implicitCastExpr(
48       anyOf(hasCastKind(CK_NullToPointer), hasCastKind(CK_NullToMemberPointer)),
49       anyOf(hasSourceExpression(gnuNullExpr()),
50             unless(hasImplicitDestinationType(
51                 qualType(substTemplateTypeParmType())))),
52       unless(hasSourceExpression(hasType(sugaredNullptrType()))),
53       unless(hasImplicitDestinationType(
54           qualType(matchers::matchesAnyListedTypeName(NameList)))));
55 
56   auto IsOrHasDescendant = [](auto InnerMatcher) {
57     return anyOf(InnerMatcher, hasDescendant(InnerMatcher));
58   };
59 
60   return traverse(
61       TK_AsIs,
62       anyOf(castExpr(anyOf(ImplicitCastToNull,
63                            explicitCastExpr(hasDescendant(ImplicitCastToNull))),
64                      unless(hasAncestor(explicitCastExpr())),
65                      unless(hasAncestor(cxxRewrittenBinaryOperator())))
66                 .bind(CastSequence),
67             cxxRewrittenBinaryOperator(
68                 // Match rewritten operators, but verify (in the check method)
69                 // that if an implicit cast is found, it is not from another
70                 // nested rewritten operator.
71                 expr().bind("matchBinopOperands"),
72                 hasEitherOperand(IsOrHasDescendant(
73                     implicitCastExpr(
74                         ImplicitCastToNull,
75                         hasAncestor(cxxRewrittenBinaryOperator().bind(
76                             "checkBinopOperands")))
77                         .bind(CastSequence))),
78                 // Skip defaulted comparison operators.
79                 unless(hasAncestor(functionDecl(isDefaulted()))))));
80 }
81 
82 bool isReplaceableRange(SourceLocation StartLoc, SourceLocation EndLoc,
83                         const SourceManager &SM) {
84   return SM.isWrittenInSameFile(StartLoc, EndLoc);
85 }
86 
87 /// Replaces the provided range with the text "nullptr", but only if
88 /// the start and end location are both in main file.
89 /// Returns true if and only if a replacement was made.
90 void replaceWithNullptr(ClangTidyCheck &Check, SourceManager &SM,
91                         SourceLocation StartLoc, SourceLocation EndLoc) {
92   CharSourceRange Range(SourceRange(StartLoc, EndLoc), true);
93   // Add a space if nullptr follows an alphanumeric character. This happens
94   // whenever there is an c-style explicit cast to nullptr not surrounded by
95   // parentheses and right beside a return statement.
96   SourceLocation PreviousLocation = StartLoc.getLocWithOffset(-1);
97   bool NeedsSpace = isAlphanumeric(*SM.getCharacterData(PreviousLocation));
98   Check.diag(Range.getBegin(), "use nullptr") << FixItHint::CreateReplacement(
99       Range, NeedsSpace ? " nullptr" : "nullptr");
100 }
101 
102 /// Returns the name of the outermost macro.
103 ///
104 /// Given
105 /// \code
106 /// #define MY_NULL NULL
107 /// \endcode
108 /// If \p Loc points to NULL, this function will return the name MY_NULL.
109 StringRef getOutermostMacroName(SourceLocation Loc, const SourceManager &SM,
110                                 const LangOptions &LO) {
111   assert(Loc.isMacroID());
112   SourceLocation OutermostMacroLoc;
113 
114   while (Loc.isMacroID()) {
115     OutermostMacroLoc = Loc;
116     Loc = SM.getImmediateMacroCallerLoc(Loc);
117   }
118 
119   return Lexer::getImmediateMacroName(OutermostMacroLoc, SM, LO);
120 }
121 
122 /// RecursiveASTVisitor for ensuring all nodes rooted at a given AST
123 /// subtree that have file-level source locations corresponding to a macro
124 /// argument have implicit NullTo(Member)Pointer nodes as ancestors.
125 class MacroArgUsageVisitor : public RecursiveASTVisitor<MacroArgUsageVisitor> {
126 public:
127   MacroArgUsageVisitor(SourceLocation CastLoc, const SourceManager &SM)
128       : CastLoc(CastLoc), SM(SM) {
129     assert(CastLoc.isFileID());
130   }
131 
132   bool TraverseStmt(Stmt *S) {
133     bool VisitedPreviously = Visited;
134 
135     if (!RecursiveASTVisitor<MacroArgUsageVisitor>::TraverseStmt(S))
136       return false;
137 
138     // The point at which VisitedPreviously is false and Visited is true is the
139     // root of a subtree containing nodes whose locations match CastLoc. It's
140     // at this point we test that the Implicit NullTo(Member)Pointer cast was
141     // found or not.
142     if (!VisitedPreviously) {
143       if (Visited && !CastFound) {
144         // Found nodes with matching SourceLocations but didn't come across a
145         // cast. This is an invalid macro arg use. Can stop traversal
146         // completely now.
147         InvalidFound = true;
148         return false;
149       }
150       // Reset state as we unwind back up the tree.
151       CastFound = false;
152       Visited = false;
153     }
154     return true;
155   }
156 
157   bool VisitStmt(Stmt *S) {
158     if (SM.getFileLoc(S->getBeginLoc()) != CastLoc)
159       return true;
160     Visited = true;
161 
162     const ImplicitCastExpr *Cast = dyn_cast<ImplicitCastExpr>(S);
163     if (Cast && (Cast->getCastKind() == CK_NullToPointer ||
164                  Cast->getCastKind() == CK_NullToMemberPointer))
165       CastFound = true;
166 
167     return true;
168   }
169 
170   bool TraverseInitListExpr(InitListExpr *S) {
171     // Only go through the semantic form of the InitListExpr, because
172     // ImplicitCast might not appear in the syntactic form, and this results in
173     // finding usages of the macro argument that don't have a ImplicitCast as an
174     // ancestor (thus invalidating the replacement) when they actually have.
175     return RecursiveASTVisitor<MacroArgUsageVisitor>::
176         TraverseSynOrSemInitListExpr(
177             S->isSemanticForm() ? S : S->getSemanticForm());
178   }
179 
180   bool foundInvalid() const { return InvalidFound; }
181 
182 private:
183   SourceLocation CastLoc;
184   const SourceManager &SM;
185 
186   bool Visited = false;
187   bool CastFound = false;
188   bool InvalidFound = false;
189 };
190 
191 /// Looks for implicit casts as well as sequences of 0 or more explicit
192 /// casts with an implicit null-to-pointer cast within.
193 ///
194 /// The matcher this visitor is used with will find a single implicit cast or a
195 /// top-most explicit cast (i.e. it has no explicit casts as an ancestor) where
196 /// an implicit cast is nested within. However, there is no guarantee that only
197 /// explicit casts exist between the found top-most explicit cast and the
198 /// possibly more than one nested implicit cast. This visitor finds all cast
199 /// sequences with an implicit cast to null within and creates a replacement
200 /// leaving the outermost explicit cast unchanged to avoid introducing
201 /// ambiguities.
202 class CastSequenceVisitor : public RecursiveASTVisitor<CastSequenceVisitor> {
203 public:
204   CastSequenceVisitor(ASTContext &Context, ArrayRef<StringRef> NullMacros,
205                       ClangTidyCheck &Check)
206       : SM(Context.getSourceManager()), Context(Context),
207         NullMacros(NullMacros), Check(Check) {}
208 
209   bool TraverseStmt(Stmt *S) {
210     // Stop traversing down the tree if requested.
211     if (PruneSubtree) {
212       PruneSubtree = false;
213       return true;
214     }
215     return RecursiveASTVisitor<CastSequenceVisitor>::TraverseStmt(S);
216   }
217 
218   // Only VisitStmt is overridden as we shouldn't find other base AST types
219   // within a cast expression.
220   bool VisitStmt(Stmt *S) {
221     auto *C = dyn_cast<CastExpr>(S);
222     // Catch the castExpr inside cxxDefaultArgExpr.
223     if (auto *E = dyn_cast<CXXDefaultArgExpr>(S)) {
224       C = dyn_cast<CastExpr>(E->getExpr());
225       FirstSubExpr = nullptr;
226     }
227     if (!C) {
228       FirstSubExpr = nullptr;
229       return true;
230     }
231 
232     auto* CastSubExpr = C->getSubExpr()->IgnoreParens();
233     // Ignore cast expressions which cast nullptr literal.
234     if (isa<CXXNullPtrLiteralExpr>(CastSubExpr)) {
235       return true;
236     }
237 
238     if (!FirstSubExpr)
239       FirstSubExpr = CastSubExpr;
240 
241     if (C->getCastKind() != CK_NullToPointer &&
242         C->getCastKind() != CK_NullToMemberPointer) {
243       return true;
244     }
245 
246     SourceLocation StartLoc = FirstSubExpr->getBeginLoc();
247     SourceLocation EndLoc = FirstSubExpr->getEndLoc();
248 
249     // If the location comes from a macro arg expansion, *all* uses of that
250     // arg must be checked to result in NullTo(Member)Pointer casts.
251     //
252     // If the location comes from a macro body expansion, check to see if its
253     // coming from one of the allowed 'NULL' macros.
254     if (SM.isMacroArgExpansion(StartLoc) && SM.isMacroArgExpansion(EndLoc)) {
255       SourceLocation FileLocStart = SM.getFileLoc(StartLoc),
256                      FileLocEnd = SM.getFileLoc(EndLoc);
257       SourceLocation ImmediateMacroArgLoc, MacroLoc;
258       // Skip NULL macros used in macro.
259       if (!getMacroAndArgLocations(StartLoc, ImmediateMacroArgLoc, MacroLoc) ||
260           ImmediateMacroArgLoc != FileLocStart)
261         return skipSubTree();
262 
263       if (isReplaceableRange(FileLocStart, FileLocEnd, SM) &&
264           allArgUsesValid(C)) {
265         replaceWithNullptr(Check, SM, FileLocStart, FileLocEnd);
266       }
267       return true;
268     }
269 
270     if (SM.isMacroBodyExpansion(StartLoc) && SM.isMacroBodyExpansion(EndLoc)) {
271       StringRef OutermostMacroName =
272           getOutermostMacroName(StartLoc, SM, Context.getLangOpts());
273 
274       // Check to see if the user wants to replace the macro being expanded.
275       if (!llvm::is_contained(NullMacros, OutermostMacroName))
276         return skipSubTree();
277 
278       StartLoc = SM.getFileLoc(StartLoc);
279       EndLoc = SM.getFileLoc(EndLoc);
280     }
281 
282     if (!isReplaceableRange(StartLoc, EndLoc, SM)) {
283       return skipSubTree();
284     }
285     replaceWithNullptr(Check, SM, StartLoc, EndLoc);
286 
287     return true;
288   }
289 
290 private:
291   bool skipSubTree() {
292     PruneSubtree = true;
293     return true;
294   }
295 
296   /// Tests that all expansions of a macro arg, one of which expands to
297   /// result in \p CE, yield NullTo(Member)Pointer casts.
298   bool allArgUsesValid(const CastExpr *CE) {
299     SourceLocation CastLoc = CE->getBeginLoc();
300 
301     // Step 1: Get location of macro arg and location of the macro the arg was
302     // provided to.
303     SourceLocation ArgLoc, MacroLoc;
304     if (!getMacroAndArgLocations(CastLoc, ArgLoc, MacroLoc))
305       return false;
306 
307     // Step 2: Find the first ancestor that doesn't expand from this macro.
308     DynTypedNode ContainingAncestor;
309     if (!findContainingAncestor(DynTypedNode::create<Stmt>(*CE), MacroLoc,
310                                 ContainingAncestor))
311       return false;
312 
313     // Step 3:
314     // Visit children of this containing parent looking for the least-descended
315     // nodes of the containing parent which are macro arg expansions that expand
316     // from the given arg location.
317     // Visitor needs: arg loc.
318     MacroArgUsageVisitor ArgUsageVisitor(SM.getFileLoc(CastLoc), SM);
319     if (const auto *D = ContainingAncestor.get<Decl>())
320       ArgUsageVisitor.TraverseDecl(const_cast<Decl *>(D));
321     else if (const auto *S = ContainingAncestor.get<Stmt>())
322       ArgUsageVisitor.TraverseStmt(const_cast<Stmt *>(S));
323     else
324       llvm_unreachable("Unhandled ContainingAncestor node type");
325 
326     return !ArgUsageVisitor.foundInvalid();
327   }
328 
329   /// Given the SourceLocation for a macro arg expansion, finds the
330   /// non-macro SourceLocation of the macro the arg was passed to and the
331   /// non-macro SourceLocation of the argument in the arg list to that macro.
332   /// These results are returned via \c MacroLoc and \c ArgLoc respectively.
333   /// These values are undefined if the return value is false.
334   ///
335   /// \returns false if one of the returned SourceLocations would be a
336   /// SourceLocation pointing within the definition of another macro.
337   bool getMacroAndArgLocations(SourceLocation Loc, SourceLocation &ArgLoc,
338                                SourceLocation &MacroLoc) {
339     assert(Loc.isMacroID() && "Only reasonable to call this on macros");
340 
341     ArgLoc = Loc;
342 
343     // Find the location of the immediate macro expansion.
344     while (true) {
345       std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(ArgLoc);
346       const SrcMgr::SLocEntry *E = &SM.getSLocEntry(LocInfo.first);
347       const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
348 
349       SourceLocation OldArgLoc = ArgLoc;
350       ArgLoc = Expansion.getExpansionLocStart();
351       if (!Expansion.isMacroArgExpansion()) {
352         if (!MacroLoc.isFileID())
353           return false;
354 
355         StringRef Name =
356             Lexer::getImmediateMacroName(OldArgLoc, SM, Context.getLangOpts());
357         return llvm::is_contained(NullMacros, Name);
358       }
359 
360       MacroLoc = SM.getExpansionRange(ArgLoc).getBegin();
361 
362       ArgLoc = Expansion.getSpellingLoc().getLocWithOffset(LocInfo.second);
363       if (ArgLoc.isFileID())
364         return true;
365 
366       // If spelling location resides in the same FileID as macro expansion
367       // location, it means there is no inner macro.
368       FileID MacroFID = SM.getFileID(MacroLoc);
369       if (SM.isInFileID(ArgLoc, MacroFID)) {
370         // Don't transform this case. If the characters that caused the
371         // null-conversion come from within a macro, they can't be changed.
372         return false;
373       }
374     }
375 
376     llvm_unreachable("getMacroAndArgLocations");
377   }
378 
379   /// Tests if TestMacroLoc is found while recursively unravelling
380   /// expansions starting at TestLoc. TestMacroLoc.isFileID() must be true.
381   /// Implementation is very similar to getMacroAndArgLocations() except in this
382   /// case, it's not assumed that TestLoc is expanded from a macro argument.
383   /// While unravelling expansions macro arguments are handled as with
384   /// getMacroAndArgLocations() but in this function macro body expansions are
385   /// also handled.
386   ///
387   /// False means either:
388   /// - TestLoc is not from a macro expansion.
389   /// - TestLoc is from a different macro expansion.
390   bool expandsFrom(SourceLocation TestLoc, SourceLocation TestMacroLoc) {
391     if (TestLoc.isFileID()) {
392       return false;
393     }
394 
395     SourceLocation Loc = TestLoc, MacroLoc;
396 
397     while (true) {
398       std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
399       const SrcMgr::SLocEntry *E = &SM.getSLocEntry(LocInfo.first);
400       const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
401 
402       Loc = Expansion.getExpansionLocStart();
403 
404       if (!Expansion.isMacroArgExpansion()) {
405         if (Loc.isFileID()) {
406           return Loc == TestMacroLoc;
407         }
408         // Since Loc is still a macro ID and it's not an argument expansion, we
409         // don't need to do the work of handling an argument expansion. Simply
410         // keep recursively expanding until we hit a FileID or a macro arg
411         // expansion or a macro arg expansion.
412         continue;
413       }
414 
415       MacroLoc = SM.getImmediateExpansionRange(Loc).getBegin();
416       if (MacroLoc.isFileID() && MacroLoc == TestMacroLoc) {
417         // Match made.
418         return true;
419       }
420 
421       Loc = Expansion.getSpellingLoc().getLocWithOffset(LocInfo.second);
422       if (Loc.isFileID()) {
423         // If we made it this far without finding a match, there is no match to
424         // be made.
425         return false;
426       }
427     }
428 
429     llvm_unreachable("expandsFrom");
430   }
431 
432   /// Given a starting point \c Start in the AST, find an ancestor that
433   /// doesn't expand from the macro called at file location \c MacroLoc.
434   ///
435   /// \pre MacroLoc.isFileID()
436   /// \returns true if such an ancestor was found, false otherwise.
437   bool findContainingAncestor(DynTypedNode Start, SourceLocation MacroLoc,
438                               DynTypedNode &Result) {
439     // Below we're only following the first parent back up the AST. This should
440     // be fine since for the statements we care about there should only be one
441     // parent, except for the case specified below.
442 
443     assert(MacroLoc.isFileID());
444 
445     while (true) {
446       const auto &Parents = Context.getParents(Start);
447       if (Parents.empty())
448         return false;
449       if (Parents.size() > 1) {
450         // If there are more than one parents, don't do the replacement unless
451         // they are InitListsExpr (semantic and syntactic form). In this case we
452         // can choose any one here, and the ASTVisitor will take care of
453         // traversing the right one.
454         for (const auto &Parent : Parents) {
455           if (!Parent.get<InitListExpr>())
456             return false;
457         }
458       }
459 
460       const DynTypedNode &Parent = Parents[0];
461 
462       SourceLocation Loc;
463       if (const auto *D = Parent.get<Decl>())
464         Loc = D->getBeginLoc();
465       else if (const auto *S = Parent.get<Stmt>())
466         Loc = S->getBeginLoc();
467 
468       // TypeLoc and NestedNameSpecifierLoc are members of the parent map. Skip
469       // them and keep going up.
470       if (Loc.isValid()) {
471         if (!expandsFrom(Loc, MacroLoc)) {
472           Result = Parent;
473           return true;
474         }
475       }
476       Start = Parent;
477     }
478 
479     llvm_unreachable("findContainingAncestor");
480   }
481 
482   SourceManager &SM;
483   ASTContext &Context;
484   ArrayRef<StringRef> NullMacros;
485   ClangTidyCheck &Check;
486   Expr *FirstSubExpr = nullptr;
487   bool PruneSubtree = false;
488 };
489 
490 } // namespace
491 
492 UseNullptrCheck::UseNullptrCheck(StringRef Name, ClangTidyContext *Context)
493     : ClangTidyCheck(Name, Context),
494       NullMacrosStr(Options.get("NullMacros", "NULL")),
495       IgnoredTypes(utils::options::parseStringList(Options.get(
496           "IgnoredTypes",
497           "std::_CmpUnspecifiedParam::;^std::__cmp_cat::__unspec"))) {
498   StringRef(NullMacrosStr).split(NullMacros, ",");
499 }
500 
501 void UseNullptrCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
502   Options.store(Opts, "NullMacros", NullMacrosStr);
503   Options.store(Opts, "IgnoredTypes",
504                 utils::options::serializeStringList(IgnoredTypes));
505 }
506 
507 void UseNullptrCheck::registerMatchers(MatchFinder *Finder) {
508   Finder->addMatcher(makeCastSequenceMatcher(IgnoredTypes), this);
509 }
510 
511 void UseNullptrCheck::check(const MatchFinder::MatchResult &Result) {
512   const auto *NullCast = Result.Nodes.getNodeAs<CastExpr>(CastSequence);
513   assert(NullCast && "Bad Callback. No node provided");
514 
515   if (Result.Nodes.getNodeAs<CXXRewrittenBinaryOperator>(
516           "matchBinopOperands") !=
517       Result.Nodes.getNodeAs<CXXRewrittenBinaryOperator>("checkBinopOperands"))
518     return;
519 
520   // Given an implicit null-ptr cast or an explicit cast with an implicit
521   // null-to-pointer cast within use CastSequenceVisitor to identify sequences
522   // of explicit casts that can be converted into 'nullptr'.
523   CastSequenceVisitor(*Result.Context, NullMacros, *this)
524       .TraverseStmt(const_cast<CastExpr *>(NullCast));
525 }
526 
527 } // namespace clang::tidy::modernize
528