xref: /llvm-project/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp (revision 20ce95f2940778da4b877865141e6cbecc73d43a)
1 //===--- UseNullptrCheck.cpp - clang-tidy----------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "UseNullptrCheck.h"
11 #include "clang/AST/ASTContext.h"
12 #include "clang/AST/RecursiveASTVisitor.h"
13 #include "clang/ASTMatchers/ASTMatchFinder.h"
14 #include "clang/Lex/Lexer.h"
15 
16 using namespace clang;
17 using namespace clang::ast_matchers;
18 using namespace llvm;
19 
20 namespace clang {
21 namespace tidy {
22 namespace modernize {
23 
24 const char CastSequence[] = "sequence";
25 
26 /// \brief Matches cast expressions that have a cast kind of CK_NullToPointer
27 /// or CK_NullToMemberPointer.
28 ///
29 /// Given
30 /// \code
31 ///   int *p = 0;
32 /// \endcode
33 /// implicitCastExpr(isNullToPointer()) matches the implicit cast clang adds
34 /// around \c 0.
35 AST_MATCHER(CastExpr, isNullToPointer) {
36   return Node.getCastKind() == CK_NullToPointer ||
37          Node.getCastKind() == CK_NullToMemberPointer;
38 }
39 
40 AST_MATCHER(Type, sugaredNullptrType) {
41   const Type *DesugaredType = Node.getUnqualifiedDesugaredType();
42   if (const BuiltinType *BT = dyn_cast<BuiltinType>(DesugaredType))
43     return BT->getKind() == BuiltinType::NullPtr;
44   return false;
45 }
46 
47 /// \brief Create a matcher that finds implicit casts as well as the head of a
48 /// sequence of zero or more nested explicit casts that have an implicit cast
49 /// to null within.
50 /// Finding sequences of explict casts is necessary so that an entire sequence
51 /// can be replaced instead of just the inner-most implicit cast.
52 StatementMatcher makeCastSequenceMatcher() {
53   StatementMatcher ImplicitCastToNull = implicitCastExpr(
54       isNullToPointer(),
55       unless(hasSourceExpression(hasType(sugaredNullptrType()))));
56 
57   return castExpr(anyOf(ImplicitCastToNull,
58                         explicitCastExpr(hasDescendant(ImplicitCastToNull))),
59                   unless(hasAncestor(explicitCastExpr())))
60       .bind(CastSequence);
61 }
62 
63 bool isReplaceableRange(SourceLocation StartLoc, SourceLocation EndLoc,
64                         const SourceManager &SM) {
65   return SM.isWrittenInSameFile(StartLoc, EndLoc);
66 }
67 
68 /// \brief Replaces the provided range with the text "nullptr", but only if
69 /// the start and end location are both in main file.
70 /// Returns true if and only if a replacement was made.
71 void replaceWithNullptr(ClangTidyCheck &Check, SourceManager &SM,
72                         SourceLocation StartLoc, SourceLocation EndLoc) {
73   CharSourceRange Range(SourceRange(StartLoc, EndLoc), true);
74   // Add a space if nullptr follows an alphanumeric character. This happens
75   // whenever there is an c-style explicit cast to nullptr not surrounded by
76   // parentheses and right beside a return statement.
77   SourceLocation PreviousLocation = StartLoc.getLocWithOffset(-1);
78   bool NeedsSpace = isAlphanumeric(*SM.getCharacterData(PreviousLocation));
79   Check.diag(Range.getBegin(), "use nullptr") << FixItHint::CreateReplacement(
80       Range, NeedsSpace ? " nullptr" : "nullptr");
81 }
82 
83 /// \brief Returns the name of the outermost macro.
84 ///
85 /// Given
86 /// \code
87 /// #define MY_NULL NULL
88 /// \endcode
89 /// If \p Loc points to NULL, this function will return the name MY_NULL.
90 StringRef getOutermostMacroName(SourceLocation Loc, const SourceManager &SM,
91                                 const LangOptions &LO) {
92   assert(Loc.isMacroID());
93   SourceLocation OutermostMacroLoc;
94 
95   while (Loc.isMacroID()) {
96     OutermostMacroLoc = Loc;
97     Loc = SM.getImmediateMacroCallerLoc(Loc);
98   }
99 
100   return Lexer::getImmediateMacroName(OutermostMacroLoc, SM, LO);
101 }
102 
103 /// \brief RecursiveASTVisitor for ensuring all nodes rooted at a given AST
104 /// subtree that have file-level source locations corresponding to a macro
105 /// argument have implicit NullTo(Member)Pointer nodes as ancestors.
106 class MacroArgUsageVisitor : public RecursiveASTVisitor<MacroArgUsageVisitor> {
107 public:
108   MacroArgUsageVisitor(SourceLocation CastLoc, const SourceManager &SM)
109       : CastLoc(CastLoc), SM(SM), Visited(false), CastFound(false),
110         InvalidFound(false) {
111     assert(CastLoc.isFileID());
112   }
113 
114   bool TraverseStmt(Stmt *S) {
115     bool VisitedPreviously = Visited;
116 
117     if (!RecursiveASTVisitor<MacroArgUsageVisitor>::TraverseStmt(S))
118       return false;
119 
120     // The point at which VisitedPreviously is false and Visited is true is the
121     // root of a subtree containing nodes whose locations match CastLoc. It's
122     // at this point we test that the Implicit NullTo(Member)Pointer cast was
123     // found or not.
124     if (!VisitedPreviously) {
125       if (Visited && !CastFound) {
126         // Found nodes with matching SourceLocations but didn't come across a
127         // cast. This is an invalid macro arg use. Can stop traversal
128         // completely now.
129         InvalidFound = true;
130         return false;
131       }
132       // Reset state as we unwind back up the tree.
133       CastFound = false;
134       Visited = false;
135     }
136     return true;
137   }
138 
139   bool VisitStmt(Stmt *S) {
140     if (SM.getFileLoc(S->getLocStart()) != CastLoc)
141       return true;
142     Visited = true;
143 
144     const ImplicitCastExpr *Cast = dyn_cast<ImplicitCastExpr>(S);
145     if (Cast && (Cast->getCastKind() == CK_NullToPointer ||
146                  Cast->getCastKind() == CK_NullToMemberPointer))
147       CastFound = true;
148 
149     return true;
150   }
151 
152   bool foundInvalid() const { return InvalidFound; }
153 
154 private:
155   SourceLocation CastLoc;
156   const SourceManager &SM;
157 
158   bool Visited;
159   bool CastFound;
160   bool InvalidFound;
161 };
162 
163 /// \brief Looks for implicit casts as well as sequences of 0 or more explicit
164 /// casts with an implicit null-to-pointer cast within.
165 ///
166 /// The matcher this visitor is used with will find a single implicit cast or a
167 /// top-most explicit cast (i.e. it has no explicit casts as an ancestor) where
168 /// an implicit cast is nested within. However, there is no guarantee that only
169 /// explicit casts exist between the found top-most explicit cast and the
170 /// possibly more than one nested implicit cast. This visitor finds all cast
171 /// sequences with an implicit cast to null within and creates a replacement
172 /// leaving the outermost explicit cast unchanged to avoid introducing
173 /// ambiguities.
174 class CastSequenceVisitor : public RecursiveASTVisitor<CastSequenceVisitor> {
175 public:
176   CastSequenceVisitor(ASTContext &Context, ArrayRef<StringRef> NullMacros,
177                       ClangTidyCheck &check)
178       : SM(Context.getSourceManager()), Context(Context),
179         NullMacros(NullMacros), Check(check), FirstSubExpr(nullptr),
180         PruneSubtree(false) {}
181 
182   bool TraverseStmt(Stmt *S) {
183     // Stop traversing down the tree if requested.
184     if (PruneSubtree) {
185       PruneSubtree = false;
186       return true;
187     }
188     return RecursiveASTVisitor<CastSequenceVisitor>::TraverseStmt(S);
189   }
190 
191   // Only VisitStmt is overridden as we shouldn't find other base AST types
192   // within a cast expression.
193   bool VisitStmt(Stmt *S) {
194     CastExpr *C = dyn_cast<CastExpr>(S);
195     if (!C) {
196       FirstSubExpr = nullptr;
197       return true;
198     }
199     if (!FirstSubExpr)
200       FirstSubExpr = C->getSubExpr()->IgnoreParens();
201 
202     if (C->getCastKind() != CK_NullToPointer &&
203         C->getCastKind() != CK_NullToMemberPointer) {
204       return true;
205     }
206 
207     SourceLocation StartLoc = FirstSubExpr->getLocStart();
208     SourceLocation EndLoc = FirstSubExpr->getLocEnd();
209 
210     // If the location comes from a macro arg expansion, *all* uses of that
211     // arg must be checked to result in NullTo(Member)Pointer casts.
212     //
213     // If the location comes from a macro body expansion, check to see if its
214     // coming from one of the allowed 'NULL' macros.
215     if (SM.isMacroArgExpansion(StartLoc) && SM.isMacroArgExpansion(EndLoc)) {
216       SourceLocation FileLocStart = SM.getFileLoc(StartLoc),
217                      FileLocEnd = SM.getFileLoc(EndLoc);
218       if (isReplaceableRange(FileLocStart, FileLocEnd, SM) &&
219           allArgUsesValid(C)) {
220         replaceWithNullptr(Check, SM, FileLocStart, FileLocEnd);
221       }
222       return skipSubTree();
223     }
224 
225     if (SM.isMacroBodyExpansion(StartLoc) && SM.isMacroBodyExpansion(EndLoc)) {
226       StringRef OutermostMacroName =
227           getOutermostMacroName(StartLoc, SM, Context.getLangOpts());
228 
229       // Check to see if the user wants to replace the macro being expanded.
230       if (std::find(NullMacros.begin(), NullMacros.end(), OutermostMacroName) ==
231           NullMacros.end()) {
232         return skipSubTree();
233       }
234 
235       StartLoc = SM.getFileLoc(StartLoc);
236       EndLoc = SM.getFileLoc(EndLoc);
237     }
238 
239     if (!isReplaceableRange(StartLoc, EndLoc, SM)) {
240       return skipSubTree();
241     }
242     replaceWithNullptr(Check, SM, StartLoc, EndLoc);
243 
244     return skipSubTree();
245   }
246 
247 private:
248   bool skipSubTree() {
249     PruneSubtree = true;
250     return true;
251   }
252 
253   /// \brief Tests that all expansions of a macro arg, one of which expands to
254   /// result in \p CE, yield NullTo(Member)Pointer casts.
255   bool allArgUsesValid(const CastExpr *CE) {
256     SourceLocation CastLoc = CE->getLocStart();
257 
258     // Step 1: Get location of macro arg and location of the macro the arg was
259     // provided to.
260     SourceLocation ArgLoc, MacroLoc;
261     if (!getMacroAndArgLocations(CastLoc, ArgLoc, MacroLoc))
262       return false;
263 
264     // Step 2: Find the first ancestor that doesn't expand from this macro.
265     ast_type_traits::DynTypedNode ContainingAncestor;
266     if (!findContainingAncestor(
267             ast_type_traits::DynTypedNode::create<Stmt>(*CE), MacroLoc,
268             ContainingAncestor))
269       return false;
270 
271     // Step 3:
272     // Visit children of this containing parent looking for the least-descended
273     // nodes of the containing parent which are macro arg expansions that expand
274     // from the given arg location.
275     // Visitor needs: arg loc
276     MacroArgUsageVisitor ArgUsageVisitor(SM.getFileLoc(CastLoc), SM);
277     if (const auto *D = ContainingAncestor.get<Decl>())
278       ArgUsageVisitor.TraverseDecl(const_cast<Decl *>(D));
279     else if (const auto *S = ContainingAncestor.get<Stmt>())
280       ArgUsageVisitor.TraverseStmt(const_cast<Stmt *>(S));
281     else
282       llvm_unreachable("Unhandled ContainingAncestor node type");
283 
284     return !ArgUsageVisitor.foundInvalid();
285   }
286 
287   /// \brief Given the SourceLocation for a macro arg expansion, finds the
288   /// non-macro SourceLocation of the macro the arg was passed to and the
289   /// non-macro SourceLocation of the argument in the arg list to that macro.
290   /// These results are returned via \c MacroLoc and \c ArgLoc respectively.
291   /// These values are undefined if the return value is false.
292   ///
293   /// \returns false if one of the returned SourceLocations would be a
294   /// SourceLocation pointing within the definition of another macro.
295   bool getMacroAndArgLocations(SourceLocation Loc, SourceLocation &ArgLoc,
296                                SourceLocation &MacroLoc) {
297     assert(Loc.isMacroID() && "Only reasonble to call this on macros");
298 
299     ArgLoc = Loc;
300 
301     // Find the location of the immediate macro expansion.
302     while (true) {
303       std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(ArgLoc);
304       const SrcMgr::SLocEntry *E = &SM.getSLocEntry(LocInfo.first);
305       const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
306 
307       SourceLocation OldArgLoc = ArgLoc;
308       ArgLoc = Expansion.getExpansionLocStart();
309       if (!Expansion.isMacroArgExpansion()) {
310         if (!MacroLoc.isFileID())
311           return false;
312 
313         StringRef Name =
314             Lexer::getImmediateMacroName(OldArgLoc, SM, Context.getLangOpts());
315         return std::find(NullMacros.begin(), NullMacros.end(), Name) !=
316                NullMacros.end();
317       }
318 
319       MacroLoc = SM.getImmediateExpansionRange(ArgLoc).first;
320 
321       ArgLoc = Expansion.getSpellingLoc().getLocWithOffset(LocInfo.second);
322       if (ArgLoc.isFileID())
323         return true;
324 
325       // If spelling location resides in the same FileID as macro expansion
326       // location, it means there is no inner macro.
327       FileID MacroFID = SM.getFileID(MacroLoc);
328       if (SM.isInFileID(ArgLoc, MacroFID)) {
329         // Don't transform this case. If the characters that caused the
330         // null-conversion come from within a macro, they can't be changed.
331         return false;
332       }
333     }
334 
335     llvm_unreachable("getMacroAndArgLocations");
336   }
337 
338   /// \brief Tests if TestMacroLoc is found while recursively unravelling
339   /// expansions starting at TestLoc. TestMacroLoc.isFileID() must be true.
340   /// Implementation is very similar to getMacroAndArgLocations() except in this
341   /// case, it's not assumed that TestLoc is expanded from a macro argument.
342   /// While unravelling expansions macro arguments are handled as with
343   /// getMacroAndArgLocations() but in this function macro body expansions are
344   /// also handled.
345   ///
346   /// False means either:
347   /// - TestLoc is not from a macro expansion
348   /// - TestLoc is from a different macro expansion
349   bool expandsFrom(SourceLocation TestLoc, SourceLocation TestMacroLoc) {
350     if (TestLoc.isFileID()) {
351       return false;
352     }
353 
354     SourceLocation Loc = TestLoc, MacroLoc;
355 
356     while (true) {
357       std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
358       const SrcMgr::SLocEntry *E = &SM.getSLocEntry(LocInfo.first);
359       const SrcMgr::ExpansionInfo &Expansion = E->getExpansion();
360 
361       Loc = Expansion.getExpansionLocStart();
362 
363       if (!Expansion.isMacroArgExpansion()) {
364         if (Loc.isFileID()) {
365           return Loc == TestMacroLoc;
366         }
367         // Since Loc is still a macro ID and it's not an argument expansion, we
368         // don't need to do the work of handling an argument expansion. Simply
369         // keep recursively expanding until we hit a FileID or a macro arg
370         // expansion or a macro arg expansion.
371         continue;
372       }
373 
374       MacroLoc = SM.getImmediateExpansionRange(Loc).first;
375       if (MacroLoc.isFileID() && MacroLoc == TestMacroLoc) {
376         // Match made.
377         return true;
378       }
379 
380       Loc = Expansion.getSpellingLoc().getLocWithOffset(LocInfo.second);
381       if (Loc.isFileID()) {
382         // If we made it this far without finding a match, there is no match to
383         // be made.
384         return false;
385       }
386     }
387 
388     llvm_unreachable("expandsFrom");
389   }
390 
391   /// \brief Given a starting point \c Start in the AST, find an ancestor that
392   /// doesn't expand from the macro called at file location \c MacroLoc.
393   ///
394   /// \pre MacroLoc.isFileID()
395   /// \returns true if such an ancestor was found, false otherwise.
396   bool findContainingAncestor(ast_type_traits::DynTypedNode Start,
397                               SourceLocation MacroLoc,
398                               ast_type_traits::DynTypedNode &Result) {
399     // Below we're only following the first parent back up the AST. This should
400     // be fine since for the statements we care about there should only be one
401     // parent as far up as we care. If this assumption doesn't hold, need to
402     // revisit what to do here.
403 
404     assert(MacroLoc.isFileID());
405 
406     while (true) {
407       const auto &Parents = Context.getParents(Start);
408       if (Parents.empty())
409         return false;
410       assert(Parents.size() == 1 &&
411              "Found an ancestor with more than one parent!");
412 
413       const ast_type_traits::DynTypedNode &Parent = Parents[0];
414 
415       SourceLocation Loc;
416       if (const auto *D = Parent.get<Decl>())
417         Loc = D->getLocStart();
418       else if (const auto *S = Parent.get<Stmt>())
419         Loc = S->getLocStart();
420       else
421         llvm_unreachable("Expected to find Decl or Stmt containing ancestor");
422 
423       if (!expandsFrom(Loc, MacroLoc)) {
424         Result = Parent;
425         return true;
426       }
427       Start = Parent;
428     }
429 
430     llvm_unreachable("findContainingAncestor");
431   }
432 
433 private:
434   SourceManager &SM;
435   ASTContext &Context;
436   ArrayRef<StringRef> NullMacros;
437   ClangTidyCheck &Check;
438   Expr *FirstSubExpr;
439   bool PruneSubtree;
440 };
441 
442 UseNullptrCheck::UseNullptrCheck(StringRef Name, ClangTidyContext *Context)
443     : ClangTidyCheck(Name, Context),
444       NullMacrosStr(Options.get("NullMacros", "")) {
445   StringRef(NullMacrosStr).split(NullMacros, ",");
446 }
447 
448 void UseNullptrCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
449   Options.store(Opts, "NullMacros", NullMacrosStr);
450 }
451 
452 void UseNullptrCheck::registerMatchers(MatchFinder *Finder) {
453   Finder->addMatcher(makeCastSequenceMatcher(), this);
454 }
455 
456 void UseNullptrCheck::check(const MatchFinder::MatchResult &Result) {
457   const auto *NullCast = Result.Nodes.getNodeAs<CastExpr>(CastSequence);
458   assert(NullCast && "Bad Callback. No node provided");
459 
460   // Given an implicit null-ptr cast or an explicit cast with an implicit
461   // null-to-pointer cast within use CastSequenceVisitor to identify sequences
462   // of explicit casts that can be converted into 'nullptr'.
463   CastSequenceVisitor(*Result.Context, NullMacros, *this)
464       .TraverseStmt(const_cast<CastExpr *>(NullCast));
465 }
466 
467 } // namespace modernize
468 } // namespace tidy
469 } // namespace clang
470