xref: /llvm-project/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp (revision fbd86d05fe51d45f19df8d63aee41d979c268f8f)
1 //===--- LexerUtils.cpp - clang-tidy---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "LexerUtils.h"
10 #include "clang/AST/AST.h"
11 #include "clang/Basic/SourceManager.h"
12 #include <optional>
13 #include <utility>
14 
15 namespace clang::tidy::utils::lexer {
16 
17 std::pair<Token, SourceLocation>
18 getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM,
19                          const LangOptions &LangOpts, bool SkipComments) {
20   const std::optional<Token> Tok =
21       Lexer::findPreviousToken(Location, SM, LangOpts, !SkipComments);
22 
23   if (Tok.has_value()) {
24     return {*Tok, Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts)};
25   }
26 
27   Token Token;
28   Token.setKind(tok::unknown);
29   return {Token, SourceLocation()};
30 }
31 
32 Token getPreviousToken(SourceLocation Location, const SourceManager &SM,
33                        const LangOptions &LangOpts, bool SkipComments) {
34   auto [Token, Start] =
35       getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments);
36   return Token;
37 }
38 
39 SourceLocation findPreviousTokenStart(SourceLocation Start,
40                                       const SourceManager &SM,
41                                       const LangOptions &LangOpts) {
42   if (Start.isInvalid() || Start.isMacroID())
43     return {};
44 
45   SourceLocation BeforeStart = Start.getLocWithOffset(-1);
46   if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
47     return {};
48 
49   return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
50 }
51 
52 SourceLocation findPreviousTokenKind(SourceLocation Start,
53                                      const SourceManager &SM,
54                                      const LangOptions &LangOpts,
55                                      tok::TokenKind TK) {
56   if (Start.isInvalid() || Start.isMacroID())
57     return {};
58 
59   while (true) {
60     SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
61     if (L.isInvalid() || L.isMacroID())
62       return {};
63 
64     Token T;
65     if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
66       return {};
67 
68     if (T.is(TK))
69       return T.getLocation();
70 
71     Start = L;
72   }
73 }
74 
75 SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM,
76                                   const LangOptions &LangOpts) {
77   return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi);
78 }
79 
80 std::optional<Token>
81 findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM,
82                               const LangOptions &LangOpts) {
83   while (Start.isValid()) {
84     std::optional<Token> CurrentToken =
85         Lexer::findNextToken(Start, SM, LangOpts);
86     if (!CurrentToken || !CurrentToken->is(tok::comment))
87       return CurrentToken;
88 
89     Start = CurrentToken->getLocation();
90   }
91 
92   return std::nullopt;
93 }
94 
95 bool rangeContainsExpansionsOrDirectives(SourceRange Range,
96                                          const SourceManager &SM,
97                                          const LangOptions &LangOpts) {
98   assert(Range.isValid() && "Invalid Range for relexing provided");
99   SourceLocation Loc = Range.getBegin();
100 
101   while (Loc <= Range.getEnd()) {
102     if (Loc.isMacroID())
103       return true;
104 
105     std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts);
106 
107     if (!Tok)
108       return true;
109 
110     if (Tok->is(tok::hash))
111       return true;
112 
113     Loc = Tok->getLocation();
114   }
115 
116   return false;
117 }
118 
119 std::optional<Token> getQualifyingToken(tok::TokenKind TK,
120                                         CharSourceRange Range,
121                                         const ASTContext &Context,
122                                         const SourceManager &SM) {
123   assert((TK == tok::kw_const || TK == tok::kw_volatile ||
124           TK == tok::kw_restrict) &&
125          "TK is not a qualifier keyword");
126   std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin());
127   StringRef File = SM.getBufferData(LocInfo.first);
128   Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(),
129                  File.begin(), File.data() + LocInfo.second, File.end());
130   std::optional<Token> LastMatchBeforeTemplate;
131   std::optional<Token> LastMatchAfterTemplate;
132   bool SawTemplate = false;
133   Token Tok;
134   while (!RawLexer.LexFromRawLexer(Tok) &&
135          Range.getEnd() != Tok.getLocation() &&
136          !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) {
137     if (Tok.is(tok::raw_identifier)) {
138       IdentifierInfo &Info = Context.Idents.get(
139           StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength()));
140       Tok.setIdentifierInfo(&Info);
141       Tok.setKind(Info.getTokenID());
142     }
143     if (Tok.is(tok::less))
144       SawTemplate = true;
145     else if (Tok.isOneOf(tok::greater, tok::greatergreater))
146       LastMatchAfterTemplate = std::nullopt;
147     else if (Tok.is(TK)) {
148       if (SawTemplate)
149         LastMatchAfterTemplate = Tok;
150       else
151         LastMatchBeforeTemplate = Tok;
152     }
153   }
154   return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate
155                                                 : LastMatchBeforeTemplate;
156 }
157 
158 static bool breakAndReturnEnd(const Stmt &S) {
159   return isa<CompoundStmt, DeclStmt, NullStmt>(S);
160 }
161 
162 static bool breakAndReturnEndPlus1Token(const Stmt &S) {
163   return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt,
164              SEHLeaveStmt>(S);
165 }
166 
167 // Given a Stmt which does not include it's semicolon this method returns the
168 // SourceLocation of the semicolon.
169 static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc,
170                                                   const SourceManager &SM,
171                                                   const LangOptions &LangOpts) {
172 
173   if (EndLoc.isMacroID()) {
174     // Assuming EndLoc points to a function call foo within macro F.
175     // This method is supposed to return location of the semicolon within
176     // those macro arguments:
177     //  F     (      foo()               ;   )
178     //  ^ EndLoc         ^ SpellingLoc   ^ next token of SpellingLoc
179     const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc);
180     std::optional<Token> NextTok =
181         findNextTokenSkippingComments(SpellingLoc, SM, LangOpts);
182 
183     // Was the next token found successfully?
184     // All macro issues are simply resolved by ensuring it's a semicolon.
185     if (NextTok && NextTok->is(tok::TokenKind::semi)) {
186       // Ideally this would return `F` with spelling location `;` (NextTok)
187       // following the example above. For now simply return NextTok location.
188       return NextTok->getLocation();
189     }
190 
191     // Fallthrough to 'normal handling'.
192     //  F     (      foo()              ) ;
193     //  ^ EndLoc         ^ SpellingLoc  ) ^ next token of EndLoc
194   }
195 
196   std::optional<Token> NextTok =
197       findNextTokenSkippingComments(EndLoc, SM, LangOpts);
198 
199   // Testing for semicolon again avoids some issues with macros.
200   if (NextTok && NextTok->is(tok::TokenKind::semi))
201     return NextTok->getLocation();
202 
203   return {};
204 }
205 
206 SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM,
207                                 const LangOptions &LangOpts) {
208 
209   const Stmt *LastChild = &S;
210   while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) &&
211          !breakAndReturnEndPlus1Token(*LastChild)) {
212     for (const Stmt *Child : LastChild->children())
213       LastChild = Child;
214   }
215 
216   if (!breakAndReturnEnd(*LastChild) && breakAndReturnEndPlus1Token(*LastChild))
217     return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts);
218 
219   return S.getEndLoc();
220 }
221 
222 SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl,
223                                                const SourceManager &SM) {
224   if (!FuncDecl)
225     return {};
226 
227   const LangOptions &LangOpts = FuncDecl->getLangOpts();
228 
229   if (FuncDecl->getNumParams() == 0) {
230     // Start at the beginning of the function declaration, and find the closing
231     // parenthesis after which we would place the noexcept specifier.
232     Token CurrentToken;
233     SourceLocation CurrentLocation = FuncDecl->getBeginLoc();
234     while (!Lexer::getRawToken(CurrentLocation, CurrentToken, SM, LangOpts,
235                                true)) {
236       if (CurrentToken.is(tok::r_paren))
237         return CurrentLocation.getLocWithOffset(1);
238 
239       CurrentLocation = CurrentToken.getEndLoc();
240     }
241 
242     // Failed to find the closing parenthesis, so just return an invalid
243     // SourceLocation.
244     return {};
245   }
246 
247   // FunctionDecl with parameters
248   const SourceLocation NoexceptLoc =
249       FuncDecl->getParamDecl(FuncDecl->getNumParams() - 1)->getEndLoc();
250   if (NoexceptLoc.isValid())
251     return Lexer::findLocationAfterToken(
252         NoexceptLoc, tok::r_paren, SM, LangOpts,
253         /*SkipTrailingWhitespaceAndNewLine=*/true);
254 
255   return {};
256 }
257 
258 } // namespace clang::tidy::utils::lexer
259