1 //===--- LexerUtils.cpp - clang-tidy---------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "LexerUtils.h" 10 #include "clang/AST/AST.h" 11 #include "clang/Basic/SourceManager.h" 12 #include <optional> 13 #include <utility> 14 15 namespace clang::tidy::utils::lexer { 16 17 std::pair<Token, SourceLocation> 18 getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM, 19 const LangOptions &LangOpts, bool SkipComments) { 20 const std::optional<Token> Tok = 21 Lexer::findPreviousToken(Location, SM, LangOpts, !SkipComments); 22 23 if (Tok.has_value()) { 24 return {*Tok, Lexer::GetBeginningOfToken(Tok->getLocation(), SM, LangOpts)}; 25 } 26 27 Token Token; 28 Token.setKind(tok::unknown); 29 return {Token, SourceLocation()}; 30 } 31 32 Token getPreviousToken(SourceLocation Location, const SourceManager &SM, 33 const LangOptions &LangOpts, bool SkipComments) { 34 auto [Token, Start] = 35 getPreviousTokenAndStart(Location, SM, LangOpts, SkipComments); 36 return Token; 37 } 38 39 SourceLocation findPreviousTokenStart(SourceLocation Start, 40 const SourceManager &SM, 41 const LangOptions &LangOpts) { 42 if (Start.isInvalid() || Start.isMacroID()) 43 return {}; 44 45 SourceLocation BeforeStart = Start.getLocWithOffset(-1); 46 if (BeforeStart.isInvalid() || BeforeStart.isMacroID()) 47 return {}; 48 49 return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts); 50 } 51 52 SourceLocation findPreviousTokenKind(SourceLocation Start, 53 const SourceManager &SM, 54 const LangOptions &LangOpts, 55 tok::TokenKind TK) { 56 if (Start.isInvalid() || Start.isMacroID()) 57 return {}; 58 59 while (true) { 60 SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts); 61 if (L.isInvalid() || L.isMacroID()) 62 return {}; 63 64 Token T; 65 if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true)) 66 return {}; 67 68 if (T.is(TK)) 69 return T.getLocation(); 70 71 Start = L; 72 } 73 } 74 75 SourceLocation findNextTerminator(SourceLocation Start, const SourceManager &SM, 76 const LangOptions &LangOpts) { 77 return findNextAnyTokenKind(Start, SM, LangOpts, tok::comma, tok::semi); 78 } 79 80 std::optional<Token> 81 findNextTokenSkippingComments(SourceLocation Start, const SourceManager &SM, 82 const LangOptions &LangOpts) { 83 while (Start.isValid()) { 84 std::optional<Token> CurrentToken = 85 Lexer::findNextToken(Start, SM, LangOpts); 86 if (!CurrentToken || !CurrentToken->is(tok::comment)) 87 return CurrentToken; 88 89 Start = CurrentToken->getLocation(); 90 } 91 92 return std::nullopt; 93 } 94 95 bool rangeContainsExpansionsOrDirectives(SourceRange Range, 96 const SourceManager &SM, 97 const LangOptions &LangOpts) { 98 assert(Range.isValid() && "Invalid Range for relexing provided"); 99 SourceLocation Loc = Range.getBegin(); 100 101 while (Loc <= Range.getEnd()) { 102 if (Loc.isMacroID()) 103 return true; 104 105 std::optional<Token> Tok = Lexer::findNextToken(Loc, SM, LangOpts); 106 107 if (!Tok) 108 return true; 109 110 if (Tok->is(tok::hash)) 111 return true; 112 113 Loc = Tok->getLocation(); 114 } 115 116 return false; 117 } 118 119 std::optional<Token> getQualifyingToken(tok::TokenKind TK, 120 CharSourceRange Range, 121 const ASTContext &Context, 122 const SourceManager &SM) { 123 assert((TK == tok::kw_const || TK == tok::kw_volatile || 124 TK == tok::kw_restrict) && 125 "TK is not a qualifier keyword"); 126 std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Range.getBegin()); 127 StringRef File = SM.getBufferData(LocInfo.first); 128 Lexer RawLexer(SM.getLocForStartOfFile(LocInfo.first), Context.getLangOpts(), 129 File.begin(), File.data() + LocInfo.second, File.end()); 130 std::optional<Token> LastMatchBeforeTemplate; 131 std::optional<Token> LastMatchAfterTemplate; 132 bool SawTemplate = false; 133 Token Tok; 134 while (!RawLexer.LexFromRawLexer(Tok) && 135 Range.getEnd() != Tok.getLocation() && 136 !SM.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) { 137 if (Tok.is(tok::raw_identifier)) { 138 IdentifierInfo &Info = Context.Idents.get( 139 StringRef(SM.getCharacterData(Tok.getLocation()), Tok.getLength())); 140 Tok.setIdentifierInfo(&Info); 141 Tok.setKind(Info.getTokenID()); 142 } 143 if (Tok.is(tok::less)) 144 SawTemplate = true; 145 else if (Tok.isOneOf(tok::greater, tok::greatergreater)) 146 LastMatchAfterTemplate = std::nullopt; 147 else if (Tok.is(TK)) { 148 if (SawTemplate) 149 LastMatchAfterTemplate = Tok; 150 else 151 LastMatchBeforeTemplate = Tok; 152 } 153 } 154 return LastMatchAfterTemplate != std::nullopt ? LastMatchAfterTemplate 155 : LastMatchBeforeTemplate; 156 } 157 158 static bool breakAndReturnEnd(const Stmt &S) { 159 return isa<CompoundStmt, DeclStmt, NullStmt>(S); 160 } 161 162 static bool breakAndReturnEndPlus1Token(const Stmt &S) { 163 return isa<Expr, DoStmt, ReturnStmt, BreakStmt, ContinueStmt, GotoStmt, 164 SEHLeaveStmt>(S); 165 } 166 167 // Given a Stmt which does not include it's semicolon this method returns the 168 // SourceLocation of the semicolon. 169 static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, 170 const SourceManager &SM, 171 const LangOptions &LangOpts) { 172 173 if (EndLoc.isMacroID()) { 174 // Assuming EndLoc points to a function call foo within macro F. 175 // This method is supposed to return location of the semicolon within 176 // those macro arguments: 177 // F ( foo() ; ) 178 // ^ EndLoc ^ SpellingLoc ^ next token of SpellingLoc 179 const SourceLocation SpellingLoc = SM.getSpellingLoc(EndLoc); 180 std::optional<Token> NextTok = 181 findNextTokenSkippingComments(SpellingLoc, SM, LangOpts); 182 183 // Was the next token found successfully? 184 // All macro issues are simply resolved by ensuring it's a semicolon. 185 if (NextTok && NextTok->is(tok::TokenKind::semi)) { 186 // Ideally this would return `F` with spelling location `;` (NextTok) 187 // following the example above. For now simply return NextTok location. 188 return NextTok->getLocation(); 189 } 190 191 // Fallthrough to 'normal handling'. 192 // F ( foo() ) ; 193 // ^ EndLoc ^ SpellingLoc ) ^ next token of EndLoc 194 } 195 196 std::optional<Token> NextTok = 197 findNextTokenSkippingComments(EndLoc, SM, LangOpts); 198 199 // Testing for semicolon again avoids some issues with macros. 200 if (NextTok && NextTok->is(tok::TokenKind::semi)) 201 return NextTok->getLocation(); 202 203 return {}; 204 } 205 206 SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, 207 const LangOptions &LangOpts) { 208 209 const Stmt *LastChild = &S; 210 while (!LastChild->children().empty() && !breakAndReturnEnd(*LastChild) && 211 !breakAndReturnEndPlus1Token(*LastChild)) { 212 for (const Stmt *Child : LastChild->children()) 213 LastChild = Child; 214 } 215 216 if (!breakAndReturnEnd(*LastChild) && breakAndReturnEndPlus1Token(*LastChild)) 217 return getSemicolonAfterStmtEndLoc(S.getEndLoc(), SM, LangOpts); 218 219 return S.getEndLoc(); 220 } 221 222 SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl, 223 const SourceManager &SM) { 224 if (!FuncDecl) 225 return {}; 226 227 const LangOptions &LangOpts = FuncDecl->getLangOpts(); 228 229 if (FuncDecl->getNumParams() == 0) { 230 // Start at the beginning of the function declaration, and find the closing 231 // parenthesis after which we would place the noexcept specifier. 232 Token CurrentToken; 233 SourceLocation CurrentLocation = FuncDecl->getBeginLoc(); 234 while (!Lexer::getRawToken(CurrentLocation, CurrentToken, SM, LangOpts, 235 true)) { 236 if (CurrentToken.is(tok::r_paren)) 237 return CurrentLocation.getLocWithOffset(1); 238 239 CurrentLocation = CurrentToken.getEndLoc(); 240 } 241 242 // Failed to find the closing parenthesis, so just return an invalid 243 // SourceLocation. 244 return {}; 245 } 246 247 // FunctionDecl with parameters 248 const SourceLocation NoexceptLoc = 249 FuncDecl->getParamDecl(FuncDecl->getNumParams() - 1)->getEndLoc(); 250 if (NoexceptLoc.isValid()) 251 return Lexer::findLocationAfterToken( 252 NoexceptLoc, tok::r_paren, SM, LangOpts, 253 /*SkipTrailingWhitespaceAndNewLine=*/true); 254 255 return {}; 256 } 257 258 } // namespace clang::tidy::utils::lexer 259