181ad6265SDimitry Andric //===- DependencyDirectivesScanner.cpp ------------------------------------===// 281ad6265SDimitry Andric // 381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 681ad6265SDimitry Andric // 781ad6265SDimitry Andric //===----------------------------------------------------------------------===// 881ad6265SDimitry Andric /// 981ad6265SDimitry Andric /// \file 1081ad6265SDimitry Andric /// This is the interface for scanning header and source files to get the 1181ad6265SDimitry Andric /// minimum necessary preprocessor directives for evaluating includes. It 1281ad6265SDimitry Andric /// reduces the source down to #define, #include, #import, @import, and any 1381ad6265SDimitry Andric /// conditional preprocessor logic that contains one of those. 1481ad6265SDimitry Andric /// 1581ad6265SDimitry Andric //===----------------------------------------------------------------------===// 1681ad6265SDimitry Andric 1781ad6265SDimitry Andric #include "clang/Lex/DependencyDirectivesScanner.h" 1881ad6265SDimitry Andric #include "clang/Basic/CharInfo.h" 1981ad6265SDimitry Andric #include "clang/Basic/Diagnostic.h" 2081ad6265SDimitry Andric #include "clang/Lex/LexDiagnostic.h" 2181ad6265SDimitry Andric #include "clang/Lex/Lexer.h" 2206c3fb27SDimitry Andric #include "clang/Lex/Pragma.h" 2381ad6265SDimitry Andric #include "llvm/ADT/ScopeExit.h" 2481ad6265SDimitry Andric #include "llvm/ADT/SmallString.h" 2581ad6265SDimitry Andric #include "llvm/ADT/StringMap.h" 2681ad6265SDimitry Andric #include "llvm/ADT/StringSwitch.h" 27bdd1243dSDimitry Andric #include <optional> 2881ad6265SDimitry Andric 2981ad6265SDimitry Andric using namespace clang; 3081ad6265SDimitry Andric using namespace clang::dependency_directives_scan; 3181ad6265SDimitry Andric using namespace llvm; 3281ad6265SDimitry Andric 3381ad6265SDimitry Andric namespace { 3481ad6265SDimitry Andric 3581ad6265SDimitry Andric struct DirectiveWithTokens { 3681ad6265SDimitry Andric DirectiveKind Kind; 3781ad6265SDimitry Andric unsigned NumTokens; 3881ad6265SDimitry Andric 3981ad6265SDimitry Andric DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens) 4081ad6265SDimitry Andric : Kind(Kind), NumTokens(NumTokens) {} 4181ad6265SDimitry Andric }; 4281ad6265SDimitry Andric 4381ad6265SDimitry Andric /// Does an efficient "scan" of the sources to detect the presence of 4481ad6265SDimitry Andric /// preprocessor (or module import) directives and collects the raw lexed tokens 4581ad6265SDimitry Andric /// for those directives so that the \p Lexer can "replay" them when the file is 4681ad6265SDimitry Andric /// included. 4781ad6265SDimitry Andric /// 4881ad6265SDimitry Andric /// Note that the behavior of the raw lexer is affected by the language mode, 4981ad6265SDimitry Andric /// while at this point we want to do a scan and collect tokens once, 5081ad6265SDimitry Andric /// irrespective of the language mode that the file will get included in. To 5181ad6265SDimitry Andric /// compensate for that the \p Lexer, while "replaying", will adjust a token 5281ad6265SDimitry Andric /// where appropriate, when it could affect the preprocessor's state. 5381ad6265SDimitry Andric /// For example in a directive like 5481ad6265SDimitry Andric /// 5581ad6265SDimitry Andric /// \code 5681ad6265SDimitry Andric /// #if __has_cpp_attribute(clang::fallthrough) 5781ad6265SDimitry Andric /// \endcode 5881ad6265SDimitry Andric /// 5981ad6265SDimitry Andric /// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2 6081ad6265SDimitry Andric /// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon' 6181ad6265SDimitry Andric /// while in C++ mode. 6281ad6265SDimitry Andric struct Scanner { 6381ad6265SDimitry Andric Scanner(StringRef Input, 6481ad6265SDimitry Andric SmallVectorImpl<dependency_directives_scan::Token> &Tokens, 6581ad6265SDimitry Andric DiagnosticsEngine *Diags, SourceLocation InputSourceLoc) 6681ad6265SDimitry Andric : Input(Input), Tokens(Tokens), Diags(Diags), 6781ad6265SDimitry Andric InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()), 6881ad6265SDimitry Andric TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(), 6981ad6265SDimitry Andric Input.end()) {} 7081ad6265SDimitry Andric 7181ad6265SDimitry Andric static LangOptions getLangOptsForDepScanning() { 7281ad6265SDimitry Andric LangOptions LangOpts; 7381ad6265SDimitry Andric // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'. 7481ad6265SDimitry Andric LangOpts.ObjC = true; 7581ad6265SDimitry Andric LangOpts.LineComment = true; 7606c3fb27SDimitry Andric // FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"" and 7706c3fb27SDimitry Andric // R"()" literals. 7881ad6265SDimitry Andric return LangOpts; 7981ad6265SDimitry Andric } 8081ad6265SDimitry Andric 8181ad6265SDimitry Andric /// Lex the provided source and emit the directive tokens. 8281ad6265SDimitry Andric /// 8381ad6265SDimitry Andric /// \returns True on error. 8481ad6265SDimitry Andric bool scan(SmallVectorImpl<Directive> &Directives); 8581ad6265SDimitry Andric 8681ad6265SDimitry Andric private: 8781ad6265SDimitry Andric /// Lexes next token and advances \p First and the \p Lexer. 88bdd1243dSDimitry Andric [[nodiscard]] dependency_directives_scan::Token & 8981ad6265SDimitry Andric lexToken(const char *&First, const char *const End); 9081ad6265SDimitry Andric 9181ad6265SDimitry Andric dependency_directives_scan::Token &lexIncludeFilename(const char *&First, 9281ad6265SDimitry Andric const char *const End); 9381ad6265SDimitry Andric 94bdd1243dSDimitry Andric void skipLine(const char *&First, const char *const End); 95bdd1243dSDimitry Andric void skipDirective(StringRef Name, const char *&First, const char *const End); 96bdd1243dSDimitry Andric 9706c3fb27SDimitry Andric /// Returns the spelling of a string literal or identifier after performing 9806c3fb27SDimitry Andric /// any processing needed to handle \c clang::Token::NeedsCleaning. 9906c3fb27SDimitry Andric StringRef cleanStringIfNeeded(const dependency_directives_scan::Token &Tok); 10006c3fb27SDimitry Andric 10181ad6265SDimitry Andric /// Lexes next token and if it is identifier returns its string, otherwise 102bdd1243dSDimitry Andric /// it skips the current line and returns \p std::nullopt. 10381ad6265SDimitry Andric /// 10481ad6265SDimitry Andric /// In any case (whatever the token kind) \p First and the \p Lexer will 10581ad6265SDimitry Andric /// advance beyond the token. 106bdd1243dSDimitry Andric [[nodiscard]] std::optional<StringRef> 10781ad6265SDimitry Andric tryLexIdentifierOrSkipLine(const char *&First, const char *const End); 10881ad6265SDimitry Andric 10981ad6265SDimitry Andric /// Used when it is certain that next token is an identifier. 110bdd1243dSDimitry Andric [[nodiscard]] StringRef lexIdentifier(const char *&First, 11181ad6265SDimitry Andric const char *const End); 11281ad6265SDimitry Andric 11381ad6265SDimitry Andric /// Lexes next token and returns true iff it is an identifier that matches \p 11481ad6265SDimitry Andric /// Id, otherwise it skips the current line and returns false. 11581ad6265SDimitry Andric /// 11681ad6265SDimitry Andric /// In any case (whatever the token kind) \p First and the \p Lexer will 11781ad6265SDimitry Andric /// advance beyond the token. 118bdd1243dSDimitry Andric [[nodiscard]] bool isNextIdentifierOrSkipLine(StringRef Id, 11981ad6265SDimitry Andric const char *&First, 12081ad6265SDimitry Andric const char *const End); 12181ad6265SDimitry Andric 12206c3fb27SDimitry Andric /// Lexes next token and returns true iff it matches the kind \p K. 12306c3fb27SDimitry Andric /// Otherwise it skips the current line and returns false. 12406c3fb27SDimitry Andric /// 12506c3fb27SDimitry Andric /// In any case (whatever the token kind) \p First and the \p Lexer will 12606c3fb27SDimitry Andric /// advance beyond the token. 12706c3fb27SDimitry Andric [[nodiscard]] bool isNextTokenOrSkipLine(tok::TokenKind K, const char *&First, 12806c3fb27SDimitry Andric const char *const End); 12906c3fb27SDimitry Andric 13006c3fb27SDimitry Andric /// Lexes next token and if it is string literal, returns its string. 13106c3fb27SDimitry Andric /// Otherwise, it skips the current line and returns \p std::nullopt. 13206c3fb27SDimitry Andric /// 13306c3fb27SDimitry Andric /// In any case (whatever the token kind) \p First and the \p Lexer will 13406c3fb27SDimitry Andric /// advance beyond the token. 13506c3fb27SDimitry Andric [[nodiscard]] std::optional<StringRef> 13606c3fb27SDimitry Andric tryLexStringLiteralOrSkipLine(const char *&First, const char *const End); 13706c3fb27SDimitry Andric 138bdd1243dSDimitry Andric [[nodiscard]] bool scanImpl(const char *First, const char *const End); 139bdd1243dSDimitry Andric [[nodiscard]] bool lexPPLine(const char *&First, const char *const End); 140bdd1243dSDimitry Andric [[nodiscard]] bool lexAt(const char *&First, const char *const End); 141bdd1243dSDimitry Andric [[nodiscard]] bool lexModule(const char *&First, const char *const End); 142bdd1243dSDimitry Andric [[nodiscard]] bool lexDefine(const char *HashLoc, const char *&First, 14381ad6265SDimitry Andric const char *const End); 144bdd1243dSDimitry Andric [[nodiscard]] bool lexPragma(const char *&First, const char *const End); 14506c3fb27SDimitry Andric [[nodiscard]] bool lex_Pragma(const char *&First, const char *const End); 146bdd1243dSDimitry Andric [[nodiscard]] bool lexEndif(const char *&First, const char *const End); 147bdd1243dSDimitry Andric [[nodiscard]] bool lexDefault(DirectiveKind Kind, const char *&First, 14881ad6265SDimitry Andric const char *const End); 149bdd1243dSDimitry Andric [[nodiscard]] bool lexModuleDirectiveBody(DirectiveKind Kind, 15081ad6265SDimitry Andric const char *&First, 15181ad6265SDimitry Andric const char *const End); 15281ad6265SDimitry Andric void lexPPDirectiveBody(const char *&First, const char *const End); 15381ad6265SDimitry Andric 15481ad6265SDimitry Andric DirectiveWithTokens &pushDirective(DirectiveKind Kind) { 15581ad6265SDimitry Andric Tokens.append(CurDirToks); 15681ad6265SDimitry Andric DirsWithToks.emplace_back(Kind, CurDirToks.size()); 15781ad6265SDimitry Andric CurDirToks.clear(); 15881ad6265SDimitry Andric return DirsWithToks.back(); 15981ad6265SDimitry Andric } 16081ad6265SDimitry Andric void popDirective() { 16181ad6265SDimitry Andric Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens); 16281ad6265SDimitry Andric } 16381ad6265SDimitry Andric DirectiveKind topDirective() const { 16481ad6265SDimitry Andric return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind; 16581ad6265SDimitry Andric } 16681ad6265SDimitry Andric 16781ad6265SDimitry Andric unsigned getOffsetAt(const char *CurPtr) const { 16881ad6265SDimitry Andric return CurPtr - Input.data(); 16981ad6265SDimitry Andric } 17081ad6265SDimitry Andric 17181ad6265SDimitry Andric /// Reports a diagnostic if the diagnostic engine is provided. Always returns 17281ad6265SDimitry Andric /// true at the end. 17381ad6265SDimitry Andric bool reportError(const char *CurPtr, unsigned Err); 17481ad6265SDimitry Andric 17581ad6265SDimitry Andric StringMap<char> SplitIds; 17681ad6265SDimitry Andric StringRef Input; 17781ad6265SDimitry Andric SmallVectorImpl<dependency_directives_scan::Token> &Tokens; 17881ad6265SDimitry Andric DiagnosticsEngine *Diags; 17981ad6265SDimitry Andric SourceLocation InputSourceLoc; 18081ad6265SDimitry Andric 181bdd1243dSDimitry Andric const char *LastTokenPtr = nullptr; 18281ad6265SDimitry Andric /// Keeps track of the tokens for the currently lexed directive. Once a 18381ad6265SDimitry Andric /// directive is fully lexed and "committed" then the tokens get appended to 18481ad6265SDimitry Andric /// \p Tokens and \p CurDirToks is cleared for the next directive. 18581ad6265SDimitry Andric SmallVector<dependency_directives_scan::Token, 32> CurDirToks; 18681ad6265SDimitry Andric /// The directives that were lexed along with the number of tokens that each 18781ad6265SDimitry Andric /// directive contains. The tokens of all the directives are kept in \p Tokens 18881ad6265SDimitry Andric /// vector, in the same order as the directives order in \p DirsWithToks. 18981ad6265SDimitry Andric SmallVector<DirectiveWithTokens, 64> DirsWithToks; 19081ad6265SDimitry Andric LangOptions LangOpts; 19181ad6265SDimitry Andric Lexer TheLexer; 19281ad6265SDimitry Andric }; 19381ad6265SDimitry Andric 19481ad6265SDimitry Andric } // end anonymous namespace 19581ad6265SDimitry Andric 19681ad6265SDimitry Andric bool Scanner::reportError(const char *CurPtr, unsigned Err) { 19781ad6265SDimitry Andric if (!Diags) 19881ad6265SDimitry Andric return true; 19981ad6265SDimitry Andric assert(CurPtr >= Input.data() && "invalid buffer ptr"); 20081ad6265SDimitry Andric Diags->Report(InputSourceLoc.getLocWithOffset(getOffsetAt(CurPtr)), Err); 20181ad6265SDimitry Andric return true; 20281ad6265SDimitry Andric } 20381ad6265SDimitry Andric 20481ad6265SDimitry Andric static void skipOverSpaces(const char *&First, const char *const End) { 20581ad6265SDimitry Andric while (First != End && isHorizontalWhitespace(*First)) 20681ad6265SDimitry Andric ++First; 20781ad6265SDimitry Andric } 20881ad6265SDimitry Andric 209bdd1243dSDimitry Andric [[nodiscard]] static bool isRawStringLiteral(const char *First, 21081ad6265SDimitry Andric const char *Current) { 21181ad6265SDimitry Andric assert(First <= Current); 21281ad6265SDimitry Andric 21381ad6265SDimitry Andric // Check if we can even back up. 21481ad6265SDimitry Andric if (*Current != '"' || First == Current) 21581ad6265SDimitry Andric return false; 21681ad6265SDimitry Andric 21781ad6265SDimitry Andric // Check for an "R". 21881ad6265SDimitry Andric --Current; 21981ad6265SDimitry Andric if (*Current != 'R') 22081ad6265SDimitry Andric return false; 22181ad6265SDimitry Andric if (First == Current || !isAsciiIdentifierContinue(*--Current)) 22281ad6265SDimitry Andric return true; 22381ad6265SDimitry Andric 22481ad6265SDimitry Andric // Check for a prefix of "u", "U", or "L". 22581ad6265SDimitry Andric if (*Current == 'u' || *Current == 'U' || *Current == 'L') 22681ad6265SDimitry Andric return First == Current || !isAsciiIdentifierContinue(*--Current); 22781ad6265SDimitry Andric 22881ad6265SDimitry Andric // Check for a prefix of "u8". 22981ad6265SDimitry Andric if (*Current != '8' || First == Current || *Current-- != 'u') 23081ad6265SDimitry Andric return false; 23181ad6265SDimitry Andric return First == Current || !isAsciiIdentifierContinue(*--Current); 23281ad6265SDimitry Andric } 23381ad6265SDimitry Andric 23481ad6265SDimitry Andric static void skipRawString(const char *&First, const char *const End) { 23581ad6265SDimitry Andric assert(First[0] == '"'); 23681ad6265SDimitry Andric assert(First[-1] == 'R'); 23781ad6265SDimitry Andric 23881ad6265SDimitry Andric const char *Last = ++First; 23981ad6265SDimitry Andric while (Last != End && *Last != '(') 24081ad6265SDimitry Andric ++Last; 24181ad6265SDimitry Andric if (Last == End) { 24281ad6265SDimitry Andric First = Last; // Hit the end... just give up. 24381ad6265SDimitry Andric return; 24481ad6265SDimitry Andric } 24581ad6265SDimitry Andric 24681ad6265SDimitry Andric StringRef Terminator(First, Last - First); 24781ad6265SDimitry Andric for (;;) { 24881ad6265SDimitry Andric // Move First to just past the next ")". 24981ad6265SDimitry Andric First = Last; 25081ad6265SDimitry Andric while (First != End && *First != ')') 25181ad6265SDimitry Andric ++First; 25281ad6265SDimitry Andric if (First == End) 25381ad6265SDimitry Andric return; 25481ad6265SDimitry Andric ++First; 25581ad6265SDimitry Andric 25681ad6265SDimitry Andric // Look ahead for the terminator sequence. 25781ad6265SDimitry Andric Last = First; 25881ad6265SDimitry Andric while (Last != End && size_t(Last - First) < Terminator.size() && 25981ad6265SDimitry Andric Terminator[Last - First] == *Last) 26081ad6265SDimitry Andric ++Last; 26181ad6265SDimitry Andric 26281ad6265SDimitry Andric // Check if we hit it (or the end of the file). 26381ad6265SDimitry Andric if (Last == End) { 26481ad6265SDimitry Andric First = Last; 26581ad6265SDimitry Andric return; 26681ad6265SDimitry Andric } 26781ad6265SDimitry Andric if (size_t(Last - First) < Terminator.size()) 26881ad6265SDimitry Andric continue; 26981ad6265SDimitry Andric if (*Last != '"') 27081ad6265SDimitry Andric continue; 27181ad6265SDimitry Andric First = Last + 1; 27281ad6265SDimitry Andric return; 27381ad6265SDimitry Andric } 27481ad6265SDimitry Andric } 27581ad6265SDimitry Andric 27681ad6265SDimitry Andric // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n) 27781ad6265SDimitry Andric static unsigned isEOL(const char *First, const char *const End) { 27881ad6265SDimitry Andric if (First == End) 27981ad6265SDimitry Andric return 0; 28081ad6265SDimitry Andric if (End - First > 1 && isVerticalWhitespace(First[0]) && 28181ad6265SDimitry Andric isVerticalWhitespace(First[1]) && First[0] != First[1]) 28281ad6265SDimitry Andric return 2; 28381ad6265SDimitry Andric return !!isVerticalWhitespace(First[0]); 28481ad6265SDimitry Andric } 28581ad6265SDimitry Andric 28681ad6265SDimitry Andric static void skipString(const char *&First, const char *const End) { 28781ad6265SDimitry Andric assert(*First == '\'' || *First == '"' || *First == '<'); 28881ad6265SDimitry Andric const char Terminator = *First == '<' ? '>' : *First; 28981ad6265SDimitry Andric for (++First; First != End && *First != Terminator; ++First) { 29081ad6265SDimitry Andric // String and character literals don't extend past the end of the line. 29181ad6265SDimitry Andric if (isVerticalWhitespace(*First)) 29281ad6265SDimitry Andric return; 29381ad6265SDimitry Andric if (*First != '\\') 29481ad6265SDimitry Andric continue; 29581ad6265SDimitry Andric // Skip past backslash to the next character. This ensures that the 29681ad6265SDimitry Andric // character right after it is skipped as well, which matters if it's 29781ad6265SDimitry Andric // the terminator. 29881ad6265SDimitry Andric if (++First == End) 29981ad6265SDimitry Andric return; 30081ad6265SDimitry Andric if (!isWhitespace(*First)) 30181ad6265SDimitry Andric continue; 30281ad6265SDimitry Andric // Whitespace after the backslash might indicate a line continuation. 30381ad6265SDimitry Andric const char *FirstAfterBackslashPastSpace = First; 30481ad6265SDimitry Andric skipOverSpaces(FirstAfterBackslashPastSpace, End); 30581ad6265SDimitry Andric if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) { 30681ad6265SDimitry Andric // Advance the character pointer to the next line for the next 30781ad6265SDimitry Andric // iteration. 30881ad6265SDimitry Andric First = FirstAfterBackslashPastSpace + NLSize - 1; 30981ad6265SDimitry Andric } 31081ad6265SDimitry Andric } 31181ad6265SDimitry Andric if (First != End) 31281ad6265SDimitry Andric ++First; // Finish off the string. 31381ad6265SDimitry Andric } 31481ad6265SDimitry Andric 31581ad6265SDimitry Andric // Returns the length of the skipped newline 31681ad6265SDimitry Andric static unsigned skipNewline(const char *&First, const char *End) { 31781ad6265SDimitry Andric if (First == End) 31881ad6265SDimitry Andric return 0; 31981ad6265SDimitry Andric assert(isVerticalWhitespace(*First)); 32081ad6265SDimitry Andric unsigned Len = isEOL(First, End); 32181ad6265SDimitry Andric assert(Len && "expected newline"); 32281ad6265SDimitry Andric First += Len; 32381ad6265SDimitry Andric return Len; 32481ad6265SDimitry Andric } 32581ad6265SDimitry Andric 32681ad6265SDimitry Andric static bool wasLineContinuation(const char *First, unsigned EOLLen) { 32781ad6265SDimitry Andric return *(First - (int)EOLLen - 1) == '\\'; 32881ad6265SDimitry Andric } 32981ad6265SDimitry Andric 33081ad6265SDimitry Andric static void skipToNewlineRaw(const char *&First, const char *const End) { 33181ad6265SDimitry Andric for (;;) { 33281ad6265SDimitry Andric if (First == End) 33381ad6265SDimitry Andric return; 33481ad6265SDimitry Andric 33581ad6265SDimitry Andric unsigned Len = isEOL(First, End); 33681ad6265SDimitry Andric if (Len) 33781ad6265SDimitry Andric return; 33881ad6265SDimitry Andric 33981ad6265SDimitry Andric do { 34081ad6265SDimitry Andric if (++First == End) 34181ad6265SDimitry Andric return; 34281ad6265SDimitry Andric Len = isEOL(First, End); 34381ad6265SDimitry Andric } while (!Len); 34481ad6265SDimitry Andric 34581ad6265SDimitry Andric if (First[-1] != '\\') 34681ad6265SDimitry Andric return; 34781ad6265SDimitry Andric 34881ad6265SDimitry Andric First += Len; 34981ad6265SDimitry Andric // Keep skipping lines... 35081ad6265SDimitry Andric } 35181ad6265SDimitry Andric } 35281ad6265SDimitry Andric 35381ad6265SDimitry Andric static void skipLineComment(const char *&First, const char *const End) { 35481ad6265SDimitry Andric assert(First[0] == '/' && First[1] == '/'); 35581ad6265SDimitry Andric First += 2; 35681ad6265SDimitry Andric skipToNewlineRaw(First, End); 35781ad6265SDimitry Andric } 35881ad6265SDimitry Andric 35981ad6265SDimitry Andric static void skipBlockComment(const char *&First, const char *const End) { 36081ad6265SDimitry Andric assert(First[0] == '/' && First[1] == '*'); 36181ad6265SDimitry Andric if (End - First < 4) { 36281ad6265SDimitry Andric First = End; 36381ad6265SDimitry Andric return; 36481ad6265SDimitry Andric } 36581ad6265SDimitry Andric for (First += 3; First != End; ++First) 36681ad6265SDimitry Andric if (First[-1] == '*' && First[0] == '/') { 36781ad6265SDimitry Andric ++First; 36881ad6265SDimitry Andric return; 36981ad6265SDimitry Andric } 37081ad6265SDimitry Andric } 37181ad6265SDimitry Andric 37281ad6265SDimitry Andric /// \returns True if the current single quotation mark character is a C++ 14 37381ad6265SDimitry Andric /// digit separator. 37481ad6265SDimitry Andric static bool isQuoteCppDigitSeparator(const char *const Start, 37581ad6265SDimitry Andric const char *const Cur, 37681ad6265SDimitry Andric const char *const End) { 37781ad6265SDimitry Andric assert(*Cur == '\'' && "expected quotation character"); 37881ad6265SDimitry Andric // skipLine called in places where we don't expect a valid number 37981ad6265SDimitry Andric // body before `start` on the same line, so always return false at the start. 38081ad6265SDimitry Andric if (Start == Cur) 38181ad6265SDimitry Andric return false; 38281ad6265SDimitry Andric // The previous character must be a valid PP number character. 38381ad6265SDimitry Andric // Make sure that the L, u, U, u8 prefixes don't get marked as a 38481ad6265SDimitry Andric // separator though. 38581ad6265SDimitry Andric char Prev = *(Cur - 1); 38681ad6265SDimitry Andric if (Prev == 'L' || Prev == 'U' || Prev == 'u') 38781ad6265SDimitry Andric return false; 38881ad6265SDimitry Andric if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') 38981ad6265SDimitry Andric return false; 39081ad6265SDimitry Andric if (!isPreprocessingNumberBody(Prev)) 39181ad6265SDimitry Andric return false; 39281ad6265SDimitry Andric // The next character should be a valid identifier body character. 39381ad6265SDimitry Andric return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1)); 39481ad6265SDimitry Andric } 39581ad6265SDimitry Andric 396bdd1243dSDimitry Andric void Scanner::skipLine(const char *&First, const char *const End) { 39781ad6265SDimitry Andric for (;;) { 39881ad6265SDimitry Andric assert(First <= End); 39981ad6265SDimitry Andric if (First == End) 40081ad6265SDimitry Andric return; 40181ad6265SDimitry Andric 40281ad6265SDimitry Andric if (isVerticalWhitespace(*First)) { 40381ad6265SDimitry Andric skipNewline(First, End); 40481ad6265SDimitry Andric return; 40581ad6265SDimitry Andric } 40681ad6265SDimitry Andric const char *Start = First; 40781ad6265SDimitry Andric while (First != End && !isVerticalWhitespace(*First)) { 40881ad6265SDimitry Andric // Iterate over strings correctly to avoid comments and newlines. 40981ad6265SDimitry Andric if (*First == '"' || 41081ad6265SDimitry Andric (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { 411bdd1243dSDimitry Andric LastTokenPtr = First; 41281ad6265SDimitry Andric if (isRawStringLiteral(Start, First)) 41381ad6265SDimitry Andric skipRawString(First, End); 41481ad6265SDimitry Andric else 41581ad6265SDimitry Andric skipString(First, End); 41681ad6265SDimitry Andric continue; 41781ad6265SDimitry Andric } 41881ad6265SDimitry Andric 41981ad6265SDimitry Andric // Iterate over comments correctly. 42081ad6265SDimitry Andric if (*First != '/' || End - First < 2) { 421bdd1243dSDimitry Andric LastTokenPtr = First; 42281ad6265SDimitry Andric ++First; 42381ad6265SDimitry Andric continue; 42481ad6265SDimitry Andric } 42581ad6265SDimitry Andric 42681ad6265SDimitry Andric if (First[1] == '/') { 42781ad6265SDimitry Andric // "//...". 42881ad6265SDimitry Andric skipLineComment(First, End); 42981ad6265SDimitry Andric continue; 43081ad6265SDimitry Andric } 43181ad6265SDimitry Andric 43281ad6265SDimitry Andric if (First[1] != '*') { 433bdd1243dSDimitry Andric LastTokenPtr = First; 43481ad6265SDimitry Andric ++First; 43581ad6265SDimitry Andric continue; 43681ad6265SDimitry Andric } 43781ad6265SDimitry Andric 43881ad6265SDimitry Andric // "/*...*/". 43981ad6265SDimitry Andric skipBlockComment(First, End); 44081ad6265SDimitry Andric } 44181ad6265SDimitry Andric if (First == End) 44281ad6265SDimitry Andric return; 44381ad6265SDimitry Andric 44481ad6265SDimitry Andric // Skip over the newline. 44581ad6265SDimitry Andric unsigned Len = skipNewline(First, End); 44681ad6265SDimitry Andric if (!wasLineContinuation(First, Len)) // Continue past line-continuations. 44781ad6265SDimitry Andric break; 44881ad6265SDimitry Andric } 44981ad6265SDimitry Andric } 45081ad6265SDimitry Andric 451bdd1243dSDimitry Andric void Scanner::skipDirective(StringRef Name, const char *&First, 45281ad6265SDimitry Andric const char *const End) { 45381ad6265SDimitry Andric if (llvm::StringSwitch<bool>(Name) 45481ad6265SDimitry Andric .Case("warning", true) 45581ad6265SDimitry Andric .Case("error", true) 45681ad6265SDimitry Andric .Default(false)) 45781ad6265SDimitry Andric // Do not process quotes or comments. 45881ad6265SDimitry Andric skipToNewlineRaw(First, End); 45981ad6265SDimitry Andric else 46081ad6265SDimitry Andric skipLine(First, End); 46181ad6265SDimitry Andric } 46281ad6265SDimitry Andric 46381ad6265SDimitry Andric static void skipWhitespace(const char *&First, const char *const End) { 46481ad6265SDimitry Andric for (;;) { 46581ad6265SDimitry Andric assert(First <= End); 46681ad6265SDimitry Andric skipOverSpaces(First, End); 46781ad6265SDimitry Andric 46881ad6265SDimitry Andric if (End - First < 2) 46981ad6265SDimitry Andric return; 47081ad6265SDimitry Andric 47181ad6265SDimitry Andric if (First[0] == '\\' && isVerticalWhitespace(First[1])) { 47281ad6265SDimitry Andric skipNewline(++First, End); 47381ad6265SDimitry Andric continue; 47481ad6265SDimitry Andric } 47581ad6265SDimitry Andric 47681ad6265SDimitry Andric // Check for a non-comment character. 47781ad6265SDimitry Andric if (First[0] != '/') 47881ad6265SDimitry Andric return; 47981ad6265SDimitry Andric 48081ad6265SDimitry Andric // "// ...". 48181ad6265SDimitry Andric if (First[1] == '/') { 48281ad6265SDimitry Andric skipLineComment(First, End); 48381ad6265SDimitry Andric return; 48481ad6265SDimitry Andric } 48581ad6265SDimitry Andric 48681ad6265SDimitry Andric // Cannot be a comment. 48781ad6265SDimitry Andric if (First[1] != '*') 48881ad6265SDimitry Andric return; 48981ad6265SDimitry Andric 49081ad6265SDimitry Andric // "/*...*/". 49181ad6265SDimitry Andric skipBlockComment(First, End); 49281ad6265SDimitry Andric } 49381ad6265SDimitry Andric } 49481ad6265SDimitry Andric 49581ad6265SDimitry Andric bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, 49681ad6265SDimitry Andric const char *const End) { 49781ad6265SDimitry Andric const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset; 49881ad6265SDimitry Andric for (;;) { 49981ad6265SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 50081ad6265SDimitry Andric if (Tok.is(tok::eof)) 50181ad6265SDimitry Andric return reportError( 50281ad6265SDimitry Andric DirectiveLoc, 50381ad6265SDimitry Andric diag::err_dep_source_scanner_missing_semi_after_at_import); 50481ad6265SDimitry Andric if (Tok.is(tok::semi)) 50581ad6265SDimitry Andric break; 50681ad6265SDimitry Andric } 50781ad6265SDimitry Andric pushDirective(Kind); 50881ad6265SDimitry Andric skipWhitespace(First, End); 50981ad6265SDimitry Andric if (First == End) 51081ad6265SDimitry Andric return false; 51181ad6265SDimitry Andric if (!isVerticalWhitespace(*First)) 51281ad6265SDimitry Andric return reportError( 51381ad6265SDimitry Andric DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); 51481ad6265SDimitry Andric skipNewline(First, End); 51581ad6265SDimitry Andric return false; 51681ad6265SDimitry Andric } 51781ad6265SDimitry Andric 51881ad6265SDimitry Andric dependency_directives_scan::Token &Scanner::lexToken(const char *&First, 51981ad6265SDimitry Andric const char *const End) { 52081ad6265SDimitry Andric clang::Token Tok; 52181ad6265SDimitry Andric TheLexer.LexFromRawLexer(Tok); 52281ad6265SDimitry Andric First = Input.data() + TheLexer.getCurrentBufferOffset(); 52381ad6265SDimitry Andric assert(First <= End); 52481ad6265SDimitry Andric 52581ad6265SDimitry Andric unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); 52681ad6265SDimitry Andric CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), 52781ad6265SDimitry Andric Tok.getFlags()); 52881ad6265SDimitry Andric return CurDirToks.back(); 52981ad6265SDimitry Andric } 53081ad6265SDimitry Andric 53181ad6265SDimitry Andric dependency_directives_scan::Token & 53281ad6265SDimitry Andric Scanner::lexIncludeFilename(const char *&First, const char *const End) { 53381ad6265SDimitry Andric clang::Token Tok; 53481ad6265SDimitry Andric TheLexer.LexIncludeFilename(Tok); 53581ad6265SDimitry Andric First = Input.data() + TheLexer.getCurrentBufferOffset(); 53681ad6265SDimitry Andric assert(First <= End); 53781ad6265SDimitry Andric 53881ad6265SDimitry Andric unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); 53981ad6265SDimitry Andric CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), 54081ad6265SDimitry Andric Tok.getFlags()); 54181ad6265SDimitry Andric return CurDirToks.back(); 54281ad6265SDimitry Andric } 54381ad6265SDimitry Andric 54481ad6265SDimitry Andric void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) { 54581ad6265SDimitry Andric while (true) { 54681ad6265SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 54781ad6265SDimitry Andric if (Tok.is(tok::eod)) 54881ad6265SDimitry Andric break; 54981ad6265SDimitry Andric } 55081ad6265SDimitry Andric } 55181ad6265SDimitry Andric 55206c3fb27SDimitry Andric StringRef 55306c3fb27SDimitry Andric Scanner::cleanStringIfNeeded(const dependency_directives_scan::Token &Tok) { 55481ad6265SDimitry Andric bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning; 55581ad6265SDimitry Andric if (LLVM_LIKELY(!NeedsCleaning)) 55681ad6265SDimitry Andric return Input.slice(Tok.Offset, Tok.getEnd()); 55781ad6265SDimitry Andric 55881ad6265SDimitry Andric SmallString<64> Spelling; 55981ad6265SDimitry Andric Spelling.resize(Tok.Length); 56081ad6265SDimitry Andric 56106c3fb27SDimitry Andric // FIXME: C++11 raw string literals need special handling (see getSpellingSlow 56206c3fb27SDimitry Andric // in the Lexer). Currently we cannot see them due to our LangOpts. 56306c3fb27SDimitry Andric 56481ad6265SDimitry Andric unsigned SpellingLength = 0; 56581ad6265SDimitry Andric const char *BufPtr = Input.begin() + Tok.Offset; 56681ad6265SDimitry Andric const char *AfterIdent = Input.begin() + Tok.getEnd(); 56781ad6265SDimitry Andric while (BufPtr < AfterIdent) { 568*5f757f3fSDimitry Andric auto [Char, Size] = Lexer::getCharAndSizeNoWarn(BufPtr, LangOpts); 569*5f757f3fSDimitry Andric Spelling[SpellingLength++] = Char; 57081ad6265SDimitry Andric BufPtr += Size; 57181ad6265SDimitry Andric } 57281ad6265SDimitry Andric 57381ad6265SDimitry Andric return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0) 57481ad6265SDimitry Andric .first->first(); 57581ad6265SDimitry Andric } 57681ad6265SDimitry Andric 57706c3fb27SDimitry Andric std::optional<StringRef> 57806c3fb27SDimitry Andric Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) { 57906c3fb27SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 58006c3fb27SDimitry Andric if (Tok.isNot(tok::raw_identifier)) { 58106c3fb27SDimitry Andric if (!Tok.is(tok::eod)) 58206c3fb27SDimitry Andric skipLine(First, End); 58306c3fb27SDimitry Andric return std::nullopt; 58406c3fb27SDimitry Andric } 58506c3fb27SDimitry Andric 58606c3fb27SDimitry Andric return cleanStringIfNeeded(Tok); 58706c3fb27SDimitry Andric } 58806c3fb27SDimitry Andric 58981ad6265SDimitry Andric StringRef Scanner::lexIdentifier(const char *&First, const char *const End) { 590bdd1243dSDimitry Andric std::optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End); 59181ad6265SDimitry Andric assert(Id && "expected identifier token"); 592bdd1243dSDimitry Andric return *Id; 59381ad6265SDimitry Andric } 59481ad6265SDimitry Andric 59581ad6265SDimitry Andric bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First, 59681ad6265SDimitry Andric const char *const End) { 597bdd1243dSDimitry Andric if (std::optional<StringRef> FoundId = 598bdd1243dSDimitry Andric tryLexIdentifierOrSkipLine(First, End)) { 59981ad6265SDimitry Andric if (*FoundId == Id) 60081ad6265SDimitry Andric return true; 60181ad6265SDimitry Andric skipLine(First, End); 60281ad6265SDimitry Andric } 60381ad6265SDimitry Andric return false; 60481ad6265SDimitry Andric } 60581ad6265SDimitry Andric 60606c3fb27SDimitry Andric bool Scanner::isNextTokenOrSkipLine(tok::TokenKind K, const char *&First, 60706c3fb27SDimitry Andric const char *const End) { 60806c3fb27SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 60906c3fb27SDimitry Andric if (Tok.is(K)) 61006c3fb27SDimitry Andric return true; 61106c3fb27SDimitry Andric skipLine(First, End); 61206c3fb27SDimitry Andric return false; 61306c3fb27SDimitry Andric } 61406c3fb27SDimitry Andric 61506c3fb27SDimitry Andric std::optional<StringRef> 61606c3fb27SDimitry Andric Scanner::tryLexStringLiteralOrSkipLine(const char *&First, 61706c3fb27SDimitry Andric const char *const End) { 61806c3fb27SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 61906c3fb27SDimitry Andric if (!tok::isStringLiteral(Tok.Kind)) { 62006c3fb27SDimitry Andric if (!Tok.is(tok::eod)) 62106c3fb27SDimitry Andric skipLine(First, End); 62206c3fb27SDimitry Andric return std::nullopt; 62306c3fb27SDimitry Andric } 62406c3fb27SDimitry Andric 62506c3fb27SDimitry Andric return cleanStringIfNeeded(Tok); 62606c3fb27SDimitry Andric } 62706c3fb27SDimitry Andric 62881ad6265SDimitry Andric bool Scanner::lexAt(const char *&First, const char *const End) { 62981ad6265SDimitry Andric // Handle "@import". 63081ad6265SDimitry Andric 63181ad6265SDimitry Andric // Lex '@'. 63281ad6265SDimitry Andric const dependency_directives_scan::Token &AtTok = lexToken(First, End); 63381ad6265SDimitry Andric assert(AtTok.is(tok::at)); 63481ad6265SDimitry Andric (void)AtTok; 63581ad6265SDimitry Andric 63681ad6265SDimitry Andric if (!isNextIdentifierOrSkipLine("import", First, End)) 63781ad6265SDimitry Andric return false; 63881ad6265SDimitry Andric return lexModuleDirectiveBody(decl_at_import, First, End); 63981ad6265SDimitry Andric } 64081ad6265SDimitry Andric 64181ad6265SDimitry Andric bool Scanner::lexModule(const char *&First, const char *const End) { 64281ad6265SDimitry Andric StringRef Id = lexIdentifier(First, End); 64381ad6265SDimitry Andric bool Export = false; 64481ad6265SDimitry Andric if (Id == "export") { 64581ad6265SDimitry Andric Export = true; 646bdd1243dSDimitry Andric std::optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End); 64781ad6265SDimitry Andric if (!NextId) 64881ad6265SDimitry Andric return false; 64981ad6265SDimitry Andric Id = *NextId; 65081ad6265SDimitry Andric } 65181ad6265SDimitry Andric 65281ad6265SDimitry Andric if (Id != "module" && Id != "import") { 65381ad6265SDimitry Andric skipLine(First, End); 65481ad6265SDimitry Andric return false; 65581ad6265SDimitry Andric } 65681ad6265SDimitry Andric 65781ad6265SDimitry Andric skipWhitespace(First, End); 65881ad6265SDimitry Andric 65981ad6265SDimitry Andric // Ignore this as a module directive if the next character can't be part of 66081ad6265SDimitry Andric // an import. 66181ad6265SDimitry Andric 66281ad6265SDimitry Andric switch (*First) { 66381ad6265SDimitry Andric case ':': 66481ad6265SDimitry Andric case '<': 66581ad6265SDimitry Andric case '"': 66681ad6265SDimitry Andric break; 66781ad6265SDimitry Andric default: 66881ad6265SDimitry Andric if (!isAsciiIdentifierContinue(*First)) { 66981ad6265SDimitry Andric skipLine(First, End); 67081ad6265SDimitry Andric return false; 67181ad6265SDimitry Andric } 67281ad6265SDimitry Andric } 67381ad6265SDimitry Andric 67481ad6265SDimitry Andric TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false); 67581ad6265SDimitry Andric 67681ad6265SDimitry Andric DirectiveKind Kind; 67781ad6265SDimitry Andric if (Id == "module") 67881ad6265SDimitry Andric Kind = Export ? cxx_export_module_decl : cxx_module_decl; 67981ad6265SDimitry Andric else 68081ad6265SDimitry Andric Kind = Export ? cxx_export_import_decl : cxx_import_decl; 68181ad6265SDimitry Andric 68281ad6265SDimitry Andric return lexModuleDirectiveBody(Kind, First, End); 68381ad6265SDimitry Andric } 68481ad6265SDimitry Andric 68506c3fb27SDimitry Andric bool Scanner::lex_Pragma(const char *&First, const char *const End) { 68606c3fb27SDimitry Andric if (!isNextTokenOrSkipLine(tok::l_paren, First, End)) 68706c3fb27SDimitry Andric return false; 68806c3fb27SDimitry Andric 68906c3fb27SDimitry Andric std::optional<StringRef> Str = tryLexStringLiteralOrSkipLine(First, End); 69006c3fb27SDimitry Andric 69106c3fb27SDimitry Andric if (!Str || !isNextTokenOrSkipLine(tok::r_paren, First, End)) 69206c3fb27SDimitry Andric return false; 69306c3fb27SDimitry Andric 69406c3fb27SDimitry Andric SmallString<64> Buffer(*Str); 69506c3fb27SDimitry Andric prepare_PragmaString(Buffer); 69606c3fb27SDimitry Andric 69706c3fb27SDimitry Andric // Use a new scanner instance since the tokens will be inside the allocated 69806c3fb27SDimitry Andric // string. We should already have captured all the relevant tokens in the 69906c3fb27SDimitry Andric // current scanner. 70006c3fb27SDimitry Andric SmallVector<dependency_directives_scan::Token> DiscardTokens; 70106c3fb27SDimitry Andric const char *Begin = Buffer.c_str(); 70206c3fb27SDimitry Andric Scanner PragmaScanner{StringRef(Begin, Buffer.size()), DiscardTokens, Diags, 70306c3fb27SDimitry Andric InputSourceLoc}; 70406c3fb27SDimitry Andric 70506c3fb27SDimitry Andric PragmaScanner.TheLexer.setParsingPreprocessorDirective(true); 70606c3fb27SDimitry Andric if (PragmaScanner.lexPragma(Begin, Buffer.end())) 70706c3fb27SDimitry Andric return true; 70806c3fb27SDimitry Andric 70906c3fb27SDimitry Andric DirectiveKind K = PragmaScanner.topDirective(); 71006c3fb27SDimitry Andric if (K == pp_none) { 71106c3fb27SDimitry Andric skipLine(First, End); 71206c3fb27SDimitry Andric return false; 71306c3fb27SDimitry Andric } 71406c3fb27SDimitry Andric 71506c3fb27SDimitry Andric assert(Begin == Buffer.end()); 71606c3fb27SDimitry Andric pushDirective(K); 71706c3fb27SDimitry Andric return false; 71806c3fb27SDimitry Andric } 71906c3fb27SDimitry Andric 72081ad6265SDimitry Andric bool Scanner::lexPragma(const char *&First, const char *const End) { 721bdd1243dSDimitry Andric std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); 72281ad6265SDimitry Andric if (!FoundId) 72381ad6265SDimitry Andric return false; 72481ad6265SDimitry Andric 72581ad6265SDimitry Andric StringRef Id = *FoundId; 72681ad6265SDimitry Andric auto Kind = llvm::StringSwitch<DirectiveKind>(Id) 72781ad6265SDimitry Andric .Case("once", pp_pragma_once) 72881ad6265SDimitry Andric .Case("push_macro", pp_pragma_push_macro) 72981ad6265SDimitry Andric .Case("pop_macro", pp_pragma_pop_macro) 73081ad6265SDimitry Andric .Case("include_alias", pp_pragma_include_alias) 73181ad6265SDimitry Andric .Default(pp_none); 73281ad6265SDimitry Andric if (Kind != pp_none) { 73381ad6265SDimitry Andric lexPPDirectiveBody(First, End); 73481ad6265SDimitry Andric pushDirective(Kind); 73581ad6265SDimitry Andric return false; 73681ad6265SDimitry Andric } 73781ad6265SDimitry Andric 73881ad6265SDimitry Andric if (Id != "clang") { 73981ad6265SDimitry Andric skipLine(First, End); 74081ad6265SDimitry Andric return false; 74181ad6265SDimitry Andric } 74281ad6265SDimitry Andric 74306c3fb27SDimitry Andric FoundId = tryLexIdentifierOrSkipLine(First, End); 74406c3fb27SDimitry Andric if (!FoundId) 74581ad6265SDimitry Andric return false; 74606c3fb27SDimitry Andric Id = *FoundId; 74706c3fb27SDimitry Andric 74806c3fb27SDimitry Andric // #pragma clang system_header 74906c3fb27SDimitry Andric if (Id == "system_header") { 75006c3fb27SDimitry Andric lexPPDirectiveBody(First, End); 75106c3fb27SDimitry Andric pushDirective(pp_pragma_system_header); 75206c3fb27SDimitry Andric return false; 75306c3fb27SDimitry Andric } 75406c3fb27SDimitry Andric 75506c3fb27SDimitry Andric if (Id != "module") { 75606c3fb27SDimitry Andric skipLine(First, End); 75706c3fb27SDimitry Andric return false; 75806c3fb27SDimitry Andric } 75981ad6265SDimitry Andric 76081ad6265SDimitry Andric // #pragma clang module. 76181ad6265SDimitry Andric if (!isNextIdentifierOrSkipLine("import", First, End)) 76281ad6265SDimitry Andric return false; 76381ad6265SDimitry Andric 76481ad6265SDimitry Andric // #pragma clang module import. 76581ad6265SDimitry Andric lexPPDirectiveBody(First, End); 76681ad6265SDimitry Andric pushDirective(pp_pragma_import); 76781ad6265SDimitry Andric return false; 76881ad6265SDimitry Andric } 76981ad6265SDimitry Andric 77081ad6265SDimitry Andric bool Scanner::lexEndif(const char *&First, const char *const End) { 77181ad6265SDimitry Andric // Strip out "#else" if it's empty. 77281ad6265SDimitry Andric if (topDirective() == pp_else) 77381ad6265SDimitry Andric popDirective(); 77481ad6265SDimitry Andric 77581ad6265SDimitry Andric // If "#ifdef" is empty, strip it and skip the "#endif". 77681ad6265SDimitry Andric // 77781ad6265SDimitry Andric // FIXME: Once/if Clang starts disallowing __has_include in macro expansions, 77881ad6265SDimitry Andric // we can skip empty `#if` and `#elif` blocks as well after scanning for a 77981ad6265SDimitry Andric // literal __has_include in the condition. Even without that rule we could 78081ad6265SDimitry Andric // drop the tokens if we scan for identifiers in the condition and find none. 78181ad6265SDimitry Andric if (topDirective() == pp_ifdef || topDirective() == pp_ifndef) { 78281ad6265SDimitry Andric popDirective(); 78381ad6265SDimitry Andric skipLine(First, End); 78481ad6265SDimitry Andric return false; 78581ad6265SDimitry Andric } 78681ad6265SDimitry Andric 78781ad6265SDimitry Andric return lexDefault(pp_endif, First, End); 78881ad6265SDimitry Andric } 78981ad6265SDimitry Andric 79081ad6265SDimitry Andric bool Scanner::lexDefault(DirectiveKind Kind, const char *&First, 79181ad6265SDimitry Andric const char *const End) { 79281ad6265SDimitry Andric lexPPDirectiveBody(First, End); 79381ad6265SDimitry Andric pushDirective(Kind); 79481ad6265SDimitry Andric return false; 79581ad6265SDimitry Andric } 79681ad6265SDimitry Andric 79781ad6265SDimitry Andric static bool isStartOfRelevantLine(char First) { 79881ad6265SDimitry Andric switch (First) { 79981ad6265SDimitry Andric case '#': 80081ad6265SDimitry Andric case '@': 80181ad6265SDimitry Andric case 'i': 80281ad6265SDimitry Andric case 'e': 80381ad6265SDimitry Andric case 'm': 80406c3fb27SDimitry Andric case '_': 80581ad6265SDimitry Andric return true; 80681ad6265SDimitry Andric } 80781ad6265SDimitry Andric return false; 80881ad6265SDimitry Andric } 80981ad6265SDimitry Andric 81081ad6265SDimitry Andric bool Scanner::lexPPLine(const char *&First, const char *const End) { 81181ad6265SDimitry Andric assert(First != End); 81281ad6265SDimitry Andric 81381ad6265SDimitry Andric skipWhitespace(First, End); 81481ad6265SDimitry Andric assert(First <= End); 81581ad6265SDimitry Andric if (First == End) 81681ad6265SDimitry Andric return false; 81781ad6265SDimitry Andric 81881ad6265SDimitry Andric if (!isStartOfRelevantLine(*First)) { 81981ad6265SDimitry Andric skipLine(First, End); 82081ad6265SDimitry Andric assert(First <= End); 82181ad6265SDimitry Andric return false; 82281ad6265SDimitry Andric } 82381ad6265SDimitry Andric 824bdd1243dSDimitry Andric LastTokenPtr = First; 825bdd1243dSDimitry Andric 82681ad6265SDimitry Andric TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true); 82781ad6265SDimitry Andric 82881ad6265SDimitry Andric auto ScEx1 = make_scope_exit([&]() { 82981ad6265SDimitry Andric /// Clear Scanner's CurDirToks before returning, in case we didn't push a 83081ad6265SDimitry Andric /// new directive. 83181ad6265SDimitry Andric CurDirToks.clear(); 83281ad6265SDimitry Andric }); 83381ad6265SDimitry Andric 83481ad6265SDimitry Andric // Handle "@import". 83581ad6265SDimitry Andric if (*First == '@') 83681ad6265SDimitry Andric return lexAt(First, End); 83781ad6265SDimitry Andric 83881ad6265SDimitry Andric if (*First == 'i' || *First == 'e' || *First == 'm') 83981ad6265SDimitry Andric return lexModule(First, End); 84081ad6265SDimitry Andric 84106c3fb27SDimitry Andric if (*First == '_') { 84206c3fb27SDimitry Andric if (isNextIdentifierOrSkipLine("_Pragma", First, End)) 84306c3fb27SDimitry Andric return lex_Pragma(First, End); 84406c3fb27SDimitry Andric return false; 84506c3fb27SDimitry Andric } 84606c3fb27SDimitry Andric 84781ad6265SDimitry Andric // Handle preprocessing directives. 84881ad6265SDimitry Andric 84981ad6265SDimitry Andric TheLexer.setParsingPreprocessorDirective(true); 85081ad6265SDimitry Andric auto ScEx2 = make_scope_exit( 85181ad6265SDimitry Andric [&]() { TheLexer.setParsingPreprocessorDirective(false); }); 85281ad6265SDimitry Andric 85381ad6265SDimitry Andric // Lex '#'. 85481ad6265SDimitry Andric const dependency_directives_scan::Token &HashTok = lexToken(First, End); 855bdd1243dSDimitry Andric if (HashTok.is(tok::hashhash)) { 856bdd1243dSDimitry Andric // A \p tok::hashhash at this location is passed by the preprocessor to the 857bdd1243dSDimitry Andric // parser to interpret, like any other token. So for dependency scanning 858bdd1243dSDimitry Andric // skip it like a normal token not affecting the preprocessor. 859bdd1243dSDimitry Andric skipLine(First, End); 860bdd1243dSDimitry Andric assert(First <= End); 861bdd1243dSDimitry Andric return false; 862bdd1243dSDimitry Andric } 86381ad6265SDimitry Andric assert(HashTok.is(tok::hash)); 86481ad6265SDimitry Andric (void)HashTok; 86581ad6265SDimitry Andric 866bdd1243dSDimitry Andric std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); 86781ad6265SDimitry Andric if (!FoundId) 86881ad6265SDimitry Andric return false; 86981ad6265SDimitry Andric 87081ad6265SDimitry Andric StringRef Id = *FoundId; 87181ad6265SDimitry Andric 87281ad6265SDimitry Andric if (Id == "pragma") 87381ad6265SDimitry Andric return lexPragma(First, End); 87481ad6265SDimitry Andric 87581ad6265SDimitry Andric auto Kind = llvm::StringSwitch<DirectiveKind>(Id) 87681ad6265SDimitry Andric .Case("include", pp_include) 87781ad6265SDimitry Andric .Case("__include_macros", pp___include_macros) 87881ad6265SDimitry Andric .Case("define", pp_define) 87981ad6265SDimitry Andric .Case("undef", pp_undef) 88081ad6265SDimitry Andric .Case("import", pp_import) 88181ad6265SDimitry Andric .Case("include_next", pp_include_next) 88281ad6265SDimitry Andric .Case("if", pp_if) 88381ad6265SDimitry Andric .Case("ifdef", pp_ifdef) 88481ad6265SDimitry Andric .Case("ifndef", pp_ifndef) 88581ad6265SDimitry Andric .Case("elif", pp_elif) 88681ad6265SDimitry Andric .Case("elifdef", pp_elifdef) 88781ad6265SDimitry Andric .Case("elifndef", pp_elifndef) 88881ad6265SDimitry Andric .Case("else", pp_else) 88981ad6265SDimitry Andric .Case("endif", pp_endif) 89081ad6265SDimitry Andric .Default(pp_none); 89181ad6265SDimitry Andric if (Kind == pp_none) { 89281ad6265SDimitry Andric skipDirective(Id, First, End); 89381ad6265SDimitry Andric return false; 89481ad6265SDimitry Andric } 89581ad6265SDimitry Andric 89681ad6265SDimitry Andric if (Kind == pp_endif) 89781ad6265SDimitry Andric return lexEndif(First, End); 89881ad6265SDimitry Andric 89981ad6265SDimitry Andric switch (Kind) { 90081ad6265SDimitry Andric case pp_include: 90181ad6265SDimitry Andric case pp___include_macros: 90281ad6265SDimitry Andric case pp_include_next: 90381ad6265SDimitry Andric case pp_import: 90481ad6265SDimitry Andric lexIncludeFilename(First, End); 90581ad6265SDimitry Andric break; 90681ad6265SDimitry Andric default: 90781ad6265SDimitry Andric break; 90881ad6265SDimitry Andric } 90981ad6265SDimitry Andric 91081ad6265SDimitry Andric // Everything else. 91181ad6265SDimitry Andric return lexDefault(Kind, First, End); 91281ad6265SDimitry Andric } 91381ad6265SDimitry Andric 91481ad6265SDimitry Andric static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { 91581ad6265SDimitry Andric if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' && 91681ad6265SDimitry Andric First[2] == '\xbf') 91781ad6265SDimitry Andric First += 3; 91881ad6265SDimitry Andric } 91981ad6265SDimitry Andric 92081ad6265SDimitry Andric bool Scanner::scanImpl(const char *First, const char *const End) { 92181ad6265SDimitry Andric skipUTF8ByteOrderMark(First, End); 92281ad6265SDimitry Andric while (First != End) 92381ad6265SDimitry Andric if (lexPPLine(First, End)) 92481ad6265SDimitry Andric return true; 92581ad6265SDimitry Andric return false; 92681ad6265SDimitry Andric } 92781ad6265SDimitry Andric 92881ad6265SDimitry Andric bool Scanner::scan(SmallVectorImpl<Directive> &Directives) { 92981ad6265SDimitry Andric bool Error = scanImpl(Input.begin(), Input.end()); 93081ad6265SDimitry Andric 93181ad6265SDimitry Andric if (!Error) { 93281ad6265SDimitry Andric // Add an EOF on success. 933bdd1243dSDimitry Andric if (LastTokenPtr && 934bdd1243dSDimitry Andric (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset)) 935bdd1243dSDimitry Andric pushDirective(tokens_present_before_eof); 93681ad6265SDimitry Andric pushDirective(pp_eof); 93781ad6265SDimitry Andric } 93881ad6265SDimitry Andric 93981ad6265SDimitry Andric ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens; 94081ad6265SDimitry Andric for (const DirectiveWithTokens &DirWithToks : DirsWithToks) { 94181ad6265SDimitry Andric assert(RemainingTokens.size() >= DirWithToks.NumTokens); 94281ad6265SDimitry Andric Directives.emplace_back(DirWithToks.Kind, 94381ad6265SDimitry Andric RemainingTokens.take_front(DirWithToks.NumTokens)); 94481ad6265SDimitry Andric RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens); 94581ad6265SDimitry Andric } 94681ad6265SDimitry Andric assert(RemainingTokens.empty()); 94781ad6265SDimitry Andric 94881ad6265SDimitry Andric return Error; 94981ad6265SDimitry Andric } 95081ad6265SDimitry Andric 95181ad6265SDimitry Andric bool clang::scanSourceForDependencyDirectives( 95281ad6265SDimitry Andric StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens, 95381ad6265SDimitry Andric SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags, 95481ad6265SDimitry Andric SourceLocation InputSourceLoc) { 95581ad6265SDimitry Andric return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives); 95681ad6265SDimitry Andric } 95781ad6265SDimitry Andric 95881ad6265SDimitry Andric void clang::printDependencyDirectivesAsSource( 95981ad6265SDimitry Andric StringRef Source, 96081ad6265SDimitry Andric ArrayRef<dependency_directives_scan::Directive> Directives, 96181ad6265SDimitry Andric llvm::raw_ostream &OS) { 96281ad6265SDimitry Andric // Add a space separator where it is convenient for testing purposes. 96381ad6265SDimitry Andric auto needsSpaceSeparator = 96481ad6265SDimitry Andric [](tok::TokenKind Prev, 96581ad6265SDimitry Andric const dependency_directives_scan::Token &Tok) -> bool { 96681ad6265SDimitry Andric if (Prev == Tok.Kind) 96781ad6265SDimitry Andric return !Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, 96881ad6265SDimitry Andric tok::r_square); 96981ad6265SDimitry Andric if (Prev == tok::raw_identifier && 97081ad6265SDimitry Andric Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal, 97181ad6265SDimitry Andric tok::char_constant, tok::header_name)) 97281ad6265SDimitry Andric return true; 97381ad6265SDimitry Andric if (Prev == tok::r_paren && 97481ad6265SDimitry Andric Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal, 97581ad6265SDimitry Andric tok::char_constant, tok::unknown)) 97681ad6265SDimitry Andric return true; 97781ad6265SDimitry Andric if (Prev == tok::comma && 97881ad6265SDimitry Andric Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less)) 97981ad6265SDimitry Andric return true; 98081ad6265SDimitry Andric return false; 98181ad6265SDimitry Andric }; 98281ad6265SDimitry Andric 98381ad6265SDimitry Andric for (const dependency_directives_scan::Directive &Directive : Directives) { 984bdd1243dSDimitry Andric if (Directive.Kind == tokens_present_before_eof) 985bdd1243dSDimitry Andric OS << "<TokBeforeEOF>"; 986bdd1243dSDimitry Andric std::optional<tok::TokenKind> PrevTokenKind; 98781ad6265SDimitry Andric for (const dependency_directives_scan::Token &Tok : Directive.Tokens) { 98881ad6265SDimitry Andric if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok)) 98981ad6265SDimitry Andric OS << ' '; 99081ad6265SDimitry Andric PrevTokenKind = Tok.Kind; 99181ad6265SDimitry Andric OS << Source.slice(Tok.Offset, Tok.getEnd()); 99281ad6265SDimitry Andric } 99381ad6265SDimitry Andric } 99481ad6265SDimitry Andric } 995