181ad6265SDimitry Andric //===- DependencyDirectivesScanner.cpp ------------------------------------===// 281ad6265SDimitry Andric // 381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 681ad6265SDimitry Andric // 781ad6265SDimitry Andric //===----------------------------------------------------------------------===// 881ad6265SDimitry Andric /// 981ad6265SDimitry Andric /// \file 1081ad6265SDimitry Andric /// This is the interface for scanning header and source files to get the 1181ad6265SDimitry Andric /// minimum necessary preprocessor directives for evaluating includes. It 1281ad6265SDimitry Andric /// reduces the source down to #define, #include, #import, @import, and any 1381ad6265SDimitry Andric /// conditional preprocessor logic that contains one of those. 1481ad6265SDimitry Andric /// 1581ad6265SDimitry Andric //===----------------------------------------------------------------------===// 1681ad6265SDimitry Andric 1781ad6265SDimitry Andric #include "clang/Lex/DependencyDirectivesScanner.h" 1881ad6265SDimitry Andric #include "clang/Basic/CharInfo.h" 1981ad6265SDimitry Andric #include "clang/Basic/Diagnostic.h" 2081ad6265SDimitry Andric #include "clang/Lex/LexDiagnostic.h" 2181ad6265SDimitry Andric #include "clang/Lex/Lexer.h" 22*06c3fb27SDimitry Andric #include "clang/Lex/Pragma.h" 2381ad6265SDimitry Andric #include "llvm/ADT/ScopeExit.h" 2481ad6265SDimitry Andric #include "llvm/ADT/SmallString.h" 2581ad6265SDimitry Andric #include "llvm/ADT/StringMap.h" 2681ad6265SDimitry Andric #include "llvm/ADT/StringSwitch.h" 27bdd1243dSDimitry Andric #include <optional> 2881ad6265SDimitry Andric 2981ad6265SDimitry Andric using namespace clang; 3081ad6265SDimitry Andric using namespace clang::dependency_directives_scan; 3181ad6265SDimitry Andric using namespace llvm; 3281ad6265SDimitry Andric 3381ad6265SDimitry Andric namespace { 3481ad6265SDimitry Andric 3581ad6265SDimitry Andric struct DirectiveWithTokens { 3681ad6265SDimitry Andric DirectiveKind Kind; 3781ad6265SDimitry Andric unsigned NumTokens; 3881ad6265SDimitry Andric 3981ad6265SDimitry Andric DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens) 4081ad6265SDimitry Andric : Kind(Kind), NumTokens(NumTokens) {} 4181ad6265SDimitry Andric }; 4281ad6265SDimitry Andric 4381ad6265SDimitry Andric /// Does an efficient "scan" of the sources to detect the presence of 4481ad6265SDimitry Andric /// preprocessor (or module import) directives and collects the raw lexed tokens 4581ad6265SDimitry Andric /// for those directives so that the \p Lexer can "replay" them when the file is 4681ad6265SDimitry Andric /// included. 4781ad6265SDimitry Andric /// 4881ad6265SDimitry Andric /// Note that the behavior of the raw lexer is affected by the language mode, 4981ad6265SDimitry Andric /// while at this point we want to do a scan and collect tokens once, 5081ad6265SDimitry Andric /// irrespective of the language mode that the file will get included in. To 5181ad6265SDimitry Andric /// compensate for that the \p Lexer, while "replaying", will adjust a token 5281ad6265SDimitry Andric /// where appropriate, when it could affect the preprocessor's state. 5381ad6265SDimitry Andric /// For example in a directive like 5481ad6265SDimitry Andric /// 5581ad6265SDimitry Andric /// \code 5681ad6265SDimitry Andric /// #if __has_cpp_attribute(clang::fallthrough) 5781ad6265SDimitry Andric /// \endcode 5881ad6265SDimitry Andric /// 5981ad6265SDimitry Andric /// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2 6081ad6265SDimitry Andric /// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon' 6181ad6265SDimitry Andric /// while in C++ mode. 6281ad6265SDimitry Andric struct Scanner { 6381ad6265SDimitry Andric Scanner(StringRef Input, 6481ad6265SDimitry Andric SmallVectorImpl<dependency_directives_scan::Token> &Tokens, 6581ad6265SDimitry Andric DiagnosticsEngine *Diags, SourceLocation InputSourceLoc) 6681ad6265SDimitry Andric : Input(Input), Tokens(Tokens), Diags(Diags), 6781ad6265SDimitry Andric InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()), 6881ad6265SDimitry Andric TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(), 6981ad6265SDimitry Andric Input.end()) {} 7081ad6265SDimitry Andric 7181ad6265SDimitry Andric static LangOptions getLangOptsForDepScanning() { 7281ad6265SDimitry Andric LangOptions LangOpts; 7381ad6265SDimitry Andric // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'. 7481ad6265SDimitry Andric LangOpts.ObjC = true; 7581ad6265SDimitry Andric LangOpts.LineComment = true; 76*06c3fb27SDimitry Andric // FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"" and 77*06c3fb27SDimitry Andric // R"()" literals. 7881ad6265SDimitry Andric return LangOpts; 7981ad6265SDimitry Andric } 8081ad6265SDimitry Andric 8181ad6265SDimitry Andric /// Lex the provided source and emit the directive tokens. 8281ad6265SDimitry Andric /// 8381ad6265SDimitry Andric /// \returns True on error. 8481ad6265SDimitry Andric bool scan(SmallVectorImpl<Directive> &Directives); 8581ad6265SDimitry Andric 8681ad6265SDimitry Andric private: 8781ad6265SDimitry Andric /// Lexes next token and advances \p First and the \p Lexer. 88bdd1243dSDimitry Andric [[nodiscard]] dependency_directives_scan::Token & 8981ad6265SDimitry Andric lexToken(const char *&First, const char *const End); 9081ad6265SDimitry Andric 9181ad6265SDimitry Andric dependency_directives_scan::Token &lexIncludeFilename(const char *&First, 9281ad6265SDimitry Andric const char *const End); 9381ad6265SDimitry Andric 94bdd1243dSDimitry Andric void skipLine(const char *&First, const char *const End); 95bdd1243dSDimitry Andric void skipDirective(StringRef Name, const char *&First, const char *const End); 96bdd1243dSDimitry Andric 97*06c3fb27SDimitry Andric /// Returns the spelling of a string literal or identifier after performing 98*06c3fb27SDimitry Andric /// any processing needed to handle \c clang::Token::NeedsCleaning. 99*06c3fb27SDimitry Andric StringRef cleanStringIfNeeded(const dependency_directives_scan::Token &Tok); 100*06c3fb27SDimitry Andric 10181ad6265SDimitry Andric /// Lexes next token and if it is identifier returns its string, otherwise 102bdd1243dSDimitry Andric /// it skips the current line and returns \p std::nullopt. 10381ad6265SDimitry Andric /// 10481ad6265SDimitry Andric /// In any case (whatever the token kind) \p First and the \p Lexer will 10581ad6265SDimitry Andric /// advance beyond the token. 106bdd1243dSDimitry Andric [[nodiscard]] std::optional<StringRef> 10781ad6265SDimitry Andric tryLexIdentifierOrSkipLine(const char *&First, const char *const End); 10881ad6265SDimitry Andric 10981ad6265SDimitry Andric /// Used when it is certain that next token is an identifier. 110bdd1243dSDimitry Andric [[nodiscard]] StringRef lexIdentifier(const char *&First, 11181ad6265SDimitry Andric const char *const End); 11281ad6265SDimitry Andric 11381ad6265SDimitry Andric /// Lexes next token and returns true iff it is an identifier that matches \p 11481ad6265SDimitry Andric /// Id, otherwise it skips the current line and returns false. 11581ad6265SDimitry Andric /// 11681ad6265SDimitry Andric /// In any case (whatever the token kind) \p First and the \p Lexer will 11781ad6265SDimitry Andric /// advance beyond the token. 118bdd1243dSDimitry Andric [[nodiscard]] bool isNextIdentifierOrSkipLine(StringRef Id, 11981ad6265SDimitry Andric const char *&First, 12081ad6265SDimitry Andric const char *const End); 12181ad6265SDimitry Andric 122*06c3fb27SDimitry Andric /// Lexes next token and returns true iff it matches the kind \p K. 123*06c3fb27SDimitry Andric /// Otherwise it skips the current line and returns false. 124*06c3fb27SDimitry Andric /// 125*06c3fb27SDimitry Andric /// In any case (whatever the token kind) \p First and the \p Lexer will 126*06c3fb27SDimitry Andric /// advance beyond the token. 127*06c3fb27SDimitry Andric [[nodiscard]] bool isNextTokenOrSkipLine(tok::TokenKind K, const char *&First, 128*06c3fb27SDimitry Andric const char *const End); 129*06c3fb27SDimitry Andric 130*06c3fb27SDimitry Andric /// Lexes next token and if it is string literal, returns its string. 131*06c3fb27SDimitry Andric /// Otherwise, it skips the current line and returns \p std::nullopt. 132*06c3fb27SDimitry Andric /// 133*06c3fb27SDimitry Andric /// In any case (whatever the token kind) \p First and the \p Lexer will 134*06c3fb27SDimitry Andric /// advance beyond the token. 135*06c3fb27SDimitry Andric [[nodiscard]] std::optional<StringRef> 136*06c3fb27SDimitry Andric tryLexStringLiteralOrSkipLine(const char *&First, const char *const End); 137*06c3fb27SDimitry Andric 138bdd1243dSDimitry Andric [[nodiscard]] bool scanImpl(const char *First, const char *const End); 139bdd1243dSDimitry Andric [[nodiscard]] bool lexPPLine(const char *&First, const char *const End); 140bdd1243dSDimitry Andric [[nodiscard]] bool lexAt(const char *&First, const char *const End); 141bdd1243dSDimitry Andric [[nodiscard]] bool lexModule(const char *&First, const char *const End); 142bdd1243dSDimitry Andric [[nodiscard]] bool lexDefine(const char *HashLoc, const char *&First, 14381ad6265SDimitry Andric const char *const End); 144bdd1243dSDimitry Andric [[nodiscard]] bool lexPragma(const char *&First, const char *const End); 145*06c3fb27SDimitry Andric [[nodiscard]] bool lex_Pragma(const char *&First, const char *const End); 146bdd1243dSDimitry Andric [[nodiscard]] bool lexEndif(const char *&First, const char *const End); 147bdd1243dSDimitry Andric [[nodiscard]] bool lexDefault(DirectiveKind Kind, const char *&First, 14881ad6265SDimitry Andric const char *const End); 149bdd1243dSDimitry Andric [[nodiscard]] bool lexModuleDirectiveBody(DirectiveKind Kind, 15081ad6265SDimitry Andric const char *&First, 15181ad6265SDimitry Andric const char *const End); 15281ad6265SDimitry Andric void lexPPDirectiveBody(const char *&First, const char *const End); 15381ad6265SDimitry Andric 15481ad6265SDimitry Andric DirectiveWithTokens &pushDirective(DirectiveKind Kind) { 15581ad6265SDimitry Andric Tokens.append(CurDirToks); 15681ad6265SDimitry Andric DirsWithToks.emplace_back(Kind, CurDirToks.size()); 15781ad6265SDimitry Andric CurDirToks.clear(); 15881ad6265SDimitry Andric return DirsWithToks.back(); 15981ad6265SDimitry Andric } 16081ad6265SDimitry Andric void popDirective() { 16181ad6265SDimitry Andric Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens); 16281ad6265SDimitry Andric } 16381ad6265SDimitry Andric DirectiveKind topDirective() const { 16481ad6265SDimitry Andric return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind; 16581ad6265SDimitry Andric } 16681ad6265SDimitry Andric 16781ad6265SDimitry Andric unsigned getOffsetAt(const char *CurPtr) const { 16881ad6265SDimitry Andric return CurPtr - Input.data(); 16981ad6265SDimitry Andric } 17081ad6265SDimitry Andric 17181ad6265SDimitry Andric /// Reports a diagnostic if the diagnostic engine is provided. Always returns 17281ad6265SDimitry Andric /// true at the end. 17381ad6265SDimitry Andric bool reportError(const char *CurPtr, unsigned Err); 17481ad6265SDimitry Andric 17581ad6265SDimitry Andric StringMap<char> SplitIds; 17681ad6265SDimitry Andric StringRef Input; 17781ad6265SDimitry Andric SmallVectorImpl<dependency_directives_scan::Token> &Tokens; 17881ad6265SDimitry Andric DiagnosticsEngine *Diags; 17981ad6265SDimitry Andric SourceLocation InputSourceLoc; 18081ad6265SDimitry Andric 181bdd1243dSDimitry Andric const char *LastTokenPtr = nullptr; 18281ad6265SDimitry Andric /// Keeps track of the tokens for the currently lexed directive. Once a 18381ad6265SDimitry Andric /// directive is fully lexed and "committed" then the tokens get appended to 18481ad6265SDimitry Andric /// \p Tokens and \p CurDirToks is cleared for the next directive. 18581ad6265SDimitry Andric SmallVector<dependency_directives_scan::Token, 32> CurDirToks; 18681ad6265SDimitry Andric /// The directives that were lexed along with the number of tokens that each 18781ad6265SDimitry Andric /// directive contains. The tokens of all the directives are kept in \p Tokens 18881ad6265SDimitry Andric /// vector, in the same order as the directives order in \p DirsWithToks. 18981ad6265SDimitry Andric SmallVector<DirectiveWithTokens, 64> DirsWithToks; 19081ad6265SDimitry Andric LangOptions LangOpts; 19181ad6265SDimitry Andric Lexer TheLexer; 19281ad6265SDimitry Andric }; 19381ad6265SDimitry Andric 19481ad6265SDimitry Andric } // end anonymous namespace 19581ad6265SDimitry Andric 19681ad6265SDimitry Andric bool Scanner::reportError(const char *CurPtr, unsigned Err) { 19781ad6265SDimitry Andric if (!Diags) 19881ad6265SDimitry Andric return true; 19981ad6265SDimitry Andric assert(CurPtr >= Input.data() && "invalid buffer ptr"); 20081ad6265SDimitry Andric Diags->Report(InputSourceLoc.getLocWithOffset(getOffsetAt(CurPtr)), Err); 20181ad6265SDimitry Andric return true; 20281ad6265SDimitry Andric } 20381ad6265SDimitry Andric 20481ad6265SDimitry Andric static void skipOverSpaces(const char *&First, const char *const End) { 20581ad6265SDimitry Andric while (First != End && isHorizontalWhitespace(*First)) 20681ad6265SDimitry Andric ++First; 20781ad6265SDimitry Andric } 20881ad6265SDimitry Andric 209bdd1243dSDimitry Andric [[nodiscard]] static bool isRawStringLiteral(const char *First, 21081ad6265SDimitry Andric const char *Current) { 21181ad6265SDimitry Andric assert(First <= Current); 21281ad6265SDimitry Andric 21381ad6265SDimitry Andric // Check if we can even back up. 21481ad6265SDimitry Andric if (*Current != '"' || First == Current) 21581ad6265SDimitry Andric return false; 21681ad6265SDimitry Andric 21781ad6265SDimitry Andric // Check for an "R". 21881ad6265SDimitry Andric --Current; 21981ad6265SDimitry Andric if (*Current != 'R') 22081ad6265SDimitry Andric return false; 22181ad6265SDimitry Andric if (First == Current || !isAsciiIdentifierContinue(*--Current)) 22281ad6265SDimitry Andric return true; 22381ad6265SDimitry Andric 22481ad6265SDimitry Andric // Check for a prefix of "u", "U", or "L". 22581ad6265SDimitry Andric if (*Current == 'u' || *Current == 'U' || *Current == 'L') 22681ad6265SDimitry Andric return First == Current || !isAsciiIdentifierContinue(*--Current); 22781ad6265SDimitry Andric 22881ad6265SDimitry Andric // Check for a prefix of "u8". 22981ad6265SDimitry Andric if (*Current != '8' || First == Current || *Current-- != 'u') 23081ad6265SDimitry Andric return false; 23181ad6265SDimitry Andric return First == Current || !isAsciiIdentifierContinue(*--Current); 23281ad6265SDimitry Andric } 23381ad6265SDimitry Andric 23481ad6265SDimitry Andric static void skipRawString(const char *&First, const char *const End) { 23581ad6265SDimitry Andric assert(First[0] == '"'); 23681ad6265SDimitry Andric assert(First[-1] == 'R'); 23781ad6265SDimitry Andric 23881ad6265SDimitry Andric const char *Last = ++First; 23981ad6265SDimitry Andric while (Last != End && *Last != '(') 24081ad6265SDimitry Andric ++Last; 24181ad6265SDimitry Andric if (Last == End) { 24281ad6265SDimitry Andric First = Last; // Hit the end... just give up. 24381ad6265SDimitry Andric return; 24481ad6265SDimitry Andric } 24581ad6265SDimitry Andric 24681ad6265SDimitry Andric StringRef Terminator(First, Last - First); 24781ad6265SDimitry Andric for (;;) { 24881ad6265SDimitry Andric // Move First to just past the next ")". 24981ad6265SDimitry Andric First = Last; 25081ad6265SDimitry Andric while (First != End && *First != ')') 25181ad6265SDimitry Andric ++First; 25281ad6265SDimitry Andric if (First == End) 25381ad6265SDimitry Andric return; 25481ad6265SDimitry Andric ++First; 25581ad6265SDimitry Andric 25681ad6265SDimitry Andric // Look ahead for the terminator sequence. 25781ad6265SDimitry Andric Last = First; 25881ad6265SDimitry Andric while (Last != End && size_t(Last - First) < Terminator.size() && 25981ad6265SDimitry Andric Terminator[Last - First] == *Last) 26081ad6265SDimitry Andric ++Last; 26181ad6265SDimitry Andric 26281ad6265SDimitry Andric // Check if we hit it (or the end of the file). 26381ad6265SDimitry Andric if (Last == End) { 26481ad6265SDimitry Andric First = Last; 26581ad6265SDimitry Andric return; 26681ad6265SDimitry Andric } 26781ad6265SDimitry Andric if (size_t(Last - First) < Terminator.size()) 26881ad6265SDimitry Andric continue; 26981ad6265SDimitry Andric if (*Last != '"') 27081ad6265SDimitry Andric continue; 27181ad6265SDimitry Andric First = Last + 1; 27281ad6265SDimitry Andric return; 27381ad6265SDimitry Andric } 27481ad6265SDimitry Andric } 27581ad6265SDimitry Andric 27681ad6265SDimitry Andric // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n) 27781ad6265SDimitry Andric static unsigned isEOL(const char *First, const char *const End) { 27881ad6265SDimitry Andric if (First == End) 27981ad6265SDimitry Andric return 0; 28081ad6265SDimitry Andric if (End - First > 1 && isVerticalWhitespace(First[0]) && 28181ad6265SDimitry Andric isVerticalWhitespace(First[1]) && First[0] != First[1]) 28281ad6265SDimitry Andric return 2; 28381ad6265SDimitry Andric return !!isVerticalWhitespace(First[0]); 28481ad6265SDimitry Andric } 28581ad6265SDimitry Andric 28681ad6265SDimitry Andric static void skipString(const char *&First, const char *const End) { 28781ad6265SDimitry Andric assert(*First == '\'' || *First == '"' || *First == '<'); 28881ad6265SDimitry Andric const char Terminator = *First == '<' ? '>' : *First; 28981ad6265SDimitry Andric for (++First; First != End && *First != Terminator; ++First) { 29081ad6265SDimitry Andric // String and character literals don't extend past the end of the line. 29181ad6265SDimitry Andric if (isVerticalWhitespace(*First)) 29281ad6265SDimitry Andric return; 29381ad6265SDimitry Andric if (*First != '\\') 29481ad6265SDimitry Andric continue; 29581ad6265SDimitry Andric // Skip past backslash to the next character. This ensures that the 29681ad6265SDimitry Andric // character right after it is skipped as well, which matters if it's 29781ad6265SDimitry Andric // the terminator. 29881ad6265SDimitry Andric if (++First == End) 29981ad6265SDimitry Andric return; 30081ad6265SDimitry Andric if (!isWhitespace(*First)) 30181ad6265SDimitry Andric continue; 30281ad6265SDimitry Andric // Whitespace after the backslash might indicate a line continuation. 30381ad6265SDimitry Andric const char *FirstAfterBackslashPastSpace = First; 30481ad6265SDimitry Andric skipOverSpaces(FirstAfterBackslashPastSpace, End); 30581ad6265SDimitry Andric if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) { 30681ad6265SDimitry Andric // Advance the character pointer to the next line for the next 30781ad6265SDimitry Andric // iteration. 30881ad6265SDimitry Andric First = FirstAfterBackslashPastSpace + NLSize - 1; 30981ad6265SDimitry Andric } 31081ad6265SDimitry Andric } 31181ad6265SDimitry Andric if (First != End) 31281ad6265SDimitry Andric ++First; // Finish off the string. 31381ad6265SDimitry Andric } 31481ad6265SDimitry Andric 31581ad6265SDimitry Andric // Returns the length of the skipped newline 31681ad6265SDimitry Andric static unsigned skipNewline(const char *&First, const char *End) { 31781ad6265SDimitry Andric if (First == End) 31881ad6265SDimitry Andric return 0; 31981ad6265SDimitry Andric assert(isVerticalWhitespace(*First)); 32081ad6265SDimitry Andric unsigned Len = isEOL(First, End); 32181ad6265SDimitry Andric assert(Len && "expected newline"); 32281ad6265SDimitry Andric First += Len; 32381ad6265SDimitry Andric return Len; 32481ad6265SDimitry Andric } 32581ad6265SDimitry Andric 32681ad6265SDimitry Andric static bool wasLineContinuation(const char *First, unsigned EOLLen) { 32781ad6265SDimitry Andric return *(First - (int)EOLLen - 1) == '\\'; 32881ad6265SDimitry Andric } 32981ad6265SDimitry Andric 33081ad6265SDimitry Andric static void skipToNewlineRaw(const char *&First, const char *const End) { 33181ad6265SDimitry Andric for (;;) { 33281ad6265SDimitry Andric if (First == End) 33381ad6265SDimitry Andric return; 33481ad6265SDimitry Andric 33581ad6265SDimitry Andric unsigned Len = isEOL(First, End); 33681ad6265SDimitry Andric if (Len) 33781ad6265SDimitry Andric return; 33881ad6265SDimitry Andric 33981ad6265SDimitry Andric do { 34081ad6265SDimitry Andric if (++First == End) 34181ad6265SDimitry Andric return; 34281ad6265SDimitry Andric Len = isEOL(First, End); 34381ad6265SDimitry Andric } while (!Len); 34481ad6265SDimitry Andric 34581ad6265SDimitry Andric if (First[-1] != '\\') 34681ad6265SDimitry Andric return; 34781ad6265SDimitry Andric 34881ad6265SDimitry Andric First += Len; 34981ad6265SDimitry Andric // Keep skipping lines... 35081ad6265SDimitry Andric } 35181ad6265SDimitry Andric } 35281ad6265SDimitry Andric 35381ad6265SDimitry Andric static void skipLineComment(const char *&First, const char *const End) { 35481ad6265SDimitry Andric assert(First[0] == '/' && First[1] == '/'); 35581ad6265SDimitry Andric First += 2; 35681ad6265SDimitry Andric skipToNewlineRaw(First, End); 35781ad6265SDimitry Andric } 35881ad6265SDimitry Andric 35981ad6265SDimitry Andric static void skipBlockComment(const char *&First, const char *const End) { 36081ad6265SDimitry Andric assert(First[0] == '/' && First[1] == '*'); 36181ad6265SDimitry Andric if (End - First < 4) { 36281ad6265SDimitry Andric First = End; 36381ad6265SDimitry Andric return; 36481ad6265SDimitry Andric } 36581ad6265SDimitry Andric for (First += 3; First != End; ++First) 36681ad6265SDimitry Andric if (First[-1] == '*' && First[0] == '/') { 36781ad6265SDimitry Andric ++First; 36881ad6265SDimitry Andric return; 36981ad6265SDimitry Andric } 37081ad6265SDimitry Andric } 37181ad6265SDimitry Andric 37281ad6265SDimitry Andric /// \returns True if the current single quotation mark character is a C++ 14 37381ad6265SDimitry Andric /// digit separator. 37481ad6265SDimitry Andric static bool isQuoteCppDigitSeparator(const char *const Start, 37581ad6265SDimitry Andric const char *const Cur, 37681ad6265SDimitry Andric const char *const End) { 37781ad6265SDimitry Andric assert(*Cur == '\'' && "expected quotation character"); 37881ad6265SDimitry Andric // skipLine called in places where we don't expect a valid number 37981ad6265SDimitry Andric // body before `start` on the same line, so always return false at the start. 38081ad6265SDimitry Andric if (Start == Cur) 38181ad6265SDimitry Andric return false; 38281ad6265SDimitry Andric // The previous character must be a valid PP number character. 38381ad6265SDimitry Andric // Make sure that the L, u, U, u8 prefixes don't get marked as a 38481ad6265SDimitry Andric // separator though. 38581ad6265SDimitry Andric char Prev = *(Cur - 1); 38681ad6265SDimitry Andric if (Prev == 'L' || Prev == 'U' || Prev == 'u') 38781ad6265SDimitry Andric return false; 38881ad6265SDimitry Andric if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') 38981ad6265SDimitry Andric return false; 39081ad6265SDimitry Andric if (!isPreprocessingNumberBody(Prev)) 39181ad6265SDimitry Andric return false; 39281ad6265SDimitry Andric // The next character should be a valid identifier body character. 39381ad6265SDimitry Andric return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1)); 39481ad6265SDimitry Andric } 39581ad6265SDimitry Andric 396bdd1243dSDimitry Andric void Scanner::skipLine(const char *&First, const char *const End) { 39781ad6265SDimitry Andric for (;;) { 39881ad6265SDimitry Andric assert(First <= End); 39981ad6265SDimitry Andric if (First == End) 40081ad6265SDimitry Andric return; 40181ad6265SDimitry Andric 40281ad6265SDimitry Andric if (isVerticalWhitespace(*First)) { 40381ad6265SDimitry Andric skipNewline(First, End); 40481ad6265SDimitry Andric return; 40581ad6265SDimitry Andric } 40681ad6265SDimitry Andric const char *Start = First; 40781ad6265SDimitry Andric while (First != End && !isVerticalWhitespace(*First)) { 40881ad6265SDimitry Andric // Iterate over strings correctly to avoid comments and newlines. 40981ad6265SDimitry Andric if (*First == '"' || 41081ad6265SDimitry Andric (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { 411bdd1243dSDimitry Andric LastTokenPtr = First; 41281ad6265SDimitry Andric if (isRawStringLiteral(Start, First)) 41381ad6265SDimitry Andric skipRawString(First, End); 41481ad6265SDimitry Andric else 41581ad6265SDimitry Andric skipString(First, End); 41681ad6265SDimitry Andric continue; 41781ad6265SDimitry Andric } 41881ad6265SDimitry Andric 41981ad6265SDimitry Andric // Iterate over comments correctly. 42081ad6265SDimitry Andric if (*First != '/' || End - First < 2) { 421bdd1243dSDimitry Andric LastTokenPtr = First; 42281ad6265SDimitry Andric ++First; 42381ad6265SDimitry Andric continue; 42481ad6265SDimitry Andric } 42581ad6265SDimitry Andric 42681ad6265SDimitry Andric if (First[1] == '/') { 42781ad6265SDimitry Andric // "//...". 42881ad6265SDimitry Andric skipLineComment(First, End); 42981ad6265SDimitry Andric continue; 43081ad6265SDimitry Andric } 43181ad6265SDimitry Andric 43281ad6265SDimitry Andric if (First[1] != '*') { 433bdd1243dSDimitry Andric LastTokenPtr = First; 43481ad6265SDimitry Andric ++First; 43581ad6265SDimitry Andric continue; 43681ad6265SDimitry Andric } 43781ad6265SDimitry Andric 43881ad6265SDimitry Andric // "/*...*/". 43981ad6265SDimitry Andric skipBlockComment(First, End); 44081ad6265SDimitry Andric } 44181ad6265SDimitry Andric if (First == End) 44281ad6265SDimitry Andric return; 44381ad6265SDimitry Andric 44481ad6265SDimitry Andric // Skip over the newline. 44581ad6265SDimitry Andric unsigned Len = skipNewline(First, End); 44681ad6265SDimitry Andric if (!wasLineContinuation(First, Len)) // Continue past line-continuations. 44781ad6265SDimitry Andric break; 44881ad6265SDimitry Andric } 44981ad6265SDimitry Andric } 45081ad6265SDimitry Andric 451bdd1243dSDimitry Andric void Scanner::skipDirective(StringRef Name, const char *&First, 45281ad6265SDimitry Andric const char *const End) { 45381ad6265SDimitry Andric if (llvm::StringSwitch<bool>(Name) 45481ad6265SDimitry Andric .Case("warning", true) 45581ad6265SDimitry Andric .Case("error", true) 45681ad6265SDimitry Andric .Default(false)) 45781ad6265SDimitry Andric // Do not process quotes or comments. 45881ad6265SDimitry Andric skipToNewlineRaw(First, End); 45981ad6265SDimitry Andric else 46081ad6265SDimitry Andric skipLine(First, End); 46181ad6265SDimitry Andric } 46281ad6265SDimitry Andric 46381ad6265SDimitry Andric static void skipWhitespace(const char *&First, const char *const End) { 46481ad6265SDimitry Andric for (;;) { 46581ad6265SDimitry Andric assert(First <= End); 46681ad6265SDimitry Andric skipOverSpaces(First, End); 46781ad6265SDimitry Andric 46881ad6265SDimitry Andric if (End - First < 2) 46981ad6265SDimitry Andric return; 47081ad6265SDimitry Andric 47181ad6265SDimitry Andric if (First[0] == '\\' && isVerticalWhitespace(First[1])) { 47281ad6265SDimitry Andric skipNewline(++First, End); 47381ad6265SDimitry Andric continue; 47481ad6265SDimitry Andric } 47581ad6265SDimitry Andric 47681ad6265SDimitry Andric // Check for a non-comment character. 47781ad6265SDimitry Andric if (First[0] != '/') 47881ad6265SDimitry Andric return; 47981ad6265SDimitry Andric 48081ad6265SDimitry Andric // "// ...". 48181ad6265SDimitry Andric if (First[1] == '/') { 48281ad6265SDimitry Andric skipLineComment(First, End); 48381ad6265SDimitry Andric return; 48481ad6265SDimitry Andric } 48581ad6265SDimitry Andric 48681ad6265SDimitry Andric // Cannot be a comment. 48781ad6265SDimitry Andric if (First[1] != '*') 48881ad6265SDimitry Andric return; 48981ad6265SDimitry Andric 49081ad6265SDimitry Andric // "/*...*/". 49181ad6265SDimitry Andric skipBlockComment(First, End); 49281ad6265SDimitry Andric } 49381ad6265SDimitry Andric } 49481ad6265SDimitry Andric 49581ad6265SDimitry Andric bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, 49681ad6265SDimitry Andric const char *const End) { 49781ad6265SDimitry Andric const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset; 49881ad6265SDimitry Andric for (;;) { 49981ad6265SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 50081ad6265SDimitry Andric if (Tok.is(tok::eof)) 50181ad6265SDimitry Andric return reportError( 50281ad6265SDimitry Andric DirectiveLoc, 50381ad6265SDimitry Andric diag::err_dep_source_scanner_missing_semi_after_at_import); 50481ad6265SDimitry Andric if (Tok.is(tok::semi)) 50581ad6265SDimitry Andric break; 50681ad6265SDimitry Andric } 50781ad6265SDimitry Andric pushDirective(Kind); 50881ad6265SDimitry Andric skipWhitespace(First, End); 50981ad6265SDimitry Andric if (First == End) 51081ad6265SDimitry Andric return false; 51181ad6265SDimitry Andric if (!isVerticalWhitespace(*First)) 51281ad6265SDimitry Andric return reportError( 51381ad6265SDimitry Andric DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); 51481ad6265SDimitry Andric skipNewline(First, End); 51581ad6265SDimitry Andric return false; 51681ad6265SDimitry Andric } 51781ad6265SDimitry Andric 51881ad6265SDimitry Andric dependency_directives_scan::Token &Scanner::lexToken(const char *&First, 51981ad6265SDimitry Andric const char *const End) { 52081ad6265SDimitry Andric clang::Token Tok; 52181ad6265SDimitry Andric TheLexer.LexFromRawLexer(Tok); 52281ad6265SDimitry Andric First = Input.data() + TheLexer.getCurrentBufferOffset(); 52381ad6265SDimitry Andric assert(First <= End); 52481ad6265SDimitry Andric 52581ad6265SDimitry Andric unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); 52681ad6265SDimitry Andric CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), 52781ad6265SDimitry Andric Tok.getFlags()); 52881ad6265SDimitry Andric return CurDirToks.back(); 52981ad6265SDimitry Andric } 53081ad6265SDimitry Andric 53181ad6265SDimitry Andric dependency_directives_scan::Token & 53281ad6265SDimitry Andric Scanner::lexIncludeFilename(const char *&First, const char *const End) { 53381ad6265SDimitry Andric clang::Token Tok; 53481ad6265SDimitry Andric TheLexer.LexIncludeFilename(Tok); 53581ad6265SDimitry Andric First = Input.data() + TheLexer.getCurrentBufferOffset(); 53681ad6265SDimitry Andric assert(First <= End); 53781ad6265SDimitry Andric 53881ad6265SDimitry Andric unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); 53981ad6265SDimitry Andric CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), 54081ad6265SDimitry Andric Tok.getFlags()); 54181ad6265SDimitry Andric return CurDirToks.back(); 54281ad6265SDimitry Andric } 54381ad6265SDimitry Andric 54481ad6265SDimitry Andric void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) { 54581ad6265SDimitry Andric while (true) { 54681ad6265SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 54781ad6265SDimitry Andric if (Tok.is(tok::eod)) 54881ad6265SDimitry Andric break; 54981ad6265SDimitry Andric } 55081ad6265SDimitry Andric } 55181ad6265SDimitry Andric 552*06c3fb27SDimitry Andric StringRef 553*06c3fb27SDimitry Andric Scanner::cleanStringIfNeeded(const dependency_directives_scan::Token &Tok) { 55481ad6265SDimitry Andric bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning; 55581ad6265SDimitry Andric if (LLVM_LIKELY(!NeedsCleaning)) 55681ad6265SDimitry Andric return Input.slice(Tok.Offset, Tok.getEnd()); 55781ad6265SDimitry Andric 55881ad6265SDimitry Andric SmallString<64> Spelling; 55981ad6265SDimitry Andric Spelling.resize(Tok.Length); 56081ad6265SDimitry Andric 561*06c3fb27SDimitry Andric // FIXME: C++11 raw string literals need special handling (see getSpellingSlow 562*06c3fb27SDimitry Andric // in the Lexer). Currently we cannot see them due to our LangOpts. 563*06c3fb27SDimitry Andric 56481ad6265SDimitry Andric unsigned SpellingLength = 0; 56581ad6265SDimitry Andric const char *BufPtr = Input.begin() + Tok.Offset; 56681ad6265SDimitry Andric const char *AfterIdent = Input.begin() + Tok.getEnd(); 56781ad6265SDimitry Andric while (BufPtr < AfterIdent) { 56881ad6265SDimitry Andric unsigned Size; 56981ad6265SDimitry Andric Spelling[SpellingLength++] = 57081ad6265SDimitry Andric Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); 57181ad6265SDimitry Andric BufPtr += Size; 57281ad6265SDimitry Andric } 57381ad6265SDimitry Andric 57481ad6265SDimitry Andric return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0) 57581ad6265SDimitry Andric .first->first(); 57681ad6265SDimitry Andric } 57781ad6265SDimitry Andric 578*06c3fb27SDimitry Andric std::optional<StringRef> 579*06c3fb27SDimitry Andric Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) { 580*06c3fb27SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 581*06c3fb27SDimitry Andric if (Tok.isNot(tok::raw_identifier)) { 582*06c3fb27SDimitry Andric if (!Tok.is(tok::eod)) 583*06c3fb27SDimitry Andric skipLine(First, End); 584*06c3fb27SDimitry Andric return std::nullopt; 585*06c3fb27SDimitry Andric } 586*06c3fb27SDimitry Andric 587*06c3fb27SDimitry Andric return cleanStringIfNeeded(Tok); 588*06c3fb27SDimitry Andric } 589*06c3fb27SDimitry Andric 59081ad6265SDimitry Andric StringRef Scanner::lexIdentifier(const char *&First, const char *const End) { 591bdd1243dSDimitry Andric std::optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End); 59281ad6265SDimitry Andric assert(Id && "expected identifier token"); 593bdd1243dSDimitry Andric return *Id; 59481ad6265SDimitry Andric } 59581ad6265SDimitry Andric 59681ad6265SDimitry Andric bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First, 59781ad6265SDimitry Andric const char *const End) { 598bdd1243dSDimitry Andric if (std::optional<StringRef> FoundId = 599bdd1243dSDimitry Andric tryLexIdentifierOrSkipLine(First, End)) { 60081ad6265SDimitry Andric if (*FoundId == Id) 60181ad6265SDimitry Andric return true; 60281ad6265SDimitry Andric skipLine(First, End); 60381ad6265SDimitry Andric } 60481ad6265SDimitry Andric return false; 60581ad6265SDimitry Andric } 60681ad6265SDimitry Andric 607*06c3fb27SDimitry Andric bool Scanner::isNextTokenOrSkipLine(tok::TokenKind K, const char *&First, 608*06c3fb27SDimitry Andric const char *const End) { 609*06c3fb27SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 610*06c3fb27SDimitry Andric if (Tok.is(K)) 611*06c3fb27SDimitry Andric return true; 612*06c3fb27SDimitry Andric skipLine(First, End); 613*06c3fb27SDimitry Andric return false; 614*06c3fb27SDimitry Andric } 615*06c3fb27SDimitry Andric 616*06c3fb27SDimitry Andric std::optional<StringRef> 617*06c3fb27SDimitry Andric Scanner::tryLexStringLiteralOrSkipLine(const char *&First, 618*06c3fb27SDimitry Andric const char *const End) { 619*06c3fb27SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 620*06c3fb27SDimitry Andric if (!tok::isStringLiteral(Tok.Kind)) { 621*06c3fb27SDimitry Andric if (!Tok.is(tok::eod)) 622*06c3fb27SDimitry Andric skipLine(First, End); 623*06c3fb27SDimitry Andric return std::nullopt; 624*06c3fb27SDimitry Andric } 625*06c3fb27SDimitry Andric 626*06c3fb27SDimitry Andric return cleanStringIfNeeded(Tok); 627*06c3fb27SDimitry Andric } 628*06c3fb27SDimitry Andric 62981ad6265SDimitry Andric bool Scanner::lexAt(const char *&First, const char *const End) { 63081ad6265SDimitry Andric // Handle "@import". 63181ad6265SDimitry Andric 63281ad6265SDimitry Andric // Lex '@'. 63381ad6265SDimitry Andric const dependency_directives_scan::Token &AtTok = lexToken(First, End); 63481ad6265SDimitry Andric assert(AtTok.is(tok::at)); 63581ad6265SDimitry Andric (void)AtTok; 63681ad6265SDimitry Andric 63781ad6265SDimitry Andric if (!isNextIdentifierOrSkipLine("import", First, End)) 63881ad6265SDimitry Andric return false; 63981ad6265SDimitry Andric return lexModuleDirectiveBody(decl_at_import, First, End); 64081ad6265SDimitry Andric } 64181ad6265SDimitry Andric 64281ad6265SDimitry Andric bool Scanner::lexModule(const char *&First, const char *const End) { 64381ad6265SDimitry Andric StringRef Id = lexIdentifier(First, End); 64481ad6265SDimitry Andric bool Export = false; 64581ad6265SDimitry Andric if (Id == "export") { 64681ad6265SDimitry Andric Export = true; 647bdd1243dSDimitry Andric std::optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End); 64881ad6265SDimitry Andric if (!NextId) 64981ad6265SDimitry Andric return false; 65081ad6265SDimitry Andric Id = *NextId; 65181ad6265SDimitry Andric } 65281ad6265SDimitry Andric 65381ad6265SDimitry Andric if (Id != "module" && Id != "import") { 65481ad6265SDimitry Andric skipLine(First, End); 65581ad6265SDimitry Andric return false; 65681ad6265SDimitry Andric } 65781ad6265SDimitry Andric 65881ad6265SDimitry Andric skipWhitespace(First, End); 65981ad6265SDimitry Andric 66081ad6265SDimitry Andric // Ignore this as a module directive if the next character can't be part of 66181ad6265SDimitry Andric // an import. 66281ad6265SDimitry Andric 66381ad6265SDimitry Andric switch (*First) { 66481ad6265SDimitry Andric case ':': 66581ad6265SDimitry Andric case '<': 66681ad6265SDimitry Andric case '"': 66781ad6265SDimitry Andric break; 66881ad6265SDimitry Andric default: 66981ad6265SDimitry Andric if (!isAsciiIdentifierContinue(*First)) { 67081ad6265SDimitry Andric skipLine(First, End); 67181ad6265SDimitry Andric return false; 67281ad6265SDimitry Andric } 67381ad6265SDimitry Andric } 67481ad6265SDimitry Andric 67581ad6265SDimitry Andric TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false); 67681ad6265SDimitry Andric 67781ad6265SDimitry Andric DirectiveKind Kind; 67881ad6265SDimitry Andric if (Id == "module") 67981ad6265SDimitry Andric Kind = Export ? cxx_export_module_decl : cxx_module_decl; 68081ad6265SDimitry Andric else 68181ad6265SDimitry Andric Kind = Export ? cxx_export_import_decl : cxx_import_decl; 68281ad6265SDimitry Andric 68381ad6265SDimitry Andric return lexModuleDirectiveBody(Kind, First, End); 68481ad6265SDimitry Andric } 68581ad6265SDimitry Andric 686*06c3fb27SDimitry Andric bool Scanner::lex_Pragma(const char *&First, const char *const End) { 687*06c3fb27SDimitry Andric if (!isNextTokenOrSkipLine(tok::l_paren, First, End)) 688*06c3fb27SDimitry Andric return false; 689*06c3fb27SDimitry Andric 690*06c3fb27SDimitry Andric std::optional<StringRef> Str = tryLexStringLiteralOrSkipLine(First, End); 691*06c3fb27SDimitry Andric 692*06c3fb27SDimitry Andric if (!Str || !isNextTokenOrSkipLine(tok::r_paren, First, End)) 693*06c3fb27SDimitry Andric return false; 694*06c3fb27SDimitry Andric 695*06c3fb27SDimitry Andric SmallString<64> Buffer(*Str); 696*06c3fb27SDimitry Andric prepare_PragmaString(Buffer); 697*06c3fb27SDimitry Andric 698*06c3fb27SDimitry Andric // Use a new scanner instance since the tokens will be inside the allocated 699*06c3fb27SDimitry Andric // string. We should already have captured all the relevant tokens in the 700*06c3fb27SDimitry Andric // current scanner. 701*06c3fb27SDimitry Andric SmallVector<dependency_directives_scan::Token> DiscardTokens; 702*06c3fb27SDimitry Andric const char *Begin = Buffer.c_str(); 703*06c3fb27SDimitry Andric Scanner PragmaScanner{StringRef(Begin, Buffer.size()), DiscardTokens, Diags, 704*06c3fb27SDimitry Andric InputSourceLoc}; 705*06c3fb27SDimitry Andric 706*06c3fb27SDimitry Andric PragmaScanner.TheLexer.setParsingPreprocessorDirective(true); 707*06c3fb27SDimitry Andric if (PragmaScanner.lexPragma(Begin, Buffer.end())) 708*06c3fb27SDimitry Andric return true; 709*06c3fb27SDimitry Andric 710*06c3fb27SDimitry Andric DirectiveKind K = PragmaScanner.topDirective(); 711*06c3fb27SDimitry Andric if (K == pp_none) { 712*06c3fb27SDimitry Andric skipLine(First, End); 713*06c3fb27SDimitry Andric return false; 714*06c3fb27SDimitry Andric } 715*06c3fb27SDimitry Andric 716*06c3fb27SDimitry Andric assert(Begin == Buffer.end()); 717*06c3fb27SDimitry Andric pushDirective(K); 718*06c3fb27SDimitry Andric return false; 719*06c3fb27SDimitry Andric } 720*06c3fb27SDimitry Andric 72181ad6265SDimitry Andric bool Scanner::lexPragma(const char *&First, const char *const End) { 722bdd1243dSDimitry Andric std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); 72381ad6265SDimitry Andric if (!FoundId) 72481ad6265SDimitry Andric return false; 72581ad6265SDimitry Andric 72681ad6265SDimitry Andric StringRef Id = *FoundId; 72781ad6265SDimitry Andric auto Kind = llvm::StringSwitch<DirectiveKind>(Id) 72881ad6265SDimitry Andric .Case("once", pp_pragma_once) 72981ad6265SDimitry Andric .Case("push_macro", pp_pragma_push_macro) 73081ad6265SDimitry Andric .Case("pop_macro", pp_pragma_pop_macro) 73181ad6265SDimitry Andric .Case("include_alias", pp_pragma_include_alias) 73281ad6265SDimitry Andric .Default(pp_none); 73381ad6265SDimitry Andric if (Kind != pp_none) { 73481ad6265SDimitry Andric lexPPDirectiveBody(First, End); 73581ad6265SDimitry Andric pushDirective(Kind); 73681ad6265SDimitry Andric return false; 73781ad6265SDimitry Andric } 73881ad6265SDimitry Andric 73981ad6265SDimitry Andric if (Id != "clang") { 74081ad6265SDimitry Andric skipLine(First, End); 74181ad6265SDimitry Andric return false; 74281ad6265SDimitry Andric } 74381ad6265SDimitry Andric 744*06c3fb27SDimitry Andric FoundId = tryLexIdentifierOrSkipLine(First, End); 745*06c3fb27SDimitry Andric if (!FoundId) 74681ad6265SDimitry Andric return false; 747*06c3fb27SDimitry Andric Id = *FoundId; 748*06c3fb27SDimitry Andric 749*06c3fb27SDimitry Andric // #pragma clang system_header 750*06c3fb27SDimitry Andric if (Id == "system_header") { 751*06c3fb27SDimitry Andric lexPPDirectiveBody(First, End); 752*06c3fb27SDimitry Andric pushDirective(pp_pragma_system_header); 753*06c3fb27SDimitry Andric return false; 754*06c3fb27SDimitry Andric } 755*06c3fb27SDimitry Andric 756*06c3fb27SDimitry Andric if (Id != "module") { 757*06c3fb27SDimitry Andric skipLine(First, End); 758*06c3fb27SDimitry Andric return false; 759*06c3fb27SDimitry Andric } 76081ad6265SDimitry Andric 76181ad6265SDimitry Andric // #pragma clang module. 76281ad6265SDimitry Andric if (!isNextIdentifierOrSkipLine("import", First, End)) 76381ad6265SDimitry Andric return false; 76481ad6265SDimitry Andric 76581ad6265SDimitry Andric // #pragma clang module import. 76681ad6265SDimitry Andric lexPPDirectiveBody(First, End); 76781ad6265SDimitry Andric pushDirective(pp_pragma_import); 76881ad6265SDimitry Andric return false; 76981ad6265SDimitry Andric } 77081ad6265SDimitry Andric 77181ad6265SDimitry Andric bool Scanner::lexEndif(const char *&First, const char *const End) { 77281ad6265SDimitry Andric // Strip out "#else" if it's empty. 77381ad6265SDimitry Andric if (topDirective() == pp_else) 77481ad6265SDimitry Andric popDirective(); 77581ad6265SDimitry Andric 77681ad6265SDimitry Andric // If "#ifdef" is empty, strip it and skip the "#endif". 77781ad6265SDimitry Andric // 77881ad6265SDimitry Andric // FIXME: Once/if Clang starts disallowing __has_include in macro expansions, 77981ad6265SDimitry Andric // we can skip empty `#if` and `#elif` blocks as well after scanning for a 78081ad6265SDimitry Andric // literal __has_include in the condition. Even without that rule we could 78181ad6265SDimitry Andric // drop the tokens if we scan for identifiers in the condition and find none. 78281ad6265SDimitry Andric if (topDirective() == pp_ifdef || topDirective() == pp_ifndef) { 78381ad6265SDimitry Andric popDirective(); 78481ad6265SDimitry Andric skipLine(First, End); 78581ad6265SDimitry Andric return false; 78681ad6265SDimitry Andric } 78781ad6265SDimitry Andric 78881ad6265SDimitry Andric return lexDefault(pp_endif, First, End); 78981ad6265SDimitry Andric } 79081ad6265SDimitry Andric 79181ad6265SDimitry Andric bool Scanner::lexDefault(DirectiveKind Kind, const char *&First, 79281ad6265SDimitry Andric const char *const End) { 79381ad6265SDimitry Andric lexPPDirectiveBody(First, End); 79481ad6265SDimitry Andric pushDirective(Kind); 79581ad6265SDimitry Andric return false; 79681ad6265SDimitry Andric } 79781ad6265SDimitry Andric 79881ad6265SDimitry Andric static bool isStartOfRelevantLine(char First) { 79981ad6265SDimitry Andric switch (First) { 80081ad6265SDimitry Andric case '#': 80181ad6265SDimitry Andric case '@': 80281ad6265SDimitry Andric case 'i': 80381ad6265SDimitry Andric case 'e': 80481ad6265SDimitry Andric case 'm': 805*06c3fb27SDimitry Andric case '_': 80681ad6265SDimitry Andric return true; 80781ad6265SDimitry Andric } 80881ad6265SDimitry Andric return false; 80981ad6265SDimitry Andric } 81081ad6265SDimitry Andric 81181ad6265SDimitry Andric bool Scanner::lexPPLine(const char *&First, const char *const End) { 81281ad6265SDimitry Andric assert(First != End); 81381ad6265SDimitry Andric 81481ad6265SDimitry Andric skipWhitespace(First, End); 81581ad6265SDimitry Andric assert(First <= End); 81681ad6265SDimitry Andric if (First == End) 81781ad6265SDimitry Andric return false; 81881ad6265SDimitry Andric 81981ad6265SDimitry Andric if (!isStartOfRelevantLine(*First)) { 82081ad6265SDimitry Andric skipLine(First, End); 82181ad6265SDimitry Andric assert(First <= End); 82281ad6265SDimitry Andric return false; 82381ad6265SDimitry Andric } 82481ad6265SDimitry Andric 825bdd1243dSDimitry Andric LastTokenPtr = First; 826bdd1243dSDimitry Andric 82781ad6265SDimitry Andric TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true); 82881ad6265SDimitry Andric 82981ad6265SDimitry Andric auto ScEx1 = make_scope_exit([&]() { 83081ad6265SDimitry Andric /// Clear Scanner's CurDirToks before returning, in case we didn't push a 83181ad6265SDimitry Andric /// new directive. 83281ad6265SDimitry Andric CurDirToks.clear(); 83381ad6265SDimitry Andric }); 83481ad6265SDimitry Andric 83581ad6265SDimitry Andric // Handle "@import". 83681ad6265SDimitry Andric if (*First == '@') 83781ad6265SDimitry Andric return lexAt(First, End); 83881ad6265SDimitry Andric 83981ad6265SDimitry Andric if (*First == 'i' || *First == 'e' || *First == 'm') 84081ad6265SDimitry Andric return lexModule(First, End); 84181ad6265SDimitry Andric 842*06c3fb27SDimitry Andric if (*First == '_') { 843*06c3fb27SDimitry Andric if (isNextIdentifierOrSkipLine("_Pragma", First, End)) 844*06c3fb27SDimitry Andric return lex_Pragma(First, End); 845*06c3fb27SDimitry Andric return false; 846*06c3fb27SDimitry Andric } 847*06c3fb27SDimitry Andric 84881ad6265SDimitry Andric // Handle preprocessing directives. 84981ad6265SDimitry Andric 85081ad6265SDimitry Andric TheLexer.setParsingPreprocessorDirective(true); 85181ad6265SDimitry Andric auto ScEx2 = make_scope_exit( 85281ad6265SDimitry Andric [&]() { TheLexer.setParsingPreprocessorDirective(false); }); 85381ad6265SDimitry Andric 85481ad6265SDimitry Andric // Lex '#'. 85581ad6265SDimitry Andric const dependency_directives_scan::Token &HashTok = lexToken(First, End); 856bdd1243dSDimitry Andric if (HashTok.is(tok::hashhash)) { 857bdd1243dSDimitry Andric // A \p tok::hashhash at this location is passed by the preprocessor to the 858bdd1243dSDimitry Andric // parser to interpret, like any other token. So for dependency scanning 859bdd1243dSDimitry Andric // skip it like a normal token not affecting the preprocessor. 860bdd1243dSDimitry Andric skipLine(First, End); 861bdd1243dSDimitry Andric assert(First <= End); 862bdd1243dSDimitry Andric return false; 863bdd1243dSDimitry Andric } 86481ad6265SDimitry Andric assert(HashTok.is(tok::hash)); 86581ad6265SDimitry Andric (void)HashTok; 86681ad6265SDimitry Andric 867bdd1243dSDimitry Andric std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); 86881ad6265SDimitry Andric if (!FoundId) 86981ad6265SDimitry Andric return false; 87081ad6265SDimitry Andric 87181ad6265SDimitry Andric StringRef Id = *FoundId; 87281ad6265SDimitry Andric 87381ad6265SDimitry Andric if (Id == "pragma") 87481ad6265SDimitry Andric return lexPragma(First, End); 87581ad6265SDimitry Andric 87681ad6265SDimitry Andric auto Kind = llvm::StringSwitch<DirectiveKind>(Id) 87781ad6265SDimitry Andric .Case("include", pp_include) 87881ad6265SDimitry Andric .Case("__include_macros", pp___include_macros) 87981ad6265SDimitry Andric .Case("define", pp_define) 88081ad6265SDimitry Andric .Case("undef", pp_undef) 88181ad6265SDimitry Andric .Case("import", pp_import) 88281ad6265SDimitry Andric .Case("include_next", pp_include_next) 88381ad6265SDimitry Andric .Case("if", pp_if) 88481ad6265SDimitry Andric .Case("ifdef", pp_ifdef) 88581ad6265SDimitry Andric .Case("ifndef", pp_ifndef) 88681ad6265SDimitry Andric .Case("elif", pp_elif) 88781ad6265SDimitry Andric .Case("elifdef", pp_elifdef) 88881ad6265SDimitry Andric .Case("elifndef", pp_elifndef) 88981ad6265SDimitry Andric .Case("else", pp_else) 89081ad6265SDimitry Andric .Case("endif", pp_endif) 89181ad6265SDimitry Andric .Default(pp_none); 89281ad6265SDimitry Andric if (Kind == pp_none) { 89381ad6265SDimitry Andric skipDirective(Id, First, End); 89481ad6265SDimitry Andric return false; 89581ad6265SDimitry Andric } 89681ad6265SDimitry Andric 89781ad6265SDimitry Andric if (Kind == pp_endif) 89881ad6265SDimitry Andric return lexEndif(First, End); 89981ad6265SDimitry Andric 90081ad6265SDimitry Andric switch (Kind) { 90181ad6265SDimitry Andric case pp_include: 90281ad6265SDimitry Andric case pp___include_macros: 90381ad6265SDimitry Andric case pp_include_next: 90481ad6265SDimitry Andric case pp_import: 90581ad6265SDimitry Andric lexIncludeFilename(First, End); 90681ad6265SDimitry Andric break; 90781ad6265SDimitry Andric default: 90881ad6265SDimitry Andric break; 90981ad6265SDimitry Andric } 91081ad6265SDimitry Andric 91181ad6265SDimitry Andric // Everything else. 91281ad6265SDimitry Andric return lexDefault(Kind, First, End); 91381ad6265SDimitry Andric } 91481ad6265SDimitry Andric 91581ad6265SDimitry Andric static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { 91681ad6265SDimitry Andric if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' && 91781ad6265SDimitry Andric First[2] == '\xbf') 91881ad6265SDimitry Andric First += 3; 91981ad6265SDimitry Andric } 92081ad6265SDimitry Andric 92181ad6265SDimitry Andric bool Scanner::scanImpl(const char *First, const char *const End) { 92281ad6265SDimitry Andric skipUTF8ByteOrderMark(First, End); 92381ad6265SDimitry Andric while (First != End) 92481ad6265SDimitry Andric if (lexPPLine(First, End)) 92581ad6265SDimitry Andric return true; 92681ad6265SDimitry Andric return false; 92781ad6265SDimitry Andric } 92881ad6265SDimitry Andric 92981ad6265SDimitry Andric bool Scanner::scan(SmallVectorImpl<Directive> &Directives) { 93081ad6265SDimitry Andric bool Error = scanImpl(Input.begin(), Input.end()); 93181ad6265SDimitry Andric 93281ad6265SDimitry Andric if (!Error) { 93381ad6265SDimitry Andric // Add an EOF on success. 934bdd1243dSDimitry Andric if (LastTokenPtr && 935bdd1243dSDimitry Andric (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset)) 936bdd1243dSDimitry Andric pushDirective(tokens_present_before_eof); 93781ad6265SDimitry Andric pushDirective(pp_eof); 93881ad6265SDimitry Andric } 93981ad6265SDimitry Andric 94081ad6265SDimitry Andric ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens; 94181ad6265SDimitry Andric for (const DirectiveWithTokens &DirWithToks : DirsWithToks) { 94281ad6265SDimitry Andric assert(RemainingTokens.size() >= DirWithToks.NumTokens); 94381ad6265SDimitry Andric Directives.emplace_back(DirWithToks.Kind, 94481ad6265SDimitry Andric RemainingTokens.take_front(DirWithToks.NumTokens)); 94581ad6265SDimitry Andric RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens); 94681ad6265SDimitry Andric } 94781ad6265SDimitry Andric assert(RemainingTokens.empty()); 94881ad6265SDimitry Andric 94981ad6265SDimitry Andric return Error; 95081ad6265SDimitry Andric } 95181ad6265SDimitry Andric 95281ad6265SDimitry Andric bool clang::scanSourceForDependencyDirectives( 95381ad6265SDimitry Andric StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens, 95481ad6265SDimitry Andric SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags, 95581ad6265SDimitry Andric SourceLocation InputSourceLoc) { 95681ad6265SDimitry Andric return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives); 95781ad6265SDimitry Andric } 95881ad6265SDimitry Andric 95981ad6265SDimitry Andric void clang::printDependencyDirectivesAsSource( 96081ad6265SDimitry Andric StringRef Source, 96181ad6265SDimitry Andric ArrayRef<dependency_directives_scan::Directive> Directives, 96281ad6265SDimitry Andric llvm::raw_ostream &OS) { 96381ad6265SDimitry Andric // Add a space separator where it is convenient for testing purposes. 96481ad6265SDimitry Andric auto needsSpaceSeparator = 96581ad6265SDimitry Andric [](tok::TokenKind Prev, 96681ad6265SDimitry Andric const dependency_directives_scan::Token &Tok) -> bool { 96781ad6265SDimitry Andric if (Prev == Tok.Kind) 96881ad6265SDimitry Andric return !Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, 96981ad6265SDimitry Andric tok::r_square); 97081ad6265SDimitry Andric if (Prev == tok::raw_identifier && 97181ad6265SDimitry Andric Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal, 97281ad6265SDimitry Andric tok::char_constant, tok::header_name)) 97381ad6265SDimitry Andric return true; 97481ad6265SDimitry Andric if (Prev == tok::r_paren && 97581ad6265SDimitry Andric Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal, 97681ad6265SDimitry Andric tok::char_constant, tok::unknown)) 97781ad6265SDimitry Andric return true; 97881ad6265SDimitry Andric if (Prev == tok::comma && 97981ad6265SDimitry Andric Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less)) 98081ad6265SDimitry Andric return true; 98181ad6265SDimitry Andric return false; 98281ad6265SDimitry Andric }; 98381ad6265SDimitry Andric 98481ad6265SDimitry Andric for (const dependency_directives_scan::Directive &Directive : Directives) { 985bdd1243dSDimitry Andric if (Directive.Kind == tokens_present_before_eof) 986bdd1243dSDimitry Andric OS << "<TokBeforeEOF>"; 987bdd1243dSDimitry Andric std::optional<tok::TokenKind> PrevTokenKind; 98881ad6265SDimitry Andric for (const dependency_directives_scan::Token &Tok : Directive.Tokens) { 98981ad6265SDimitry Andric if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok)) 99081ad6265SDimitry Andric OS << ' '; 99181ad6265SDimitry Andric PrevTokenKind = Tok.Kind; 99281ad6265SDimitry Andric OS << Source.slice(Tok.Offset, Tok.getEnd()); 99381ad6265SDimitry Andric } 99481ad6265SDimitry Andric } 99581ad6265SDimitry Andric } 996