181ad6265SDimitry Andric //===- DependencyDirectivesScanner.cpp ------------------------------------===// 281ad6265SDimitry Andric // 381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 681ad6265SDimitry Andric // 781ad6265SDimitry Andric //===----------------------------------------------------------------------===// 881ad6265SDimitry Andric /// 981ad6265SDimitry Andric /// \file 1081ad6265SDimitry Andric /// This is the interface for scanning header and source files to get the 1181ad6265SDimitry Andric /// minimum necessary preprocessor directives for evaluating includes. It 1281ad6265SDimitry Andric /// reduces the source down to #define, #include, #import, @import, and any 1381ad6265SDimitry Andric /// conditional preprocessor logic that contains one of those. 1481ad6265SDimitry Andric /// 1581ad6265SDimitry Andric //===----------------------------------------------------------------------===// 1681ad6265SDimitry Andric 1781ad6265SDimitry Andric #include "clang/Lex/DependencyDirectivesScanner.h" 1881ad6265SDimitry Andric #include "clang/Basic/CharInfo.h" 1981ad6265SDimitry Andric #include "clang/Basic/Diagnostic.h" 2081ad6265SDimitry Andric #include "clang/Lex/LexDiagnostic.h" 2181ad6265SDimitry Andric #include "clang/Lex/Lexer.h" 2281ad6265SDimitry Andric #include "llvm/ADT/ScopeExit.h" 2381ad6265SDimitry Andric #include "llvm/ADT/SmallString.h" 2481ad6265SDimitry Andric #include "llvm/ADT/StringMap.h" 2581ad6265SDimitry Andric #include "llvm/ADT/StringSwitch.h" 26*bdd1243dSDimitry Andric #include <optional> 2781ad6265SDimitry Andric 2881ad6265SDimitry Andric using namespace clang; 2981ad6265SDimitry Andric using namespace clang::dependency_directives_scan; 3081ad6265SDimitry Andric using namespace llvm; 3181ad6265SDimitry Andric 3281ad6265SDimitry Andric namespace { 3381ad6265SDimitry Andric 3481ad6265SDimitry Andric struct DirectiveWithTokens { 3581ad6265SDimitry Andric DirectiveKind Kind; 3681ad6265SDimitry Andric unsigned NumTokens; 3781ad6265SDimitry Andric 3881ad6265SDimitry Andric DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens) 3981ad6265SDimitry Andric : Kind(Kind), NumTokens(NumTokens) {} 4081ad6265SDimitry Andric }; 4181ad6265SDimitry Andric 4281ad6265SDimitry Andric /// Does an efficient "scan" of the sources to detect the presence of 4381ad6265SDimitry Andric /// preprocessor (or module import) directives and collects the raw lexed tokens 4481ad6265SDimitry Andric /// for those directives so that the \p Lexer can "replay" them when the file is 4581ad6265SDimitry Andric /// included. 4681ad6265SDimitry Andric /// 4781ad6265SDimitry Andric /// Note that the behavior of the raw lexer is affected by the language mode, 4881ad6265SDimitry Andric /// while at this point we want to do a scan and collect tokens once, 4981ad6265SDimitry Andric /// irrespective of the language mode that the file will get included in. To 5081ad6265SDimitry Andric /// compensate for that the \p Lexer, while "replaying", will adjust a token 5181ad6265SDimitry Andric /// where appropriate, when it could affect the preprocessor's state. 5281ad6265SDimitry Andric /// For example in a directive like 5381ad6265SDimitry Andric /// 5481ad6265SDimitry Andric /// \code 5581ad6265SDimitry Andric /// #if __has_cpp_attribute(clang::fallthrough) 5681ad6265SDimitry Andric /// \endcode 5781ad6265SDimitry Andric /// 5881ad6265SDimitry Andric /// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2 5981ad6265SDimitry Andric /// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon' 6081ad6265SDimitry Andric /// while in C++ mode. 6181ad6265SDimitry Andric struct Scanner { 6281ad6265SDimitry Andric Scanner(StringRef Input, 6381ad6265SDimitry Andric SmallVectorImpl<dependency_directives_scan::Token> &Tokens, 6481ad6265SDimitry Andric DiagnosticsEngine *Diags, SourceLocation InputSourceLoc) 6581ad6265SDimitry Andric : Input(Input), Tokens(Tokens), Diags(Diags), 6681ad6265SDimitry Andric InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()), 6781ad6265SDimitry Andric TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(), 6881ad6265SDimitry Andric Input.end()) {} 6981ad6265SDimitry Andric 7081ad6265SDimitry Andric static LangOptions getLangOptsForDepScanning() { 7181ad6265SDimitry Andric LangOptions LangOpts; 7281ad6265SDimitry Andric // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'. 7381ad6265SDimitry Andric LangOpts.ObjC = true; 7481ad6265SDimitry Andric LangOpts.LineComment = true; 7581ad6265SDimitry Andric return LangOpts; 7681ad6265SDimitry Andric } 7781ad6265SDimitry Andric 7881ad6265SDimitry Andric /// Lex the provided source and emit the directive tokens. 7981ad6265SDimitry Andric /// 8081ad6265SDimitry Andric /// \returns True on error. 8181ad6265SDimitry Andric bool scan(SmallVectorImpl<Directive> &Directives); 8281ad6265SDimitry Andric 8381ad6265SDimitry Andric private: 8481ad6265SDimitry Andric /// Lexes next token and advances \p First and the \p Lexer. 85*bdd1243dSDimitry Andric [[nodiscard]] dependency_directives_scan::Token & 8681ad6265SDimitry Andric lexToken(const char *&First, const char *const End); 8781ad6265SDimitry Andric 8881ad6265SDimitry Andric dependency_directives_scan::Token &lexIncludeFilename(const char *&First, 8981ad6265SDimitry Andric const char *const End); 9081ad6265SDimitry Andric 91*bdd1243dSDimitry Andric void skipLine(const char *&First, const char *const End); 92*bdd1243dSDimitry Andric void skipDirective(StringRef Name, const char *&First, const char *const End); 93*bdd1243dSDimitry Andric 9481ad6265SDimitry Andric /// Lexes next token and if it is identifier returns its string, otherwise 95*bdd1243dSDimitry Andric /// it skips the current line and returns \p std::nullopt. 9681ad6265SDimitry Andric /// 9781ad6265SDimitry Andric /// In any case (whatever the token kind) \p First and the \p Lexer will 9881ad6265SDimitry Andric /// advance beyond the token. 99*bdd1243dSDimitry Andric [[nodiscard]] std::optional<StringRef> 10081ad6265SDimitry Andric tryLexIdentifierOrSkipLine(const char *&First, const char *const End); 10181ad6265SDimitry Andric 10281ad6265SDimitry Andric /// Used when it is certain that next token is an identifier. 103*bdd1243dSDimitry Andric [[nodiscard]] StringRef lexIdentifier(const char *&First, 10481ad6265SDimitry Andric const char *const End); 10581ad6265SDimitry Andric 10681ad6265SDimitry Andric /// Lexes next token and returns true iff it is an identifier that matches \p 10781ad6265SDimitry Andric /// Id, otherwise it skips the current line and returns false. 10881ad6265SDimitry Andric /// 10981ad6265SDimitry Andric /// In any case (whatever the token kind) \p First and the \p Lexer will 11081ad6265SDimitry Andric /// advance beyond the token. 111*bdd1243dSDimitry Andric [[nodiscard]] bool isNextIdentifierOrSkipLine(StringRef Id, 11281ad6265SDimitry Andric const char *&First, 11381ad6265SDimitry Andric const char *const End); 11481ad6265SDimitry Andric 115*bdd1243dSDimitry Andric [[nodiscard]] bool scanImpl(const char *First, const char *const End); 116*bdd1243dSDimitry Andric [[nodiscard]] bool lexPPLine(const char *&First, const char *const End); 117*bdd1243dSDimitry Andric [[nodiscard]] bool lexAt(const char *&First, const char *const End); 118*bdd1243dSDimitry Andric [[nodiscard]] bool lexModule(const char *&First, const char *const End); 119*bdd1243dSDimitry Andric [[nodiscard]] bool lexDefine(const char *HashLoc, const char *&First, 12081ad6265SDimitry Andric const char *const End); 121*bdd1243dSDimitry Andric [[nodiscard]] bool lexPragma(const char *&First, const char *const End); 122*bdd1243dSDimitry Andric [[nodiscard]] bool lexEndif(const char *&First, const char *const End); 123*bdd1243dSDimitry Andric [[nodiscard]] bool lexDefault(DirectiveKind Kind, const char *&First, 12481ad6265SDimitry Andric const char *const End); 125*bdd1243dSDimitry Andric [[nodiscard]] bool lexModuleDirectiveBody(DirectiveKind Kind, 12681ad6265SDimitry Andric const char *&First, 12781ad6265SDimitry Andric const char *const End); 12881ad6265SDimitry Andric void lexPPDirectiveBody(const char *&First, const char *const End); 12981ad6265SDimitry Andric 13081ad6265SDimitry Andric DirectiveWithTokens &pushDirective(DirectiveKind Kind) { 13181ad6265SDimitry Andric Tokens.append(CurDirToks); 13281ad6265SDimitry Andric DirsWithToks.emplace_back(Kind, CurDirToks.size()); 13381ad6265SDimitry Andric CurDirToks.clear(); 13481ad6265SDimitry Andric return DirsWithToks.back(); 13581ad6265SDimitry Andric } 13681ad6265SDimitry Andric void popDirective() { 13781ad6265SDimitry Andric Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens); 13881ad6265SDimitry Andric } 13981ad6265SDimitry Andric DirectiveKind topDirective() const { 14081ad6265SDimitry Andric return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind; 14181ad6265SDimitry Andric } 14281ad6265SDimitry Andric 14381ad6265SDimitry Andric unsigned getOffsetAt(const char *CurPtr) const { 14481ad6265SDimitry Andric return CurPtr - Input.data(); 14581ad6265SDimitry Andric } 14681ad6265SDimitry Andric 14781ad6265SDimitry Andric /// Reports a diagnostic if the diagnostic engine is provided. Always returns 14881ad6265SDimitry Andric /// true at the end. 14981ad6265SDimitry Andric bool reportError(const char *CurPtr, unsigned Err); 15081ad6265SDimitry Andric 15181ad6265SDimitry Andric StringMap<char> SplitIds; 15281ad6265SDimitry Andric StringRef Input; 15381ad6265SDimitry Andric SmallVectorImpl<dependency_directives_scan::Token> &Tokens; 15481ad6265SDimitry Andric DiagnosticsEngine *Diags; 15581ad6265SDimitry Andric SourceLocation InputSourceLoc; 15681ad6265SDimitry Andric 157*bdd1243dSDimitry Andric const char *LastTokenPtr = nullptr; 15881ad6265SDimitry Andric /// Keeps track of the tokens for the currently lexed directive. Once a 15981ad6265SDimitry Andric /// directive is fully lexed and "committed" then the tokens get appended to 16081ad6265SDimitry Andric /// \p Tokens and \p CurDirToks is cleared for the next directive. 16181ad6265SDimitry Andric SmallVector<dependency_directives_scan::Token, 32> CurDirToks; 16281ad6265SDimitry Andric /// The directives that were lexed along with the number of tokens that each 16381ad6265SDimitry Andric /// directive contains. The tokens of all the directives are kept in \p Tokens 16481ad6265SDimitry Andric /// vector, in the same order as the directives order in \p DirsWithToks. 16581ad6265SDimitry Andric SmallVector<DirectiveWithTokens, 64> DirsWithToks; 16681ad6265SDimitry Andric LangOptions LangOpts; 16781ad6265SDimitry Andric Lexer TheLexer; 16881ad6265SDimitry Andric }; 16981ad6265SDimitry Andric 17081ad6265SDimitry Andric } // end anonymous namespace 17181ad6265SDimitry Andric 17281ad6265SDimitry Andric bool Scanner::reportError(const char *CurPtr, unsigned Err) { 17381ad6265SDimitry Andric if (!Diags) 17481ad6265SDimitry Andric return true; 17581ad6265SDimitry Andric assert(CurPtr >= Input.data() && "invalid buffer ptr"); 17681ad6265SDimitry Andric Diags->Report(InputSourceLoc.getLocWithOffset(getOffsetAt(CurPtr)), Err); 17781ad6265SDimitry Andric return true; 17881ad6265SDimitry Andric } 17981ad6265SDimitry Andric 18081ad6265SDimitry Andric static void skipOverSpaces(const char *&First, const char *const End) { 18181ad6265SDimitry Andric while (First != End && isHorizontalWhitespace(*First)) 18281ad6265SDimitry Andric ++First; 18381ad6265SDimitry Andric } 18481ad6265SDimitry Andric 185*bdd1243dSDimitry Andric [[nodiscard]] static bool isRawStringLiteral(const char *First, 18681ad6265SDimitry Andric const char *Current) { 18781ad6265SDimitry Andric assert(First <= Current); 18881ad6265SDimitry Andric 18981ad6265SDimitry Andric // Check if we can even back up. 19081ad6265SDimitry Andric if (*Current != '"' || First == Current) 19181ad6265SDimitry Andric return false; 19281ad6265SDimitry Andric 19381ad6265SDimitry Andric // Check for an "R". 19481ad6265SDimitry Andric --Current; 19581ad6265SDimitry Andric if (*Current != 'R') 19681ad6265SDimitry Andric return false; 19781ad6265SDimitry Andric if (First == Current || !isAsciiIdentifierContinue(*--Current)) 19881ad6265SDimitry Andric return true; 19981ad6265SDimitry Andric 20081ad6265SDimitry Andric // Check for a prefix of "u", "U", or "L". 20181ad6265SDimitry Andric if (*Current == 'u' || *Current == 'U' || *Current == 'L') 20281ad6265SDimitry Andric return First == Current || !isAsciiIdentifierContinue(*--Current); 20381ad6265SDimitry Andric 20481ad6265SDimitry Andric // Check for a prefix of "u8". 20581ad6265SDimitry Andric if (*Current != '8' || First == Current || *Current-- != 'u') 20681ad6265SDimitry Andric return false; 20781ad6265SDimitry Andric return First == Current || !isAsciiIdentifierContinue(*--Current); 20881ad6265SDimitry Andric } 20981ad6265SDimitry Andric 21081ad6265SDimitry Andric static void skipRawString(const char *&First, const char *const End) { 21181ad6265SDimitry Andric assert(First[0] == '"'); 21281ad6265SDimitry Andric assert(First[-1] == 'R'); 21381ad6265SDimitry Andric 21481ad6265SDimitry Andric const char *Last = ++First; 21581ad6265SDimitry Andric while (Last != End && *Last != '(') 21681ad6265SDimitry Andric ++Last; 21781ad6265SDimitry Andric if (Last == End) { 21881ad6265SDimitry Andric First = Last; // Hit the end... just give up. 21981ad6265SDimitry Andric return; 22081ad6265SDimitry Andric } 22181ad6265SDimitry Andric 22281ad6265SDimitry Andric StringRef Terminator(First, Last - First); 22381ad6265SDimitry Andric for (;;) { 22481ad6265SDimitry Andric // Move First to just past the next ")". 22581ad6265SDimitry Andric First = Last; 22681ad6265SDimitry Andric while (First != End && *First != ')') 22781ad6265SDimitry Andric ++First; 22881ad6265SDimitry Andric if (First == End) 22981ad6265SDimitry Andric return; 23081ad6265SDimitry Andric ++First; 23181ad6265SDimitry Andric 23281ad6265SDimitry Andric // Look ahead for the terminator sequence. 23381ad6265SDimitry Andric Last = First; 23481ad6265SDimitry Andric while (Last != End && size_t(Last - First) < Terminator.size() && 23581ad6265SDimitry Andric Terminator[Last - First] == *Last) 23681ad6265SDimitry Andric ++Last; 23781ad6265SDimitry Andric 23881ad6265SDimitry Andric // Check if we hit it (or the end of the file). 23981ad6265SDimitry Andric if (Last == End) { 24081ad6265SDimitry Andric First = Last; 24181ad6265SDimitry Andric return; 24281ad6265SDimitry Andric } 24381ad6265SDimitry Andric if (size_t(Last - First) < Terminator.size()) 24481ad6265SDimitry Andric continue; 24581ad6265SDimitry Andric if (*Last != '"') 24681ad6265SDimitry Andric continue; 24781ad6265SDimitry Andric First = Last + 1; 24881ad6265SDimitry Andric return; 24981ad6265SDimitry Andric } 25081ad6265SDimitry Andric } 25181ad6265SDimitry Andric 25281ad6265SDimitry Andric // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n) 25381ad6265SDimitry Andric static unsigned isEOL(const char *First, const char *const End) { 25481ad6265SDimitry Andric if (First == End) 25581ad6265SDimitry Andric return 0; 25681ad6265SDimitry Andric if (End - First > 1 && isVerticalWhitespace(First[0]) && 25781ad6265SDimitry Andric isVerticalWhitespace(First[1]) && First[0] != First[1]) 25881ad6265SDimitry Andric return 2; 25981ad6265SDimitry Andric return !!isVerticalWhitespace(First[0]); 26081ad6265SDimitry Andric } 26181ad6265SDimitry Andric 26281ad6265SDimitry Andric static void skipString(const char *&First, const char *const End) { 26381ad6265SDimitry Andric assert(*First == '\'' || *First == '"' || *First == '<'); 26481ad6265SDimitry Andric const char Terminator = *First == '<' ? '>' : *First; 26581ad6265SDimitry Andric for (++First; First != End && *First != Terminator; ++First) { 26681ad6265SDimitry Andric // String and character literals don't extend past the end of the line. 26781ad6265SDimitry Andric if (isVerticalWhitespace(*First)) 26881ad6265SDimitry Andric return; 26981ad6265SDimitry Andric if (*First != '\\') 27081ad6265SDimitry Andric continue; 27181ad6265SDimitry Andric // Skip past backslash to the next character. This ensures that the 27281ad6265SDimitry Andric // character right after it is skipped as well, which matters if it's 27381ad6265SDimitry Andric // the terminator. 27481ad6265SDimitry Andric if (++First == End) 27581ad6265SDimitry Andric return; 27681ad6265SDimitry Andric if (!isWhitespace(*First)) 27781ad6265SDimitry Andric continue; 27881ad6265SDimitry Andric // Whitespace after the backslash might indicate a line continuation. 27981ad6265SDimitry Andric const char *FirstAfterBackslashPastSpace = First; 28081ad6265SDimitry Andric skipOverSpaces(FirstAfterBackslashPastSpace, End); 28181ad6265SDimitry Andric if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) { 28281ad6265SDimitry Andric // Advance the character pointer to the next line for the next 28381ad6265SDimitry Andric // iteration. 28481ad6265SDimitry Andric First = FirstAfterBackslashPastSpace + NLSize - 1; 28581ad6265SDimitry Andric } 28681ad6265SDimitry Andric } 28781ad6265SDimitry Andric if (First != End) 28881ad6265SDimitry Andric ++First; // Finish off the string. 28981ad6265SDimitry Andric } 29081ad6265SDimitry Andric 29181ad6265SDimitry Andric // Returns the length of the skipped newline 29281ad6265SDimitry Andric static unsigned skipNewline(const char *&First, const char *End) { 29381ad6265SDimitry Andric if (First == End) 29481ad6265SDimitry Andric return 0; 29581ad6265SDimitry Andric assert(isVerticalWhitespace(*First)); 29681ad6265SDimitry Andric unsigned Len = isEOL(First, End); 29781ad6265SDimitry Andric assert(Len && "expected newline"); 29881ad6265SDimitry Andric First += Len; 29981ad6265SDimitry Andric return Len; 30081ad6265SDimitry Andric } 30181ad6265SDimitry Andric 30281ad6265SDimitry Andric static bool wasLineContinuation(const char *First, unsigned EOLLen) { 30381ad6265SDimitry Andric return *(First - (int)EOLLen - 1) == '\\'; 30481ad6265SDimitry Andric } 30581ad6265SDimitry Andric 30681ad6265SDimitry Andric static void skipToNewlineRaw(const char *&First, const char *const End) { 30781ad6265SDimitry Andric for (;;) { 30881ad6265SDimitry Andric if (First == End) 30981ad6265SDimitry Andric return; 31081ad6265SDimitry Andric 31181ad6265SDimitry Andric unsigned Len = isEOL(First, End); 31281ad6265SDimitry Andric if (Len) 31381ad6265SDimitry Andric return; 31481ad6265SDimitry Andric 31581ad6265SDimitry Andric do { 31681ad6265SDimitry Andric if (++First == End) 31781ad6265SDimitry Andric return; 31881ad6265SDimitry Andric Len = isEOL(First, End); 31981ad6265SDimitry Andric } while (!Len); 32081ad6265SDimitry Andric 32181ad6265SDimitry Andric if (First[-1] != '\\') 32281ad6265SDimitry Andric return; 32381ad6265SDimitry Andric 32481ad6265SDimitry Andric First += Len; 32581ad6265SDimitry Andric // Keep skipping lines... 32681ad6265SDimitry Andric } 32781ad6265SDimitry Andric } 32881ad6265SDimitry Andric 32981ad6265SDimitry Andric static void skipLineComment(const char *&First, const char *const End) { 33081ad6265SDimitry Andric assert(First[0] == '/' && First[1] == '/'); 33181ad6265SDimitry Andric First += 2; 33281ad6265SDimitry Andric skipToNewlineRaw(First, End); 33381ad6265SDimitry Andric } 33481ad6265SDimitry Andric 33581ad6265SDimitry Andric static void skipBlockComment(const char *&First, const char *const End) { 33681ad6265SDimitry Andric assert(First[0] == '/' && First[1] == '*'); 33781ad6265SDimitry Andric if (End - First < 4) { 33881ad6265SDimitry Andric First = End; 33981ad6265SDimitry Andric return; 34081ad6265SDimitry Andric } 34181ad6265SDimitry Andric for (First += 3; First != End; ++First) 34281ad6265SDimitry Andric if (First[-1] == '*' && First[0] == '/') { 34381ad6265SDimitry Andric ++First; 34481ad6265SDimitry Andric return; 34581ad6265SDimitry Andric } 34681ad6265SDimitry Andric } 34781ad6265SDimitry Andric 34881ad6265SDimitry Andric /// \returns True if the current single quotation mark character is a C++ 14 34981ad6265SDimitry Andric /// digit separator. 35081ad6265SDimitry Andric static bool isQuoteCppDigitSeparator(const char *const Start, 35181ad6265SDimitry Andric const char *const Cur, 35281ad6265SDimitry Andric const char *const End) { 35381ad6265SDimitry Andric assert(*Cur == '\'' && "expected quotation character"); 35481ad6265SDimitry Andric // skipLine called in places where we don't expect a valid number 35581ad6265SDimitry Andric // body before `start` on the same line, so always return false at the start. 35681ad6265SDimitry Andric if (Start == Cur) 35781ad6265SDimitry Andric return false; 35881ad6265SDimitry Andric // The previous character must be a valid PP number character. 35981ad6265SDimitry Andric // Make sure that the L, u, U, u8 prefixes don't get marked as a 36081ad6265SDimitry Andric // separator though. 36181ad6265SDimitry Andric char Prev = *(Cur - 1); 36281ad6265SDimitry Andric if (Prev == 'L' || Prev == 'U' || Prev == 'u') 36381ad6265SDimitry Andric return false; 36481ad6265SDimitry Andric if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') 36581ad6265SDimitry Andric return false; 36681ad6265SDimitry Andric if (!isPreprocessingNumberBody(Prev)) 36781ad6265SDimitry Andric return false; 36881ad6265SDimitry Andric // The next character should be a valid identifier body character. 36981ad6265SDimitry Andric return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1)); 37081ad6265SDimitry Andric } 37181ad6265SDimitry Andric 372*bdd1243dSDimitry Andric void Scanner::skipLine(const char *&First, const char *const End) { 37381ad6265SDimitry Andric for (;;) { 37481ad6265SDimitry Andric assert(First <= End); 37581ad6265SDimitry Andric if (First == End) 37681ad6265SDimitry Andric return; 37781ad6265SDimitry Andric 37881ad6265SDimitry Andric if (isVerticalWhitespace(*First)) { 37981ad6265SDimitry Andric skipNewline(First, End); 38081ad6265SDimitry Andric return; 38181ad6265SDimitry Andric } 38281ad6265SDimitry Andric const char *Start = First; 38381ad6265SDimitry Andric while (First != End && !isVerticalWhitespace(*First)) { 38481ad6265SDimitry Andric // Iterate over strings correctly to avoid comments and newlines. 38581ad6265SDimitry Andric if (*First == '"' || 38681ad6265SDimitry Andric (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { 387*bdd1243dSDimitry Andric LastTokenPtr = First; 38881ad6265SDimitry Andric if (isRawStringLiteral(Start, First)) 38981ad6265SDimitry Andric skipRawString(First, End); 39081ad6265SDimitry Andric else 39181ad6265SDimitry Andric skipString(First, End); 39281ad6265SDimitry Andric continue; 39381ad6265SDimitry Andric } 39481ad6265SDimitry Andric 39581ad6265SDimitry Andric // Iterate over comments correctly. 39681ad6265SDimitry Andric if (*First != '/' || End - First < 2) { 397*bdd1243dSDimitry Andric LastTokenPtr = First; 39881ad6265SDimitry Andric ++First; 39981ad6265SDimitry Andric continue; 40081ad6265SDimitry Andric } 40181ad6265SDimitry Andric 40281ad6265SDimitry Andric if (First[1] == '/') { 40381ad6265SDimitry Andric // "//...". 40481ad6265SDimitry Andric skipLineComment(First, End); 40581ad6265SDimitry Andric continue; 40681ad6265SDimitry Andric } 40781ad6265SDimitry Andric 40881ad6265SDimitry Andric if (First[1] != '*') { 409*bdd1243dSDimitry Andric LastTokenPtr = First; 41081ad6265SDimitry Andric ++First; 41181ad6265SDimitry Andric continue; 41281ad6265SDimitry Andric } 41381ad6265SDimitry Andric 41481ad6265SDimitry Andric // "/*...*/". 41581ad6265SDimitry Andric skipBlockComment(First, End); 41681ad6265SDimitry Andric } 41781ad6265SDimitry Andric if (First == End) 41881ad6265SDimitry Andric return; 41981ad6265SDimitry Andric 42081ad6265SDimitry Andric // Skip over the newline. 42181ad6265SDimitry Andric unsigned Len = skipNewline(First, End); 42281ad6265SDimitry Andric if (!wasLineContinuation(First, Len)) // Continue past line-continuations. 42381ad6265SDimitry Andric break; 42481ad6265SDimitry Andric } 42581ad6265SDimitry Andric } 42681ad6265SDimitry Andric 427*bdd1243dSDimitry Andric void Scanner::skipDirective(StringRef Name, const char *&First, 42881ad6265SDimitry Andric const char *const End) { 42981ad6265SDimitry Andric if (llvm::StringSwitch<bool>(Name) 43081ad6265SDimitry Andric .Case("warning", true) 43181ad6265SDimitry Andric .Case("error", true) 43281ad6265SDimitry Andric .Default(false)) 43381ad6265SDimitry Andric // Do not process quotes or comments. 43481ad6265SDimitry Andric skipToNewlineRaw(First, End); 43581ad6265SDimitry Andric else 43681ad6265SDimitry Andric skipLine(First, End); 43781ad6265SDimitry Andric } 43881ad6265SDimitry Andric 43981ad6265SDimitry Andric static void skipWhitespace(const char *&First, const char *const End) { 44081ad6265SDimitry Andric for (;;) { 44181ad6265SDimitry Andric assert(First <= End); 44281ad6265SDimitry Andric skipOverSpaces(First, End); 44381ad6265SDimitry Andric 44481ad6265SDimitry Andric if (End - First < 2) 44581ad6265SDimitry Andric return; 44681ad6265SDimitry Andric 44781ad6265SDimitry Andric if (First[0] == '\\' && isVerticalWhitespace(First[1])) { 44881ad6265SDimitry Andric skipNewline(++First, End); 44981ad6265SDimitry Andric continue; 45081ad6265SDimitry Andric } 45181ad6265SDimitry Andric 45281ad6265SDimitry Andric // Check for a non-comment character. 45381ad6265SDimitry Andric if (First[0] != '/') 45481ad6265SDimitry Andric return; 45581ad6265SDimitry Andric 45681ad6265SDimitry Andric // "// ...". 45781ad6265SDimitry Andric if (First[1] == '/') { 45881ad6265SDimitry Andric skipLineComment(First, End); 45981ad6265SDimitry Andric return; 46081ad6265SDimitry Andric } 46181ad6265SDimitry Andric 46281ad6265SDimitry Andric // Cannot be a comment. 46381ad6265SDimitry Andric if (First[1] != '*') 46481ad6265SDimitry Andric return; 46581ad6265SDimitry Andric 46681ad6265SDimitry Andric // "/*...*/". 46781ad6265SDimitry Andric skipBlockComment(First, End); 46881ad6265SDimitry Andric } 46981ad6265SDimitry Andric } 47081ad6265SDimitry Andric 47181ad6265SDimitry Andric bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, 47281ad6265SDimitry Andric const char *const End) { 47381ad6265SDimitry Andric const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset; 47481ad6265SDimitry Andric for (;;) { 47581ad6265SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 47681ad6265SDimitry Andric if (Tok.is(tok::eof)) 47781ad6265SDimitry Andric return reportError( 47881ad6265SDimitry Andric DirectiveLoc, 47981ad6265SDimitry Andric diag::err_dep_source_scanner_missing_semi_after_at_import); 48081ad6265SDimitry Andric if (Tok.is(tok::semi)) 48181ad6265SDimitry Andric break; 48281ad6265SDimitry Andric } 48381ad6265SDimitry Andric pushDirective(Kind); 48481ad6265SDimitry Andric skipWhitespace(First, End); 48581ad6265SDimitry Andric if (First == End) 48681ad6265SDimitry Andric return false; 48781ad6265SDimitry Andric if (!isVerticalWhitespace(*First)) 48881ad6265SDimitry Andric return reportError( 48981ad6265SDimitry Andric DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); 49081ad6265SDimitry Andric skipNewline(First, End); 49181ad6265SDimitry Andric return false; 49281ad6265SDimitry Andric } 49381ad6265SDimitry Andric 49481ad6265SDimitry Andric dependency_directives_scan::Token &Scanner::lexToken(const char *&First, 49581ad6265SDimitry Andric const char *const End) { 49681ad6265SDimitry Andric clang::Token Tok; 49781ad6265SDimitry Andric TheLexer.LexFromRawLexer(Tok); 49881ad6265SDimitry Andric First = Input.data() + TheLexer.getCurrentBufferOffset(); 49981ad6265SDimitry Andric assert(First <= End); 50081ad6265SDimitry Andric 50181ad6265SDimitry Andric unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); 50281ad6265SDimitry Andric CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), 50381ad6265SDimitry Andric Tok.getFlags()); 50481ad6265SDimitry Andric return CurDirToks.back(); 50581ad6265SDimitry Andric } 50681ad6265SDimitry Andric 50781ad6265SDimitry Andric dependency_directives_scan::Token & 50881ad6265SDimitry Andric Scanner::lexIncludeFilename(const char *&First, const char *const End) { 50981ad6265SDimitry Andric clang::Token Tok; 51081ad6265SDimitry Andric TheLexer.LexIncludeFilename(Tok); 51181ad6265SDimitry Andric First = Input.data() + TheLexer.getCurrentBufferOffset(); 51281ad6265SDimitry Andric assert(First <= End); 51381ad6265SDimitry Andric 51481ad6265SDimitry Andric unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); 51581ad6265SDimitry Andric CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), 51681ad6265SDimitry Andric Tok.getFlags()); 51781ad6265SDimitry Andric return CurDirToks.back(); 51881ad6265SDimitry Andric } 51981ad6265SDimitry Andric 52081ad6265SDimitry Andric void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) { 52181ad6265SDimitry Andric while (true) { 52281ad6265SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 52381ad6265SDimitry Andric if (Tok.is(tok::eod)) 52481ad6265SDimitry Andric break; 52581ad6265SDimitry Andric } 52681ad6265SDimitry Andric } 52781ad6265SDimitry Andric 528*bdd1243dSDimitry Andric [[nodiscard]] std::optional<StringRef> 52981ad6265SDimitry Andric Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) { 53081ad6265SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 53181ad6265SDimitry Andric if (Tok.isNot(tok::raw_identifier)) { 53281ad6265SDimitry Andric if (!Tok.is(tok::eod)) 53381ad6265SDimitry Andric skipLine(First, End); 534*bdd1243dSDimitry Andric return std::nullopt; 53581ad6265SDimitry Andric } 53681ad6265SDimitry Andric 53781ad6265SDimitry Andric bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning; 53881ad6265SDimitry Andric if (LLVM_LIKELY(!NeedsCleaning)) 53981ad6265SDimitry Andric return Input.slice(Tok.Offset, Tok.getEnd()); 54081ad6265SDimitry Andric 54181ad6265SDimitry Andric SmallString<64> Spelling; 54281ad6265SDimitry Andric Spelling.resize(Tok.Length); 54381ad6265SDimitry Andric 54481ad6265SDimitry Andric unsigned SpellingLength = 0; 54581ad6265SDimitry Andric const char *BufPtr = Input.begin() + Tok.Offset; 54681ad6265SDimitry Andric const char *AfterIdent = Input.begin() + Tok.getEnd(); 54781ad6265SDimitry Andric while (BufPtr < AfterIdent) { 54881ad6265SDimitry Andric unsigned Size; 54981ad6265SDimitry Andric Spelling[SpellingLength++] = 55081ad6265SDimitry Andric Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts); 55181ad6265SDimitry Andric BufPtr += Size; 55281ad6265SDimitry Andric } 55381ad6265SDimitry Andric 55481ad6265SDimitry Andric return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0) 55581ad6265SDimitry Andric .first->first(); 55681ad6265SDimitry Andric } 55781ad6265SDimitry Andric 55881ad6265SDimitry Andric StringRef Scanner::lexIdentifier(const char *&First, const char *const End) { 559*bdd1243dSDimitry Andric std::optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End); 56081ad6265SDimitry Andric assert(Id && "expected identifier token"); 561*bdd1243dSDimitry Andric return *Id; 56281ad6265SDimitry Andric } 56381ad6265SDimitry Andric 56481ad6265SDimitry Andric bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First, 56581ad6265SDimitry Andric const char *const End) { 566*bdd1243dSDimitry Andric if (std::optional<StringRef> FoundId = 567*bdd1243dSDimitry Andric tryLexIdentifierOrSkipLine(First, End)) { 56881ad6265SDimitry Andric if (*FoundId == Id) 56981ad6265SDimitry Andric return true; 57081ad6265SDimitry Andric skipLine(First, End); 57181ad6265SDimitry Andric } 57281ad6265SDimitry Andric return false; 57381ad6265SDimitry Andric } 57481ad6265SDimitry Andric 57581ad6265SDimitry Andric bool Scanner::lexAt(const char *&First, const char *const End) { 57681ad6265SDimitry Andric // Handle "@import". 57781ad6265SDimitry Andric 57881ad6265SDimitry Andric // Lex '@'. 57981ad6265SDimitry Andric const dependency_directives_scan::Token &AtTok = lexToken(First, End); 58081ad6265SDimitry Andric assert(AtTok.is(tok::at)); 58181ad6265SDimitry Andric (void)AtTok; 58281ad6265SDimitry Andric 58381ad6265SDimitry Andric if (!isNextIdentifierOrSkipLine("import", First, End)) 58481ad6265SDimitry Andric return false; 58581ad6265SDimitry Andric return lexModuleDirectiveBody(decl_at_import, First, End); 58681ad6265SDimitry Andric } 58781ad6265SDimitry Andric 58881ad6265SDimitry Andric bool Scanner::lexModule(const char *&First, const char *const End) { 58981ad6265SDimitry Andric StringRef Id = lexIdentifier(First, End); 59081ad6265SDimitry Andric bool Export = false; 59181ad6265SDimitry Andric if (Id == "export") { 59281ad6265SDimitry Andric Export = true; 593*bdd1243dSDimitry Andric std::optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End); 59481ad6265SDimitry Andric if (!NextId) 59581ad6265SDimitry Andric return false; 59681ad6265SDimitry Andric Id = *NextId; 59781ad6265SDimitry Andric } 59881ad6265SDimitry Andric 59981ad6265SDimitry Andric if (Id != "module" && Id != "import") { 60081ad6265SDimitry Andric skipLine(First, End); 60181ad6265SDimitry Andric return false; 60281ad6265SDimitry Andric } 60381ad6265SDimitry Andric 60481ad6265SDimitry Andric skipWhitespace(First, End); 60581ad6265SDimitry Andric 60681ad6265SDimitry Andric // Ignore this as a module directive if the next character can't be part of 60781ad6265SDimitry Andric // an import. 60881ad6265SDimitry Andric 60981ad6265SDimitry Andric switch (*First) { 61081ad6265SDimitry Andric case ':': 61181ad6265SDimitry Andric case '<': 61281ad6265SDimitry Andric case '"': 61381ad6265SDimitry Andric break; 61481ad6265SDimitry Andric default: 61581ad6265SDimitry Andric if (!isAsciiIdentifierContinue(*First)) { 61681ad6265SDimitry Andric skipLine(First, End); 61781ad6265SDimitry Andric return false; 61881ad6265SDimitry Andric } 61981ad6265SDimitry Andric } 62081ad6265SDimitry Andric 62181ad6265SDimitry Andric TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false); 62281ad6265SDimitry Andric 62381ad6265SDimitry Andric DirectiveKind Kind; 62481ad6265SDimitry Andric if (Id == "module") 62581ad6265SDimitry Andric Kind = Export ? cxx_export_module_decl : cxx_module_decl; 62681ad6265SDimitry Andric else 62781ad6265SDimitry Andric Kind = Export ? cxx_export_import_decl : cxx_import_decl; 62881ad6265SDimitry Andric 62981ad6265SDimitry Andric return lexModuleDirectiveBody(Kind, First, End); 63081ad6265SDimitry Andric } 63181ad6265SDimitry Andric 63281ad6265SDimitry Andric bool Scanner::lexPragma(const char *&First, const char *const End) { 633*bdd1243dSDimitry Andric std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); 63481ad6265SDimitry Andric if (!FoundId) 63581ad6265SDimitry Andric return false; 63681ad6265SDimitry Andric 63781ad6265SDimitry Andric StringRef Id = *FoundId; 63881ad6265SDimitry Andric auto Kind = llvm::StringSwitch<DirectiveKind>(Id) 63981ad6265SDimitry Andric .Case("once", pp_pragma_once) 64081ad6265SDimitry Andric .Case("push_macro", pp_pragma_push_macro) 64181ad6265SDimitry Andric .Case("pop_macro", pp_pragma_pop_macro) 64281ad6265SDimitry Andric .Case("include_alias", pp_pragma_include_alias) 64381ad6265SDimitry Andric .Default(pp_none); 64481ad6265SDimitry Andric if (Kind != pp_none) { 64581ad6265SDimitry Andric lexPPDirectiveBody(First, End); 64681ad6265SDimitry Andric pushDirective(Kind); 64781ad6265SDimitry Andric return false; 64881ad6265SDimitry Andric } 64981ad6265SDimitry Andric 65081ad6265SDimitry Andric if (Id != "clang") { 65181ad6265SDimitry Andric skipLine(First, End); 65281ad6265SDimitry Andric return false; 65381ad6265SDimitry Andric } 65481ad6265SDimitry Andric 65581ad6265SDimitry Andric // #pragma clang. 65681ad6265SDimitry Andric if (!isNextIdentifierOrSkipLine("module", First, End)) 65781ad6265SDimitry Andric return false; 65881ad6265SDimitry Andric 65981ad6265SDimitry Andric // #pragma clang module. 66081ad6265SDimitry Andric if (!isNextIdentifierOrSkipLine("import", First, End)) 66181ad6265SDimitry Andric return false; 66281ad6265SDimitry Andric 66381ad6265SDimitry Andric // #pragma clang module import. 66481ad6265SDimitry Andric lexPPDirectiveBody(First, End); 66581ad6265SDimitry Andric pushDirective(pp_pragma_import); 66681ad6265SDimitry Andric return false; 66781ad6265SDimitry Andric } 66881ad6265SDimitry Andric 66981ad6265SDimitry Andric bool Scanner::lexEndif(const char *&First, const char *const End) { 67081ad6265SDimitry Andric // Strip out "#else" if it's empty. 67181ad6265SDimitry Andric if (topDirective() == pp_else) 67281ad6265SDimitry Andric popDirective(); 67381ad6265SDimitry Andric 67481ad6265SDimitry Andric // If "#ifdef" is empty, strip it and skip the "#endif". 67581ad6265SDimitry Andric // 67681ad6265SDimitry Andric // FIXME: Once/if Clang starts disallowing __has_include in macro expansions, 67781ad6265SDimitry Andric // we can skip empty `#if` and `#elif` blocks as well after scanning for a 67881ad6265SDimitry Andric // literal __has_include in the condition. Even without that rule we could 67981ad6265SDimitry Andric // drop the tokens if we scan for identifiers in the condition and find none. 68081ad6265SDimitry Andric if (topDirective() == pp_ifdef || topDirective() == pp_ifndef) { 68181ad6265SDimitry Andric popDirective(); 68281ad6265SDimitry Andric skipLine(First, End); 68381ad6265SDimitry Andric return false; 68481ad6265SDimitry Andric } 68581ad6265SDimitry Andric 68681ad6265SDimitry Andric return lexDefault(pp_endif, First, End); 68781ad6265SDimitry Andric } 68881ad6265SDimitry Andric 68981ad6265SDimitry Andric bool Scanner::lexDefault(DirectiveKind Kind, const char *&First, 69081ad6265SDimitry Andric const char *const End) { 69181ad6265SDimitry Andric lexPPDirectiveBody(First, End); 69281ad6265SDimitry Andric pushDirective(Kind); 69381ad6265SDimitry Andric return false; 69481ad6265SDimitry Andric } 69581ad6265SDimitry Andric 69681ad6265SDimitry Andric static bool isStartOfRelevantLine(char First) { 69781ad6265SDimitry Andric switch (First) { 69881ad6265SDimitry Andric case '#': 69981ad6265SDimitry Andric case '@': 70081ad6265SDimitry Andric case 'i': 70181ad6265SDimitry Andric case 'e': 70281ad6265SDimitry Andric case 'm': 70381ad6265SDimitry Andric return true; 70481ad6265SDimitry Andric } 70581ad6265SDimitry Andric return false; 70681ad6265SDimitry Andric } 70781ad6265SDimitry Andric 70881ad6265SDimitry Andric bool Scanner::lexPPLine(const char *&First, const char *const End) { 70981ad6265SDimitry Andric assert(First != End); 71081ad6265SDimitry Andric 71181ad6265SDimitry Andric skipWhitespace(First, End); 71281ad6265SDimitry Andric assert(First <= End); 71381ad6265SDimitry Andric if (First == End) 71481ad6265SDimitry Andric return false; 71581ad6265SDimitry Andric 71681ad6265SDimitry Andric if (!isStartOfRelevantLine(*First)) { 71781ad6265SDimitry Andric skipLine(First, End); 71881ad6265SDimitry Andric assert(First <= End); 71981ad6265SDimitry Andric return false; 72081ad6265SDimitry Andric } 72181ad6265SDimitry Andric 722*bdd1243dSDimitry Andric LastTokenPtr = First; 723*bdd1243dSDimitry Andric 72481ad6265SDimitry Andric TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true); 72581ad6265SDimitry Andric 72681ad6265SDimitry Andric auto ScEx1 = make_scope_exit([&]() { 72781ad6265SDimitry Andric /// Clear Scanner's CurDirToks before returning, in case we didn't push a 72881ad6265SDimitry Andric /// new directive. 72981ad6265SDimitry Andric CurDirToks.clear(); 73081ad6265SDimitry Andric }); 73181ad6265SDimitry Andric 73281ad6265SDimitry Andric // Handle "@import". 73381ad6265SDimitry Andric if (*First == '@') 73481ad6265SDimitry Andric return lexAt(First, End); 73581ad6265SDimitry Andric 73681ad6265SDimitry Andric if (*First == 'i' || *First == 'e' || *First == 'm') 73781ad6265SDimitry Andric return lexModule(First, End); 73881ad6265SDimitry Andric 73981ad6265SDimitry Andric // Handle preprocessing directives. 74081ad6265SDimitry Andric 74181ad6265SDimitry Andric TheLexer.setParsingPreprocessorDirective(true); 74281ad6265SDimitry Andric auto ScEx2 = make_scope_exit( 74381ad6265SDimitry Andric [&]() { TheLexer.setParsingPreprocessorDirective(false); }); 74481ad6265SDimitry Andric 74581ad6265SDimitry Andric // Lex '#'. 74681ad6265SDimitry Andric const dependency_directives_scan::Token &HashTok = lexToken(First, End); 747*bdd1243dSDimitry Andric if (HashTok.is(tok::hashhash)) { 748*bdd1243dSDimitry Andric // A \p tok::hashhash at this location is passed by the preprocessor to the 749*bdd1243dSDimitry Andric // parser to interpret, like any other token. So for dependency scanning 750*bdd1243dSDimitry Andric // skip it like a normal token not affecting the preprocessor. 751*bdd1243dSDimitry Andric skipLine(First, End); 752*bdd1243dSDimitry Andric assert(First <= End); 753*bdd1243dSDimitry Andric return false; 754*bdd1243dSDimitry Andric } 75581ad6265SDimitry Andric assert(HashTok.is(tok::hash)); 75681ad6265SDimitry Andric (void)HashTok; 75781ad6265SDimitry Andric 758*bdd1243dSDimitry Andric std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); 75981ad6265SDimitry Andric if (!FoundId) 76081ad6265SDimitry Andric return false; 76181ad6265SDimitry Andric 76281ad6265SDimitry Andric StringRef Id = *FoundId; 76381ad6265SDimitry Andric 76481ad6265SDimitry Andric if (Id == "pragma") 76581ad6265SDimitry Andric return lexPragma(First, End); 76681ad6265SDimitry Andric 76781ad6265SDimitry Andric auto Kind = llvm::StringSwitch<DirectiveKind>(Id) 76881ad6265SDimitry Andric .Case("include", pp_include) 76981ad6265SDimitry Andric .Case("__include_macros", pp___include_macros) 77081ad6265SDimitry Andric .Case("define", pp_define) 77181ad6265SDimitry Andric .Case("undef", pp_undef) 77281ad6265SDimitry Andric .Case("import", pp_import) 77381ad6265SDimitry Andric .Case("include_next", pp_include_next) 77481ad6265SDimitry Andric .Case("if", pp_if) 77581ad6265SDimitry Andric .Case("ifdef", pp_ifdef) 77681ad6265SDimitry Andric .Case("ifndef", pp_ifndef) 77781ad6265SDimitry Andric .Case("elif", pp_elif) 77881ad6265SDimitry Andric .Case("elifdef", pp_elifdef) 77981ad6265SDimitry Andric .Case("elifndef", pp_elifndef) 78081ad6265SDimitry Andric .Case("else", pp_else) 78181ad6265SDimitry Andric .Case("endif", pp_endif) 78281ad6265SDimitry Andric .Default(pp_none); 78381ad6265SDimitry Andric if (Kind == pp_none) { 78481ad6265SDimitry Andric skipDirective(Id, First, End); 78581ad6265SDimitry Andric return false; 78681ad6265SDimitry Andric } 78781ad6265SDimitry Andric 78881ad6265SDimitry Andric if (Kind == pp_endif) 78981ad6265SDimitry Andric return lexEndif(First, End); 79081ad6265SDimitry Andric 79181ad6265SDimitry Andric switch (Kind) { 79281ad6265SDimitry Andric case pp_include: 79381ad6265SDimitry Andric case pp___include_macros: 79481ad6265SDimitry Andric case pp_include_next: 79581ad6265SDimitry Andric case pp_import: 79681ad6265SDimitry Andric lexIncludeFilename(First, End); 79781ad6265SDimitry Andric break; 79881ad6265SDimitry Andric default: 79981ad6265SDimitry Andric break; 80081ad6265SDimitry Andric } 80181ad6265SDimitry Andric 80281ad6265SDimitry Andric // Everything else. 80381ad6265SDimitry Andric return lexDefault(Kind, First, End); 80481ad6265SDimitry Andric } 80581ad6265SDimitry Andric 80681ad6265SDimitry Andric static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { 80781ad6265SDimitry Andric if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' && 80881ad6265SDimitry Andric First[2] == '\xbf') 80981ad6265SDimitry Andric First += 3; 81081ad6265SDimitry Andric } 81181ad6265SDimitry Andric 81281ad6265SDimitry Andric bool Scanner::scanImpl(const char *First, const char *const End) { 81381ad6265SDimitry Andric skipUTF8ByteOrderMark(First, End); 81481ad6265SDimitry Andric while (First != End) 81581ad6265SDimitry Andric if (lexPPLine(First, End)) 81681ad6265SDimitry Andric return true; 81781ad6265SDimitry Andric return false; 81881ad6265SDimitry Andric } 81981ad6265SDimitry Andric 82081ad6265SDimitry Andric bool Scanner::scan(SmallVectorImpl<Directive> &Directives) { 82181ad6265SDimitry Andric bool Error = scanImpl(Input.begin(), Input.end()); 82281ad6265SDimitry Andric 82381ad6265SDimitry Andric if (!Error) { 82481ad6265SDimitry Andric // Add an EOF on success. 825*bdd1243dSDimitry Andric if (LastTokenPtr && 826*bdd1243dSDimitry Andric (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset)) 827*bdd1243dSDimitry Andric pushDirective(tokens_present_before_eof); 82881ad6265SDimitry Andric pushDirective(pp_eof); 82981ad6265SDimitry Andric } 83081ad6265SDimitry Andric 83181ad6265SDimitry Andric ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens; 83281ad6265SDimitry Andric for (const DirectiveWithTokens &DirWithToks : DirsWithToks) { 83381ad6265SDimitry Andric assert(RemainingTokens.size() >= DirWithToks.NumTokens); 83481ad6265SDimitry Andric Directives.emplace_back(DirWithToks.Kind, 83581ad6265SDimitry Andric RemainingTokens.take_front(DirWithToks.NumTokens)); 83681ad6265SDimitry Andric RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens); 83781ad6265SDimitry Andric } 83881ad6265SDimitry Andric assert(RemainingTokens.empty()); 83981ad6265SDimitry Andric 84081ad6265SDimitry Andric return Error; 84181ad6265SDimitry Andric } 84281ad6265SDimitry Andric 84381ad6265SDimitry Andric bool clang::scanSourceForDependencyDirectives( 84481ad6265SDimitry Andric StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens, 84581ad6265SDimitry Andric SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags, 84681ad6265SDimitry Andric SourceLocation InputSourceLoc) { 84781ad6265SDimitry Andric return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives); 84881ad6265SDimitry Andric } 84981ad6265SDimitry Andric 85081ad6265SDimitry Andric void clang::printDependencyDirectivesAsSource( 85181ad6265SDimitry Andric StringRef Source, 85281ad6265SDimitry Andric ArrayRef<dependency_directives_scan::Directive> Directives, 85381ad6265SDimitry Andric llvm::raw_ostream &OS) { 85481ad6265SDimitry Andric // Add a space separator where it is convenient for testing purposes. 85581ad6265SDimitry Andric auto needsSpaceSeparator = 85681ad6265SDimitry Andric [](tok::TokenKind Prev, 85781ad6265SDimitry Andric const dependency_directives_scan::Token &Tok) -> bool { 85881ad6265SDimitry Andric if (Prev == Tok.Kind) 85981ad6265SDimitry Andric return !Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, 86081ad6265SDimitry Andric tok::r_square); 86181ad6265SDimitry Andric if (Prev == tok::raw_identifier && 86281ad6265SDimitry Andric Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal, 86381ad6265SDimitry Andric tok::char_constant, tok::header_name)) 86481ad6265SDimitry Andric return true; 86581ad6265SDimitry Andric if (Prev == tok::r_paren && 86681ad6265SDimitry Andric Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal, 86781ad6265SDimitry Andric tok::char_constant, tok::unknown)) 86881ad6265SDimitry Andric return true; 86981ad6265SDimitry Andric if (Prev == tok::comma && 87081ad6265SDimitry Andric Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less)) 87181ad6265SDimitry Andric return true; 87281ad6265SDimitry Andric return false; 87381ad6265SDimitry Andric }; 87481ad6265SDimitry Andric 87581ad6265SDimitry Andric for (const dependency_directives_scan::Directive &Directive : Directives) { 876*bdd1243dSDimitry Andric if (Directive.Kind == tokens_present_before_eof) 877*bdd1243dSDimitry Andric OS << "<TokBeforeEOF>"; 878*bdd1243dSDimitry Andric std::optional<tok::TokenKind> PrevTokenKind; 87981ad6265SDimitry Andric for (const dependency_directives_scan::Token &Tok : Directive.Tokens) { 88081ad6265SDimitry Andric if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok)) 88181ad6265SDimitry Andric OS << ' '; 88281ad6265SDimitry Andric PrevTokenKind = Tok.Kind; 88381ad6265SDimitry Andric OS << Source.slice(Tok.Offset, Tok.getEnd()); 88481ad6265SDimitry Andric } 88581ad6265SDimitry Andric } 88681ad6265SDimitry Andric } 887