181ad6265SDimitry Andric //===- DependencyDirectivesScanner.cpp ------------------------------------===// 281ad6265SDimitry Andric // 381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 681ad6265SDimitry Andric // 781ad6265SDimitry Andric //===----------------------------------------------------------------------===// 881ad6265SDimitry Andric /// 981ad6265SDimitry Andric /// \file 1081ad6265SDimitry Andric /// This is the interface for scanning header and source files to get the 1181ad6265SDimitry Andric /// minimum necessary preprocessor directives for evaluating includes. It 1281ad6265SDimitry Andric /// reduces the source down to #define, #include, #import, @import, and any 1381ad6265SDimitry Andric /// conditional preprocessor logic that contains one of those. 1481ad6265SDimitry Andric /// 1581ad6265SDimitry Andric //===----------------------------------------------------------------------===// 1681ad6265SDimitry Andric 1781ad6265SDimitry Andric #include "clang/Lex/DependencyDirectivesScanner.h" 1881ad6265SDimitry Andric #include "clang/Basic/CharInfo.h" 1981ad6265SDimitry Andric #include "clang/Basic/Diagnostic.h" 2081ad6265SDimitry Andric #include "clang/Lex/LexDiagnostic.h" 2181ad6265SDimitry Andric #include "clang/Lex/Lexer.h" 2206c3fb27SDimitry Andric #include "clang/Lex/Pragma.h" 2381ad6265SDimitry Andric #include "llvm/ADT/ScopeExit.h" 2481ad6265SDimitry Andric #include "llvm/ADT/SmallString.h" 2581ad6265SDimitry Andric #include "llvm/ADT/StringMap.h" 2681ad6265SDimitry Andric #include "llvm/ADT/StringSwitch.h" 27bdd1243dSDimitry Andric #include <optional> 2881ad6265SDimitry Andric 2981ad6265SDimitry Andric using namespace clang; 3081ad6265SDimitry Andric using namespace clang::dependency_directives_scan; 3181ad6265SDimitry Andric using namespace llvm; 3281ad6265SDimitry Andric 3381ad6265SDimitry Andric namespace { 3481ad6265SDimitry Andric 3581ad6265SDimitry Andric struct DirectiveWithTokens { 3681ad6265SDimitry Andric DirectiveKind Kind; 3781ad6265SDimitry Andric unsigned NumTokens; 3881ad6265SDimitry Andric 3981ad6265SDimitry Andric DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens) 4081ad6265SDimitry Andric : Kind(Kind), NumTokens(NumTokens) {} 4181ad6265SDimitry Andric }; 4281ad6265SDimitry Andric 4381ad6265SDimitry Andric /// Does an efficient "scan" of the sources to detect the presence of 4481ad6265SDimitry Andric /// preprocessor (or module import) directives and collects the raw lexed tokens 4581ad6265SDimitry Andric /// for those directives so that the \p Lexer can "replay" them when the file is 4681ad6265SDimitry Andric /// included. 4781ad6265SDimitry Andric /// 4881ad6265SDimitry Andric /// Note that the behavior of the raw lexer is affected by the language mode, 4981ad6265SDimitry Andric /// while at this point we want to do a scan and collect tokens once, 5081ad6265SDimitry Andric /// irrespective of the language mode that the file will get included in. To 5181ad6265SDimitry Andric /// compensate for that the \p Lexer, while "replaying", will adjust a token 5281ad6265SDimitry Andric /// where appropriate, when it could affect the preprocessor's state. 5381ad6265SDimitry Andric /// For example in a directive like 5481ad6265SDimitry Andric /// 5581ad6265SDimitry Andric /// \code 5681ad6265SDimitry Andric /// #if __has_cpp_attribute(clang::fallthrough) 5781ad6265SDimitry Andric /// \endcode 5881ad6265SDimitry Andric /// 5981ad6265SDimitry Andric /// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2 6081ad6265SDimitry Andric /// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon' 6181ad6265SDimitry Andric /// while in C++ mode. 6281ad6265SDimitry Andric struct Scanner { 6381ad6265SDimitry Andric Scanner(StringRef Input, 6481ad6265SDimitry Andric SmallVectorImpl<dependency_directives_scan::Token> &Tokens, 6581ad6265SDimitry Andric DiagnosticsEngine *Diags, SourceLocation InputSourceLoc) 6681ad6265SDimitry Andric : Input(Input), Tokens(Tokens), Diags(Diags), 6781ad6265SDimitry Andric InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()), 6881ad6265SDimitry Andric TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(), 6981ad6265SDimitry Andric Input.end()) {} 7081ad6265SDimitry Andric 7181ad6265SDimitry Andric static LangOptions getLangOptsForDepScanning() { 7281ad6265SDimitry Andric LangOptions LangOpts; 7381ad6265SDimitry Andric // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'. 7481ad6265SDimitry Andric LangOpts.ObjC = true; 7581ad6265SDimitry Andric LangOpts.LineComment = true; 76*0fca6ea1SDimitry Andric LangOpts.RawStringLiterals = true; 77*0fca6ea1SDimitry Andric // FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"". 7881ad6265SDimitry Andric return LangOpts; 7981ad6265SDimitry Andric } 8081ad6265SDimitry Andric 8181ad6265SDimitry Andric /// Lex the provided source and emit the directive tokens. 8281ad6265SDimitry Andric /// 8381ad6265SDimitry Andric /// \returns True on error. 8481ad6265SDimitry Andric bool scan(SmallVectorImpl<Directive> &Directives); 8581ad6265SDimitry Andric 8681ad6265SDimitry Andric private: 8781ad6265SDimitry Andric /// Lexes next token and advances \p First and the \p Lexer. 88bdd1243dSDimitry Andric [[nodiscard]] dependency_directives_scan::Token & 8981ad6265SDimitry Andric lexToken(const char *&First, const char *const End); 9081ad6265SDimitry Andric 91*0fca6ea1SDimitry Andric [[nodiscard]] dependency_directives_scan::Token & 92*0fca6ea1SDimitry Andric lexIncludeFilename(const char *&First, const char *const End); 9381ad6265SDimitry Andric 94bdd1243dSDimitry Andric void skipLine(const char *&First, const char *const End); 95bdd1243dSDimitry Andric void skipDirective(StringRef Name, const char *&First, const char *const End); 96bdd1243dSDimitry Andric 9706c3fb27SDimitry Andric /// Returns the spelling of a string literal or identifier after performing 9806c3fb27SDimitry Andric /// any processing needed to handle \c clang::Token::NeedsCleaning. 9906c3fb27SDimitry Andric StringRef cleanStringIfNeeded(const dependency_directives_scan::Token &Tok); 10006c3fb27SDimitry Andric 10181ad6265SDimitry Andric /// Lexes next token and if it is identifier returns its string, otherwise 102bdd1243dSDimitry Andric /// it skips the current line and returns \p std::nullopt. 10381ad6265SDimitry Andric /// 10481ad6265SDimitry Andric /// In any case (whatever the token kind) \p First and the \p Lexer will 10581ad6265SDimitry Andric /// advance beyond the token. 106bdd1243dSDimitry Andric [[nodiscard]] std::optional<StringRef> 10781ad6265SDimitry Andric tryLexIdentifierOrSkipLine(const char *&First, const char *const End); 10881ad6265SDimitry Andric 10981ad6265SDimitry Andric /// Used when it is certain that next token is an identifier. 110bdd1243dSDimitry Andric [[nodiscard]] StringRef lexIdentifier(const char *&First, 11181ad6265SDimitry Andric const char *const End); 11281ad6265SDimitry Andric 11381ad6265SDimitry Andric /// Lexes next token and returns true iff it is an identifier that matches \p 11481ad6265SDimitry Andric /// Id, otherwise it skips the current line and returns false. 11581ad6265SDimitry Andric /// 11681ad6265SDimitry Andric /// In any case (whatever the token kind) \p First and the \p Lexer will 11781ad6265SDimitry Andric /// advance beyond the token. 118bdd1243dSDimitry Andric [[nodiscard]] bool isNextIdentifierOrSkipLine(StringRef Id, 11981ad6265SDimitry Andric const char *&First, 12081ad6265SDimitry Andric const char *const End); 12181ad6265SDimitry Andric 12206c3fb27SDimitry Andric /// Lexes next token and returns true iff it matches the kind \p K. 12306c3fb27SDimitry Andric /// Otherwise it skips the current line and returns false. 12406c3fb27SDimitry Andric /// 12506c3fb27SDimitry Andric /// In any case (whatever the token kind) \p First and the \p Lexer will 12606c3fb27SDimitry Andric /// advance beyond the token. 12706c3fb27SDimitry Andric [[nodiscard]] bool isNextTokenOrSkipLine(tok::TokenKind K, const char *&First, 12806c3fb27SDimitry Andric const char *const End); 12906c3fb27SDimitry Andric 13006c3fb27SDimitry Andric /// Lexes next token and if it is string literal, returns its string. 13106c3fb27SDimitry Andric /// Otherwise, it skips the current line and returns \p std::nullopt. 13206c3fb27SDimitry Andric /// 13306c3fb27SDimitry Andric /// In any case (whatever the token kind) \p First and the \p Lexer will 13406c3fb27SDimitry Andric /// advance beyond the token. 13506c3fb27SDimitry Andric [[nodiscard]] std::optional<StringRef> 13606c3fb27SDimitry Andric tryLexStringLiteralOrSkipLine(const char *&First, const char *const End); 13706c3fb27SDimitry Andric 138bdd1243dSDimitry Andric [[nodiscard]] bool scanImpl(const char *First, const char *const End); 139bdd1243dSDimitry Andric [[nodiscard]] bool lexPPLine(const char *&First, const char *const End); 140bdd1243dSDimitry Andric [[nodiscard]] bool lexAt(const char *&First, const char *const End); 141bdd1243dSDimitry Andric [[nodiscard]] bool lexModule(const char *&First, const char *const End); 142bdd1243dSDimitry Andric [[nodiscard]] bool lexDefine(const char *HashLoc, const char *&First, 14381ad6265SDimitry Andric const char *const End); 144bdd1243dSDimitry Andric [[nodiscard]] bool lexPragma(const char *&First, const char *const End); 14506c3fb27SDimitry Andric [[nodiscard]] bool lex_Pragma(const char *&First, const char *const End); 146bdd1243dSDimitry Andric [[nodiscard]] bool lexEndif(const char *&First, const char *const End); 147bdd1243dSDimitry Andric [[nodiscard]] bool lexDefault(DirectiveKind Kind, const char *&First, 14881ad6265SDimitry Andric const char *const End); 149bdd1243dSDimitry Andric [[nodiscard]] bool lexModuleDirectiveBody(DirectiveKind Kind, 15081ad6265SDimitry Andric const char *&First, 15181ad6265SDimitry Andric const char *const End); 15281ad6265SDimitry Andric void lexPPDirectiveBody(const char *&First, const char *const End); 15381ad6265SDimitry Andric 15481ad6265SDimitry Andric DirectiveWithTokens &pushDirective(DirectiveKind Kind) { 15581ad6265SDimitry Andric Tokens.append(CurDirToks); 15681ad6265SDimitry Andric DirsWithToks.emplace_back(Kind, CurDirToks.size()); 15781ad6265SDimitry Andric CurDirToks.clear(); 15881ad6265SDimitry Andric return DirsWithToks.back(); 15981ad6265SDimitry Andric } 16081ad6265SDimitry Andric void popDirective() { 16181ad6265SDimitry Andric Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens); 16281ad6265SDimitry Andric } 16381ad6265SDimitry Andric DirectiveKind topDirective() const { 16481ad6265SDimitry Andric return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind; 16581ad6265SDimitry Andric } 16681ad6265SDimitry Andric 16781ad6265SDimitry Andric unsigned getOffsetAt(const char *CurPtr) const { 16881ad6265SDimitry Andric return CurPtr - Input.data(); 16981ad6265SDimitry Andric } 17081ad6265SDimitry Andric 17181ad6265SDimitry Andric /// Reports a diagnostic if the diagnostic engine is provided. Always returns 17281ad6265SDimitry Andric /// true at the end. 17381ad6265SDimitry Andric bool reportError(const char *CurPtr, unsigned Err); 17481ad6265SDimitry Andric 17581ad6265SDimitry Andric StringMap<char> SplitIds; 17681ad6265SDimitry Andric StringRef Input; 17781ad6265SDimitry Andric SmallVectorImpl<dependency_directives_scan::Token> &Tokens; 17881ad6265SDimitry Andric DiagnosticsEngine *Diags; 17981ad6265SDimitry Andric SourceLocation InputSourceLoc; 18081ad6265SDimitry Andric 181bdd1243dSDimitry Andric const char *LastTokenPtr = nullptr; 18281ad6265SDimitry Andric /// Keeps track of the tokens for the currently lexed directive. Once a 18381ad6265SDimitry Andric /// directive is fully lexed and "committed" then the tokens get appended to 18481ad6265SDimitry Andric /// \p Tokens and \p CurDirToks is cleared for the next directive. 18581ad6265SDimitry Andric SmallVector<dependency_directives_scan::Token, 32> CurDirToks; 18681ad6265SDimitry Andric /// The directives that were lexed along with the number of tokens that each 18781ad6265SDimitry Andric /// directive contains. The tokens of all the directives are kept in \p Tokens 18881ad6265SDimitry Andric /// vector, in the same order as the directives order in \p DirsWithToks. 18981ad6265SDimitry Andric SmallVector<DirectiveWithTokens, 64> DirsWithToks; 19081ad6265SDimitry Andric LangOptions LangOpts; 19181ad6265SDimitry Andric Lexer TheLexer; 19281ad6265SDimitry Andric }; 19381ad6265SDimitry Andric 19481ad6265SDimitry Andric } // end anonymous namespace 19581ad6265SDimitry Andric 19681ad6265SDimitry Andric bool Scanner::reportError(const char *CurPtr, unsigned Err) { 19781ad6265SDimitry Andric if (!Diags) 19881ad6265SDimitry Andric return true; 19981ad6265SDimitry Andric assert(CurPtr >= Input.data() && "invalid buffer ptr"); 20081ad6265SDimitry Andric Diags->Report(InputSourceLoc.getLocWithOffset(getOffsetAt(CurPtr)), Err); 20181ad6265SDimitry Andric return true; 20281ad6265SDimitry Andric } 20381ad6265SDimitry Andric 20481ad6265SDimitry Andric static void skipOverSpaces(const char *&First, const char *const End) { 20581ad6265SDimitry Andric while (First != End && isHorizontalWhitespace(*First)) 20681ad6265SDimitry Andric ++First; 20781ad6265SDimitry Andric } 20881ad6265SDimitry Andric 209bdd1243dSDimitry Andric [[nodiscard]] static bool isRawStringLiteral(const char *First, 21081ad6265SDimitry Andric const char *Current) { 21181ad6265SDimitry Andric assert(First <= Current); 21281ad6265SDimitry Andric 21381ad6265SDimitry Andric // Check if we can even back up. 21481ad6265SDimitry Andric if (*Current != '"' || First == Current) 21581ad6265SDimitry Andric return false; 21681ad6265SDimitry Andric 21781ad6265SDimitry Andric // Check for an "R". 21881ad6265SDimitry Andric --Current; 21981ad6265SDimitry Andric if (*Current != 'R') 22081ad6265SDimitry Andric return false; 22181ad6265SDimitry Andric if (First == Current || !isAsciiIdentifierContinue(*--Current)) 22281ad6265SDimitry Andric return true; 22381ad6265SDimitry Andric 22481ad6265SDimitry Andric // Check for a prefix of "u", "U", or "L". 22581ad6265SDimitry Andric if (*Current == 'u' || *Current == 'U' || *Current == 'L') 22681ad6265SDimitry Andric return First == Current || !isAsciiIdentifierContinue(*--Current); 22781ad6265SDimitry Andric 22881ad6265SDimitry Andric // Check for a prefix of "u8". 22981ad6265SDimitry Andric if (*Current != '8' || First == Current || *Current-- != 'u') 23081ad6265SDimitry Andric return false; 23181ad6265SDimitry Andric return First == Current || !isAsciiIdentifierContinue(*--Current); 23281ad6265SDimitry Andric } 23381ad6265SDimitry Andric 23481ad6265SDimitry Andric static void skipRawString(const char *&First, const char *const End) { 23581ad6265SDimitry Andric assert(First[0] == '"'); 23681ad6265SDimitry Andric assert(First[-1] == 'R'); 23781ad6265SDimitry Andric 23881ad6265SDimitry Andric const char *Last = ++First; 23981ad6265SDimitry Andric while (Last != End && *Last != '(') 24081ad6265SDimitry Andric ++Last; 24181ad6265SDimitry Andric if (Last == End) { 24281ad6265SDimitry Andric First = Last; // Hit the end... just give up. 24381ad6265SDimitry Andric return; 24481ad6265SDimitry Andric } 24581ad6265SDimitry Andric 24681ad6265SDimitry Andric StringRef Terminator(First, Last - First); 24781ad6265SDimitry Andric for (;;) { 24881ad6265SDimitry Andric // Move First to just past the next ")". 24981ad6265SDimitry Andric First = Last; 25081ad6265SDimitry Andric while (First != End && *First != ')') 25181ad6265SDimitry Andric ++First; 25281ad6265SDimitry Andric if (First == End) 25381ad6265SDimitry Andric return; 25481ad6265SDimitry Andric ++First; 25581ad6265SDimitry Andric 25681ad6265SDimitry Andric // Look ahead for the terminator sequence. 25781ad6265SDimitry Andric Last = First; 25881ad6265SDimitry Andric while (Last != End && size_t(Last - First) < Terminator.size() && 25981ad6265SDimitry Andric Terminator[Last - First] == *Last) 26081ad6265SDimitry Andric ++Last; 26181ad6265SDimitry Andric 26281ad6265SDimitry Andric // Check if we hit it (or the end of the file). 26381ad6265SDimitry Andric if (Last == End) { 26481ad6265SDimitry Andric First = Last; 26581ad6265SDimitry Andric return; 26681ad6265SDimitry Andric } 26781ad6265SDimitry Andric if (size_t(Last - First) < Terminator.size()) 26881ad6265SDimitry Andric continue; 26981ad6265SDimitry Andric if (*Last != '"') 27081ad6265SDimitry Andric continue; 27181ad6265SDimitry Andric First = Last + 1; 27281ad6265SDimitry Andric return; 27381ad6265SDimitry Andric } 27481ad6265SDimitry Andric } 27581ad6265SDimitry Andric 27681ad6265SDimitry Andric // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n) 27781ad6265SDimitry Andric static unsigned isEOL(const char *First, const char *const End) { 27881ad6265SDimitry Andric if (First == End) 27981ad6265SDimitry Andric return 0; 28081ad6265SDimitry Andric if (End - First > 1 && isVerticalWhitespace(First[0]) && 28181ad6265SDimitry Andric isVerticalWhitespace(First[1]) && First[0] != First[1]) 28281ad6265SDimitry Andric return 2; 28381ad6265SDimitry Andric return !!isVerticalWhitespace(First[0]); 28481ad6265SDimitry Andric } 28581ad6265SDimitry Andric 28681ad6265SDimitry Andric static void skipString(const char *&First, const char *const End) { 28781ad6265SDimitry Andric assert(*First == '\'' || *First == '"' || *First == '<'); 28881ad6265SDimitry Andric const char Terminator = *First == '<' ? '>' : *First; 28981ad6265SDimitry Andric for (++First; First != End && *First != Terminator; ++First) { 29081ad6265SDimitry Andric // String and character literals don't extend past the end of the line. 29181ad6265SDimitry Andric if (isVerticalWhitespace(*First)) 29281ad6265SDimitry Andric return; 29381ad6265SDimitry Andric if (*First != '\\') 29481ad6265SDimitry Andric continue; 29581ad6265SDimitry Andric // Skip past backslash to the next character. This ensures that the 29681ad6265SDimitry Andric // character right after it is skipped as well, which matters if it's 29781ad6265SDimitry Andric // the terminator. 29881ad6265SDimitry Andric if (++First == End) 29981ad6265SDimitry Andric return; 30081ad6265SDimitry Andric if (!isWhitespace(*First)) 30181ad6265SDimitry Andric continue; 30281ad6265SDimitry Andric // Whitespace after the backslash might indicate a line continuation. 30381ad6265SDimitry Andric const char *FirstAfterBackslashPastSpace = First; 30481ad6265SDimitry Andric skipOverSpaces(FirstAfterBackslashPastSpace, End); 30581ad6265SDimitry Andric if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) { 30681ad6265SDimitry Andric // Advance the character pointer to the next line for the next 30781ad6265SDimitry Andric // iteration. 30881ad6265SDimitry Andric First = FirstAfterBackslashPastSpace + NLSize - 1; 30981ad6265SDimitry Andric } 31081ad6265SDimitry Andric } 31181ad6265SDimitry Andric if (First != End) 31281ad6265SDimitry Andric ++First; // Finish off the string. 31381ad6265SDimitry Andric } 31481ad6265SDimitry Andric 31581ad6265SDimitry Andric // Returns the length of the skipped newline 31681ad6265SDimitry Andric static unsigned skipNewline(const char *&First, const char *End) { 31781ad6265SDimitry Andric if (First == End) 31881ad6265SDimitry Andric return 0; 31981ad6265SDimitry Andric assert(isVerticalWhitespace(*First)); 32081ad6265SDimitry Andric unsigned Len = isEOL(First, End); 32181ad6265SDimitry Andric assert(Len && "expected newline"); 32281ad6265SDimitry Andric First += Len; 32381ad6265SDimitry Andric return Len; 32481ad6265SDimitry Andric } 32581ad6265SDimitry Andric 32681ad6265SDimitry Andric static bool wasLineContinuation(const char *First, unsigned EOLLen) { 32781ad6265SDimitry Andric return *(First - (int)EOLLen - 1) == '\\'; 32881ad6265SDimitry Andric } 32981ad6265SDimitry Andric 33081ad6265SDimitry Andric static void skipToNewlineRaw(const char *&First, const char *const End) { 33181ad6265SDimitry Andric for (;;) { 33281ad6265SDimitry Andric if (First == End) 33381ad6265SDimitry Andric return; 33481ad6265SDimitry Andric 33581ad6265SDimitry Andric unsigned Len = isEOL(First, End); 33681ad6265SDimitry Andric if (Len) 33781ad6265SDimitry Andric return; 33881ad6265SDimitry Andric 33981ad6265SDimitry Andric do { 34081ad6265SDimitry Andric if (++First == End) 34181ad6265SDimitry Andric return; 34281ad6265SDimitry Andric Len = isEOL(First, End); 34381ad6265SDimitry Andric } while (!Len); 34481ad6265SDimitry Andric 34581ad6265SDimitry Andric if (First[-1] != '\\') 34681ad6265SDimitry Andric return; 34781ad6265SDimitry Andric 34881ad6265SDimitry Andric First += Len; 34981ad6265SDimitry Andric // Keep skipping lines... 35081ad6265SDimitry Andric } 35181ad6265SDimitry Andric } 35281ad6265SDimitry Andric 35381ad6265SDimitry Andric static void skipLineComment(const char *&First, const char *const End) { 35481ad6265SDimitry Andric assert(First[0] == '/' && First[1] == '/'); 35581ad6265SDimitry Andric First += 2; 35681ad6265SDimitry Andric skipToNewlineRaw(First, End); 35781ad6265SDimitry Andric } 35881ad6265SDimitry Andric 35981ad6265SDimitry Andric static void skipBlockComment(const char *&First, const char *const End) { 36081ad6265SDimitry Andric assert(First[0] == '/' && First[1] == '*'); 36181ad6265SDimitry Andric if (End - First < 4) { 36281ad6265SDimitry Andric First = End; 36381ad6265SDimitry Andric return; 36481ad6265SDimitry Andric } 36581ad6265SDimitry Andric for (First += 3; First != End; ++First) 36681ad6265SDimitry Andric if (First[-1] == '*' && First[0] == '/') { 36781ad6265SDimitry Andric ++First; 36881ad6265SDimitry Andric return; 36981ad6265SDimitry Andric } 37081ad6265SDimitry Andric } 37181ad6265SDimitry Andric 37281ad6265SDimitry Andric /// \returns True if the current single quotation mark character is a C++14 37381ad6265SDimitry Andric /// digit separator. 37481ad6265SDimitry Andric static bool isQuoteCppDigitSeparator(const char *const Start, 37581ad6265SDimitry Andric const char *const Cur, 37681ad6265SDimitry Andric const char *const End) { 37781ad6265SDimitry Andric assert(*Cur == '\'' && "expected quotation character"); 37881ad6265SDimitry Andric // skipLine called in places where we don't expect a valid number 37981ad6265SDimitry Andric // body before `start` on the same line, so always return false at the start. 38081ad6265SDimitry Andric if (Start == Cur) 38181ad6265SDimitry Andric return false; 38281ad6265SDimitry Andric // The previous character must be a valid PP number character. 38381ad6265SDimitry Andric // Make sure that the L, u, U, u8 prefixes don't get marked as a 38481ad6265SDimitry Andric // separator though. 38581ad6265SDimitry Andric char Prev = *(Cur - 1); 38681ad6265SDimitry Andric if (Prev == 'L' || Prev == 'U' || Prev == 'u') 38781ad6265SDimitry Andric return false; 38881ad6265SDimitry Andric if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') 38981ad6265SDimitry Andric return false; 39081ad6265SDimitry Andric if (!isPreprocessingNumberBody(Prev)) 39181ad6265SDimitry Andric return false; 39281ad6265SDimitry Andric // The next character should be a valid identifier body character. 39381ad6265SDimitry Andric return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1)); 39481ad6265SDimitry Andric } 39581ad6265SDimitry Andric 396bdd1243dSDimitry Andric void Scanner::skipLine(const char *&First, const char *const End) { 39781ad6265SDimitry Andric for (;;) { 39881ad6265SDimitry Andric assert(First <= End); 39981ad6265SDimitry Andric if (First == End) 40081ad6265SDimitry Andric return; 40181ad6265SDimitry Andric 40281ad6265SDimitry Andric if (isVerticalWhitespace(*First)) { 40381ad6265SDimitry Andric skipNewline(First, End); 40481ad6265SDimitry Andric return; 40581ad6265SDimitry Andric } 40681ad6265SDimitry Andric const char *Start = First; 40781ad6265SDimitry Andric while (First != End && !isVerticalWhitespace(*First)) { 40881ad6265SDimitry Andric // Iterate over strings correctly to avoid comments and newlines. 40981ad6265SDimitry Andric if (*First == '"' || 41081ad6265SDimitry Andric (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { 411bdd1243dSDimitry Andric LastTokenPtr = First; 41281ad6265SDimitry Andric if (isRawStringLiteral(Start, First)) 41381ad6265SDimitry Andric skipRawString(First, End); 41481ad6265SDimitry Andric else 41581ad6265SDimitry Andric skipString(First, End); 41681ad6265SDimitry Andric continue; 41781ad6265SDimitry Andric } 41881ad6265SDimitry Andric 41981ad6265SDimitry Andric // Iterate over comments correctly. 42081ad6265SDimitry Andric if (*First != '/' || End - First < 2) { 421bdd1243dSDimitry Andric LastTokenPtr = First; 42281ad6265SDimitry Andric ++First; 42381ad6265SDimitry Andric continue; 42481ad6265SDimitry Andric } 42581ad6265SDimitry Andric 42681ad6265SDimitry Andric if (First[1] == '/') { 42781ad6265SDimitry Andric // "//...". 42881ad6265SDimitry Andric skipLineComment(First, End); 42981ad6265SDimitry Andric continue; 43081ad6265SDimitry Andric } 43181ad6265SDimitry Andric 43281ad6265SDimitry Andric if (First[1] != '*') { 433bdd1243dSDimitry Andric LastTokenPtr = First; 43481ad6265SDimitry Andric ++First; 43581ad6265SDimitry Andric continue; 43681ad6265SDimitry Andric } 43781ad6265SDimitry Andric 43881ad6265SDimitry Andric // "/*...*/". 43981ad6265SDimitry Andric skipBlockComment(First, End); 44081ad6265SDimitry Andric } 44181ad6265SDimitry Andric if (First == End) 44281ad6265SDimitry Andric return; 44381ad6265SDimitry Andric 44481ad6265SDimitry Andric // Skip over the newline. 44581ad6265SDimitry Andric unsigned Len = skipNewline(First, End); 44681ad6265SDimitry Andric if (!wasLineContinuation(First, Len)) // Continue past line-continuations. 44781ad6265SDimitry Andric break; 44881ad6265SDimitry Andric } 44981ad6265SDimitry Andric } 45081ad6265SDimitry Andric 451bdd1243dSDimitry Andric void Scanner::skipDirective(StringRef Name, const char *&First, 45281ad6265SDimitry Andric const char *const End) { 45381ad6265SDimitry Andric if (llvm::StringSwitch<bool>(Name) 45481ad6265SDimitry Andric .Case("warning", true) 45581ad6265SDimitry Andric .Case("error", true) 45681ad6265SDimitry Andric .Default(false)) 45781ad6265SDimitry Andric // Do not process quotes or comments. 45881ad6265SDimitry Andric skipToNewlineRaw(First, End); 45981ad6265SDimitry Andric else 46081ad6265SDimitry Andric skipLine(First, End); 46181ad6265SDimitry Andric } 46281ad6265SDimitry Andric 46381ad6265SDimitry Andric static void skipWhitespace(const char *&First, const char *const End) { 46481ad6265SDimitry Andric for (;;) { 46581ad6265SDimitry Andric assert(First <= End); 46681ad6265SDimitry Andric skipOverSpaces(First, End); 46781ad6265SDimitry Andric 46881ad6265SDimitry Andric if (End - First < 2) 46981ad6265SDimitry Andric return; 47081ad6265SDimitry Andric 47181ad6265SDimitry Andric if (First[0] == '\\' && isVerticalWhitespace(First[1])) { 47281ad6265SDimitry Andric skipNewline(++First, End); 47381ad6265SDimitry Andric continue; 47481ad6265SDimitry Andric } 47581ad6265SDimitry Andric 47681ad6265SDimitry Andric // Check for a non-comment character. 47781ad6265SDimitry Andric if (First[0] != '/') 47881ad6265SDimitry Andric return; 47981ad6265SDimitry Andric 48081ad6265SDimitry Andric // "// ...". 48181ad6265SDimitry Andric if (First[1] == '/') { 48281ad6265SDimitry Andric skipLineComment(First, End); 48381ad6265SDimitry Andric return; 48481ad6265SDimitry Andric } 48581ad6265SDimitry Andric 48681ad6265SDimitry Andric // Cannot be a comment. 48781ad6265SDimitry Andric if (First[1] != '*') 48881ad6265SDimitry Andric return; 48981ad6265SDimitry Andric 49081ad6265SDimitry Andric // "/*...*/". 49181ad6265SDimitry Andric skipBlockComment(First, End); 49281ad6265SDimitry Andric } 49381ad6265SDimitry Andric } 49481ad6265SDimitry Andric 49581ad6265SDimitry Andric bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, 49681ad6265SDimitry Andric const char *const End) { 49781ad6265SDimitry Andric const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset; 49881ad6265SDimitry Andric for (;;) { 49981ad6265SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 50081ad6265SDimitry Andric if (Tok.is(tok::eof)) 50181ad6265SDimitry Andric return reportError( 50281ad6265SDimitry Andric DirectiveLoc, 50381ad6265SDimitry Andric diag::err_dep_source_scanner_missing_semi_after_at_import); 50481ad6265SDimitry Andric if (Tok.is(tok::semi)) 50581ad6265SDimitry Andric break; 50681ad6265SDimitry Andric } 50781ad6265SDimitry Andric pushDirective(Kind); 50881ad6265SDimitry Andric skipWhitespace(First, End); 50981ad6265SDimitry Andric if (First == End) 51081ad6265SDimitry Andric return false; 51181ad6265SDimitry Andric if (!isVerticalWhitespace(*First)) 51281ad6265SDimitry Andric return reportError( 51381ad6265SDimitry Andric DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); 51481ad6265SDimitry Andric skipNewline(First, End); 51581ad6265SDimitry Andric return false; 51681ad6265SDimitry Andric } 51781ad6265SDimitry Andric 51881ad6265SDimitry Andric dependency_directives_scan::Token &Scanner::lexToken(const char *&First, 51981ad6265SDimitry Andric const char *const End) { 52081ad6265SDimitry Andric clang::Token Tok; 52181ad6265SDimitry Andric TheLexer.LexFromRawLexer(Tok); 52281ad6265SDimitry Andric First = Input.data() + TheLexer.getCurrentBufferOffset(); 52381ad6265SDimitry Andric assert(First <= End); 52481ad6265SDimitry Andric 52581ad6265SDimitry Andric unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); 52681ad6265SDimitry Andric CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), 52781ad6265SDimitry Andric Tok.getFlags()); 52881ad6265SDimitry Andric return CurDirToks.back(); 52981ad6265SDimitry Andric } 53081ad6265SDimitry Andric 53181ad6265SDimitry Andric dependency_directives_scan::Token & 53281ad6265SDimitry Andric Scanner::lexIncludeFilename(const char *&First, const char *const End) { 53381ad6265SDimitry Andric clang::Token Tok; 53481ad6265SDimitry Andric TheLexer.LexIncludeFilename(Tok); 53581ad6265SDimitry Andric First = Input.data() + TheLexer.getCurrentBufferOffset(); 53681ad6265SDimitry Andric assert(First <= End); 53781ad6265SDimitry Andric 53881ad6265SDimitry Andric unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength(); 53981ad6265SDimitry Andric CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(), 54081ad6265SDimitry Andric Tok.getFlags()); 54181ad6265SDimitry Andric return CurDirToks.back(); 54281ad6265SDimitry Andric } 54381ad6265SDimitry Andric 54481ad6265SDimitry Andric void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) { 54581ad6265SDimitry Andric while (true) { 54681ad6265SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 547*0fca6ea1SDimitry Andric if (Tok.is(tok::eod) || Tok.is(tok::eof)) 54881ad6265SDimitry Andric break; 54981ad6265SDimitry Andric } 55081ad6265SDimitry Andric } 55181ad6265SDimitry Andric 55206c3fb27SDimitry Andric StringRef 55306c3fb27SDimitry Andric Scanner::cleanStringIfNeeded(const dependency_directives_scan::Token &Tok) { 55481ad6265SDimitry Andric bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning; 55581ad6265SDimitry Andric if (LLVM_LIKELY(!NeedsCleaning)) 55681ad6265SDimitry Andric return Input.slice(Tok.Offset, Tok.getEnd()); 55781ad6265SDimitry Andric 55881ad6265SDimitry Andric SmallString<64> Spelling; 55981ad6265SDimitry Andric Spelling.resize(Tok.Length); 56081ad6265SDimitry Andric 56106c3fb27SDimitry Andric // FIXME: C++11 raw string literals need special handling (see getSpellingSlow 56206c3fb27SDimitry Andric // in the Lexer). Currently we cannot see them due to our LangOpts. 56306c3fb27SDimitry Andric 56481ad6265SDimitry Andric unsigned SpellingLength = 0; 56581ad6265SDimitry Andric const char *BufPtr = Input.begin() + Tok.Offset; 56681ad6265SDimitry Andric const char *AfterIdent = Input.begin() + Tok.getEnd(); 56781ad6265SDimitry Andric while (BufPtr < AfterIdent) { 5685f757f3fSDimitry Andric auto [Char, Size] = Lexer::getCharAndSizeNoWarn(BufPtr, LangOpts); 5695f757f3fSDimitry Andric Spelling[SpellingLength++] = Char; 57081ad6265SDimitry Andric BufPtr += Size; 57181ad6265SDimitry Andric } 57281ad6265SDimitry Andric 57381ad6265SDimitry Andric return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0) 57481ad6265SDimitry Andric .first->first(); 57581ad6265SDimitry Andric } 57681ad6265SDimitry Andric 57706c3fb27SDimitry Andric std::optional<StringRef> 57806c3fb27SDimitry Andric Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) { 57906c3fb27SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 58006c3fb27SDimitry Andric if (Tok.isNot(tok::raw_identifier)) { 58106c3fb27SDimitry Andric if (!Tok.is(tok::eod)) 58206c3fb27SDimitry Andric skipLine(First, End); 58306c3fb27SDimitry Andric return std::nullopt; 58406c3fb27SDimitry Andric } 58506c3fb27SDimitry Andric 58606c3fb27SDimitry Andric return cleanStringIfNeeded(Tok); 58706c3fb27SDimitry Andric } 58806c3fb27SDimitry Andric 58981ad6265SDimitry Andric StringRef Scanner::lexIdentifier(const char *&First, const char *const End) { 590bdd1243dSDimitry Andric std::optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End); 59181ad6265SDimitry Andric assert(Id && "expected identifier token"); 592bdd1243dSDimitry Andric return *Id; 59381ad6265SDimitry Andric } 59481ad6265SDimitry Andric 59581ad6265SDimitry Andric bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First, 59681ad6265SDimitry Andric const char *const End) { 597bdd1243dSDimitry Andric if (std::optional<StringRef> FoundId = 598bdd1243dSDimitry Andric tryLexIdentifierOrSkipLine(First, End)) { 59981ad6265SDimitry Andric if (*FoundId == Id) 60081ad6265SDimitry Andric return true; 60181ad6265SDimitry Andric skipLine(First, End); 60281ad6265SDimitry Andric } 60381ad6265SDimitry Andric return false; 60481ad6265SDimitry Andric } 60581ad6265SDimitry Andric 60606c3fb27SDimitry Andric bool Scanner::isNextTokenOrSkipLine(tok::TokenKind K, const char *&First, 60706c3fb27SDimitry Andric const char *const End) { 60806c3fb27SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 60906c3fb27SDimitry Andric if (Tok.is(K)) 61006c3fb27SDimitry Andric return true; 61106c3fb27SDimitry Andric skipLine(First, End); 61206c3fb27SDimitry Andric return false; 61306c3fb27SDimitry Andric } 61406c3fb27SDimitry Andric 61506c3fb27SDimitry Andric std::optional<StringRef> 61606c3fb27SDimitry Andric Scanner::tryLexStringLiteralOrSkipLine(const char *&First, 61706c3fb27SDimitry Andric const char *const End) { 61806c3fb27SDimitry Andric const dependency_directives_scan::Token &Tok = lexToken(First, End); 61906c3fb27SDimitry Andric if (!tok::isStringLiteral(Tok.Kind)) { 62006c3fb27SDimitry Andric if (!Tok.is(tok::eod)) 62106c3fb27SDimitry Andric skipLine(First, End); 62206c3fb27SDimitry Andric return std::nullopt; 62306c3fb27SDimitry Andric } 62406c3fb27SDimitry Andric 62506c3fb27SDimitry Andric return cleanStringIfNeeded(Tok); 62606c3fb27SDimitry Andric } 62706c3fb27SDimitry Andric 62881ad6265SDimitry Andric bool Scanner::lexAt(const char *&First, const char *const End) { 62981ad6265SDimitry Andric // Handle "@import". 63081ad6265SDimitry Andric 63181ad6265SDimitry Andric // Lex '@'. 63281ad6265SDimitry Andric const dependency_directives_scan::Token &AtTok = lexToken(First, End); 63381ad6265SDimitry Andric assert(AtTok.is(tok::at)); 63481ad6265SDimitry Andric (void)AtTok; 63581ad6265SDimitry Andric 63681ad6265SDimitry Andric if (!isNextIdentifierOrSkipLine("import", First, End)) 63781ad6265SDimitry Andric return false; 63881ad6265SDimitry Andric return lexModuleDirectiveBody(decl_at_import, First, End); 63981ad6265SDimitry Andric } 64081ad6265SDimitry Andric 64181ad6265SDimitry Andric bool Scanner::lexModule(const char *&First, const char *const End) { 64281ad6265SDimitry Andric StringRef Id = lexIdentifier(First, End); 64381ad6265SDimitry Andric bool Export = false; 64481ad6265SDimitry Andric if (Id == "export") { 64581ad6265SDimitry Andric Export = true; 646bdd1243dSDimitry Andric std::optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End); 64781ad6265SDimitry Andric if (!NextId) 64881ad6265SDimitry Andric return false; 64981ad6265SDimitry Andric Id = *NextId; 65081ad6265SDimitry Andric } 65181ad6265SDimitry Andric 65281ad6265SDimitry Andric if (Id != "module" && Id != "import") { 65381ad6265SDimitry Andric skipLine(First, End); 65481ad6265SDimitry Andric return false; 65581ad6265SDimitry Andric } 65681ad6265SDimitry Andric 65781ad6265SDimitry Andric skipWhitespace(First, End); 65881ad6265SDimitry Andric 65981ad6265SDimitry Andric // Ignore this as a module directive if the next character can't be part of 66081ad6265SDimitry Andric // an import. 66181ad6265SDimitry Andric 66281ad6265SDimitry Andric switch (*First) { 663*0fca6ea1SDimitry Andric case ':': { 664*0fca6ea1SDimitry Andric // `module :` is never the start of a valid module declaration. 665*0fca6ea1SDimitry Andric if (Id == "module") { 666*0fca6ea1SDimitry Andric skipLine(First, End); 667*0fca6ea1SDimitry Andric return false; 668*0fca6ea1SDimitry Andric } 669*0fca6ea1SDimitry Andric // `import:(type)name` is a valid ObjC method decl, so check one more token. 670*0fca6ea1SDimitry Andric (void)lexToken(First, End); 671*0fca6ea1SDimitry Andric if (!tryLexIdentifierOrSkipLine(First, End)) 672*0fca6ea1SDimitry Andric return false; 673*0fca6ea1SDimitry Andric break; 674*0fca6ea1SDimitry Andric } 67581ad6265SDimitry Andric case '<': 67681ad6265SDimitry Andric case '"': 67781ad6265SDimitry Andric break; 67881ad6265SDimitry Andric default: 67981ad6265SDimitry Andric if (!isAsciiIdentifierContinue(*First)) { 68081ad6265SDimitry Andric skipLine(First, End); 68181ad6265SDimitry Andric return false; 68281ad6265SDimitry Andric } 68381ad6265SDimitry Andric } 68481ad6265SDimitry Andric 68581ad6265SDimitry Andric TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false); 68681ad6265SDimitry Andric 68781ad6265SDimitry Andric DirectiveKind Kind; 68881ad6265SDimitry Andric if (Id == "module") 68981ad6265SDimitry Andric Kind = Export ? cxx_export_module_decl : cxx_module_decl; 69081ad6265SDimitry Andric else 69181ad6265SDimitry Andric Kind = Export ? cxx_export_import_decl : cxx_import_decl; 69281ad6265SDimitry Andric 69381ad6265SDimitry Andric return lexModuleDirectiveBody(Kind, First, End); 69481ad6265SDimitry Andric } 69581ad6265SDimitry Andric 69606c3fb27SDimitry Andric bool Scanner::lex_Pragma(const char *&First, const char *const End) { 69706c3fb27SDimitry Andric if (!isNextTokenOrSkipLine(tok::l_paren, First, End)) 69806c3fb27SDimitry Andric return false; 69906c3fb27SDimitry Andric 70006c3fb27SDimitry Andric std::optional<StringRef> Str = tryLexStringLiteralOrSkipLine(First, End); 70106c3fb27SDimitry Andric 70206c3fb27SDimitry Andric if (!Str || !isNextTokenOrSkipLine(tok::r_paren, First, End)) 70306c3fb27SDimitry Andric return false; 70406c3fb27SDimitry Andric 70506c3fb27SDimitry Andric SmallString<64> Buffer(*Str); 70606c3fb27SDimitry Andric prepare_PragmaString(Buffer); 70706c3fb27SDimitry Andric 70806c3fb27SDimitry Andric // Use a new scanner instance since the tokens will be inside the allocated 70906c3fb27SDimitry Andric // string. We should already have captured all the relevant tokens in the 71006c3fb27SDimitry Andric // current scanner. 71106c3fb27SDimitry Andric SmallVector<dependency_directives_scan::Token> DiscardTokens; 71206c3fb27SDimitry Andric const char *Begin = Buffer.c_str(); 71306c3fb27SDimitry Andric Scanner PragmaScanner{StringRef(Begin, Buffer.size()), DiscardTokens, Diags, 71406c3fb27SDimitry Andric InputSourceLoc}; 71506c3fb27SDimitry Andric 71606c3fb27SDimitry Andric PragmaScanner.TheLexer.setParsingPreprocessorDirective(true); 71706c3fb27SDimitry Andric if (PragmaScanner.lexPragma(Begin, Buffer.end())) 71806c3fb27SDimitry Andric return true; 71906c3fb27SDimitry Andric 72006c3fb27SDimitry Andric DirectiveKind K = PragmaScanner.topDirective(); 72106c3fb27SDimitry Andric if (K == pp_none) { 72206c3fb27SDimitry Andric skipLine(First, End); 72306c3fb27SDimitry Andric return false; 72406c3fb27SDimitry Andric } 72506c3fb27SDimitry Andric 72606c3fb27SDimitry Andric assert(Begin == Buffer.end()); 72706c3fb27SDimitry Andric pushDirective(K); 72806c3fb27SDimitry Andric return false; 72906c3fb27SDimitry Andric } 73006c3fb27SDimitry Andric 73181ad6265SDimitry Andric bool Scanner::lexPragma(const char *&First, const char *const End) { 732bdd1243dSDimitry Andric std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); 73381ad6265SDimitry Andric if (!FoundId) 73481ad6265SDimitry Andric return false; 73581ad6265SDimitry Andric 73681ad6265SDimitry Andric StringRef Id = *FoundId; 73781ad6265SDimitry Andric auto Kind = llvm::StringSwitch<DirectiveKind>(Id) 73881ad6265SDimitry Andric .Case("once", pp_pragma_once) 73981ad6265SDimitry Andric .Case("push_macro", pp_pragma_push_macro) 74081ad6265SDimitry Andric .Case("pop_macro", pp_pragma_pop_macro) 74181ad6265SDimitry Andric .Case("include_alias", pp_pragma_include_alias) 74281ad6265SDimitry Andric .Default(pp_none); 74381ad6265SDimitry Andric if (Kind != pp_none) { 74481ad6265SDimitry Andric lexPPDirectiveBody(First, End); 74581ad6265SDimitry Andric pushDirective(Kind); 74681ad6265SDimitry Andric return false; 74781ad6265SDimitry Andric } 74881ad6265SDimitry Andric 74981ad6265SDimitry Andric if (Id != "clang") { 75081ad6265SDimitry Andric skipLine(First, End); 75181ad6265SDimitry Andric return false; 75281ad6265SDimitry Andric } 75381ad6265SDimitry Andric 75406c3fb27SDimitry Andric FoundId = tryLexIdentifierOrSkipLine(First, End); 75506c3fb27SDimitry Andric if (!FoundId) 75681ad6265SDimitry Andric return false; 75706c3fb27SDimitry Andric Id = *FoundId; 75806c3fb27SDimitry Andric 75906c3fb27SDimitry Andric // #pragma clang system_header 76006c3fb27SDimitry Andric if (Id == "system_header") { 76106c3fb27SDimitry Andric lexPPDirectiveBody(First, End); 76206c3fb27SDimitry Andric pushDirective(pp_pragma_system_header); 76306c3fb27SDimitry Andric return false; 76406c3fb27SDimitry Andric } 76506c3fb27SDimitry Andric 76606c3fb27SDimitry Andric if (Id != "module") { 76706c3fb27SDimitry Andric skipLine(First, End); 76806c3fb27SDimitry Andric return false; 76906c3fb27SDimitry Andric } 77081ad6265SDimitry Andric 77181ad6265SDimitry Andric // #pragma clang module. 77281ad6265SDimitry Andric if (!isNextIdentifierOrSkipLine("import", First, End)) 77381ad6265SDimitry Andric return false; 77481ad6265SDimitry Andric 77581ad6265SDimitry Andric // #pragma clang module import. 77681ad6265SDimitry Andric lexPPDirectiveBody(First, End); 77781ad6265SDimitry Andric pushDirective(pp_pragma_import); 77881ad6265SDimitry Andric return false; 77981ad6265SDimitry Andric } 78081ad6265SDimitry Andric 78181ad6265SDimitry Andric bool Scanner::lexEndif(const char *&First, const char *const End) { 78281ad6265SDimitry Andric // Strip out "#else" if it's empty. 78381ad6265SDimitry Andric if (topDirective() == pp_else) 78481ad6265SDimitry Andric popDirective(); 78581ad6265SDimitry Andric 78681ad6265SDimitry Andric // If "#ifdef" is empty, strip it and skip the "#endif". 78781ad6265SDimitry Andric // 78881ad6265SDimitry Andric // FIXME: Once/if Clang starts disallowing __has_include in macro expansions, 78981ad6265SDimitry Andric // we can skip empty `#if` and `#elif` blocks as well after scanning for a 79081ad6265SDimitry Andric // literal __has_include in the condition. Even without that rule we could 79181ad6265SDimitry Andric // drop the tokens if we scan for identifiers in the condition and find none. 79281ad6265SDimitry Andric if (topDirective() == pp_ifdef || topDirective() == pp_ifndef) { 79381ad6265SDimitry Andric popDirective(); 79481ad6265SDimitry Andric skipLine(First, End); 79581ad6265SDimitry Andric return false; 79681ad6265SDimitry Andric } 79781ad6265SDimitry Andric 79881ad6265SDimitry Andric return lexDefault(pp_endif, First, End); 79981ad6265SDimitry Andric } 80081ad6265SDimitry Andric 80181ad6265SDimitry Andric bool Scanner::lexDefault(DirectiveKind Kind, const char *&First, 80281ad6265SDimitry Andric const char *const End) { 80381ad6265SDimitry Andric lexPPDirectiveBody(First, End); 80481ad6265SDimitry Andric pushDirective(Kind); 80581ad6265SDimitry Andric return false; 80681ad6265SDimitry Andric } 80781ad6265SDimitry Andric 80881ad6265SDimitry Andric static bool isStartOfRelevantLine(char First) { 80981ad6265SDimitry Andric switch (First) { 81081ad6265SDimitry Andric case '#': 81181ad6265SDimitry Andric case '@': 81281ad6265SDimitry Andric case 'i': 81381ad6265SDimitry Andric case 'e': 81481ad6265SDimitry Andric case 'm': 81506c3fb27SDimitry Andric case '_': 81681ad6265SDimitry Andric return true; 81781ad6265SDimitry Andric } 81881ad6265SDimitry Andric return false; 81981ad6265SDimitry Andric } 82081ad6265SDimitry Andric 82181ad6265SDimitry Andric bool Scanner::lexPPLine(const char *&First, const char *const End) { 82281ad6265SDimitry Andric assert(First != End); 82381ad6265SDimitry Andric 82481ad6265SDimitry Andric skipWhitespace(First, End); 82581ad6265SDimitry Andric assert(First <= End); 82681ad6265SDimitry Andric if (First == End) 82781ad6265SDimitry Andric return false; 82881ad6265SDimitry Andric 82981ad6265SDimitry Andric if (!isStartOfRelevantLine(*First)) { 83081ad6265SDimitry Andric skipLine(First, End); 83181ad6265SDimitry Andric assert(First <= End); 83281ad6265SDimitry Andric return false; 83381ad6265SDimitry Andric } 83481ad6265SDimitry Andric 835bdd1243dSDimitry Andric LastTokenPtr = First; 836bdd1243dSDimitry Andric 83781ad6265SDimitry Andric TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true); 83881ad6265SDimitry Andric 83981ad6265SDimitry Andric auto ScEx1 = make_scope_exit([&]() { 84081ad6265SDimitry Andric /// Clear Scanner's CurDirToks before returning, in case we didn't push a 84181ad6265SDimitry Andric /// new directive. 84281ad6265SDimitry Andric CurDirToks.clear(); 84381ad6265SDimitry Andric }); 84481ad6265SDimitry Andric 84581ad6265SDimitry Andric // Handle "@import". 84681ad6265SDimitry Andric if (*First == '@') 84781ad6265SDimitry Andric return lexAt(First, End); 84881ad6265SDimitry Andric 84981ad6265SDimitry Andric if (*First == 'i' || *First == 'e' || *First == 'm') 85081ad6265SDimitry Andric return lexModule(First, End); 85181ad6265SDimitry Andric 85206c3fb27SDimitry Andric if (*First == '_') { 85306c3fb27SDimitry Andric if (isNextIdentifierOrSkipLine("_Pragma", First, End)) 85406c3fb27SDimitry Andric return lex_Pragma(First, End); 85506c3fb27SDimitry Andric return false; 85606c3fb27SDimitry Andric } 85706c3fb27SDimitry Andric 85881ad6265SDimitry Andric // Handle preprocessing directives. 85981ad6265SDimitry Andric 86081ad6265SDimitry Andric TheLexer.setParsingPreprocessorDirective(true); 86181ad6265SDimitry Andric auto ScEx2 = make_scope_exit( 86281ad6265SDimitry Andric [&]() { TheLexer.setParsingPreprocessorDirective(false); }); 86381ad6265SDimitry Andric 86481ad6265SDimitry Andric // Lex '#'. 86581ad6265SDimitry Andric const dependency_directives_scan::Token &HashTok = lexToken(First, End); 866bdd1243dSDimitry Andric if (HashTok.is(tok::hashhash)) { 867bdd1243dSDimitry Andric // A \p tok::hashhash at this location is passed by the preprocessor to the 868bdd1243dSDimitry Andric // parser to interpret, like any other token. So for dependency scanning 869bdd1243dSDimitry Andric // skip it like a normal token not affecting the preprocessor. 870bdd1243dSDimitry Andric skipLine(First, End); 871bdd1243dSDimitry Andric assert(First <= End); 872bdd1243dSDimitry Andric return false; 873bdd1243dSDimitry Andric } 87481ad6265SDimitry Andric assert(HashTok.is(tok::hash)); 87581ad6265SDimitry Andric (void)HashTok; 87681ad6265SDimitry Andric 877bdd1243dSDimitry Andric std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End); 87881ad6265SDimitry Andric if (!FoundId) 87981ad6265SDimitry Andric return false; 88081ad6265SDimitry Andric 88181ad6265SDimitry Andric StringRef Id = *FoundId; 88281ad6265SDimitry Andric 88381ad6265SDimitry Andric if (Id == "pragma") 88481ad6265SDimitry Andric return lexPragma(First, End); 88581ad6265SDimitry Andric 88681ad6265SDimitry Andric auto Kind = llvm::StringSwitch<DirectiveKind>(Id) 88781ad6265SDimitry Andric .Case("include", pp_include) 88881ad6265SDimitry Andric .Case("__include_macros", pp___include_macros) 88981ad6265SDimitry Andric .Case("define", pp_define) 89081ad6265SDimitry Andric .Case("undef", pp_undef) 89181ad6265SDimitry Andric .Case("import", pp_import) 89281ad6265SDimitry Andric .Case("include_next", pp_include_next) 89381ad6265SDimitry Andric .Case("if", pp_if) 89481ad6265SDimitry Andric .Case("ifdef", pp_ifdef) 89581ad6265SDimitry Andric .Case("ifndef", pp_ifndef) 89681ad6265SDimitry Andric .Case("elif", pp_elif) 89781ad6265SDimitry Andric .Case("elifdef", pp_elifdef) 89881ad6265SDimitry Andric .Case("elifndef", pp_elifndef) 89981ad6265SDimitry Andric .Case("else", pp_else) 90081ad6265SDimitry Andric .Case("endif", pp_endif) 90181ad6265SDimitry Andric .Default(pp_none); 90281ad6265SDimitry Andric if (Kind == pp_none) { 90381ad6265SDimitry Andric skipDirective(Id, First, End); 90481ad6265SDimitry Andric return false; 90581ad6265SDimitry Andric } 90681ad6265SDimitry Andric 90781ad6265SDimitry Andric if (Kind == pp_endif) 90881ad6265SDimitry Andric return lexEndif(First, End); 90981ad6265SDimitry Andric 91081ad6265SDimitry Andric switch (Kind) { 91181ad6265SDimitry Andric case pp_include: 91281ad6265SDimitry Andric case pp___include_macros: 91381ad6265SDimitry Andric case pp_include_next: 91481ad6265SDimitry Andric case pp_import: 915*0fca6ea1SDimitry Andric // Ignore missing filenames in include or import directives. 916*0fca6ea1SDimitry Andric if (lexIncludeFilename(First, End).is(tok::eod)) { 917*0fca6ea1SDimitry Andric skipDirective(Id, First, End); 918*0fca6ea1SDimitry Andric return true; 919*0fca6ea1SDimitry Andric } 92081ad6265SDimitry Andric break; 92181ad6265SDimitry Andric default: 92281ad6265SDimitry Andric break; 92381ad6265SDimitry Andric } 92481ad6265SDimitry Andric 92581ad6265SDimitry Andric // Everything else. 92681ad6265SDimitry Andric return lexDefault(Kind, First, End); 92781ad6265SDimitry Andric } 92881ad6265SDimitry Andric 92981ad6265SDimitry Andric static void skipUTF8ByteOrderMark(const char *&First, const char *const End) { 93081ad6265SDimitry Andric if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' && 93181ad6265SDimitry Andric First[2] == '\xbf') 93281ad6265SDimitry Andric First += 3; 93381ad6265SDimitry Andric } 93481ad6265SDimitry Andric 93581ad6265SDimitry Andric bool Scanner::scanImpl(const char *First, const char *const End) { 93681ad6265SDimitry Andric skipUTF8ByteOrderMark(First, End); 93781ad6265SDimitry Andric while (First != End) 93881ad6265SDimitry Andric if (lexPPLine(First, End)) 93981ad6265SDimitry Andric return true; 94081ad6265SDimitry Andric return false; 94181ad6265SDimitry Andric } 94281ad6265SDimitry Andric 94381ad6265SDimitry Andric bool Scanner::scan(SmallVectorImpl<Directive> &Directives) { 94481ad6265SDimitry Andric bool Error = scanImpl(Input.begin(), Input.end()); 94581ad6265SDimitry Andric 94681ad6265SDimitry Andric if (!Error) { 94781ad6265SDimitry Andric // Add an EOF on success. 948bdd1243dSDimitry Andric if (LastTokenPtr && 949bdd1243dSDimitry Andric (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset)) 950bdd1243dSDimitry Andric pushDirective(tokens_present_before_eof); 95181ad6265SDimitry Andric pushDirective(pp_eof); 95281ad6265SDimitry Andric } 95381ad6265SDimitry Andric 95481ad6265SDimitry Andric ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens; 95581ad6265SDimitry Andric for (const DirectiveWithTokens &DirWithToks : DirsWithToks) { 95681ad6265SDimitry Andric assert(RemainingTokens.size() >= DirWithToks.NumTokens); 95781ad6265SDimitry Andric Directives.emplace_back(DirWithToks.Kind, 95881ad6265SDimitry Andric RemainingTokens.take_front(DirWithToks.NumTokens)); 95981ad6265SDimitry Andric RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens); 96081ad6265SDimitry Andric } 96181ad6265SDimitry Andric assert(RemainingTokens.empty()); 96281ad6265SDimitry Andric 96381ad6265SDimitry Andric return Error; 96481ad6265SDimitry Andric } 96581ad6265SDimitry Andric 96681ad6265SDimitry Andric bool clang::scanSourceForDependencyDirectives( 96781ad6265SDimitry Andric StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens, 96881ad6265SDimitry Andric SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags, 96981ad6265SDimitry Andric SourceLocation InputSourceLoc) { 97081ad6265SDimitry Andric return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives); 97181ad6265SDimitry Andric } 97281ad6265SDimitry Andric 97381ad6265SDimitry Andric void clang::printDependencyDirectivesAsSource( 97481ad6265SDimitry Andric StringRef Source, 97581ad6265SDimitry Andric ArrayRef<dependency_directives_scan::Directive> Directives, 97681ad6265SDimitry Andric llvm::raw_ostream &OS) { 97781ad6265SDimitry Andric // Add a space separator where it is convenient for testing purposes. 97881ad6265SDimitry Andric auto needsSpaceSeparator = 97981ad6265SDimitry Andric [](tok::TokenKind Prev, 98081ad6265SDimitry Andric const dependency_directives_scan::Token &Tok) -> bool { 98181ad6265SDimitry Andric if (Prev == Tok.Kind) 98281ad6265SDimitry Andric return !Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, 98381ad6265SDimitry Andric tok::r_square); 98481ad6265SDimitry Andric if (Prev == tok::raw_identifier && 98581ad6265SDimitry Andric Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal, 98681ad6265SDimitry Andric tok::char_constant, tok::header_name)) 98781ad6265SDimitry Andric return true; 98881ad6265SDimitry Andric if (Prev == tok::r_paren && 98981ad6265SDimitry Andric Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal, 99081ad6265SDimitry Andric tok::char_constant, tok::unknown)) 99181ad6265SDimitry Andric return true; 99281ad6265SDimitry Andric if (Prev == tok::comma && 99381ad6265SDimitry Andric Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less)) 99481ad6265SDimitry Andric return true; 99581ad6265SDimitry Andric return false; 99681ad6265SDimitry Andric }; 99781ad6265SDimitry Andric 99881ad6265SDimitry Andric for (const dependency_directives_scan::Directive &Directive : Directives) { 999bdd1243dSDimitry Andric if (Directive.Kind == tokens_present_before_eof) 1000bdd1243dSDimitry Andric OS << "<TokBeforeEOF>"; 1001bdd1243dSDimitry Andric std::optional<tok::TokenKind> PrevTokenKind; 100281ad6265SDimitry Andric for (const dependency_directives_scan::Token &Tok : Directive.Tokens) { 100381ad6265SDimitry Andric if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok)) 100481ad6265SDimitry Andric OS << ' '; 100581ad6265SDimitry Andric PrevTokenKind = Tok.Kind; 100681ad6265SDimitry Andric OS << Source.slice(Tok.Offset, Tok.getEnd()); 100781ad6265SDimitry Andric } 100881ad6265SDimitry Andric } 100981ad6265SDimitry Andric } 1010