xref: /openbsd-src/gnu/llvm/clang/lib/Lex/DependencyDirectivesScanner.cpp (revision 12c855180aad702bbcca06e0398d774beeafb155)
1*12c85518Srobert //===- DependencyDirectivesScanner.cpp ------------------------------------===//
2*12c85518Srobert //
3*12c85518Srobert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*12c85518Srobert // See https://llvm.org/LICENSE.txt for license information.
5*12c85518Srobert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*12c85518Srobert //
7*12c85518Srobert //===----------------------------------------------------------------------===//
8*12c85518Srobert ///
9*12c85518Srobert /// \file
10*12c85518Srobert /// This is the interface for scanning header and source files to get the
11*12c85518Srobert /// minimum necessary preprocessor directives for evaluating includes. It
12*12c85518Srobert /// reduces the source down to #define, #include, #import, @import, and any
13*12c85518Srobert /// conditional preprocessor logic that contains one of those.
14*12c85518Srobert ///
15*12c85518Srobert //===----------------------------------------------------------------------===//
16*12c85518Srobert 
17*12c85518Srobert #include "clang/Lex/DependencyDirectivesScanner.h"
18*12c85518Srobert #include "clang/Basic/CharInfo.h"
19*12c85518Srobert #include "clang/Basic/Diagnostic.h"
20*12c85518Srobert #include "clang/Lex/LexDiagnostic.h"
21*12c85518Srobert #include "clang/Lex/Lexer.h"
22*12c85518Srobert #include "llvm/ADT/ScopeExit.h"
23*12c85518Srobert #include "llvm/ADT/SmallString.h"
24*12c85518Srobert #include "llvm/ADT/StringMap.h"
25*12c85518Srobert #include "llvm/ADT/StringSwitch.h"
26*12c85518Srobert #include <optional>
27*12c85518Srobert 
28*12c85518Srobert using namespace clang;
29*12c85518Srobert using namespace clang::dependency_directives_scan;
30*12c85518Srobert using namespace llvm;
31*12c85518Srobert 
32*12c85518Srobert namespace {
33*12c85518Srobert 
34*12c85518Srobert struct DirectiveWithTokens {
35*12c85518Srobert   DirectiveKind Kind;
36*12c85518Srobert   unsigned NumTokens;
37*12c85518Srobert 
DirectiveWithTokens__anonb4e7e0440111::DirectiveWithTokens38*12c85518Srobert   DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens)
39*12c85518Srobert       : Kind(Kind), NumTokens(NumTokens) {}
40*12c85518Srobert };
41*12c85518Srobert 
42*12c85518Srobert /// Does an efficient "scan" of the sources to detect the presence of
43*12c85518Srobert /// preprocessor (or module import) directives and collects the raw lexed tokens
44*12c85518Srobert /// for those directives so that the \p Lexer can "replay" them when the file is
45*12c85518Srobert /// included.
46*12c85518Srobert ///
47*12c85518Srobert /// Note that the behavior of the raw lexer is affected by the language mode,
48*12c85518Srobert /// while at this point we want to do a scan and collect tokens once,
49*12c85518Srobert /// irrespective of the language mode that the file will get included in. To
50*12c85518Srobert /// compensate for that the \p Lexer, while "replaying", will adjust a token
51*12c85518Srobert /// where appropriate, when it could affect the preprocessor's state.
52*12c85518Srobert /// For example in a directive like
53*12c85518Srobert ///
54*12c85518Srobert /// \code
55*12c85518Srobert ///   #if __has_cpp_attribute(clang::fallthrough)
56*12c85518Srobert /// \endcode
57*12c85518Srobert ///
58*12c85518Srobert /// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2
59*12c85518Srobert /// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon'
60*12c85518Srobert /// while in C++ mode.
61*12c85518Srobert struct Scanner {
Scanner__anonb4e7e0440111::Scanner62*12c85518Srobert   Scanner(StringRef Input,
63*12c85518Srobert           SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
64*12c85518Srobert           DiagnosticsEngine *Diags, SourceLocation InputSourceLoc)
65*12c85518Srobert       : Input(Input), Tokens(Tokens), Diags(Diags),
66*12c85518Srobert         InputSourceLoc(InputSourceLoc), LangOpts(getLangOptsForDepScanning()),
67*12c85518Srobert         TheLexer(InputSourceLoc, LangOpts, Input.begin(), Input.begin(),
68*12c85518Srobert                  Input.end()) {}
69*12c85518Srobert 
getLangOptsForDepScanning__anonb4e7e0440111::Scanner70*12c85518Srobert   static LangOptions getLangOptsForDepScanning() {
71*12c85518Srobert     LangOptions LangOpts;
72*12c85518Srobert     // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
73*12c85518Srobert     LangOpts.ObjC = true;
74*12c85518Srobert     LangOpts.LineComment = true;
75*12c85518Srobert     return LangOpts;
76*12c85518Srobert   }
77*12c85518Srobert 
78*12c85518Srobert   /// Lex the provided source and emit the directive tokens.
79*12c85518Srobert   ///
80*12c85518Srobert   /// \returns True on error.
81*12c85518Srobert   bool scan(SmallVectorImpl<Directive> &Directives);
82*12c85518Srobert 
83*12c85518Srobert private:
84*12c85518Srobert   /// Lexes next token and advances \p First and the \p Lexer.
85*12c85518Srobert   [[nodiscard]] dependency_directives_scan::Token &
86*12c85518Srobert   lexToken(const char *&First, const char *const End);
87*12c85518Srobert 
88*12c85518Srobert   dependency_directives_scan::Token &lexIncludeFilename(const char *&First,
89*12c85518Srobert                                                         const char *const End);
90*12c85518Srobert 
91*12c85518Srobert   void skipLine(const char *&First, const char *const End);
92*12c85518Srobert   void skipDirective(StringRef Name, const char *&First, const char *const End);
93*12c85518Srobert 
94*12c85518Srobert   /// Lexes next token and if it is identifier returns its string, otherwise
95*12c85518Srobert   /// it skips the current line and returns \p std::nullopt.
96*12c85518Srobert   ///
97*12c85518Srobert   /// In any case (whatever the token kind) \p First and the \p Lexer will
98*12c85518Srobert   /// advance beyond the token.
99*12c85518Srobert   [[nodiscard]] std::optional<StringRef>
100*12c85518Srobert   tryLexIdentifierOrSkipLine(const char *&First, const char *const End);
101*12c85518Srobert 
102*12c85518Srobert   /// Used when it is certain that next token is an identifier.
103*12c85518Srobert   [[nodiscard]] StringRef lexIdentifier(const char *&First,
104*12c85518Srobert                                         const char *const End);
105*12c85518Srobert 
106*12c85518Srobert   /// Lexes next token and returns true iff it is an identifier that matches \p
107*12c85518Srobert   /// Id, otherwise it skips the current line and returns false.
108*12c85518Srobert   ///
109*12c85518Srobert   /// In any case (whatever the token kind) \p First and the \p Lexer will
110*12c85518Srobert   /// advance beyond the token.
111*12c85518Srobert   [[nodiscard]] bool isNextIdentifierOrSkipLine(StringRef Id,
112*12c85518Srobert                                                 const char *&First,
113*12c85518Srobert                                                 const char *const End);
114*12c85518Srobert 
115*12c85518Srobert   [[nodiscard]] bool scanImpl(const char *First, const char *const End);
116*12c85518Srobert   [[nodiscard]] bool lexPPLine(const char *&First, const char *const End);
117*12c85518Srobert   [[nodiscard]] bool lexAt(const char *&First, const char *const End);
118*12c85518Srobert   [[nodiscard]] bool lexModule(const char *&First, const char *const End);
119*12c85518Srobert   [[nodiscard]] bool lexDefine(const char *HashLoc, const char *&First,
120*12c85518Srobert                                const char *const End);
121*12c85518Srobert   [[nodiscard]] bool lexPragma(const char *&First, const char *const End);
122*12c85518Srobert   [[nodiscard]] bool lexEndif(const char *&First, const char *const End);
123*12c85518Srobert   [[nodiscard]] bool lexDefault(DirectiveKind Kind, const char *&First,
124*12c85518Srobert                                 const char *const End);
125*12c85518Srobert   [[nodiscard]] bool lexModuleDirectiveBody(DirectiveKind Kind,
126*12c85518Srobert                                             const char *&First,
127*12c85518Srobert                                             const char *const End);
128*12c85518Srobert   void lexPPDirectiveBody(const char *&First, const char *const End);
129*12c85518Srobert 
pushDirective__anonb4e7e0440111::Scanner130*12c85518Srobert   DirectiveWithTokens &pushDirective(DirectiveKind Kind) {
131*12c85518Srobert     Tokens.append(CurDirToks);
132*12c85518Srobert     DirsWithToks.emplace_back(Kind, CurDirToks.size());
133*12c85518Srobert     CurDirToks.clear();
134*12c85518Srobert     return DirsWithToks.back();
135*12c85518Srobert   }
popDirective__anonb4e7e0440111::Scanner136*12c85518Srobert   void popDirective() {
137*12c85518Srobert     Tokens.pop_back_n(DirsWithToks.pop_back_val().NumTokens);
138*12c85518Srobert   }
topDirective__anonb4e7e0440111::Scanner139*12c85518Srobert   DirectiveKind topDirective() const {
140*12c85518Srobert     return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind;
141*12c85518Srobert   }
142*12c85518Srobert 
getOffsetAt__anonb4e7e0440111::Scanner143*12c85518Srobert   unsigned getOffsetAt(const char *CurPtr) const {
144*12c85518Srobert     return CurPtr - Input.data();
145*12c85518Srobert   }
146*12c85518Srobert 
147*12c85518Srobert   /// Reports a diagnostic if the diagnostic engine is provided. Always returns
148*12c85518Srobert   /// true at the end.
149*12c85518Srobert   bool reportError(const char *CurPtr, unsigned Err);
150*12c85518Srobert 
151*12c85518Srobert   StringMap<char> SplitIds;
152*12c85518Srobert   StringRef Input;
153*12c85518Srobert   SmallVectorImpl<dependency_directives_scan::Token> &Tokens;
154*12c85518Srobert   DiagnosticsEngine *Diags;
155*12c85518Srobert   SourceLocation InputSourceLoc;
156*12c85518Srobert 
157*12c85518Srobert   const char *LastTokenPtr = nullptr;
158*12c85518Srobert   /// Keeps track of the tokens for the currently lexed directive. Once a
159*12c85518Srobert   /// directive is fully lexed and "committed" then the tokens get appended to
160*12c85518Srobert   /// \p Tokens and \p CurDirToks is cleared for the next directive.
161*12c85518Srobert   SmallVector<dependency_directives_scan::Token, 32> CurDirToks;
162*12c85518Srobert   /// The directives that were lexed along with the number of tokens that each
163*12c85518Srobert   /// directive contains. The tokens of all the directives are kept in \p Tokens
164*12c85518Srobert   /// vector, in the same order as the directives order in \p DirsWithToks.
165*12c85518Srobert   SmallVector<DirectiveWithTokens, 64> DirsWithToks;
166*12c85518Srobert   LangOptions LangOpts;
167*12c85518Srobert   Lexer TheLexer;
168*12c85518Srobert };
169*12c85518Srobert 
170*12c85518Srobert } // end anonymous namespace
171*12c85518Srobert 
reportError(const char * CurPtr,unsigned Err)172*12c85518Srobert bool Scanner::reportError(const char *CurPtr, unsigned Err) {
173*12c85518Srobert   if (!Diags)
174*12c85518Srobert     return true;
175*12c85518Srobert   assert(CurPtr >= Input.data() && "invalid buffer ptr");
176*12c85518Srobert   Diags->Report(InputSourceLoc.getLocWithOffset(getOffsetAt(CurPtr)), Err);
177*12c85518Srobert   return true;
178*12c85518Srobert }
179*12c85518Srobert 
skipOverSpaces(const char * & First,const char * const End)180*12c85518Srobert static void skipOverSpaces(const char *&First, const char *const End) {
181*12c85518Srobert   while (First != End && isHorizontalWhitespace(*First))
182*12c85518Srobert     ++First;
183*12c85518Srobert }
184*12c85518Srobert 
isRawStringLiteral(const char * First,const char * Current)185*12c85518Srobert [[nodiscard]] static bool isRawStringLiteral(const char *First,
186*12c85518Srobert                                              const char *Current) {
187*12c85518Srobert   assert(First <= Current);
188*12c85518Srobert 
189*12c85518Srobert   // Check if we can even back up.
190*12c85518Srobert   if (*Current != '"' || First == Current)
191*12c85518Srobert     return false;
192*12c85518Srobert 
193*12c85518Srobert   // Check for an "R".
194*12c85518Srobert   --Current;
195*12c85518Srobert   if (*Current != 'R')
196*12c85518Srobert     return false;
197*12c85518Srobert   if (First == Current || !isAsciiIdentifierContinue(*--Current))
198*12c85518Srobert     return true;
199*12c85518Srobert 
200*12c85518Srobert   // Check for a prefix of "u", "U", or "L".
201*12c85518Srobert   if (*Current == 'u' || *Current == 'U' || *Current == 'L')
202*12c85518Srobert     return First == Current || !isAsciiIdentifierContinue(*--Current);
203*12c85518Srobert 
204*12c85518Srobert   // Check for a prefix of "u8".
205*12c85518Srobert   if (*Current != '8' || First == Current || *Current-- != 'u')
206*12c85518Srobert     return false;
207*12c85518Srobert   return First == Current || !isAsciiIdentifierContinue(*--Current);
208*12c85518Srobert }
209*12c85518Srobert 
skipRawString(const char * & First,const char * const End)210*12c85518Srobert static void skipRawString(const char *&First, const char *const End) {
211*12c85518Srobert   assert(First[0] == '"');
212*12c85518Srobert   assert(First[-1] == 'R');
213*12c85518Srobert 
214*12c85518Srobert   const char *Last = ++First;
215*12c85518Srobert   while (Last != End && *Last != '(')
216*12c85518Srobert     ++Last;
217*12c85518Srobert   if (Last == End) {
218*12c85518Srobert     First = Last; // Hit the end... just give up.
219*12c85518Srobert     return;
220*12c85518Srobert   }
221*12c85518Srobert 
222*12c85518Srobert   StringRef Terminator(First, Last - First);
223*12c85518Srobert   for (;;) {
224*12c85518Srobert     // Move First to just past the next ")".
225*12c85518Srobert     First = Last;
226*12c85518Srobert     while (First != End && *First != ')')
227*12c85518Srobert       ++First;
228*12c85518Srobert     if (First == End)
229*12c85518Srobert       return;
230*12c85518Srobert     ++First;
231*12c85518Srobert 
232*12c85518Srobert     // Look ahead for the terminator sequence.
233*12c85518Srobert     Last = First;
234*12c85518Srobert     while (Last != End && size_t(Last - First) < Terminator.size() &&
235*12c85518Srobert            Terminator[Last - First] == *Last)
236*12c85518Srobert       ++Last;
237*12c85518Srobert 
238*12c85518Srobert     // Check if we hit it (or the end of the file).
239*12c85518Srobert     if (Last == End) {
240*12c85518Srobert       First = Last;
241*12c85518Srobert       return;
242*12c85518Srobert     }
243*12c85518Srobert     if (size_t(Last - First) < Terminator.size())
244*12c85518Srobert       continue;
245*12c85518Srobert     if (*Last != '"')
246*12c85518Srobert       continue;
247*12c85518Srobert     First = Last + 1;
248*12c85518Srobert     return;
249*12c85518Srobert   }
250*12c85518Srobert }
251*12c85518Srobert 
252*12c85518Srobert // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n)
isEOL(const char * First,const char * const End)253*12c85518Srobert static unsigned isEOL(const char *First, const char *const End) {
254*12c85518Srobert   if (First == End)
255*12c85518Srobert     return 0;
256*12c85518Srobert   if (End - First > 1 && isVerticalWhitespace(First[0]) &&
257*12c85518Srobert       isVerticalWhitespace(First[1]) && First[0] != First[1])
258*12c85518Srobert     return 2;
259*12c85518Srobert   return !!isVerticalWhitespace(First[0]);
260*12c85518Srobert }
261*12c85518Srobert 
skipString(const char * & First,const char * const End)262*12c85518Srobert static void skipString(const char *&First, const char *const End) {
263*12c85518Srobert   assert(*First == '\'' || *First == '"' || *First == '<');
264*12c85518Srobert   const char Terminator = *First == '<' ? '>' : *First;
265*12c85518Srobert   for (++First; First != End && *First != Terminator; ++First) {
266*12c85518Srobert     // String and character literals don't extend past the end of the line.
267*12c85518Srobert     if (isVerticalWhitespace(*First))
268*12c85518Srobert       return;
269*12c85518Srobert     if (*First != '\\')
270*12c85518Srobert       continue;
271*12c85518Srobert     // Skip past backslash to the next character. This ensures that the
272*12c85518Srobert     // character right after it is skipped as well, which matters if it's
273*12c85518Srobert     // the terminator.
274*12c85518Srobert     if (++First == End)
275*12c85518Srobert       return;
276*12c85518Srobert     if (!isWhitespace(*First))
277*12c85518Srobert       continue;
278*12c85518Srobert     // Whitespace after the backslash might indicate a line continuation.
279*12c85518Srobert     const char *FirstAfterBackslashPastSpace = First;
280*12c85518Srobert     skipOverSpaces(FirstAfterBackslashPastSpace, End);
281*12c85518Srobert     if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) {
282*12c85518Srobert       // Advance the character pointer to the next line for the next
283*12c85518Srobert       // iteration.
284*12c85518Srobert       First = FirstAfterBackslashPastSpace + NLSize - 1;
285*12c85518Srobert     }
286*12c85518Srobert   }
287*12c85518Srobert   if (First != End)
288*12c85518Srobert     ++First; // Finish off the string.
289*12c85518Srobert }
290*12c85518Srobert 
291*12c85518Srobert // Returns the length of the skipped newline
skipNewline(const char * & First,const char * End)292*12c85518Srobert static unsigned skipNewline(const char *&First, const char *End) {
293*12c85518Srobert   if (First == End)
294*12c85518Srobert     return 0;
295*12c85518Srobert   assert(isVerticalWhitespace(*First));
296*12c85518Srobert   unsigned Len = isEOL(First, End);
297*12c85518Srobert   assert(Len && "expected newline");
298*12c85518Srobert   First += Len;
299*12c85518Srobert   return Len;
300*12c85518Srobert }
301*12c85518Srobert 
wasLineContinuation(const char * First,unsigned EOLLen)302*12c85518Srobert static bool wasLineContinuation(const char *First, unsigned EOLLen) {
303*12c85518Srobert   return *(First - (int)EOLLen - 1) == '\\';
304*12c85518Srobert }
305*12c85518Srobert 
skipToNewlineRaw(const char * & First,const char * const End)306*12c85518Srobert static void skipToNewlineRaw(const char *&First, const char *const End) {
307*12c85518Srobert   for (;;) {
308*12c85518Srobert     if (First == End)
309*12c85518Srobert       return;
310*12c85518Srobert 
311*12c85518Srobert     unsigned Len = isEOL(First, End);
312*12c85518Srobert     if (Len)
313*12c85518Srobert       return;
314*12c85518Srobert 
315*12c85518Srobert     do {
316*12c85518Srobert       if (++First == End)
317*12c85518Srobert         return;
318*12c85518Srobert       Len = isEOL(First, End);
319*12c85518Srobert     } while (!Len);
320*12c85518Srobert 
321*12c85518Srobert     if (First[-1] != '\\')
322*12c85518Srobert       return;
323*12c85518Srobert 
324*12c85518Srobert     First += Len;
325*12c85518Srobert     // Keep skipping lines...
326*12c85518Srobert   }
327*12c85518Srobert }
328*12c85518Srobert 
skipLineComment(const char * & First,const char * const End)329*12c85518Srobert static void skipLineComment(const char *&First, const char *const End) {
330*12c85518Srobert   assert(First[0] == '/' && First[1] == '/');
331*12c85518Srobert   First += 2;
332*12c85518Srobert   skipToNewlineRaw(First, End);
333*12c85518Srobert }
334*12c85518Srobert 
skipBlockComment(const char * & First,const char * const End)335*12c85518Srobert static void skipBlockComment(const char *&First, const char *const End) {
336*12c85518Srobert   assert(First[0] == '/' && First[1] == '*');
337*12c85518Srobert   if (End - First < 4) {
338*12c85518Srobert     First = End;
339*12c85518Srobert     return;
340*12c85518Srobert   }
341*12c85518Srobert   for (First += 3; First != End; ++First)
342*12c85518Srobert     if (First[-1] == '*' && First[0] == '/') {
343*12c85518Srobert       ++First;
344*12c85518Srobert       return;
345*12c85518Srobert     }
346*12c85518Srobert }
347*12c85518Srobert 
348*12c85518Srobert /// \returns True if the current single quotation mark character is a C++ 14
349*12c85518Srobert /// digit separator.
isQuoteCppDigitSeparator(const char * const Start,const char * const Cur,const char * const End)350*12c85518Srobert static bool isQuoteCppDigitSeparator(const char *const Start,
351*12c85518Srobert                                      const char *const Cur,
352*12c85518Srobert                                      const char *const End) {
353*12c85518Srobert   assert(*Cur == '\'' && "expected quotation character");
354*12c85518Srobert   // skipLine called in places where we don't expect a valid number
355*12c85518Srobert   // body before `start` on the same line, so always return false at the start.
356*12c85518Srobert   if (Start == Cur)
357*12c85518Srobert     return false;
358*12c85518Srobert   // The previous character must be a valid PP number character.
359*12c85518Srobert   // Make sure that the L, u, U, u8 prefixes don't get marked as a
360*12c85518Srobert   // separator though.
361*12c85518Srobert   char Prev = *(Cur - 1);
362*12c85518Srobert   if (Prev == 'L' || Prev == 'U' || Prev == 'u')
363*12c85518Srobert     return false;
364*12c85518Srobert   if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
365*12c85518Srobert     return false;
366*12c85518Srobert   if (!isPreprocessingNumberBody(Prev))
367*12c85518Srobert     return false;
368*12c85518Srobert   // The next character should be a valid identifier body character.
369*12c85518Srobert   return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1));
370*12c85518Srobert }
371*12c85518Srobert 
skipLine(const char * & First,const char * const End)372*12c85518Srobert void Scanner::skipLine(const char *&First, const char *const End) {
373*12c85518Srobert   for (;;) {
374*12c85518Srobert     assert(First <= End);
375*12c85518Srobert     if (First == End)
376*12c85518Srobert       return;
377*12c85518Srobert 
378*12c85518Srobert     if (isVerticalWhitespace(*First)) {
379*12c85518Srobert       skipNewline(First, End);
380*12c85518Srobert       return;
381*12c85518Srobert     }
382*12c85518Srobert     const char *Start = First;
383*12c85518Srobert     while (First != End && !isVerticalWhitespace(*First)) {
384*12c85518Srobert       // Iterate over strings correctly to avoid comments and newlines.
385*12c85518Srobert       if (*First == '"' ||
386*12c85518Srobert           (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {
387*12c85518Srobert         LastTokenPtr = First;
388*12c85518Srobert         if (isRawStringLiteral(Start, First))
389*12c85518Srobert           skipRawString(First, End);
390*12c85518Srobert         else
391*12c85518Srobert           skipString(First, End);
392*12c85518Srobert         continue;
393*12c85518Srobert       }
394*12c85518Srobert 
395*12c85518Srobert       // Iterate over comments correctly.
396*12c85518Srobert       if (*First != '/' || End - First < 2) {
397*12c85518Srobert         LastTokenPtr = First;
398*12c85518Srobert         ++First;
399*12c85518Srobert         continue;
400*12c85518Srobert       }
401*12c85518Srobert 
402*12c85518Srobert       if (First[1] == '/') {
403*12c85518Srobert         // "//...".
404*12c85518Srobert         skipLineComment(First, End);
405*12c85518Srobert         continue;
406*12c85518Srobert       }
407*12c85518Srobert 
408*12c85518Srobert       if (First[1] != '*') {
409*12c85518Srobert         LastTokenPtr = First;
410*12c85518Srobert         ++First;
411*12c85518Srobert         continue;
412*12c85518Srobert       }
413*12c85518Srobert 
414*12c85518Srobert       // "/*...*/".
415*12c85518Srobert       skipBlockComment(First, End);
416*12c85518Srobert     }
417*12c85518Srobert     if (First == End)
418*12c85518Srobert       return;
419*12c85518Srobert 
420*12c85518Srobert     // Skip over the newline.
421*12c85518Srobert     unsigned Len = skipNewline(First, End);
422*12c85518Srobert     if (!wasLineContinuation(First, Len)) // Continue past line-continuations.
423*12c85518Srobert       break;
424*12c85518Srobert   }
425*12c85518Srobert }
426*12c85518Srobert 
skipDirective(StringRef Name,const char * & First,const char * const End)427*12c85518Srobert void Scanner::skipDirective(StringRef Name, const char *&First,
428*12c85518Srobert                             const char *const End) {
429*12c85518Srobert   if (llvm::StringSwitch<bool>(Name)
430*12c85518Srobert           .Case("warning", true)
431*12c85518Srobert           .Case("error", true)
432*12c85518Srobert           .Default(false))
433*12c85518Srobert     // Do not process quotes or comments.
434*12c85518Srobert     skipToNewlineRaw(First, End);
435*12c85518Srobert   else
436*12c85518Srobert     skipLine(First, End);
437*12c85518Srobert }
438*12c85518Srobert 
skipWhitespace(const char * & First,const char * const End)439*12c85518Srobert static void skipWhitespace(const char *&First, const char *const End) {
440*12c85518Srobert   for (;;) {
441*12c85518Srobert     assert(First <= End);
442*12c85518Srobert     skipOverSpaces(First, End);
443*12c85518Srobert 
444*12c85518Srobert     if (End - First < 2)
445*12c85518Srobert       return;
446*12c85518Srobert 
447*12c85518Srobert     if (First[0] == '\\' && isVerticalWhitespace(First[1])) {
448*12c85518Srobert       skipNewline(++First, End);
449*12c85518Srobert       continue;
450*12c85518Srobert     }
451*12c85518Srobert 
452*12c85518Srobert     // Check for a non-comment character.
453*12c85518Srobert     if (First[0] != '/')
454*12c85518Srobert       return;
455*12c85518Srobert 
456*12c85518Srobert     // "// ...".
457*12c85518Srobert     if (First[1] == '/') {
458*12c85518Srobert       skipLineComment(First, End);
459*12c85518Srobert       return;
460*12c85518Srobert     }
461*12c85518Srobert 
462*12c85518Srobert     // Cannot be a comment.
463*12c85518Srobert     if (First[1] != '*')
464*12c85518Srobert       return;
465*12c85518Srobert 
466*12c85518Srobert     // "/*...*/".
467*12c85518Srobert     skipBlockComment(First, End);
468*12c85518Srobert   }
469*12c85518Srobert }
470*12c85518Srobert 
lexModuleDirectiveBody(DirectiveKind Kind,const char * & First,const char * const End)471*12c85518Srobert bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First,
472*12c85518Srobert                                      const char *const End) {
473*12c85518Srobert   const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
474*12c85518Srobert   for (;;) {
475*12c85518Srobert     const dependency_directives_scan::Token &Tok = lexToken(First, End);
476*12c85518Srobert     if (Tok.is(tok::eof))
477*12c85518Srobert       return reportError(
478*12c85518Srobert           DirectiveLoc,
479*12c85518Srobert           diag::err_dep_source_scanner_missing_semi_after_at_import);
480*12c85518Srobert     if (Tok.is(tok::semi))
481*12c85518Srobert       break;
482*12c85518Srobert   }
483*12c85518Srobert   pushDirective(Kind);
484*12c85518Srobert   skipWhitespace(First, End);
485*12c85518Srobert   if (First == End)
486*12c85518Srobert     return false;
487*12c85518Srobert   if (!isVerticalWhitespace(*First))
488*12c85518Srobert     return reportError(
489*12c85518Srobert         DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import);
490*12c85518Srobert   skipNewline(First, End);
491*12c85518Srobert   return false;
492*12c85518Srobert }
493*12c85518Srobert 
lexToken(const char * & First,const char * const End)494*12c85518Srobert dependency_directives_scan::Token &Scanner::lexToken(const char *&First,
495*12c85518Srobert                                                      const char *const End) {
496*12c85518Srobert   clang::Token Tok;
497*12c85518Srobert   TheLexer.LexFromRawLexer(Tok);
498*12c85518Srobert   First = Input.data() + TheLexer.getCurrentBufferOffset();
499*12c85518Srobert   assert(First <= End);
500*12c85518Srobert 
501*12c85518Srobert   unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength();
502*12c85518Srobert   CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(),
503*12c85518Srobert                           Tok.getFlags());
504*12c85518Srobert   return CurDirToks.back();
505*12c85518Srobert }
506*12c85518Srobert 
507*12c85518Srobert dependency_directives_scan::Token &
lexIncludeFilename(const char * & First,const char * const End)508*12c85518Srobert Scanner::lexIncludeFilename(const char *&First, const char *const End) {
509*12c85518Srobert   clang::Token Tok;
510*12c85518Srobert   TheLexer.LexIncludeFilename(Tok);
511*12c85518Srobert   First = Input.data() + TheLexer.getCurrentBufferOffset();
512*12c85518Srobert   assert(First <= End);
513*12c85518Srobert 
514*12c85518Srobert   unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength();
515*12c85518Srobert   CurDirToks.emplace_back(Offset, Tok.getLength(), Tok.getKind(),
516*12c85518Srobert                           Tok.getFlags());
517*12c85518Srobert   return CurDirToks.back();
518*12c85518Srobert }
519*12c85518Srobert 
lexPPDirectiveBody(const char * & First,const char * const End)520*12c85518Srobert void Scanner::lexPPDirectiveBody(const char *&First, const char *const End) {
521*12c85518Srobert   while (true) {
522*12c85518Srobert     const dependency_directives_scan::Token &Tok = lexToken(First, End);
523*12c85518Srobert     if (Tok.is(tok::eod))
524*12c85518Srobert       break;
525*12c85518Srobert   }
526*12c85518Srobert }
527*12c85518Srobert 
528*12c85518Srobert [[nodiscard]] std::optional<StringRef>
tryLexIdentifierOrSkipLine(const char * & First,const char * const End)529*12c85518Srobert Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) {
530*12c85518Srobert   const dependency_directives_scan::Token &Tok = lexToken(First, End);
531*12c85518Srobert   if (Tok.isNot(tok::raw_identifier)) {
532*12c85518Srobert     if (!Tok.is(tok::eod))
533*12c85518Srobert       skipLine(First, End);
534*12c85518Srobert     return std::nullopt;
535*12c85518Srobert   }
536*12c85518Srobert 
537*12c85518Srobert   bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning;
538*12c85518Srobert   if (LLVM_LIKELY(!NeedsCleaning))
539*12c85518Srobert     return Input.slice(Tok.Offset, Tok.getEnd());
540*12c85518Srobert 
541*12c85518Srobert   SmallString<64> Spelling;
542*12c85518Srobert   Spelling.resize(Tok.Length);
543*12c85518Srobert 
544*12c85518Srobert   unsigned SpellingLength = 0;
545*12c85518Srobert   const char *BufPtr = Input.begin() + Tok.Offset;
546*12c85518Srobert   const char *AfterIdent = Input.begin() + Tok.getEnd();
547*12c85518Srobert   while (BufPtr < AfterIdent) {
548*12c85518Srobert     unsigned Size;
549*12c85518Srobert     Spelling[SpellingLength++] =
550*12c85518Srobert         Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
551*12c85518Srobert     BufPtr += Size;
552*12c85518Srobert   }
553*12c85518Srobert 
554*12c85518Srobert   return SplitIds.try_emplace(StringRef(Spelling.begin(), SpellingLength), 0)
555*12c85518Srobert       .first->first();
556*12c85518Srobert }
557*12c85518Srobert 
lexIdentifier(const char * & First,const char * const End)558*12c85518Srobert StringRef Scanner::lexIdentifier(const char *&First, const char *const End) {
559*12c85518Srobert   std::optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End);
560*12c85518Srobert   assert(Id && "expected identifier token");
561*12c85518Srobert   return *Id;
562*12c85518Srobert }
563*12c85518Srobert 
isNextIdentifierOrSkipLine(StringRef Id,const char * & First,const char * const End)564*12c85518Srobert bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First,
565*12c85518Srobert                                          const char *const End) {
566*12c85518Srobert   if (std::optional<StringRef> FoundId =
567*12c85518Srobert           tryLexIdentifierOrSkipLine(First, End)) {
568*12c85518Srobert     if (*FoundId == Id)
569*12c85518Srobert       return true;
570*12c85518Srobert     skipLine(First, End);
571*12c85518Srobert   }
572*12c85518Srobert   return false;
573*12c85518Srobert }
574*12c85518Srobert 
lexAt(const char * & First,const char * const End)575*12c85518Srobert bool Scanner::lexAt(const char *&First, const char *const End) {
576*12c85518Srobert   // Handle "@import".
577*12c85518Srobert 
578*12c85518Srobert   // Lex '@'.
579*12c85518Srobert   const dependency_directives_scan::Token &AtTok = lexToken(First, End);
580*12c85518Srobert   assert(AtTok.is(tok::at));
581*12c85518Srobert   (void)AtTok;
582*12c85518Srobert 
583*12c85518Srobert   if (!isNextIdentifierOrSkipLine("import", First, End))
584*12c85518Srobert     return false;
585*12c85518Srobert   return lexModuleDirectiveBody(decl_at_import, First, End);
586*12c85518Srobert }
587*12c85518Srobert 
lexModule(const char * & First,const char * const End)588*12c85518Srobert bool Scanner::lexModule(const char *&First, const char *const End) {
589*12c85518Srobert   StringRef Id = lexIdentifier(First, End);
590*12c85518Srobert   bool Export = false;
591*12c85518Srobert   if (Id == "export") {
592*12c85518Srobert     Export = true;
593*12c85518Srobert     std::optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End);
594*12c85518Srobert     if (!NextId)
595*12c85518Srobert       return false;
596*12c85518Srobert     Id = *NextId;
597*12c85518Srobert   }
598*12c85518Srobert 
599*12c85518Srobert   if (Id != "module" && Id != "import") {
600*12c85518Srobert     skipLine(First, End);
601*12c85518Srobert     return false;
602*12c85518Srobert   }
603*12c85518Srobert 
604*12c85518Srobert   skipWhitespace(First, End);
605*12c85518Srobert 
606*12c85518Srobert   // Ignore this as a module directive if the next character can't be part of
607*12c85518Srobert   // an import.
608*12c85518Srobert 
609*12c85518Srobert   switch (*First) {
610*12c85518Srobert   case ':':
611*12c85518Srobert   case '<':
612*12c85518Srobert   case '"':
613*12c85518Srobert     break;
614*12c85518Srobert   default:
615*12c85518Srobert     if (!isAsciiIdentifierContinue(*First)) {
616*12c85518Srobert       skipLine(First, End);
617*12c85518Srobert       return false;
618*12c85518Srobert     }
619*12c85518Srobert   }
620*12c85518Srobert 
621*12c85518Srobert   TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ false);
622*12c85518Srobert 
623*12c85518Srobert   DirectiveKind Kind;
624*12c85518Srobert   if (Id == "module")
625*12c85518Srobert     Kind = Export ? cxx_export_module_decl : cxx_module_decl;
626*12c85518Srobert   else
627*12c85518Srobert     Kind = Export ? cxx_export_import_decl : cxx_import_decl;
628*12c85518Srobert 
629*12c85518Srobert   return lexModuleDirectiveBody(Kind, First, End);
630*12c85518Srobert }
631*12c85518Srobert 
lexPragma(const char * & First,const char * const End)632*12c85518Srobert bool Scanner::lexPragma(const char *&First, const char *const End) {
633*12c85518Srobert   std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);
634*12c85518Srobert   if (!FoundId)
635*12c85518Srobert     return false;
636*12c85518Srobert 
637*12c85518Srobert   StringRef Id = *FoundId;
638*12c85518Srobert   auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
639*12c85518Srobert                   .Case("once", pp_pragma_once)
640*12c85518Srobert                   .Case("push_macro", pp_pragma_push_macro)
641*12c85518Srobert                   .Case("pop_macro", pp_pragma_pop_macro)
642*12c85518Srobert                   .Case("include_alias", pp_pragma_include_alias)
643*12c85518Srobert                   .Default(pp_none);
644*12c85518Srobert   if (Kind != pp_none) {
645*12c85518Srobert     lexPPDirectiveBody(First, End);
646*12c85518Srobert     pushDirective(Kind);
647*12c85518Srobert     return false;
648*12c85518Srobert   }
649*12c85518Srobert 
650*12c85518Srobert   if (Id != "clang") {
651*12c85518Srobert     skipLine(First, End);
652*12c85518Srobert     return false;
653*12c85518Srobert   }
654*12c85518Srobert 
655*12c85518Srobert   // #pragma clang.
656*12c85518Srobert   if (!isNextIdentifierOrSkipLine("module", First, End))
657*12c85518Srobert     return false;
658*12c85518Srobert 
659*12c85518Srobert   // #pragma clang module.
660*12c85518Srobert   if (!isNextIdentifierOrSkipLine("import", First, End))
661*12c85518Srobert     return false;
662*12c85518Srobert 
663*12c85518Srobert   // #pragma clang module import.
664*12c85518Srobert   lexPPDirectiveBody(First, End);
665*12c85518Srobert   pushDirective(pp_pragma_import);
666*12c85518Srobert   return false;
667*12c85518Srobert }
668*12c85518Srobert 
lexEndif(const char * & First,const char * const End)669*12c85518Srobert bool Scanner::lexEndif(const char *&First, const char *const End) {
670*12c85518Srobert   // Strip out "#else" if it's empty.
671*12c85518Srobert   if (topDirective() == pp_else)
672*12c85518Srobert     popDirective();
673*12c85518Srobert 
674*12c85518Srobert   // If "#ifdef" is empty, strip it and skip the "#endif".
675*12c85518Srobert   //
676*12c85518Srobert   // FIXME: Once/if Clang starts disallowing __has_include in macro expansions,
677*12c85518Srobert   // we can skip empty `#if` and `#elif` blocks as well after scanning for a
678*12c85518Srobert   // literal __has_include in the condition.  Even without that rule we could
679*12c85518Srobert   // drop the tokens if we scan for identifiers in the condition and find none.
680*12c85518Srobert   if (topDirective() == pp_ifdef || topDirective() == pp_ifndef) {
681*12c85518Srobert     popDirective();
682*12c85518Srobert     skipLine(First, End);
683*12c85518Srobert     return false;
684*12c85518Srobert   }
685*12c85518Srobert 
686*12c85518Srobert   return lexDefault(pp_endif, First, End);
687*12c85518Srobert }
688*12c85518Srobert 
lexDefault(DirectiveKind Kind,const char * & First,const char * const End)689*12c85518Srobert bool Scanner::lexDefault(DirectiveKind Kind, const char *&First,
690*12c85518Srobert                          const char *const End) {
691*12c85518Srobert   lexPPDirectiveBody(First, End);
692*12c85518Srobert   pushDirective(Kind);
693*12c85518Srobert   return false;
694*12c85518Srobert }
695*12c85518Srobert 
isStartOfRelevantLine(char First)696*12c85518Srobert static bool isStartOfRelevantLine(char First) {
697*12c85518Srobert   switch (First) {
698*12c85518Srobert   case '#':
699*12c85518Srobert   case '@':
700*12c85518Srobert   case 'i':
701*12c85518Srobert   case 'e':
702*12c85518Srobert   case 'm':
703*12c85518Srobert     return true;
704*12c85518Srobert   }
705*12c85518Srobert   return false;
706*12c85518Srobert }
707*12c85518Srobert 
lexPPLine(const char * & First,const char * const End)708*12c85518Srobert bool Scanner::lexPPLine(const char *&First, const char *const End) {
709*12c85518Srobert   assert(First != End);
710*12c85518Srobert 
711*12c85518Srobert   skipWhitespace(First, End);
712*12c85518Srobert   assert(First <= End);
713*12c85518Srobert   if (First == End)
714*12c85518Srobert     return false;
715*12c85518Srobert 
716*12c85518Srobert   if (!isStartOfRelevantLine(*First)) {
717*12c85518Srobert     skipLine(First, End);
718*12c85518Srobert     assert(First <= End);
719*12c85518Srobert     return false;
720*12c85518Srobert   }
721*12c85518Srobert 
722*12c85518Srobert   LastTokenPtr = First;
723*12c85518Srobert 
724*12c85518Srobert   TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true);
725*12c85518Srobert 
726*12c85518Srobert   auto ScEx1 = make_scope_exit([&]() {
727*12c85518Srobert     /// Clear Scanner's CurDirToks before returning, in case we didn't push a
728*12c85518Srobert     /// new directive.
729*12c85518Srobert     CurDirToks.clear();
730*12c85518Srobert   });
731*12c85518Srobert 
732*12c85518Srobert   // Handle "@import".
733*12c85518Srobert   if (*First == '@')
734*12c85518Srobert     return lexAt(First, End);
735*12c85518Srobert 
736*12c85518Srobert   if (*First == 'i' || *First == 'e' || *First == 'm')
737*12c85518Srobert     return lexModule(First, End);
738*12c85518Srobert 
739*12c85518Srobert   // Handle preprocessing directives.
740*12c85518Srobert 
741*12c85518Srobert   TheLexer.setParsingPreprocessorDirective(true);
742*12c85518Srobert   auto ScEx2 = make_scope_exit(
743*12c85518Srobert       [&]() { TheLexer.setParsingPreprocessorDirective(false); });
744*12c85518Srobert 
745*12c85518Srobert   // Lex '#'.
746*12c85518Srobert   const dependency_directives_scan::Token &HashTok = lexToken(First, End);
747*12c85518Srobert   if (HashTok.is(tok::hashhash)) {
748*12c85518Srobert     // A \p tok::hashhash at this location is passed by the preprocessor to the
749*12c85518Srobert     // parser to interpret, like any other token. So for dependency scanning
750*12c85518Srobert     // skip it like a normal token not affecting the preprocessor.
751*12c85518Srobert     skipLine(First, End);
752*12c85518Srobert     assert(First <= End);
753*12c85518Srobert     return false;
754*12c85518Srobert   }
755*12c85518Srobert   assert(HashTok.is(tok::hash));
756*12c85518Srobert   (void)HashTok;
757*12c85518Srobert 
758*12c85518Srobert   std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);
759*12c85518Srobert   if (!FoundId)
760*12c85518Srobert     return false;
761*12c85518Srobert 
762*12c85518Srobert   StringRef Id = *FoundId;
763*12c85518Srobert 
764*12c85518Srobert   if (Id == "pragma")
765*12c85518Srobert     return lexPragma(First, End);
766*12c85518Srobert 
767*12c85518Srobert   auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
768*12c85518Srobert                   .Case("include", pp_include)
769*12c85518Srobert                   .Case("__include_macros", pp___include_macros)
770*12c85518Srobert                   .Case("define", pp_define)
771*12c85518Srobert                   .Case("undef", pp_undef)
772*12c85518Srobert                   .Case("import", pp_import)
773*12c85518Srobert                   .Case("include_next", pp_include_next)
774*12c85518Srobert                   .Case("if", pp_if)
775*12c85518Srobert                   .Case("ifdef", pp_ifdef)
776*12c85518Srobert                   .Case("ifndef", pp_ifndef)
777*12c85518Srobert                   .Case("elif", pp_elif)
778*12c85518Srobert                   .Case("elifdef", pp_elifdef)
779*12c85518Srobert                   .Case("elifndef", pp_elifndef)
780*12c85518Srobert                   .Case("else", pp_else)
781*12c85518Srobert                   .Case("endif", pp_endif)
782*12c85518Srobert                   .Default(pp_none);
783*12c85518Srobert   if (Kind == pp_none) {
784*12c85518Srobert     skipDirective(Id, First, End);
785*12c85518Srobert     return false;
786*12c85518Srobert   }
787*12c85518Srobert 
788*12c85518Srobert   if (Kind == pp_endif)
789*12c85518Srobert     return lexEndif(First, End);
790*12c85518Srobert 
791*12c85518Srobert   switch (Kind) {
792*12c85518Srobert   case pp_include:
793*12c85518Srobert   case pp___include_macros:
794*12c85518Srobert   case pp_include_next:
795*12c85518Srobert   case pp_import:
796*12c85518Srobert     lexIncludeFilename(First, End);
797*12c85518Srobert     break;
798*12c85518Srobert   default:
799*12c85518Srobert     break;
800*12c85518Srobert   }
801*12c85518Srobert 
802*12c85518Srobert   // Everything else.
803*12c85518Srobert   return lexDefault(Kind, First, End);
804*12c85518Srobert }
805*12c85518Srobert 
skipUTF8ByteOrderMark(const char * & First,const char * const End)806*12c85518Srobert static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {
807*12c85518Srobert   if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' &&
808*12c85518Srobert       First[2] == '\xbf')
809*12c85518Srobert     First += 3;
810*12c85518Srobert }
811*12c85518Srobert 
scanImpl(const char * First,const char * const End)812*12c85518Srobert bool Scanner::scanImpl(const char *First, const char *const End) {
813*12c85518Srobert   skipUTF8ByteOrderMark(First, End);
814*12c85518Srobert   while (First != End)
815*12c85518Srobert     if (lexPPLine(First, End))
816*12c85518Srobert       return true;
817*12c85518Srobert   return false;
818*12c85518Srobert }
819*12c85518Srobert 
scan(SmallVectorImpl<Directive> & Directives)820*12c85518Srobert bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {
821*12c85518Srobert   bool Error = scanImpl(Input.begin(), Input.end());
822*12c85518Srobert 
823*12c85518Srobert   if (!Error) {
824*12c85518Srobert     // Add an EOF on success.
825*12c85518Srobert     if (LastTokenPtr &&
826*12c85518Srobert         (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset))
827*12c85518Srobert       pushDirective(tokens_present_before_eof);
828*12c85518Srobert     pushDirective(pp_eof);
829*12c85518Srobert   }
830*12c85518Srobert 
831*12c85518Srobert   ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens;
832*12c85518Srobert   for (const DirectiveWithTokens &DirWithToks : DirsWithToks) {
833*12c85518Srobert     assert(RemainingTokens.size() >= DirWithToks.NumTokens);
834*12c85518Srobert     Directives.emplace_back(DirWithToks.Kind,
835*12c85518Srobert                             RemainingTokens.take_front(DirWithToks.NumTokens));
836*12c85518Srobert     RemainingTokens = RemainingTokens.drop_front(DirWithToks.NumTokens);
837*12c85518Srobert   }
838*12c85518Srobert   assert(RemainingTokens.empty());
839*12c85518Srobert 
840*12c85518Srobert   return Error;
841*12c85518Srobert }
842*12c85518Srobert 
scanSourceForDependencyDirectives(StringRef Input,SmallVectorImpl<dependency_directives_scan::Token> & Tokens,SmallVectorImpl<Directive> & Directives,DiagnosticsEngine * Diags,SourceLocation InputSourceLoc)843*12c85518Srobert bool clang::scanSourceForDependencyDirectives(
844*12c85518Srobert     StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
845*12c85518Srobert     SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags,
846*12c85518Srobert     SourceLocation InputSourceLoc) {
847*12c85518Srobert   return Scanner(Input, Tokens, Diags, InputSourceLoc).scan(Directives);
848*12c85518Srobert }
849*12c85518Srobert 
printDependencyDirectivesAsSource(StringRef Source,ArrayRef<dependency_directives_scan::Directive> Directives,llvm::raw_ostream & OS)850*12c85518Srobert void clang::printDependencyDirectivesAsSource(
851*12c85518Srobert     StringRef Source,
852*12c85518Srobert     ArrayRef<dependency_directives_scan::Directive> Directives,
853*12c85518Srobert     llvm::raw_ostream &OS) {
854*12c85518Srobert   // Add a space separator where it is convenient for testing purposes.
855*12c85518Srobert   auto needsSpaceSeparator =
856*12c85518Srobert       [](tok::TokenKind Prev,
857*12c85518Srobert          const dependency_directives_scan::Token &Tok) -> bool {
858*12c85518Srobert     if (Prev == Tok.Kind)
859*12c85518Srobert       return !Tok.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
860*12c85518Srobert                           tok::r_square);
861*12c85518Srobert     if (Prev == tok::raw_identifier &&
862*12c85518Srobert         Tok.isOneOf(tok::hash, tok::numeric_constant, tok::string_literal,
863*12c85518Srobert                     tok::char_constant, tok::header_name))
864*12c85518Srobert       return true;
865*12c85518Srobert     if (Prev == tok::r_paren &&
866*12c85518Srobert         Tok.isOneOf(tok::raw_identifier, tok::hash, tok::string_literal,
867*12c85518Srobert                     tok::char_constant, tok::unknown))
868*12c85518Srobert       return true;
869*12c85518Srobert     if (Prev == tok::comma &&
870*12c85518Srobert         Tok.isOneOf(tok::l_paren, tok::string_literal, tok::less))
871*12c85518Srobert       return true;
872*12c85518Srobert     return false;
873*12c85518Srobert   };
874*12c85518Srobert 
875*12c85518Srobert   for (const dependency_directives_scan::Directive &Directive : Directives) {
876*12c85518Srobert     if (Directive.Kind == tokens_present_before_eof)
877*12c85518Srobert       OS << "<TokBeforeEOF>";
878*12c85518Srobert     std::optional<tok::TokenKind> PrevTokenKind;
879*12c85518Srobert     for (const dependency_directives_scan::Token &Tok : Directive.Tokens) {
880*12c85518Srobert       if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok))
881*12c85518Srobert         OS << ' ';
882*12c85518Srobert       PrevTokenKind = Tok.Kind;
883*12c85518Srobert       OS << Source.slice(Tok.Offset, Tok.getEnd());
884*12c85518Srobert     }
885*12c85518Srobert   }
886*12c85518Srobert }
887