xref: /freebsd-src/contrib/llvm-project/llvm/lib/TableGen/TGLexer.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This class represents the Lexer for tablegen files.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
140b57cec5SDimitry Andric #define LLVM_LIB_TABLEGEN_TGLEXER_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
170b57cec5SDimitry Andric #include "llvm/ADT/StringSet.h"
180b57cec5SDimitry Andric #include "llvm/Support/DataTypes.h"
190b57cec5SDimitry Andric #include "llvm/Support/SMLoc.h"
200b57cec5SDimitry Andric #include <cassert>
210b57cec5SDimitry Andric #include <memory>
22480093f4SDimitry Andric #include <set>
230b57cec5SDimitry Andric #include <string>
245ffd83dbSDimitry Andric #include <vector>
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric namespace llvm {
275ffd83dbSDimitry Andric template <typename T> class ArrayRef;
280b57cec5SDimitry Andric class SourceMgr;
290b57cec5SDimitry Andric class Twine;
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric namespace tgtok {
320b57cec5SDimitry Andric enum TokKind {
330b57cec5SDimitry Andric   // Markers
3406c3fb27SDimitry Andric   Eof,
3506c3fb27SDimitry Andric   Error,
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric   // Tokens with no info.
3806c3fb27SDimitry Andric   minus,     // -
3906c3fb27SDimitry Andric   plus,      // +
4006c3fb27SDimitry Andric   l_square,  // [
4106c3fb27SDimitry Andric   r_square,  // ]
4206c3fb27SDimitry Andric   l_brace,   // {
4306c3fb27SDimitry Andric   r_brace,   // }
4406c3fb27SDimitry Andric   l_paren,   // (
4506c3fb27SDimitry Andric   r_paren,   // )
4606c3fb27SDimitry Andric   less,      // <
4706c3fb27SDimitry Andric   greater,   // >
4806c3fb27SDimitry Andric   colon,     // :
4906c3fb27SDimitry Andric   semi,      // ;
5006c3fb27SDimitry Andric   comma,     // ,
5106c3fb27SDimitry Andric   dot,       // .
5206c3fb27SDimitry Andric   equal,     // =
5306c3fb27SDimitry Andric   question,  // ?
540b57cec5SDimitry Andric   paste,     // #
55e8d8bef9SDimitry Andric   dotdotdot, // ...
560b57cec5SDimitry Andric 
575f757f3fSDimitry Andric   // Boolean literals.
585f757f3fSDimitry Andric   TrueVal,
595f757f3fSDimitry Andric   FalseVal,
605f757f3fSDimitry Andric 
615f757f3fSDimitry Andric   // Integer value.
625f757f3fSDimitry Andric   IntVal,
635f757f3fSDimitry Andric 
645f757f3fSDimitry Andric   // Binary constant.  Note that these are sized according to the number of
655f757f3fSDimitry Andric   // bits given.
665f757f3fSDimitry Andric   BinaryIntVal,
675f757f3fSDimitry Andric 
685f757f3fSDimitry Andric   // Preprocessing tokens for internal usage by the lexer.
695f757f3fSDimitry Andric   // They are never returned as a result of Lex().
705f757f3fSDimitry Andric   Ifdef,
715f757f3fSDimitry Andric   Ifndef,
725f757f3fSDimitry Andric   Else,
735f757f3fSDimitry Andric   Endif,
745f757f3fSDimitry Andric   Define,
755f757f3fSDimitry Andric 
76e8d8bef9SDimitry Andric   // Reserved keywords. ('ElseKW' is named to distinguish it from the
77e8d8bef9SDimitry Andric   // existing 'Else' that means the preprocessor #else.)
7806c3fb27SDimitry Andric   Bit,
7906c3fb27SDimitry Andric   Bits,
8006c3fb27SDimitry Andric   Code,
8106c3fb27SDimitry Andric   Dag,
8206c3fb27SDimitry Andric   ElseKW,
8306c3fb27SDimitry Andric   FalseKW,
8406c3fb27SDimitry Andric   Field,
8506c3fb27SDimitry Andric   In,
8606c3fb27SDimitry Andric   Include,
8706c3fb27SDimitry Andric   Int,
8806c3fb27SDimitry Andric   List,
8906c3fb27SDimitry Andric   String,
9006c3fb27SDimitry Andric   Then,
9106c3fb27SDimitry Andric   TrueKW,
920b57cec5SDimitry Andric 
935f757f3fSDimitry Andric   // Object start tokens.
945f757f3fSDimitry Andric   OBJECT_START_FIRST,
955f757f3fSDimitry Andric   Assert = OBJECT_START_FIRST,
965f757f3fSDimitry Andric   Class,
975f757f3fSDimitry Andric   Def,
985f757f3fSDimitry Andric   Defm,
995f757f3fSDimitry Andric   Defset,
100*0fca6ea1SDimitry Andric   Deftype,
1015f757f3fSDimitry Andric   Defvar,
1025f757f3fSDimitry Andric   Dump,
1035f757f3fSDimitry Andric   Foreach,
1045f757f3fSDimitry Andric   If,
1055f757f3fSDimitry Andric   Let,
1065f757f3fSDimitry Andric   MultiClass,
1075f757f3fSDimitry Andric   OBJECT_START_LAST = MultiClass,
1085f757f3fSDimitry Andric 
109e8d8bef9SDimitry Andric   // Bang operators.
1105f757f3fSDimitry Andric   BANG_OPERATOR_FIRST,
1115f757f3fSDimitry Andric   XConcat = BANG_OPERATOR_FIRST,
11206c3fb27SDimitry Andric   XADD,
11306c3fb27SDimitry Andric   XSUB,
11406c3fb27SDimitry Andric   XMUL,
11506c3fb27SDimitry Andric   XDIV,
11606c3fb27SDimitry Andric   XNOT,
11706c3fb27SDimitry Andric   XLOG2,
11806c3fb27SDimitry Andric   XAND,
11906c3fb27SDimitry Andric   XOR,
12006c3fb27SDimitry Andric   XXOR,
12106c3fb27SDimitry Andric   XSRA,
12206c3fb27SDimitry Andric   XSRL,
12306c3fb27SDimitry Andric   XSHL,
12406c3fb27SDimitry Andric   XListConcat,
12506c3fb27SDimitry Andric   XListSplat,
12606c3fb27SDimitry Andric   XStrConcat,
12706c3fb27SDimitry Andric   XInterleave,
12806c3fb27SDimitry Andric   XSubstr,
12906c3fb27SDimitry Andric   XFind,
13006c3fb27SDimitry Andric   XCast,
13106c3fb27SDimitry Andric   XSubst,
13206c3fb27SDimitry Andric   XForEach,
13306c3fb27SDimitry Andric   XFilter,
13406c3fb27SDimitry Andric   XFoldl,
13506c3fb27SDimitry Andric   XHead,
13606c3fb27SDimitry Andric   XTail,
13706c3fb27SDimitry Andric   XSize,
13806c3fb27SDimitry Andric   XEmpty,
13906c3fb27SDimitry Andric   XIf,
14006c3fb27SDimitry Andric   XCond,
14106c3fb27SDimitry Andric   XEq,
14206c3fb27SDimitry Andric   XIsA,
14306c3fb27SDimitry Andric   XDag,
14406c3fb27SDimitry Andric   XNe,
14506c3fb27SDimitry Andric   XLe,
14606c3fb27SDimitry Andric   XLt,
14706c3fb27SDimitry Andric   XGe,
14806c3fb27SDimitry Andric   XGt,
14906c3fb27SDimitry Andric   XSetDagOp,
15006c3fb27SDimitry Andric   XGetDagOp,
15106c3fb27SDimitry Andric   XExists,
15206c3fb27SDimitry Andric   XListRemove,
15306c3fb27SDimitry Andric   XToLower,
15406c3fb27SDimitry Andric   XToUpper,
15506c3fb27SDimitry Andric   XRange,
15606c3fb27SDimitry Andric   XGetDagArg,
15706c3fb27SDimitry Andric   XGetDagName,
15806c3fb27SDimitry Andric   XSetDagArg,
15906c3fb27SDimitry Andric   XSetDagName,
1605f757f3fSDimitry Andric   XRepr,
1615f757f3fSDimitry Andric   BANG_OPERATOR_LAST = XRepr,
1620b57cec5SDimitry Andric 
1630b57cec5SDimitry Andric   // String valued tokens.
1645f757f3fSDimitry Andric   STRING_VALUE_FIRST,
1655f757f3fSDimitry Andric   Id = STRING_VALUE_FIRST,
16606c3fb27SDimitry Andric   StrVal,
16706c3fb27SDimitry Andric   VarName,
16806c3fb27SDimitry Andric   CodeFragment,
1695f757f3fSDimitry Andric   STRING_VALUE_LAST = CodeFragment,
1700b57cec5SDimitry Andric };
1715f757f3fSDimitry Andric 
1725f757f3fSDimitry Andric /// isBangOperator - Return true if this is a bang operator.
1735f757f3fSDimitry Andric static inline bool isBangOperator(tgtok::TokKind Kind) {
1745f757f3fSDimitry Andric   return tgtok::BANG_OPERATOR_FIRST <= Kind && Kind <= BANG_OPERATOR_LAST;
1750b57cec5SDimitry Andric }
1760b57cec5SDimitry Andric 
1775f757f3fSDimitry Andric /// isObjectStart - Return true if this is a valid first token for a statement.
1785f757f3fSDimitry Andric static inline bool isObjectStart(tgtok::TokKind Kind) {
1795f757f3fSDimitry Andric   return tgtok::OBJECT_START_FIRST <= Kind && Kind <= OBJECT_START_LAST;
1805f757f3fSDimitry Andric }
1815f757f3fSDimitry Andric 
1825f757f3fSDimitry Andric /// isStringValue - Return true if this is a string value.
1835f757f3fSDimitry Andric static inline bool isStringValue(tgtok::TokKind Kind) {
1845f757f3fSDimitry Andric   return tgtok::STRING_VALUE_FIRST <= Kind && Kind <= STRING_VALUE_LAST;
1855f757f3fSDimitry Andric }
1865f757f3fSDimitry Andric } // namespace tgtok
1875f757f3fSDimitry Andric 
1880b57cec5SDimitry Andric /// TGLexer - TableGen Lexer class.
1890b57cec5SDimitry Andric class TGLexer {
1900b57cec5SDimitry Andric   SourceMgr &SrcMgr;
1910b57cec5SDimitry Andric 
192480093f4SDimitry Andric   const char *CurPtr = nullptr;
1930b57cec5SDimitry Andric   StringRef CurBuf;
1940b57cec5SDimitry Andric 
1950b57cec5SDimitry Andric   // Information about the current token.
196480093f4SDimitry Andric   const char *TokStart = nullptr;
197480093f4SDimitry Andric   tgtok::TokKind CurCode = tgtok::TokKind::Eof;
198e8d8bef9SDimitry Andric   std::string CurStrVal; // This is valid for Id, StrVal, VarName, CodeFragment
199e8d8bef9SDimitry Andric   int64_t CurIntVal = 0; // This is valid for IntVal.
2000b57cec5SDimitry Andric 
2010b57cec5SDimitry Andric   /// CurBuffer - This is the current buffer index we're lexing from as managed
2020b57cec5SDimitry Andric   /// by the SourceMgr object.
203480093f4SDimitry Andric   unsigned CurBuffer = 0;
2040b57cec5SDimitry Andric 
2050b57cec5SDimitry Andric public:
206480093f4SDimitry Andric   typedef std::set<std::string> DependenciesSetTy;
207480093f4SDimitry Andric 
2080b57cec5SDimitry Andric private:
2090b57cec5SDimitry Andric   /// Dependencies - This is the list of all included files.
210480093f4SDimitry Andric   DependenciesSetTy Dependencies;
2110b57cec5SDimitry Andric 
2120b57cec5SDimitry Andric public:
2130b57cec5SDimitry Andric   TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
2140b57cec5SDimitry Andric 
2150b57cec5SDimitry Andric   tgtok::TokKind Lex() {
2160b57cec5SDimitry Andric     return CurCode = LexToken(CurPtr == CurBuf.begin());
2170b57cec5SDimitry Andric   }
2180b57cec5SDimitry Andric 
219480093f4SDimitry Andric   const DependenciesSetTy &getDependencies() const {
2200b57cec5SDimitry Andric     return Dependencies;
2210b57cec5SDimitry Andric   }
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric   tgtok::TokKind getCode() const { return CurCode; }
2240b57cec5SDimitry Andric 
2250b57cec5SDimitry Andric   const std::string &getCurStrVal() const {
2265f757f3fSDimitry Andric     assert(tgtok::isStringValue(CurCode) &&
2270b57cec5SDimitry Andric            "This token doesn't have a string value");
2280b57cec5SDimitry Andric     return CurStrVal;
2290b57cec5SDimitry Andric   }
2300b57cec5SDimitry Andric   int64_t getCurIntVal() const {
2310b57cec5SDimitry Andric     assert(CurCode == tgtok::IntVal && "This token isn't an integer");
2320b57cec5SDimitry Andric     return CurIntVal;
2330b57cec5SDimitry Andric   }
2340b57cec5SDimitry Andric   std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
2350b57cec5SDimitry Andric     assert(CurCode == tgtok::BinaryIntVal &&
2360b57cec5SDimitry Andric            "This token isn't a binary integer");
2370b57cec5SDimitry Andric     return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
2380b57cec5SDimitry Andric   }
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric   SMLoc getLoc() const;
241bdd1243dSDimitry Andric   SMRange getLocRange() const;
2420b57cec5SDimitry Andric 
2430b57cec5SDimitry Andric private:
2440b57cec5SDimitry Andric   /// LexToken - Read the next token and return its code.
2450b57cec5SDimitry Andric   tgtok::TokKind LexToken(bool FileOrLineStart = false);
2460b57cec5SDimitry Andric 
2470b57cec5SDimitry Andric   tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
2480b57cec5SDimitry Andric   tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
2490b57cec5SDimitry Andric 
2500b57cec5SDimitry Andric   int getNextChar();
2510b57cec5SDimitry Andric   int peekNextChar(int Index) const;
2520b57cec5SDimitry Andric   void SkipBCPLComment();
2530b57cec5SDimitry Andric   bool SkipCComment();
2540b57cec5SDimitry Andric   tgtok::TokKind LexIdentifier();
2550b57cec5SDimitry Andric   bool LexInclude();
2560b57cec5SDimitry Andric   tgtok::TokKind LexString();
2570b57cec5SDimitry Andric   tgtok::TokKind LexVarName();
2580b57cec5SDimitry Andric   tgtok::TokKind LexNumber();
2590b57cec5SDimitry Andric   tgtok::TokKind LexBracket();
2600b57cec5SDimitry Andric   tgtok::TokKind LexExclaim();
2610b57cec5SDimitry Andric 
2620b57cec5SDimitry Andric   // Process EOF encountered in LexToken().
2630b57cec5SDimitry Andric   // If EOF is met in an include file, then the method will update
2640b57cec5SDimitry Andric   // CurPtr, CurBuf and preprocessing include stack, and return true.
2650b57cec5SDimitry Andric   // If EOF is met in the top-level file, then the method will
2660b57cec5SDimitry Andric   // update and check the preprocessing include stack, and return false.
2670b57cec5SDimitry Andric   bool processEOF();
2680b57cec5SDimitry Andric 
2690b57cec5SDimitry Andric   // *** Structures and methods for preprocessing support ***
2700b57cec5SDimitry Andric 
2710b57cec5SDimitry Andric   // A set of macro names that are defined either via command line or
2720b57cec5SDimitry Andric   // by using:
2730b57cec5SDimitry Andric   //     #define NAME
2740b57cec5SDimitry Andric   StringSet<> DefinedMacros;
2750b57cec5SDimitry Andric 
2760b57cec5SDimitry Andric   // Each of #ifdef and #else directives has a descriptor associated
2770b57cec5SDimitry Andric   // with it.
2780b57cec5SDimitry Andric   //
2790b57cec5SDimitry Andric   // An ordered list of preprocessing controls defined by #ifdef/#else
2800b57cec5SDimitry Andric   // directives that are in effect currently is called preprocessing
2810b57cec5SDimitry Andric   // control stack.  It is represented as a vector of PreprocessorControlDesc's.
2820b57cec5SDimitry Andric   //
2830b57cec5SDimitry Andric   // The control stack is updated according to the following rules:
2840b57cec5SDimitry Andric   //
2850b57cec5SDimitry Andric   // For each #ifdef we add an element to the control stack.
2860b57cec5SDimitry Andric   // For each #else we replace the top element with a descriptor
2870b57cec5SDimitry Andric   // with an inverted IsDefined value.
2880b57cec5SDimitry Andric   // For each #endif we pop the top element from the control stack.
2890b57cec5SDimitry Andric   //
2900b57cec5SDimitry Andric   // When CurPtr reaches the current buffer's end, the control stack
2910b57cec5SDimitry Andric   // must be empty, i.e. #ifdef and the corresponding #endif
2920b57cec5SDimitry Andric   // must be located in the same file.
2930b57cec5SDimitry Andric   struct PreprocessorControlDesc {
2940b57cec5SDimitry Andric     // Either tgtok::Ifdef or tgtok::Else.
2950b57cec5SDimitry Andric     tgtok::TokKind Kind;
2960b57cec5SDimitry Andric 
2970b57cec5SDimitry Andric     // True, if the condition for this directive is true, false - otherwise.
2980b57cec5SDimitry Andric     // Examples:
2990b57cec5SDimitry Andric     //     #ifdef NAME       : true, if NAME is defined, false - otherwise.
3000b57cec5SDimitry Andric     //     ...
3010b57cec5SDimitry Andric     //     #else             : false, if NAME is defined, true - otherwise.
3020b57cec5SDimitry Andric     bool IsDefined;
3030b57cec5SDimitry Andric 
3040b57cec5SDimitry Andric     // Pointer into CurBuf to the beginning of the preprocessing directive
3050b57cec5SDimitry Andric     // word, e.g.:
3060b57cec5SDimitry Andric     //     #ifdef NAME
3070b57cec5SDimitry Andric     //      ^ - SrcPos
3080b57cec5SDimitry Andric     SMLoc SrcPos;
3090b57cec5SDimitry Andric   };
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric   // We want to disallow code like this:
3120b57cec5SDimitry Andric   //     file1.td:
3130b57cec5SDimitry Andric   //         #define NAME
3140b57cec5SDimitry Andric   //         #ifdef NAME
3150b57cec5SDimitry Andric   //         include "file2.td"
3160b57cec5SDimitry Andric   //     EOF
3170b57cec5SDimitry Andric   //     file2.td:
3180b57cec5SDimitry Andric   //         #endif
3190b57cec5SDimitry Andric   //     EOF
3200b57cec5SDimitry Andric   //
3210b57cec5SDimitry Andric   // To do this, we clear the preprocessing control stack on entry
3220b57cec5SDimitry Andric   // to each of the included file.  PrepIncludeStack is used to store
3230b57cec5SDimitry Andric   // preprocessing control stacks for the current file and all its
3240b57cec5SDimitry Andric   // parent files.  The back() element is the preprocessing control
3250b57cec5SDimitry Andric   // stack for the current file.
3260b57cec5SDimitry Andric   std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
3270b57cec5SDimitry Andric       PrepIncludeStack;
3280b57cec5SDimitry Andric 
3290b57cec5SDimitry Andric   // Validate that the current preprocessing control stack is empty,
3300b57cec5SDimitry Andric   // since we are about to exit a file, and pop the include stack.
3310b57cec5SDimitry Andric   //
3320b57cec5SDimitry Andric   // If IncludeStackMustBeEmpty is true, the include stack must be empty
3330b57cec5SDimitry Andric   // after the popping, otherwise, the include stack must not be empty
3340b57cec5SDimitry Andric   // after the popping.  Basically, the include stack must be empty
3350b57cec5SDimitry Andric   // only if we exit the "top-level" file (i.e. finish lexing).
3360b57cec5SDimitry Andric   //
3370b57cec5SDimitry Andric   // The method returns false, if the current preprocessing control stack
3380b57cec5SDimitry Andric   // is not empty (e.g. there is an unterminated #ifdef/#else),
3390b57cec5SDimitry Andric   // true - otherwise.
3400b57cec5SDimitry Andric   bool prepExitInclude(bool IncludeStackMustBeEmpty);
3410b57cec5SDimitry Andric 
3420b57cec5SDimitry Andric   // Look ahead for a preprocessing directive starting from CurPtr.  The caller
3430b57cec5SDimitry Andric   // must only call this method, if *(CurPtr - 1) is '#'.  If the method matches
3440b57cec5SDimitry Andric   // a preprocessing directive word followed by a whitespace, then it returns
3450b57cec5SDimitry Andric   // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
3460b57cec5SDimitry Andric   //
3470b57cec5SDimitry Andric   // CurPtr is not adjusted by this method.
3480b57cec5SDimitry Andric   tgtok::TokKind prepIsDirective() const;
3490b57cec5SDimitry Andric 
3500b57cec5SDimitry Andric   // Given a preprocessing token kind, adjusts CurPtr to the end
3510b57cec5SDimitry Andric   // of the preprocessing directive word.  Returns true, unless
3520b57cec5SDimitry Andric   // an unsupported token kind is passed in.
3530b57cec5SDimitry Andric   //
3540b57cec5SDimitry Andric   // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
3550b57cec5SDimitry Andric   // to avoid adjusting CurPtr before we are sure that '#' is followed
3560b57cec5SDimitry Andric   // by a preprocessing directive.  If it is not, then we fall back to
3570b57cec5SDimitry Andric   // tgtok::paste interpretation of '#'.
3580b57cec5SDimitry Andric   bool prepEatPreprocessorDirective(tgtok::TokKind Kind);
3590b57cec5SDimitry Andric 
3600b57cec5SDimitry Andric   // The main "exit" point from the token parsing to preprocessor.
3610b57cec5SDimitry Andric   //
3620b57cec5SDimitry Andric   // The method is called for CurPtr, when prepIsDirective() returns
3630b57cec5SDimitry Andric   // true.  The first parameter matches the result of prepIsDirective(),
3640b57cec5SDimitry Andric   // denoting the actual preprocessor directive to be processed.
3650b57cec5SDimitry Andric   //
3660b57cec5SDimitry Andric   // If the preprocessing directive disables the tokens processing, e.g.:
3670b57cec5SDimitry Andric   //     #ifdef NAME // NAME is undefined
3680b57cec5SDimitry Andric   // then lexPreprocessor() enters the lines-skipping mode.
3690b57cec5SDimitry Andric   // In this mode, it does not parse any tokens, because the code under
3700b57cec5SDimitry Andric   // the #ifdef may not even be a correct tablegen code.  The preprocessor
3710b57cec5SDimitry Andric   // looks for lines containing other preprocessing directives, which
3720b57cec5SDimitry Andric   // may be prepended with whitespaces and C-style comments.  If the line
3730b57cec5SDimitry Andric   // does not contain a preprocessing directive, it is skipped completely.
3740b57cec5SDimitry Andric   // Otherwise, the preprocessing directive is processed by recursively
3750b57cec5SDimitry Andric   // calling lexPreprocessor().  The processing of the encountered
3760b57cec5SDimitry Andric   // preprocessing directives includes updating preprocessing control stack
3770b57cec5SDimitry Andric   // and adding new macros into DefinedMacros set.
3780b57cec5SDimitry Andric   //
3790b57cec5SDimitry Andric   // The second parameter controls whether lexPreprocessor() is called from
3800b57cec5SDimitry Andric   // LexToken() (true) or recursively from lexPreprocessor() (false).
3810b57cec5SDimitry Andric   //
3820b57cec5SDimitry Andric   // If ReturnNextLiveToken is true, the method returns the next
3830b57cec5SDimitry Andric   // LEX token following the current directive or following the end
3840b57cec5SDimitry Andric   // of the disabled preprocessing region corresponding to this directive.
3850b57cec5SDimitry Andric   // If ReturnNextLiveToken is false, the method returns the first parameter,
3860b57cec5SDimitry Andric   // unless there were errors encountered in the disabled preprocessing
3870b57cec5SDimitry Andric   // region - in this case, it returns tgtok::Error.
3880b57cec5SDimitry Andric   tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
3890b57cec5SDimitry Andric                                  bool ReturnNextLiveToken = true);
3900b57cec5SDimitry Andric 
3910b57cec5SDimitry Andric   // Worker method for lexPreprocessor() to skip lines after some
3920b57cec5SDimitry Andric   // preprocessing directive up to the buffer end or to the directive
3930b57cec5SDimitry Andric   // that re-enables token processing.  The method returns true
3940b57cec5SDimitry Andric   // upon processing the next directive that re-enables tokens
3950b57cec5SDimitry Andric   // processing.  False is returned if an error was encountered.
3960b57cec5SDimitry Andric   //
3970b57cec5SDimitry Andric   // Note that prepSkipRegion() calls lexPreprocessor() to process
3980b57cec5SDimitry Andric   // encountered preprocessing directives.  In this case, the second
3990b57cec5SDimitry Andric   // parameter to lexPreprocessor() is set to false.  Being passed
4000b57cec5SDimitry Andric   // false ReturnNextLiveToken, lexPreprocessor() must never call
4010b57cec5SDimitry Andric   // prepSkipRegion().  We assert this by passing ReturnNextLiveToken
4020b57cec5SDimitry Andric   // to prepSkipRegion() and checking that it is never set to false.
4030b57cec5SDimitry Andric   bool prepSkipRegion(bool MustNeverBeFalse);
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric   // Lex name of the macro after either #ifdef or #define.  We could have used
4060b57cec5SDimitry Andric   // LexIdentifier(), but it has special handling of "include" word, which
4070b57cec5SDimitry Andric   // could result in awkward diagnostic errors.  Consider:
4080b57cec5SDimitry Andric   // ----
4090b57cec5SDimitry Andric   // #ifdef include
4100b57cec5SDimitry Andric   // class ...
4110b57cec5SDimitry Andric   // ----
4120b57cec5SDimitry Andric   // LexIdentifier() will engage LexInclude(), which will complain about
4130b57cec5SDimitry Andric   // missing file with name "class".  Instead, prepLexMacroName() will treat
4140b57cec5SDimitry Andric   // "include" as a normal macro name.
4150b57cec5SDimitry Andric   //
4160b57cec5SDimitry Andric   // On entry, CurPtr points to the end of a preprocessing directive word.
4170b57cec5SDimitry Andric   // The method allows for whitespaces between the preprocessing directive
4180b57cec5SDimitry Andric   // and the macro name.  The allowed whitespaces are ' ' and '\t'.
4190b57cec5SDimitry Andric   //
4200b57cec5SDimitry Andric   // If the first non-whitespace symbol after the preprocessing directive
4210b57cec5SDimitry Andric   // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
4220b57cec5SDimitry Andric   // the method updates TokStart to the position of the first non-whitespace
4230b57cec5SDimitry Andric   // symbol, sets CurPtr to the position of the macro name's last symbol,
4240b57cec5SDimitry Andric   // and returns a string reference to the macro name.  Otherwise,
4250b57cec5SDimitry Andric   // TokStart is set to the first non-whitespace symbol after the preprocessing
4260b57cec5SDimitry Andric   // directive, and the method returns an empty string reference.
4270b57cec5SDimitry Andric   //
4280b57cec5SDimitry Andric   // In all cases, TokStart may be used to point to the word following
4290b57cec5SDimitry Andric   // the preprocessing directive.
4300b57cec5SDimitry Andric   StringRef prepLexMacroName();
4310b57cec5SDimitry Andric 
4320b57cec5SDimitry Andric   // Skip any whitespaces starting from CurPtr.  The method is used
4330b57cec5SDimitry Andric   // only in the lines-skipping mode to find the first non-whitespace
4340b57cec5SDimitry Andric   // symbol after or at CurPtr.  Allowed whitespaces are ' ', '\t', '\n'
4350b57cec5SDimitry Andric   // and '\r'.  The method skips C-style comments as well, because
4360b57cec5SDimitry Andric   // it is used to find the beginning of the preprocessing directive.
4370b57cec5SDimitry Andric   // If we do not handle C-style comments the following code would
4380b57cec5SDimitry Andric   // result in incorrect detection of a preprocessing directive:
4390b57cec5SDimitry Andric   //     /*
4400b57cec5SDimitry Andric   //     #ifdef NAME
4410b57cec5SDimitry Andric   //     */
4420b57cec5SDimitry Andric   // As long as we skip C-style comments, the following code is correctly
4430b57cec5SDimitry Andric   // recognized as a preprocessing directive:
4440b57cec5SDimitry Andric   //     /* first line comment
4450b57cec5SDimitry Andric   //        second line comment */ #ifdef NAME
4460b57cec5SDimitry Andric   //
4470b57cec5SDimitry Andric   // The method returns true upon reaching the first non-whitespace symbol
4480b57cec5SDimitry Andric   // or EOF, CurPtr is set to point to this symbol.  The method returns false,
44981ad6265SDimitry Andric   // if an error occurred during skipping of a C-style comment.
4500b57cec5SDimitry Andric   bool prepSkipLineBegin();
4510b57cec5SDimitry Andric 
4520b57cec5SDimitry Andric   // Skip any whitespaces or comments after a preprocessing directive.
4530b57cec5SDimitry Andric   // The method returns true upon reaching either end of the line
4540b57cec5SDimitry Andric   // or end of the file.  If there is a multiline C-style comment
4550b57cec5SDimitry Andric   // after the preprocessing directive, the method skips
4560b57cec5SDimitry Andric   // the comment, so the final CurPtr may point to one of the next lines.
45781ad6265SDimitry Andric   // The method returns false, if an error occurred during skipping
4580b57cec5SDimitry Andric   // C- or C++-style comment, or a non-whitespace symbol appears
4590b57cec5SDimitry Andric   // after the preprocessing directive.
4600b57cec5SDimitry Andric   //
4610b57cec5SDimitry Andric   // The method maybe called both during lines-skipping and tokens
4620b57cec5SDimitry Andric   // processing.  It actually verifies that only whitespaces or/and
4630b57cec5SDimitry Andric   // comments follow a preprocessing directive.
4640b57cec5SDimitry Andric   //
4650b57cec5SDimitry Andric   // After the execution of this mehod, CurPtr points either to new line
4660b57cec5SDimitry Andric   // symbol, buffer end or non-whitespace symbol following the preprocesing
4670b57cec5SDimitry Andric   // directive.
4680b57cec5SDimitry Andric   bool prepSkipDirectiveEnd();
4690b57cec5SDimitry Andric 
4700b57cec5SDimitry Andric   // Return true, if the current preprocessor control stack is such that
4710b57cec5SDimitry Andric   // we should allow lexer to process the next token, false - otherwise.
4720b57cec5SDimitry Andric   //
4730b57cec5SDimitry Andric   // In particular, the method returns true, if all the #ifdef/#else
4740b57cec5SDimitry Andric   // controls on the stack have their IsDefined member set to true.
4750b57cec5SDimitry Andric   bool prepIsProcessingEnabled();
4760b57cec5SDimitry Andric 
4770b57cec5SDimitry Andric   // Report an error, if we reach EOF with non-empty preprocessing control
4780b57cec5SDimitry Andric   // stack.  This means there is no matching #endif for the previous
4790b57cec5SDimitry Andric   // #ifdef/#else.
4800b57cec5SDimitry Andric   void prepReportPreprocessorStackError();
4810b57cec5SDimitry Andric };
4820b57cec5SDimitry Andric 
4830b57cec5SDimitry Andric } // end namespace llvm
4840b57cec5SDimitry Andric 
4850b57cec5SDimitry Andric #endif
486