10b57cec5SDimitry Andric //===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This class represents the Lexer for tablegen files. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H 140b57cec5SDimitry Andric #define LLVM_LIB_TABLEGEN_TGLEXER_H 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 170b57cec5SDimitry Andric #include "llvm/ADT/StringSet.h" 180b57cec5SDimitry Andric #include "llvm/Support/DataTypes.h" 190b57cec5SDimitry Andric #include "llvm/Support/SMLoc.h" 200b57cec5SDimitry Andric #include <cassert> 210b57cec5SDimitry Andric #include <memory> 22480093f4SDimitry Andric #include <set> 230b57cec5SDimitry Andric #include <string> 245ffd83dbSDimitry Andric #include <vector> 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric namespace llvm { 275ffd83dbSDimitry Andric template <typename T> class ArrayRef; 280b57cec5SDimitry Andric class SourceMgr; 290b57cec5SDimitry Andric class Twine; 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric namespace tgtok { 320b57cec5SDimitry Andric enum TokKind { 330b57cec5SDimitry Andric // Markers 3406c3fb27SDimitry Andric Eof, 3506c3fb27SDimitry Andric Error, 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric // Tokens with no info. 3806c3fb27SDimitry Andric minus, // - 3906c3fb27SDimitry Andric plus, // + 4006c3fb27SDimitry Andric l_square, // [ 4106c3fb27SDimitry Andric r_square, // ] 4206c3fb27SDimitry Andric l_brace, // { 4306c3fb27SDimitry Andric r_brace, // } 4406c3fb27SDimitry Andric l_paren, // ( 4506c3fb27SDimitry Andric r_paren, // ) 4606c3fb27SDimitry Andric less, // < 4706c3fb27SDimitry Andric greater, // > 4806c3fb27SDimitry Andric colon, // : 4906c3fb27SDimitry Andric semi, // ; 5006c3fb27SDimitry Andric comma, // , 5106c3fb27SDimitry Andric dot, // . 5206c3fb27SDimitry Andric equal, // = 5306c3fb27SDimitry Andric question, // ? 540b57cec5SDimitry Andric paste, // # 55e8d8bef9SDimitry Andric dotdotdot, // ... 560b57cec5SDimitry Andric 575f757f3fSDimitry Andric // Boolean literals. 585f757f3fSDimitry Andric TrueVal, 595f757f3fSDimitry Andric FalseVal, 605f757f3fSDimitry Andric 615f757f3fSDimitry Andric // Integer value. 625f757f3fSDimitry Andric IntVal, 635f757f3fSDimitry Andric 645f757f3fSDimitry Andric // Binary constant. Note that these are sized according to the number of 655f757f3fSDimitry Andric // bits given. 665f757f3fSDimitry Andric BinaryIntVal, 675f757f3fSDimitry Andric 685f757f3fSDimitry Andric // Preprocessing tokens for internal usage by the lexer. 695f757f3fSDimitry Andric // They are never returned as a result of Lex(). 705f757f3fSDimitry Andric Ifdef, 715f757f3fSDimitry Andric Ifndef, 725f757f3fSDimitry Andric Else, 735f757f3fSDimitry Andric Endif, 745f757f3fSDimitry Andric Define, 755f757f3fSDimitry Andric 76e8d8bef9SDimitry Andric // Reserved keywords. ('ElseKW' is named to distinguish it from the 77e8d8bef9SDimitry Andric // existing 'Else' that means the preprocessor #else.) 7806c3fb27SDimitry Andric Bit, 7906c3fb27SDimitry Andric Bits, 8006c3fb27SDimitry Andric Code, 8106c3fb27SDimitry Andric Dag, 8206c3fb27SDimitry Andric ElseKW, 8306c3fb27SDimitry Andric FalseKW, 8406c3fb27SDimitry Andric Field, 8506c3fb27SDimitry Andric In, 8606c3fb27SDimitry Andric Include, 8706c3fb27SDimitry Andric Int, 8806c3fb27SDimitry Andric List, 8906c3fb27SDimitry Andric String, 9006c3fb27SDimitry Andric Then, 9106c3fb27SDimitry Andric TrueKW, 920b57cec5SDimitry Andric 935f757f3fSDimitry Andric // Object start tokens. 945f757f3fSDimitry Andric OBJECT_START_FIRST, 955f757f3fSDimitry Andric Assert = OBJECT_START_FIRST, 965f757f3fSDimitry Andric Class, 975f757f3fSDimitry Andric Def, 985f757f3fSDimitry Andric Defm, 995f757f3fSDimitry Andric Defset, 100*0fca6ea1SDimitry Andric Deftype, 1015f757f3fSDimitry Andric Defvar, 1025f757f3fSDimitry Andric Dump, 1035f757f3fSDimitry Andric Foreach, 1045f757f3fSDimitry Andric If, 1055f757f3fSDimitry Andric Let, 1065f757f3fSDimitry Andric MultiClass, 1075f757f3fSDimitry Andric OBJECT_START_LAST = MultiClass, 1085f757f3fSDimitry Andric 109e8d8bef9SDimitry Andric // Bang operators. 1105f757f3fSDimitry Andric BANG_OPERATOR_FIRST, 1115f757f3fSDimitry Andric XConcat = BANG_OPERATOR_FIRST, 11206c3fb27SDimitry Andric XADD, 11306c3fb27SDimitry Andric XSUB, 11406c3fb27SDimitry Andric XMUL, 11506c3fb27SDimitry Andric XDIV, 11606c3fb27SDimitry Andric XNOT, 11706c3fb27SDimitry Andric XLOG2, 11806c3fb27SDimitry Andric XAND, 11906c3fb27SDimitry Andric XOR, 12006c3fb27SDimitry Andric XXOR, 12106c3fb27SDimitry Andric XSRA, 12206c3fb27SDimitry Andric XSRL, 12306c3fb27SDimitry Andric XSHL, 12406c3fb27SDimitry Andric XListConcat, 12506c3fb27SDimitry Andric XListSplat, 12606c3fb27SDimitry Andric XStrConcat, 12706c3fb27SDimitry Andric XInterleave, 12806c3fb27SDimitry Andric XSubstr, 12906c3fb27SDimitry Andric XFind, 13006c3fb27SDimitry Andric XCast, 13106c3fb27SDimitry Andric XSubst, 13206c3fb27SDimitry Andric XForEach, 13306c3fb27SDimitry Andric XFilter, 13406c3fb27SDimitry Andric XFoldl, 13506c3fb27SDimitry Andric XHead, 13606c3fb27SDimitry Andric XTail, 13706c3fb27SDimitry Andric XSize, 13806c3fb27SDimitry Andric XEmpty, 13906c3fb27SDimitry Andric XIf, 14006c3fb27SDimitry Andric XCond, 14106c3fb27SDimitry Andric XEq, 14206c3fb27SDimitry Andric XIsA, 14306c3fb27SDimitry Andric XDag, 14406c3fb27SDimitry Andric XNe, 14506c3fb27SDimitry Andric XLe, 14606c3fb27SDimitry Andric XLt, 14706c3fb27SDimitry Andric XGe, 14806c3fb27SDimitry Andric XGt, 14906c3fb27SDimitry Andric XSetDagOp, 15006c3fb27SDimitry Andric XGetDagOp, 15106c3fb27SDimitry Andric XExists, 15206c3fb27SDimitry Andric XListRemove, 15306c3fb27SDimitry Andric XToLower, 15406c3fb27SDimitry Andric XToUpper, 15506c3fb27SDimitry Andric XRange, 15606c3fb27SDimitry Andric XGetDagArg, 15706c3fb27SDimitry Andric XGetDagName, 15806c3fb27SDimitry Andric XSetDagArg, 15906c3fb27SDimitry Andric XSetDagName, 1605f757f3fSDimitry Andric XRepr, 1615f757f3fSDimitry Andric BANG_OPERATOR_LAST = XRepr, 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric // String valued tokens. 1645f757f3fSDimitry Andric STRING_VALUE_FIRST, 1655f757f3fSDimitry Andric Id = STRING_VALUE_FIRST, 16606c3fb27SDimitry Andric StrVal, 16706c3fb27SDimitry Andric VarName, 16806c3fb27SDimitry Andric CodeFragment, 1695f757f3fSDimitry Andric STRING_VALUE_LAST = CodeFragment, 1700b57cec5SDimitry Andric }; 1715f757f3fSDimitry Andric 1725f757f3fSDimitry Andric /// isBangOperator - Return true if this is a bang operator. 1735f757f3fSDimitry Andric static inline bool isBangOperator(tgtok::TokKind Kind) { 1745f757f3fSDimitry Andric return tgtok::BANG_OPERATOR_FIRST <= Kind && Kind <= BANG_OPERATOR_LAST; 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric 1775f757f3fSDimitry Andric /// isObjectStart - Return true if this is a valid first token for a statement. 1785f757f3fSDimitry Andric static inline bool isObjectStart(tgtok::TokKind Kind) { 1795f757f3fSDimitry Andric return tgtok::OBJECT_START_FIRST <= Kind && Kind <= OBJECT_START_LAST; 1805f757f3fSDimitry Andric } 1815f757f3fSDimitry Andric 1825f757f3fSDimitry Andric /// isStringValue - Return true if this is a string value. 1835f757f3fSDimitry Andric static inline bool isStringValue(tgtok::TokKind Kind) { 1845f757f3fSDimitry Andric return tgtok::STRING_VALUE_FIRST <= Kind && Kind <= STRING_VALUE_LAST; 1855f757f3fSDimitry Andric } 1865f757f3fSDimitry Andric } // namespace tgtok 1875f757f3fSDimitry Andric 1880b57cec5SDimitry Andric /// TGLexer - TableGen Lexer class. 1890b57cec5SDimitry Andric class TGLexer { 1900b57cec5SDimitry Andric SourceMgr &SrcMgr; 1910b57cec5SDimitry Andric 192480093f4SDimitry Andric const char *CurPtr = nullptr; 1930b57cec5SDimitry Andric StringRef CurBuf; 1940b57cec5SDimitry Andric 1950b57cec5SDimitry Andric // Information about the current token. 196480093f4SDimitry Andric const char *TokStart = nullptr; 197480093f4SDimitry Andric tgtok::TokKind CurCode = tgtok::TokKind::Eof; 198e8d8bef9SDimitry Andric std::string CurStrVal; // This is valid for Id, StrVal, VarName, CodeFragment 199e8d8bef9SDimitry Andric int64_t CurIntVal = 0; // This is valid for IntVal. 2000b57cec5SDimitry Andric 2010b57cec5SDimitry Andric /// CurBuffer - This is the current buffer index we're lexing from as managed 2020b57cec5SDimitry Andric /// by the SourceMgr object. 203480093f4SDimitry Andric unsigned CurBuffer = 0; 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric public: 206480093f4SDimitry Andric typedef std::set<std::string> DependenciesSetTy; 207480093f4SDimitry Andric 2080b57cec5SDimitry Andric private: 2090b57cec5SDimitry Andric /// Dependencies - This is the list of all included files. 210480093f4SDimitry Andric DependenciesSetTy Dependencies; 2110b57cec5SDimitry Andric 2120b57cec5SDimitry Andric public: 2130b57cec5SDimitry Andric TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros); 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric tgtok::TokKind Lex() { 2160b57cec5SDimitry Andric return CurCode = LexToken(CurPtr == CurBuf.begin()); 2170b57cec5SDimitry Andric } 2180b57cec5SDimitry Andric 219480093f4SDimitry Andric const DependenciesSetTy &getDependencies() const { 2200b57cec5SDimitry Andric return Dependencies; 2210b57cec5SDimitry Andric } 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric tgtok::TokKind getCode() const { return CurCode; } 2240b57cec5SDimitry Andric 2250b57cec5SDimitry Andric const std::string &getCurStrVal() const { 2265f757f3fSDimitry Andric assert(tgtok::isStringValue(CurCode) && 2270b57cec5SDimitry Andric "This token doesn't have a string value"); 2280b57cec5SDimitry Andric return CurStrVal; 2290b57cec5SDimitry Andric } 2300b57cec5SDimitry Andric int64_t getCurIntVal() const { 2310b57cec5SDimitry Andric assert(CurCode == tgtok::IntVal && "This token isn't an integer"); 2320b57cec5SDimitry Andric return CurIntVal; 2330b57cec5SDimitry Andric } 2340b57cec5SDimitry Andric std::pair<int64_t, unsigned> getCurBinaryIntVal() const { 2350b57cec5SDimitry Andric assert(CurCode == tgtok::BinaryIntVal && 2360b57cec5SDimitry Andric "This token isn't a binary integer"); 2370b57cec5SDimitry Andric return std::make_pair(CurIntVal, (CurPtr - TokStart)-2); 2380b57cec5SDimitry Andric } 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric SMLoc getLoc() const; 241bdd1243dSDimitry Andric SMRange getLocRange() const; 2420b57cec5SDimitry Andric 2430b57cec5SDimitry Andric private: 2440b57cec5SDimitry Andric /// LexToken - Read the next token and return its code. 2450b57cec5SDimitry Andric tgtok::TokKind LexToken(bool FileOrLineStart = false); 2460b57cec5SDimitry Andric 2470b57cec5SDimitry Andric tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg); 2480b57cec5SDimitry Andric tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg); 2490b57cec5SDimitry Andric 2500b57cec5SDimitry Andric int getNextChar(); 2510b57cec5SDimitry Andric int peekNextChar(int Index) const; 2520b57cec5SDimitry Andric void SkipBCPLComment(); 2530b57cec5SDimitry Andric bool SkipCComment(); 2540b57cec5SDimitry Andric tgtok::TokKind LexIdentifier(); 2550b57cec5SDimitry Andric bool LexInclude(); 2560b57cec5SDimitry Andric tgtok::TokKind LexString(); 2570b57cec5SDimitry Andric tgtok::TokKind LexVarName(); 2580b57cec5SDimitry Andric tgtok::TokKind LexNumber(); 2590b57cec5SDimitry Andric tgtok::TokKind LexBracket(); 2600b57cec5SDimitry Andric tgtok::TokKind LexExclaim(); 2610b57cec5SDimitry Andric 2620b57cec5SDimitry Andric // Process EOF encountered in LexToken(). 2630b57cec5SDimitry Andric // If EOF is met in an include file, then the method will update 2640b57cec5SDimitry Andric // CurPtr, CurBuf and preprocessing include stack, and return true. 2650b57cec5SDimitry Andric // If EOF is met in the top-level file, then the method will 2660b57cec5SDimitry Andric // update and check the preprocessing include stack, and return false. 2670b57cec5SDimitry Andric bool processEOF(); 2680b57cec5SDimitry Andric 2690b57cec5SDimitry Andric // *** Structures and methods for preprocessing support *** 2700b57cec5SDimitry Andric 2710b57cec5SDimitry Andric // A set of macro names that are defined either via command line or 2720b57cec5SDimitry Andric // by using: 2730b57cec5SDimitry Andric // #define NAME 2740b57cec5SDimitry Andric StringSet<> DefinedMacros; 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andric // Each of #ifdef and #else directives has a descriptor associated 2770b57cec5SDimitry Andric // with it. 2780b57cec5SDimitry Andric // 2790b57cec5SDimitry Andric // An ordered list of preprocessing controls defined by #ifdef/#else 2800b57cec5SDimitry Andric // directives that are in effect currently is called preprocessing 2810b57cec5SDimitry Andric // control stack. It is represented as a vector of PreprocessorControlDesc's. 2820b57cec5SDimitry Andric // 2830b57cec5SDimitry Andric // The control stack is updated according to the following rules: 2840b57cec5SDimitry Andric // 2850b57cec5SDimitry Andric // For each #ifdef we add an element to the control stack. 2860b57cec5SDimitry Andric // For each #else we replace the top element with a descriptor 2870b57cec5SDimitry Andric // with an inverted IsDefined value. 2880b57cec5SDimitry Andric // For each #endif we pop the top element from the control stack. 2890b57cec5SDimitry Andric // 2900b57cec5SDimitry Andric // When CurPtr reaches the current buffer's end, the control stack 2910b57cec5SDimitry Andric // must be empty, i.e. #ifdef and the corresponding #endif 2920b57cec5SDimitry Andric // must be located in the same file. 2930b57cec5SDimitry Andric struct PreprocessorControlDesc { 2940b57cec5SDimitry Andric // Either tgtok::Ifdef or tgtok::Else. 2950b57cec5SDimitry Andric tgtok::TokKind Kind; 2960b57cec5SDimitry Andric 2970b57cec5SDimitry Andric // True, if the condition for this directive is true, false - otherwise. 2980b57cec5SDimitry Andric // Examples: 2990b57cec5SDimitry Andric // #ifdef NAME : true, if NAME is defined, false - otherwise. 3000b57cec5SDimitry Andric // ... 3010b57cec5SDimitry Andric // #else : false, if NAME is defined, true - otherwise. 3020b57cec5SDimitry Andric bool IsDefined; 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric // Pointer into CurBuf to the beginning of the preprocessing directive 3050b57cec5SDimitry Andric // word, e.g.: 3060b57cec5SDimitry Andric // #ifdef NAME 3070b57cec5SDimitry Andric // ^ - SrcPos 3080b57cec5SDimitry Andric SMLoc SrcPos; 3090b57cec5SDimitry Andric }; 3100b57cec5SDimitry Andric 3110b57cec5SDimitry Andric // We want to disallow code like this: 3120b57cec5SDimitry Andric // file1.td: 3130b57cec5SDimitry Andric // #define NAME 3140b57cec5SDimitry Andric // #ifdef NAME 3150b57cec5SDimitry Andric // include "file2.td" 3160b57cec5SDimitry Andric // EOF 3170b57cec5SDimitry Andric // file2.td: 3180b57cec5SDimitry Andric // #endif 3190b57cec5SDimitry Andric // EOF 3200b57cec5SDimitry Andric // 3210b57cec5SDimitry Andric // To do this, we clear the preprocessing control stack on entry 3220b57cec5SDimitry Andric // to each of the included file. PrepIncludeStack is used to store 3230b57cec5SDimitry Andric // preprocessing control stacks for the current file and all its 3240b57cec5SDimitry Andric // parent files. The back() element is the preprocessing control 3250b57cec5SDimitry Andric // stack for the current file. 3260b57cec5SDimitry Andric std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>> 3270b57cec5SDimitry Andric PrepIncludeStack; 3280b57cec5SDimitry Andric 3290b57cec5SDimitry Andric // Validate that the current preprocessing control stack is empty, 3300b57cec5SDimitry Andric // since we are about to exit a file, and pop the include stack. 3310b57cec5SDimitry Andric // 3320b57cec5SDimitry Andric // If IncludeStackMustBeEmpty is true, the include stack must be empty 3330b57cec5SDimitry Andric // after the popping, otherwise, the include stack must not be empty 3340b57cec5SDimitry Andric // after the popping. Basically, the include stack must be empty 3350b57cec5SDimitry Andric // only if we exit the "top-level" file (i.e. finish lexing). 3360b57cec5SDimitry Andric // 3370b57cec5SDimitry Andric // The method returns false, if the current preprocessing control stack 3380b57cec5SDimitry Andric // is not empty (e.g. there is an unterminated #ifdef/#else), 3390b57cec5SDimitry Andric // true - otherwise. 3400b57cec5SDimitry Andric bool prepExitInclude(bool IncludeStackMustBeEmpty); 3410b57cec5SDimitry Andric 3420b57cec5SDimitry Andric // Look ahead for a preprocessing directive starting from CurPtr. The caller 3430b57cec5SDimitry Andric // must only call this method, if *(CurPtr - 1) is '#'. If the method matches 3440b57cec5SDimitry Andric // a preprocessing directive word followed by a whitespace, then it returns 3450b57cec5SDimitry Andric // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define. 3460b57cec5SDimitry Andric // 3470b57cec5SDimitry Andric // CurPtr is not adjusted by this method. 3480b57cec5SDimitry Andric tgtok::TokKind prepIsDirective() const; 3490b57cec5SDimitry Andric 3500b57cec5SDimitry Andric // Given a preprocessing token kind, adjusts CurPtr to the end 3510b57cec5SDimitry Andric // of the preprocessing directive word. Returns true, unless 3520b57cec5SDimitry Andric // an unsupported token kind is passed in. 3530b57cec5SDimitry Andric // 3540b57cec5SDimitry Andric // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective() 3550b57cec5SDimitry Andric // to avoid adjusting CurPtr before we are sure that '#' is followed 3560b57cec5SDimitry Andric // by a preprocessing directive. If it is not, then we fall back to 3570b57cec5SDimitry Andric // tgtok::paste interpretation of '#'. 3580b57cec5SDimitry Andric bool prepEatPreprocessorDirective(tgtok::TokKind Kind); 3590b57cec5SDimitry Andric 3600b57cec5SDimitry Andric // The main "exit" point from the token parsing to preprocessor. 3610b57cec5SDimitry Andric // 3620b57cec5SDimitry Andric // The method is called for CurPtr, when prepIsDirective() returns 3630b57cec5SDimitry Andric // true. The first parameter matches the result of prepIsDirective(), 3640b57cec5SDimitry Andric // denoting the actual preprocessor directive to be processed. 3650b57cec5SDimitry Andric // 3660b57cec5SDimitry Andric // If the preprocessing directive disables the tokens processing, e.g.: 3670b57cec5SDimitry Andric // #ifdef NAME // NAME is undefined 3680b57cec5SDimitry Andric // then lexPreprocessor() enters the lines-skipping mode. 3690b57cec5SDimitry Andric // In this mode, it does not parse any tokens, because the code under 3700b57cec5SDimitry Andric // the #ifdef may not even be a correct tablegen code. The preprocessor 3710b57cec5SDimitry Andric // looks for lines containing other preprocessing directives, which 3720b57cec5SDimitry Andric // may be prepended with whitespaces and C-style comments. If the line 3730b57cec5SDimitry Andric // does not contain a preprocessing directive, it is skipped completely. 3740b57cec5SDimitry Andric // Otherwise, the preprocessing directive is processed by recursively 3750b57cec5SDimitry Andric // calling lexPreprocessor(). The processing of the encountered 3760b57cec5SDimitry Andric // preprocessing directives includes updating preprocessing control stack 3770b57cec5SDimitry Andric // and adding new macros into DefinedMacros set. 3780b57cec5SDimitry Andric // 3790b57cec5SDimitry Andric // The second parameter controls whether lexPreprocessor() is called from 3800b57cec5SDimitry Andric // LexToken() (true) or recursively from lexPreprocessor() (false). 3810b57cec5SDimitry Andric // 3820b57cec5SDimitry Andric // If ReturnNextLiveToken is true, the method returns the next 3830b57cec5SDimitry Andric // LEX token following the current directive or following the end 3840b57cec5SDimitry Andric // of the disabled preprocessing region corresponding to this directive. 3850b57cec5SDimitry Andric // If ReturnNextLiveToken is false, the method returns the first parameter, 3860b57cec5SDimitry Andric // unless there were errors encountered in the disabled preprocessing 3870b57cec5SDimitry Andric // region - in this case, it returns tgtok::Error. 3880b57cec5SDimitry Andric tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind, 3890b57cec5SDimitry Andric bool ReturnNextLiveToken = true); 3900b57cec5SDimitry Andric 3910b57cec5SDimitry Andric // Worker method for lexPreprocessor() to skip lines after some 3920b57cec5SDimitry Andric // preprocessing directive up to the buffer end or to the directive 3930b57cec5SDimitry Andric // that re-enables token processing. The method returns true 3940b57cec5SDimitry Andric // upon processing the next directive that re-enables tokens 3950b57cec5SDimitry Andric // processing. False is returned if an error was encountered. 3960b57cec5SDimitry Andric // 3970b57cec5SDimitry Andric // Note that prepSkipRegion() calls lexPreprocessor() to process 3980b57cec5SDimitry Andric // encountered preprocessing directives. In this case, the second 3990b57cec5SDimitry Andric // parameter to lexPreprocessor() is set to false. Being passed 4000b57cec5SDimitry Andric // false ReturnNextLiveToken, lexPreprocessor() must never call 4010b57cec5SDimitry Andric // prepSkipRegion(). We assert this by passing ReturnNextLiveToken 4020b57cec5SDimitry Andric // to prepSkipRegion() and checking that it is never set to false. 4030b57cec5SDimitry Andric bool prepSkipRegion(bool MustNeverBeFalse); 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric // Lex name of the macro after either #ifdef or #define. We could have used 4060b57cec5SDimitry Andric // LexIdentifier(), but it has special handling of "include" word, which 4070b57cec5SDimitry Andric // could result in awkward diagnostic errors. Consider: 4080b57cec5SDimitry Andric // ---- 4090b57cec5SDimitry Andric // #ifdef include 4100b57cec5SDimitry Andric // class ... 4110b57cec5SDimitry Andric // ---- 4120b57cec5SDimitry Andric // LexIdentifier() will engage LexInclude(), which will complain about 4130b57cec5SDimitry Andric // missing file with name "class". Instead, prepLexMacroName() will treat 4140b57cec5SDimitry Andric // "include" as a normal macro name. 4150b57cec5SDimitry Andric // 4160b57cec5SDimitry Andric // On entry, CurPtr points to the end of a preprocessing directive word. 4170b57cec5SDimitry Andric // The method allows for whitespaces between the preprocessing directive 4180b57cec5SDimitry Andric // and the macro name. The allowed whitespaces are ' ' and '\t'. 4190b57cec5SDimitry Andric // 4200b57cec5SDimitry Andric // If the first non-whitespace symbol after the preprocessing directive 4210b57cec5SDimitry Andric // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then 4220b57cec5SDimitry Andric // the method updates TokStart to the position of the first non-whitespace 4230b57cec5SDimitry Andric // symbol, sets CurPtr to the position of the macro name's last symbol, 4240b57cec5SDimitry Andric // and returns a string reference to the macro name. Otherwise, 4250b57cec5SDimitry Andric // TokStart is set to the first non-whitespace symbol after the preprocessing 4260b57cec5SDimitry Andric // directive, and the method returns an empty string reference. 4270b57cec5SDimitry Andric // 4280b57cec5SDimitry Andric // In all cases, TokStart may be used to point to the word following 4290b57cec5SDimitry Andric // the preprocessing directive. 4300b57cec5SDimitry Andric StringRef prepLexMacroName(); 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric // Skip any whitespaces starting from CurPtr. The method is used 4330b57cec5SDimitry Andric // only in the lines-skipping mode to find the first non-whitespace 4340b57cec5SDimitry Andric // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n' 4350b57cec5SDimitry Andric // and '\r'. The method skips C-style comments as well, because 4360b57cec5SDimitry Andric // it is used to find the beginning of the preprocessing directive. 4370b57cec5SDimitry Andric // If we do not handle C-style comments the following code would 4380b57cec5SDimitry Andric // result in incorrect detection of a preprocessing directive: 4390b57cec5SDimitry Andric // /* 4400b57cec5SDimitry Andric // #ifdef NAME 4410b57cec5SDimitry Andric // */ 4420b57cec5SDimitry Andric // As long as we skip C-style comments, the following code is correctly 4430b57cec5SDimitry Andric // recognized as a preprocessing directive: 4440b57cec5SDimitry Andric // /* first line comment 4450b57cec5SDimitry Andric // second line comment */ #ifdef NAME 4460b57cec5SDimitry Andric // 4470b57cec5SDimitry Andric // The method returns true upon reaching the first non-whitespace symbol 4480b57cec5SDimitry Andric // or EOF, CurPtr is set to point to this symbol. The method returns false, 44981ad6265SDimitry Andric // if an error occurred during skipping of a C-style comment. 4500b57cec5SDimitry Andric bool prepSkipLineBegin(); 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andric // Skip any whitespaces or comments after a preprocessing directive. 4530b57cec5SDimitry Andric // The method returns true upon reaching either end of the line 4540b57cec5SDimitry Andric // or end of the file. If there is a multiline C-style comment 4550b57cec5SDimitry Andric // after the preprocessing directive, the method skips 4560b57cec5SDimitry Andric // the comment, so the final CurPtr may point to one of the next lines. 45781ad6265SDimitry Andric // The method returns false, if an error occurred during skipping 4580b57cec5SDimitry Andric // C- or C++-style comment, or a non-whitespace symbol appears 4590b57cec5SDimitry Andric // after the preprocessing directive. 4600b57cec5SDimitry Andric // 4610b57cec5SDimitry Andric // The method maybe called both during lines-skipping and tokens 4620b57cec5SDimitry Andric // processing. It actually verifies that only whitespaces or/and 4630b57cec5SDimitry Andric // comments follow a preprocessing directive. 4640b57cec5SDimitry Andric // 4650b57cec5SDimitry Andric // After the execution of this mehod, CurPtr points either to new line 4660b57cec5SDimitry Andric // symbol, buffer end or non-whitespace symbol following the preprocesing 4670b57cec5SDimitry Andric // directive. 4680b57cec5SDimitry Andric bool prepSkipDirectiveEnd(); 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric // Return true, if the current preprocessor control stack is such that 4710b57cec5SDimitry Andric // we should allow lexer to process the next token, false - otherwise. 4720b57cec5SDimitry Andric // 4730b57cec5SDimitry Andric // In particular, the method returns true, if all the #ifdef/#else 4740b57cec5SDimitry Andric // controls on the stack have their IsDefined member set to true. 4750b57cec5SDimitry Andric bool prepIsProcessingEnabled(); 4760b57cec5SDimitry Andric 4770b57cec5SDimitry Andric // Report an error, if we reach EOF with non-empty preprocessing control 4780b57cec5SDimitry Andric // stack. This means there is no matching #endif for the previous 4790b57cec5SDimitry Andric // #ifdef/#else. 4800b57cec5SDimitry Andric void prepReportPreprocessorStackError(); 4810b57cec5SDimitry Andric }; 4820b57cec5SDimitry Andric 4830b57cec5SDimitry Andric } // end namespace llvm 4840b57cec5SDimitry Andric 4850b57cec5SDimitry Andric #endif 486