10b57cec5SDimitry Andric //===- TGLexer.cpp - Lexer for TableGen -----------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // Implement the Lexer for TableGen. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "TGLexer.h" 145ffd83dbSDimitry Andric #include "llvm/ADT/ArrayRef.h" 150b57cec5SDimitry Andric #include "llvm/ADT/StringSwitch.h" 160b57cec5SDimitry Andric #include "llvm/ADT/Twine.h" 170b57cec5SDimitry Andric #include "llvm/Config/config.h" // for strtoull()/strtoll() define 180b57cec5SDimitry Andric #include "llvm/Support/Compiler.h" 190b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h" 200b57cec5SDimitry Andric #include "llvm/Support/SourceMgr.h" 210b57cec5SDimitry Andric #include "llvm/TableGen/Error.h" 220b57cec5SDimitry Andric #include <algorithm> 230b57cec5SDimitry Andric #include <cctype> 240b57cec5SDimitry Andric #include <cerrno> 250b57cec5SDimitry Andric #include <cstdint> 260b57cec5SDimitry Andric #include <cstdio> 270b57cec5SDimitry Andric #include <cstdlib> 280b57cec5SDimitry Andric #include <cstring> 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric using namespace llvm; 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric namespace { 330b57cec5SDimitry Andric // A list of supported preprocessing directives with their 340b57cec5SDimitry Andric // internal token kinds and names. 350b57cec5SDimitry Andric struct { 360b57cec5SDimitry Andric tgtok::TokKind Kind; 370b57cec5SDimitry Andric const char *Word; 380b57cec5SDimitry Andric } PreprocessorDirs[] = { 390b57cec5SDimitry Andric { tgtok::Ifdef, "ifdef" }, 400b57cec5SDimitry Andric { tgtok::Ifndef, "ifndef" }, 410b57cec5SDimitry Andric { tgtok::Else, "else" }, 420b57cec5SDimitry Andric { tgtok::Endif, "endif" }, 430b57cec5SDimitry Andric { tgtok::Define, "define" } 440b57cec5SDimitry Andric }; 450b57cec5SDimitry Andric } // end anonymous namespace 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) { 480b57cec5SDimitry Andric CurBuffer = SrcMgr.getMainFileID(); 490b57cec5SDimitry Andric CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); 500b57cec5SDimitry Andric CurPtr = CurBuf.begin(); 510b57cec5SDimitry Andric TokStart = nullptr; 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric // Pretend that we enter the "top-level" include file. 540b57cec5SDimitry Andric PrepIncludeStack.push_back( 558bcb0991SDimitry Andric std::make_unique<std::vector<PreprocessorControlDesc>>()); 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric // Put all macros defined in the command line into the DefinedMacros set. 5881ad6265SDimitry Andric for (const std::string &MacroName : Macros) 590b57cec5SDimitry Andric DefinedMacros.insert(MacroName); 600b57cec5SDimitry Andric } 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric SMLoc TGLexer::getLoc() const { 630b57cec5SDimitry Andric return SMLoc::getFromPointer(TokStart); 640b57cec5SDimitry Andric } 650b57cec5SDimitry Andric 66bdd1243dSDimitry Andric SMRange TGLexer::getLocRange() const { 67bdd1243dSDimitry Andric return {getLoc(), SMLoc::getFromPointer(CurPtr)}; 68bdd1243dSDimitry Andric } 69bdd1243dSDimitry Andric 700b57cec5SDimitry Andric /// ReturnError - Set the error to the specified string at the specified 710b57cec5SDimitry Andric /// location. This is defined to always return tgtok::Error. 720b57cec5SDimitry Andric tgtok::TokKind TGLexer::ReturnError(SMLoc Loc, const Twine &Msg) { 730b57cec5SDimitry Andric PrintError(Loc, Msg); 740b57cec5SDimitry Andric return tgtok::Error; 750b57cec5SDimitry Andric } 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) { 780b57cec5SDimitry Andric return ReturnError(SMLoc::getFromPointer(Loc), Msg); 790b57cec5SDimitry Andric } 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric bool TGLexer::processEOF() { 820b57cec5SDimitry Andric SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); 830b57cec5SDimitry Andric if (ParentIncludeLoc != SMLoc()) { 840b57cec5SDimitry Andric // If prepExitInclude() detects a problem with the preprocessing 850b57cec5SDimitry Andric // control stack, it will return false. Pretend that we reached 860b57cec5SDimitry Andric // the final EOF and stop lexing more tokens by returning false 870b57cec5SDimitry Andric // to LexToken(). 880b57cec5SDimitry Andric if (!prepExitInclude(false)) 890b57cec5SDimitry Andric return false; 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); 920b57cec5SDimitry Andric CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); 930b57cec5SDimitry Andric CurPtr = ParentIncludeLoc.getPointer(); 940b57cec5SDimitry Andric // Make sure TokStart points into the parent file's buffer. 950b57cec5SDimitry Andric // LexToken() assigns to it before calling getNextChar(), 960b57cec5SDimitry Andric // so it is pointing into the included file now. 970b57cec5SDimitry Andric TokStart = CurPtr; 980b57cec5SDimitry Andric return true; 990b57cec5SDimitry Andric } 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric // Pretend that we exit the "top-level" include file. 1020b57cec5SDimitry Andric // Note that in case of an error (e.g. control stack imbalance) 1030b57cec5SDimitry Andric // the routine will issue a fatal error. 1040b57cec5SDimitry Andric prepExitInclude(true); 1050b57cec5SDimitry Andric return false; 1060b57cec5SDimitry Andric } 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric int TGLexer::getNextChar() { 1090b57cec5SDimitry Andric char CurChar = *CurPtr++; 1100b57cec5SDimitry Andric switch (CurChar) { 1110b57cec5SDimitry Andric default: 1120b57cec5SDimitry Andric return (unsigned char)CurChar; 1130b57cec5SDimitry Andric 114fe6060f1SDimitry Andric case 0: { 115fe6060f1SDimitry Andric // A NUL character in the stream is either the end of the current buffer or 116fe6060f1SDimitry Andric // a spurious NUL in the file. Disambiguate that here. 117fe6060f1SDimitry Andric if (CurPtr - 1 == CurBuf.end()) { 118fe6060f1SDimitry Andric --CurPtr; // Arrange for another call to return EOF again. 1190b57cec5SDimitry Andric return EOF; 1200b57cec5SDimitry Andric } 121fe6060f1SDimitry Andric PrintError(getLoc(), 122fe6060f1SDimitry Andric "NUL character is invalid in source; treated as space"); 123fe6060f1SDimitry Andric return ' '; 124fe6060f1SDimitry Andric } 125fe6060f1SDimitry Andric 1260b57cec5SDimitry Andric case '\n': 1270b57cec5SDimitry Andric case '\r': 1280b57cec5SDimitry Andric // Handle the newline character by ignoring it and incrementing the line 1290b57cec5SDimitry Andric // count. However, be careful about 'dos style' files with \n\r in them. 1300b57cec5SDimitry Andric // Only treat a \n\r or \r\n as a single line. 1310b57cec5SDimitry Andric if ((*CurPtr == '\n' || (*CurPtr == '\r')) && 1320b57cec5SDimitry Andric *CurPtr != CurChar) 1330b57cec5SDimitry Andric ++CurPtr; // Eat the two char newline sequence. 1340b57cec5SDimitry Andric return '\n'; 1350b57cec5SDimitry Andric } 1360b57cec5SDimitry Andric } 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric int TGLexer::peekNextChar(int Index) const { 1390b57cec5SDimitry Andric return *(CurPtr + Index); 1400b57cec5SDimitry Andric } 1410b57cec5SDimitry Andric 1420b57cec5SDimitry Andric tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { 1430b57cec5SDimitry Andric TokStart = CurPtr; 1440b57cec5SDimitry Andric // This always consumes at least one character. 1450b57cec5SDimitry Andric int CurChar = getNextChar(); 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric switch (CurChar) { 1480b57cec5SDimitry Andric default: 1490b57cec5SDimitry Andric // Handle letters: [a-zA-Z_] 1500b57cec5SDimitry Andric if (isalpha(CurChar) || CurChar == '_') 1510b57cec5SDimitry Andric return LexIdentifier(); 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric // Unknown character, emit an error. 1540b57cec5SDimitry Andric return ReturnError(TokStart, "Unexpected character"); 1550b57cec5SDimitry Andric case EOF: 1560b57cec5SDimitry Andric // Lex next token, if we just left an include file. 1570b57cec5SDimitry Andric // Note that leaving an include file means that the next 158e8d8bef9SDimitry Andric // symbol is located at the end of the 'include "..."' 1590b57cec5SDimitry Andric // construct, so LexToken() is called with default 1600b57cec5SDimitry Andric // false parameter. 1610b57cec5SDimitry Andric if (processEOF()) 1620b57cec5SDimitry Andric return LexToken(); 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric // Return EOF denoting the end of lexing. 1650b57cec5SDimitry Andric return tgtok::Eof; 1660b57cec5SDimitry Andric 1670b57cec5SDimitry Andric case ':': return tgtok::colon; 1680b57cec5SDimitry Andric case ';': return tgtok::semi; 1690b57cec5SDimitry Andric case ',': return tgtok::comma; 1700b57cec5SDimitry Andric case '<': return tgtok::less; 1710b57cec5SDimitry Andric case '>': return tgtok::greater; 1720b57cec5SDimitry Andric case ']': return tgtok::r_square; 1730b57cec5SDimitry Andric case '{': return tgtok::l_brace; 1740b57cec5SDimitry Andric case '}': return tgtok::r_brace; 1750b57cec5SDimitry Andric case '(': return tgtok::l_paren; 1760b57cec5SDimitry Andric case ')': return tgtok::r_paren; 1770b57cec5SDimitry Andric case '=': return tgtok::equal; 1780b57cec5SDimitry Andric case '?': return tgtok::question; 1790b57cec5SDimitry Andric case '#': 1800b57cec5SDimitry Andric if (FileOrLineStart) { 1810b57cec5SDimitry Andric tgtok::TokKind Kind = prepIsDirective(); 1820b57cec5SDimitry Andric if (Kind != tgtok::Error) 1830b57cec5SDimitry Andric return lexPreprocessor(Kind); 1840b57cec5SDimitry Andric } 1850b57cec5SDimitry Andric 1860b57cec5SDimitry Andric return tgtok::paste; 1870b57cec5SDimitry Andric 188e8d8bef9SDimitry Andric // The period is a separate case so we can recognize the "..." 189e8d8bef9SDimitry Andric // range punctuator. 190e8d8bef9SDimitry Andric case '.': 191e8d8bef9SDimitry Andric if (peekNextChar(0) == '.') { 192e8d8bef9SDimitry Andric ++CurPtr; // Eat second dot. 193e8d8bef9SDimitry Andric if (peekNextChar(0) == '.') { 194e8d8bef9SDimitry Andric ++CurPtr; // Eat third dot. 195e8d8bef9SDimitry Andric return tgtok::dotdotdot; 196e8d8bef9SDimitry Andric } 197e8d8bef9SDimitry Andric return ReturnError(TokStart, "Invalid '..' punctuation"); 198e8d8bef9SDimitry Andric } 199e8d8bef9SDimitry Andric return tgtok::dot; 200e8d8bef9SDimitry Andric 2010b57cec5SDimitry Andric case '\r': 2020b57cec5SDimitry Andric PrintFatalError("getNextChar() must never return '\r'"); 2030b57cec5SDimitry Andric return tgtok::Error; 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric case ' ': 2060b57cec5SDimitry Andric case '\t': 2070b57cec5SDimitry Andric // Ignore whitespace. 2080b57cec5SDimitry Andric return LexToken(FileOrLineStart); 2090b57cec5SDimitry Andric case '\n': 2100b57cec5SDimitry Andric // Ignore whitespace, and identify the new line. 2110b57cec5SDimitry Andric return LexToken(true); 2120b57cec5SDimitry Andric case '/': 2130b57cec5SDimitry Andric // If this is the start of a // comment, skip until the end of the line or 2140b57cec5SDimitry Andric // the end of the buffer. 2150b57cec5SDimitry Andric if (*CurPtr == '/') 2160b57cec5SDimitry Andric SkipBCPLComment(); 2170b57cec5SDimitry Andric else if (*CurPtr == '*') { 2180b57cec5SDimitry Andric if (SkipCComment()) 2190b57cec5SDimitry Andric return tgtok::Error; 2200b57cec5SDimitry Andric } else // Otherwise, this is an error. 2210b57cec5SDimitry Andric return ReturnError(TokStart, "Unexpected character"); 2220b57cec5SDimitry Andric return LexToken(FileOrLineStart); 2230b57cec5SDimitry Andric case '-': case '+': 2240b57cec5SDimitry Andric case '0': case '1': case '2': case '3': case '4': case '5': case '6': 2250b57cec5SDimitry Andric case '7': case '8': case '9': { 2260b57cec5SDimitry Andric int NextChar = 0; 2270b57cec5SDimitry Andric if (isdigit(CurChar)) { 2280b57cec5SDimitry Andric // Allow identifiers to start with a number if it is followed by 2290b57cec5SDimitry Andric // an identifier. This can happen with paste operations like 2300b57cec5SDimitry Andric // foo#8i. 2310b57cec5SDimitry Andric int i = 0; 2320b57cec5SDimitry Andric do { 2330b57cec5SDimitry Andric NextChar = peekNextChar(i++); 2340b57cec5SDimitry Andric } while (isdigit(NextChar)); 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric if (NextChar == 'x' || NextChar == 'b') { 2370b57cec5SDimitry Andric // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most 2380b57cec5SDimitry Andric // likely a number. 2390b57cec5SDimitry Andric int NextNextChar = peekNextChar(i); 2400b57cec5SDimitry Andric switch (NextNextChar) { 2410b57cec5SDimitry Andric default: 2420b57cec5SDimitry Andric break; 2430b57cec5SDimitry Andric case '0': case '1': 2440b57cec5SDimitry Andric if (NextChar == 'b') 2450b57cec5SDimitry Andric return LexNumber(); 246bdd1243dSDimitry Andric [[fallthrough]]; 2470b57cec5SDimitry Andric case '2': case '3': case '4': case '5': 2480b57cec5SDimitry Andric case '6': case '7': case '8': case '9': 2490b57cec5SDimitry Andric case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 2500b57cec5SDimitry Andric case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 2510b57cec5SDimitry Andric if (NextChar == 'x') 2520b57cec5SDimitry Andric return LexNumber(); 2530b57cec5SDimitry Andric break; 2540b57cec5SDimitry Andric } 2550b57cec5SDimitry Andric } 2560b57cec5SDimitry Andric } 2570b57cec5SDimitry Andric 2580b57cec5SDimitry Andric if (isalpha(NextChar) || NextChar == '_') 2590b57cec5SDimitry Andric return LexIdentifier(); 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric return LexNumber(); 2620b57cec5SDimitry Andric } 2630b57cec5SDimitry Andric case '"': return LexString(); 2640b57cec5SDimitry Andric case '$': return LexVarName(); 2650b57cec5SDimitry Andric case '[': return LexBracket(); 2660b57cec5SDimitry Andric case '!': return LexExclaim(); 2670b57cec5SDimitry Andric } 2680b57cec5SDimitry Andric } 2690b57cec5SDimitry Andric 2700b57cec5SDimitry Andric /// LexString - Lex "[^"]*" 2710b57cec5SDimitry Andric tgtok::TokKind TGLexer::LexString() { 2720b57cec5SDimitry Andric const char *StrStart = CurPtr; 2730b57cec5SDimitry Andric 2740b57cec5SDimitry Andric CurStrVal = ""; 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andric while (*CurPtr != '"') { 2770b57cec5SDimitry Andric // If we hit the end of the buffer, report an error. 2780b57cec5SDimitry Andric if (*CurPtr == 0 && CurPtr == CurBuf.end()) 2790b57cec5SDimitry Andric return ReturnError(StrStart, "End of file in string literal"); 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric if (*CurPtr == '\n' || *CurPtr == '\r') 2820b57cec5SDimitry Andric return ReturnError(StrStart, "End of line in string literal"); 2830b57cec5SDimitry Andric 2840b57cec5SDimitry Andric if (*CurPtr != '\\') { 2850b57cec5SDimitry Andric CurStrVal += *CurPtr++; 2860b57cec5SDimitry Andric continue; 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric ++CurPtr; 2900b57cec5SDimitry Andric 2910b57cec5SDimitry Andric switch (*CurPtr) { 2920b57cec5SDimitry Andric case '\\': case '\'': case '"': 2930b57cec5SDimitry Andric // These turn into their literal character. 2940b57cec5SDimitry Andric CurStrVal += *CurPtr++; 2950b57cec5SDimitry Andric break; 2960b57cec5SDimitry Andric case 't': 2970b57cec5SDimitry Andric CurStrVal += '\t'; 2980b57cec5SDimitry Andric ++CurPtr; 2990b57cec5SDimitry Andric break; 3000b57cec5SDimitry Andric case 'n': 3010b57cec5SDimitry Andric CurStrVal += '\n'; 3020b57cec5SDimitry Andric ++CurPtr; 3030b57cec5SDimitry Andric break; 3040b57cec5SDimitry Andric 3050b57cec5SDimitry Andric case '\n': 3060b57cec5SDimitry Andric case '\r': 3070b57cec5SDimitry Andric return ReturnError(CurPtr, "escaped newlines not supported in tblgen"); 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric // If we hit the end of the buffer, report an error. 3100b57cec5SDimitry Andric case '\0': 3110b57cec5SDimitry Andric if (CurPtr == CurBuf.end()) 3120b57cec5SDimitry Andric return ReturnError(StrStart, "End of file in string literal"); 313bdd1243dSDimitry Andric [[fallthrough]]; 3140b57cec5SDimitry Andric default: 3150b57cec5SDimitry Andric return ReturnError(CurPtr, "invalid escape in string literal"); 3160b57cec5SDimitry Andric } 3170b57cec5SDimitry Andric } 3180b57cec5SDimitry Andric 3190b57cec5SDimitry Andric ++CurPtr; 3200b57cec5SDimitry Andric return tgtok::StrVal; 3210b57cec5SDimitry Andric } 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andric tgtok::TokKind TGLexer::LexVarName() { 3240b57cec5SDimitry Andric if (!isalpha(CurPtr[0]) && CurPtr[0] != '_') 3250b57cec5SDimitry Andric return ReturnError(TokStart, "Invalid variable name"); 3260b57cec5SDimitry Andric 3270b57cec5SDimitry Andric // Otherwise, we're ok, consume the rest of the characters. 3280b57cec5SDimitry Andric const char *VarNameStart = CurPtr++; 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') 3310b57cec5SDimitry Andric ++CurPtr; 3320b57cec5SDimitry Andric 3330b57cec5SDimitry Andric CurStrVal.assign(VarNameStart, CurPtr); 3340b57cec5SDimitry Andric return tgtok::VarName; 3350b57cec5SDimitry Andric } 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andric tgtok::TokKind TGLexer::LexIdentifier() { 3380b57cec5SDimitry Andric // The first letter is [a-zA-Z_]. 3390b57cec5SDimitry Andric const char *IdentStart = TokStart; 3400b57cec5SDimitry Andric 3410b57cec5SDimitry Andric // Match the rest of the identifier regex: [0-9a-zA-Z_]* 3420b57cec5SDimitry Andric while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') 3430b57cec5SDimitry Andric ++CurPtr; 3440b57cec5SDimitry Andric 345e8d8bef9SDimitry Andric // Check to see if this identifier is a reserved keyword. 3460b57cec5SDimitry Andric StringRef Str(IdentStart, CurPtr-IdentStart); 3470b57cec5SDimitry Andric 3480b57cec5SDimitry Andric tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str) 3490b57cec5SDimitry Andric .Case("int", tgtok::Int) 3500b57cec5SDimitry Andric .Case("bit", tgtok::Bit) 3510b57cec5SDimitry Andric .Case("bits", tgtok::Bits) 3520b57cec5SDimitry Andric .Case("string", tgtok::String) 3530b57cec5SDimitry Andric .Case("list", tgtok::List) 3540b57cec5SDimitry Andric .Case("code", tgtok::Code) 3550b57cec5SDimitry Andric .Case("dag", tgtok::Dag) 3560b57cec5SDimitry Andric .Case("class", tgtok::Class) 3570b57cec5SDimitry Andric .Case("def", tgtok::Def) 358e8d8bef9SDimitry Andric .Case("true", tgtok::TrueVal) 359e8d8bef9SDimitry Andric .Case("false", tgtok::FalseVal) 3600b57cec5SDimitry Andric .Case("foreach", tgtok::Foreach) 3610b57cec5SDimitry Andric .Case("defm", tgtok::Defm) 3620b57cec5SDimitry Andric .Case("defset", tgtok::Defset) 363*0fca6ea1SDimitry Andric .Case("deftype", tgtok::Deftype) 3640b57cec5SDimitry Andric .Case("multiclass", tgtok::MultiClass) 3650b57cec5SDimitry Andric .Case("field", tgtok::Field) 3660b57cec5SDimitry Andric .Case("let", tgtok::Let) 3670b57cec5SDimitry Andric .Case("in", tgtok::In) 368480093f4SDimitry Andric .Case("defvar", tgtok::Defvar) 369e8d8bef9SDimitry Andric .Case("include", tgtok::Include) 370480093f4SDimitry Andric .Case("if", tgtok::If) 371480093f4SDimitry Andric .Case("then", tgtok::Then) 372480093f4SDimitry Andric .Case("else", tgtok::ElseKW) 373e8d8bef9SDimitry Andric .Case("assert", tgtok::Assert) 3745f757f3fSDimitry Andric .Case("dump", tgtok::Dump) 3750b57cec5SDimitry Andric .Default(tgtok::Id); 3760b57cec5SDimitry Andric 377e8d8bef9SDimitry Andric // A couple of tokens require special processing. 378e8d8bef9SDimitry Andric switch (Kind) { 379e8d8bef9SDimitry Andric case tgtok::Include: 380e8d8bef9SDimitry Andric if (LexInclude()) return tgtok::Error; 381e8d8bef9SDimitry Andric return Lex(); 382e8d8bef9SDimitry Andric case tgtok::Id: 3830b57cec5SDimitry Andric CurStrVal.assign(Str.begin(), Str.end()); 384e8d8bef9SDimitry Andric break; 385e8d8bef9SDimitry Andric default: 386e8d8bef9SDimitry Andric break; 387e8d8bef9SDimitry Andric } 388e8d8bef9SDimitry Andric 3890b57cec5SDimitry Andric return Kind; 3900b57cec5SDimitry Andric } 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric /// LexInclude - We just read the "include" token. Get the string token that 3930b57cec5SDimitry Andric /// comes next and enter the include. 3940b57cec5SDimitry Andric bool TGLexer::LexInclude() { 3950b57cec5SDimitry Andric // The token after the include must be a string. 3960b57cec5SDimitry Andric tgtok::TokKind Tok = LexToken(); 3970b57cec5SDimitry Andric if (Tok == tgtok::Error) return true; 3980b57cec5SDimitry Andric if (Tok != tgtok::StrVal) { 3990b57cec5SDimitry Andric PrintError(getLoc(), "Expected filename after include"); 4000b57cec5SDimitry Andric return true; 4010b57cec5SDimitry Andric } 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric // Get the string. 4040b57cec5SDimitry Andric std::string Filename = CurStrVal; 4050b57cec5SDimitry Andric std::string IncludedFile; 4060b57cec5SDimitry Andric 4070b57cec5SDimitry Andric CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr), 4080b57cec5SDimitry Andric IncludedFile); 4090b57cec5SDimitry Andric if (!CurBuffer) { 4100b57cec5SDimitry Andric PrintError(getLoc(), "Could not find include file '" + Filename + "'"); 4110b57cec5SDimitry Andric return true; 4120b57cec5SDimitry Andric } 4130b57cec5SDimitry Andric 414480093f4SDimitry Andric Dependencies.insert(IncludedFile); 4150b57cec5SDimitry Andric // Save the line number and lex buffer of the includer. 4160b57cec5SDimitry Andric CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); 4170b57cec5SDimitry Andric CurPtr = CurBuf.begin(); 4180b57cec5SDimitry Andric 4190b57cec5SDimitry Andric PrepIncludeStack.push_back( 4208bcb0991SDimitry Andric std::make_unique<std::vector<PreprocessorControlDesc>>()); 4210b57cec5SDimitry Andric return false; 4220b57cec5SDimitry Andric } 4230b57cec5SDimitry Andric 424fe6060f1SDimitry Andric /// SkipBCPLComment - Skip over the comment by finding the next CR or LF. 425fe6060f1SDimitry Andric /// Or we may end up at the end of the buffer. 4260b57cec5SDimitry Andric void TGLexer::SkipBCPLComment() { 4270b57cec5SDimitry Andric ++CurPtr; // skip the second slash. 428fe6060f1SDimitry Andric auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data()); 429fe6060f1SDimitry Andric CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos; 4300b57cec5SDimitry Andric } 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric /// SkipCComment - This skips C-style /**/ comments. The only difference from C 4330b57cec5SDimitry Andric /// is that we allow nesting. 4340b57cec5SDimitry Andric bool TGLexer::SkipCComment() { 4350b57cec5SDimitry Andric ++CurPtr; // skip the star. 4360b57cec5SDimitry Andric unsigned CommentDepth = 1; 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andric while (true) { 4390b57cec5SDimitry Andric int CurChar = getNextChar(); 4400b57cec5SDimitry Andric switch (CurChar) { 4410b57cec5SDimitry Andric case EOF: 4420b57cec5SDimitry Andric PrintError(TokStart, "Unterminated comment!"); 4430b57cec5SDimitry Andric return true; 4440b57cec5SDimitry Andric case '*': 4450b57cec5SDimitry Andric // End of the comment? 4460b57cec5SDimitry Andric if (CurPtr[0] != '/') break; 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric ++CurPtr; // End the */. 4490b57cec5SDimitry Andric if (--CommentDepth == 0) 4500b57cec5SDimitry Andric return false; 4510b57cec5SDimitry Andric break; 4520b57cec5SDimitry Andric case '/': 4530b57cec5SDimitry Andric // Start of a nested comment? 4540b57cec5SDimitry Andric if (CurPtr[0] != '*') break; 4550b57cec5SDimitry Andric ++CurPtr; 4560b57cec5SDimitry Andric ++CommentDepth; 4570b57cec5SDimitry Andric break; 4580b57cec5SDimitry Andric } 4590b57cec5SDimitry Andric } 4600b57cec5SDimitry Andric } 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric /// LexNumber - Lex: 4630b57cec5SDimitry Andric /// [-+]?[0-9]+ 4640b57cec5SDimitry Andric /// 0x[0-9a-fA-F]+ 4650b57cec5SDimitry Andric /// 0b[01]+ 4660b57cec5SDimitry Andric tgtok::TokKind TGLexer::LexNumber() { 4675f757f3fSDimitry Andric unsigned Base = 0; 4685f757f3fSDimitry Andric const char *NumStart; 4695f757f3fSDimitry Andric 4705f757f3fSDimitry Andric // Check if it's a hex or a binary value. 4710b57cec5SDimitry Andric if (CurPtr[-1] == '0') { 4725f757f3fSDimitry Andric NumStart = CurPtr + 1; 4730b57cec5SDimitry Andric if (CurPtr[0] == 'x') { 4745f757f3fSDimitry Andric Base = 16; 4755f757f3fSDimitry Andric do 4760b57cec5SDimitry Andric ++CurPtr; 4775f757f3fSDimitry Andric while (isxdigit(CurPtr[0])); 4780b57cec5SDimitry Andric } else if (CurPtr[0] == 'b') { 4795f757f3fSDimitry Andric Base = 2; 4805f757f3fSDimitry Andric do 4810b57cec5SDimitry Andric ++CurPtr; 4825f757f3fSDimitry Andric while (CurPtr[0] == '0' || CurPtr[0] == '1'); 4830b57cec5SDimitry Andric } 4840b57cec5SDimitry Andric } 4850b57cec5SDimitry Andric 4865f757f3fSDimitry Andric // For a hex or binary value, we always convert it to an unsigned value. 4875f757f3fSDimitry Andric bool IsMinus = false; 4885f757f3fSDimitry Andric 4895f757f3fSDimitry Andric // Check if it's a decimal value. 4905f757f3fSDimitry Andric if (Base == 0) { 4910b57cec5SDimitry Andric // Check for a sign without a digit. 4920b57cec5SDimitry Andric if (!isdigit(CurPtr[0])) { 4930b57cec5SDimitry Andric if (CurPtr[-1] == '-') 4940b57cec5SDimitry Andric return tgtok::minus; 4950b57cec5SDimitry Andric else if (CurPtr[-1] == '+') 4960b57cec5SDimitry Andric return tgtok::plus; 4970b57cec5SDimitry Andric } 4980b57cec5SDimitry Andric 4995f757f3fSDimitry Andric Base = 10; 5005f757f3fSDimitry Andric NumStart = TokStart; 5015f757f3fSDimitry Andric IsMinus = CurPtr[-1] == '-'; 5025f757f3fSDimitry Andric 5030b57cec5SDimitry Andric while (isdigit(CurPtr[0])) 5040b57cec5SDimitry Andric ++CurPtr; 5055f757f3fSDimitry Andric } 5065f757f3fSDimitry Andric 5075f757f3fSDimitry Andric // Requires at least one digit. 5085f757f3fSDimitry Andric if (CurPtr == NumStart) 5095f757f3fSDimitry Andric return ReturnError(TokStart, "Invalid number"); 5105f757f3fSDimitry Andric 5115f757f3fSDimitry Andric errno = 0; 5125f757f3fSDimitry Andric if (IsMinus) 5135f757f3fSDimitry Andric CurIntVal = strtoll(NumStart, nullptr, Base); 5145f757f3fSDimitry Andric else 5155f757f3fSDimitry Andric CurIntVal = strtoull(NumStart, nullptr, Base); 5165f757f3fSDimitry Andric 5175f757f3fSDimitry Andric if (errno == EINVAL) 5185f757f3fSDimitry Andric return ReturnError(TokStart, "Invalid number"); 5195f757f3fSDimitry Andric if (errno == ERANGE) 5205f757f3fSDimitry Andric return ReturnError(TokStart, "Number out of range"); 5215f757f3fSDimitry Andric 5225f757f3fSDimitry Andric return Base == 2 ? tgtok::BinaryIntVal : tgtok::IntVal; 5230b57cec5SDimitry Andric } 5240b57cec5SDimitry Andric 5250b57cec5SDimitry Andric /// LexBracket - We just read '['. If this is a code block, return it, 5260b57cec5SDimitry Andric /// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ | }[^]] )* }]' 5270b57cec5SDimitry Andric tgtok::TokKind TGLexer::LexBracket() { 5280b57cec5SDimitry Andric if (CurPtr[0] != '{') 5290b57cec5SDimitry Andric return tgtok::l_square; 5300b57cec5SDimitry Andric ++CurPtr; 5310b57cec5SDimitry Andric const char *CodeStart = CurPtr; 5320b57cec5SDimitry Andric while (true) { 5330b57cec5SDimitry Andric int Char = getNextChar(); 5340b57cec5SDimitry Andric if (Char == EOF) break; 5350b57cec5SDimitry Andric 5360b57cec5SDimitry Andric if (Char != '}') continue; 5370b57cec5SDimitry Andric 5380b57cec5SDimitry Andric Char = getNextChar(); 5390b57cec5SDimitry Andric if (Char == EOF) break; 5400b57cec5SDimitry Andric if (Char == ']') { 5410b57cec5SDimitry Andric CurStrVal.assign(CodeStart, CurPtr-2); 5420b57cec5SDimitry Andric return tgtok::CodeFragment; 5430b57cec5SDimitry Andric } 5440b57cec5SDimitry Andric } 5450b57cec5SDimitry Andric 546e8d8bef9SDimitry Andric return ReturnError(CodeStart - 2, "Unterminated code block"); 5470b57cec5SDimitry Andric } 5480b57cec5SDimitry Andric 5490b57cec5SDimitry Andric /// LexExclaim - Lex '!' and '![a-zA-Z]+'. 5500b57cec5SDimitry Andric tgtok::TokKind TGLexer::LexExclaim() { 5510b57cec5SDimitry Andric if (!isalpha(*CurPtr)) 5520b57cec5SDimitry Andric return ReturnError(CurPtr - 1, "Invalid \"!operator\""); 5530b57cec5SDimitry Andric 5540b57cec5SDimitry Andric const char *Start = CurPtr++; 5550b57cec5SDimitry Andric while (isalpha(*CurPtr)) 5560b57cec5SDimitry Andric ++CurPtr; 5570b57cec5SDimitry Andric 5580b57cec5SDimitry Andric // Check to see which operator this is. 5590b57cec5SDimitry Andric tgtok::TokKind Kind = 5600b57cec5SDimitry Andric StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start)) 5610b57cec5SDimitry Andric .Case("eq", tgtok::XEq) 5620b57cec5SDimitry Andric .Case("ne", tgtok::XNe) 5630b57cec5SDimitry Andric .Case("le", tgtok::XLe) 5640b57cec5SDimitry Andric .Case("lt", tgtok::XLt) 5650b57cec5SDimitry Andric .Case("ge", tgtok::XGe) 5660b57cec5SDimitry Andric .Case("gt", tgtok::XGt) 5670b57cec5SDimitry Andric .Case("if", tgtok::XIf) 5680b57cec5SDimitry Andric .Case("cond", tgtok::XCond) 5690b57cec5SDimitry Andric .Case("isa", tgtok::XIsA) 5700b57cec5SDimitry Andric .Case("head", tgtok::XHead) 5710b57cec5SDimitry Andric .Case("tail", tgtok::XTail) 5720b57cec5SDimitry Andric .Case("size", tgtok::XSize) 5730b57cec5SDimitry Andric .Case("con", tgtok::XConcat) 5740b57cec5SDimitry Andric .Case("dag", tgtok::XDag) 5750b57cec5SDimitry Andric .Case("add", tgtok::XADD) 576e8d8bef9SDimitry Andric .Case("sub", tgtok::XSUB) 5770b57cec5SDimitry Andric .Case("mul", tgtok::XMUL) 578bdd1243dSDimitry Andric .Case("div", tgtok::XDIV) 579e8d8bef9SDimitry Andric .Case("not", tgtok::XNOT) 580bdd1243dSDimitry Andric .Case("logtwo", tgtok::XLOG2) 5810b57cec5SDimitry Andric .Case("and", tgtok::XAND) 5820b57cec5SDimitry Andric .Case("or", tgtok::XOR) 583e8d8bef9SDimitry Andric .Case("xor", tgtok::XXOR) 5840b57cec5SDimitry Andric .Case("shl", tgtok::XSHL) 5850b57cec5SDimitry Andric .Case("sra", tgtok::XSRA) 5860b57cec5SDimitry Andric .Case("srl", tgtok::XSRL) 5870b57cec5SDimitry Andric .Case("cast", tgtok::XCast) 5880b57cec5SDimitry Andric .Case("empty", tgtok::XEmpty) 5890b57cec5SDimitry Andric .Case("subst", tgtok::XSubst) 5900b57cec5SDimitry Andric .Case("foldl", tgtok::XFoldl) 5910b57cec5SDimitry Andric .Case("foreach", tgtok::XForEach) 592e8d8bef9SDimitry Andric .Case("filter", tgtok::XFilter) 5930b57cec5SDimitry Andric .Case("listconcat", tgtok::XListConcat) 5940b57cec5SDimitry Andric .Case("listsplat", tgtok::XListSplat) 595bdd1243dSDimitry Andric .Case("listremove", tgtok::XListRemove) 59606c3fb27SDimitry Andric .Case("range", tgtok::XRange) 5970b57cec5SDimitry Andric .Case("strconcat", tgtok::XStrConcat) 598e8d8bef9SDimitry Andric .Case("interleave", tgtok::XInterleave) 599e8d8bef9SDimitry Andric .Case("substr", tgtok::XSubstr) 600fe6060f1SDimitry Andric .Case("find", tgtok::XFind) 601e8d8bef9SDimitry Andric .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated. 602e8d8bef9SDimitry Andric .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated. 60306c3fb27SDimitry Andric .Case("getdagarg", tgtok::XGetDagArg) 60406c3fb27SDimitry Andric .Case("getdagname", tgtok::XGetDagName) 60506c3fb27SDimitry Andric .Case("setdagarg", tgtok::XSetDagArg) 60606c3fb27SDimitry Andric .Case("setdagname", tgtok::XSetDagName) 60781ad6265SDimitry Andric .Case("exists", tgtok::XExists) 60806c3fb27SDimitry Andric .Case("tolower", tgtok::XToLower) 60906c3fb27SDimitry Andric .Case("toupper", tgtok::XToUpper) 6105f757f3fSDimitry Andric .Case("repr", tgtok::XRepr) 6110b57cec5SDimitry Andric .Default(tgtok::Error); 6120b57cec5SDimitry Andric 6130b57cec5SDimitry Andric return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator"); 6140b57cec5SDimitry Andric } 6150b57cec5SDimitry Andric 6160b57cec5SDimitry Andric bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) { 6170b57cec5SDimitry Andric // Report an error, if preprocessor control stack for the current 6180b57cec5SDimitry Andric // file is not empty. 6190b57cec5SDimitry Andric if (!PrepIncludeStack.back()->empty()) { 6200b57cec5SDimitry Andric prepReportPreprocessorStackError(); 6210b57cec5SDimitry Andric 6220b57cec5SDimitry Andric return false; 6230b57cec5SDimitry Andric } 6240b57cec5SDimitry Andric 6250b57cec5SDimitry Andric // Pop the preprocessing controls from the include stack. 6260b57cec5SDimitry Andric if (PrepIncludeStack.empty()) { 6270b57cec5SDimitry Andric PrintFatalError("Preprocessor include stack is empty"); 6280b57cec5SDimitry Andric } 6290b57cec5SDimitry Andric 6300b57cec5SDimitry Andric PrepIncludeStack.pop_back(); 6310b57cec5SDimitry Andric 6320b57cec5SDimitry Andric if (IncludeStackMustBeEmpty) { 6330b57cec5SDimitry Andric if (!PrepIncludeStack.empty()) 6340b57cec5SDimitry Andric PrintFatalError("Preprocessor include stack is not empty"); 6350b57cec5SDimitry Andric } else { 6360b57cec5SDimitry Andric if (PrepIncludeStack.empty()) 6370b57cec5SDimitry Andric PrintFatalError("Preprocessor include stack is empty"); 6380b57cec5SDimitry Andric } 6390b57cec5SDimitry Andric 6400b57cec5SDimitry Andric return true; 6410b57cec5SDimitry Andric } 6420b57cec5SDimitry Andric 6430b57cec5SDimitry Andric tgtok::TokKind TGLexer::prepIsDirective() const { 644fe6060f1SDimitry Andric for (const auto &PD : PreprocessorDirs) { 6450b57cec5SDimitry Andric int NextChar = *CurPtr; 6460b57cec5SDimitry Andric bool Match = true; 6470b57cec5SDimitry Andric unsigned I = 0; 648fe6060f1SDimitry Andric for (; I < strlen(PD.Word); ++I) { 649fe6060f1SDimitry Andric if (NextChar != PD.Word[I]) { 6500b57cec5SDimitry Andric Match = false; 6510b57cec5SDimitry Andric break; 6520b57cec5SDimitry Andric } 6530b57cec5SDimitry Andric 6540b57cec5SDimitry Andric NextChar = peekNextChar(I + 1); 6550b57cec5SDimitry Andric } 6560b57cec5SDimitry Andric 6570b57cec5SDimitry Andric // Check for whitespace after the directive. If there is no whitespace, 6580b57cec5SDimitry Andric // then we do not recognize it as a preprocessing directive. 6590b57cec5SDimitry Andric if (Match) { 660fe6060f1SDimitry Andric tgtok::TokKind Kind = PD.Kind; 6610b57cec5SDimitry Andric 6620b57cec5SDimitry Andric // New line and EOF may follow only #else/#endif. It will be reported 6630b57cec5SDimitry Andric // as an error for #ifdef/#define after the call to prepLexMacroName(). 6640b57cec5SDimitry Andric if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF || 6650b57cec5SDimitry Andric NextChar == '\n' || 6660b57cec5SDimitry Andric // It looks like TableGen does not support '\r' as the actual 6670b57cec5SDimitry Andric // carriage return, e.g. getNextChar() treats a single '\r' 6680b57cec5SDimitry Andric // as '\n'. So we do the same here. 6690b57cec5SDimitry Andric NextChar == '\r') 6700b57cec5SDimitry Andric return Kind; 6710b57cec5SDimitry Andric 6720b57cec5SDimitry Andric // Allow comments after some directives, e.g.: 6730b57cec5SDimitry Andric // #else// OR #else/**/ 6740b57cec5SDimitry Andric // #endif// OR #endif/**/ 6750b57cec5SDimitry Andric // 6760b57cec5SDimitry Andric // Note that we do allow comments after #ifdef/#define here, e.g. 6770b57cec5SDimitry Andric // #ifdef/**/ AND #ifdef// 6780b57cec5SDimitry Andric // #define/**/ AND #define// 6790b57cec5SDimitry Andric // 6800b57cec5SDimitry Andric // These cases will be reported as incorrect after calling 6810b57cec5SDimitry Andric // prepLexMacroName(). We could have supported C-style comments 6820b57cec5SDimitry Andric // after #ifdef/#define, but this would complicate the code 6830b57cec5SDimitry Andric // for little benefit. 6840b57cec5SDimitry Andric if (NextChar == '/') { 6850b57cec5SDimitry Andric NextChar = peekNextChar(I + 1); 6860b57cec5SDimitry Andric 6870b57cec5SDimitry Andric if (NextChar == '*' || NextChar == '/') 6880b57cec5SDimitry Andric return Kind; 6890b57cec5SDimitry Andric 6900b57cec5SDimitry Andric // Pretend that we do not recognize the directive. 6910b57cec5SDimitry Andric } 6920b57cec5SDimitry Andric } 6930b57cec5SDimitry Andric } 6940b57cec5SDimitry Andric 6950b57cec5SDimitry Andric return tgtok::Error; 6960b57cec5SDimitry Andric } 6970b57cec5SDimitry Andric 6980b57cec5SDimitry Andric bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) { 6990b57cec5SDimitry Andric TokStart = CurPtr; 7000b57cec5SDimitry Andric 701fe6060f1SDimitry Andric for (const auto &PD : PreprocessorDirs) 702fe6060f1SDimitry Andric if (PD.Kind == Kind) { 7030b57cec5SDimitry Andric // Advance CurPtr to the end of the preprocessing word. 704fe6060f1SDimitry Andric CurPtr += strlen(PD.Word); 7050b57cec5SDimitry Andric return true; 7060b57cec5SDimitry Andric } 7070b57cec5SDimitry Andric 7080b57cec5SDimitry Andric PrintFatalError("Unsupported preprocessing token in " 7090b57cec5SDimitry Andric "prepEatPreprocessorDirective()"); 7100b57cec5SDimitry Andric return false; 7110b57cec5SDimitry Andric } 7120b57cec5SDimitry Andric 7130b57cec5SDimitry Andric tgtok::TokKind TGLexer::lexPreprocessor( 7140b57cec5SDimitry Andric tgtok::TokKind Kind, bool ReturnNextLiveToken) { 7150b57cec5SDimitry Andric 7160b57cec5SDimitry Andric // We must be looking at a preprocessing directive. Eat it! 7170b57cec5SDimitry Andric if (!prepEatPreprocessorDirective(Kind)) 7180b57cec5SDimitry Andric PrintFatalError("lexPreprocessor() called for unknown " 7190b57cec5SDimitry Andric "preprocessor directive"); 7200b57cec5SDimitry Andric 7210b57cec5SDimitry Andric if (Kind == tgtok::Ifdef || Kind == tgtok::Ifndef) { 7220b57cec5SDimitry Andric StringRef MacroName = prepLexMacroName(); 7230b57cec5SDimitry Andric StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef"; 7240b57cec5SDimitry Andric if (MacroName.empty()) 7250b57cec5SDimitry Andric return ReturnError(TokStart, "Expected macro name after " + IfTokName); 7260b57cec5SDimitry Andric 7270b57cec5SDimitry Andric bool MacroIsDefined = DefinedMacros.count(MacroName) != 0; 7280b57cec5SDimitry Andric 7295f757f3fSDimitry Andric // Canonicalize ifndef's MacroIsDefined to its ifdef equivalent. 7305f757f3fSDimitry Andric if (Kind == tgtok::Ifndef) 7310b57cec5SDimitry Andric MacroIsDefined = !MacroIsDefined; 7320b57cec5SDimitry Andric 7330b57cec5SDimitry Andric // Regardless of whether we are processing tokens or not, 7340b57cec5SDimitry Andric // we put the #ifdef control on stack. 7355f757f3fSDimitry Andric // Note that MacroIsDefined has been canonicalized against ifdef. 7360b57cec5SDimitry Andric PrepIncludeStack.back()->push_back( 7375f757f3fSDimitry Andric {tgtok::Ifdef, MacroIsDefined, SMLoc::getFromPointer(TokStart)}); 7380b57cec5SDimitry Andric 7390b57cec5SDimitry Andric if (!prepSkipDirectiveEnd()) 7400b57cec5SDimitry Andric return ReturnError(CurPtr, "Only comments are supported after " + 7410b57cec5SDimitry Andric IfTokName + " NAME"); 7420b57cec5SDimitry Andric 7430b57cec5SDimitry Andric // If we were not processing tokens before this #ifdef, 7440b57cec5SDimitry Andric // then just return back to the lines skipping code. 7450b57cec5SDimitry Andric if (!ReturnNextLiveToken) 7460b57cec5SDimitry Andric return Kind; 7470b57cec5SDimitry Andric 7480b57cec5SDimitry Andric // If we were processing tokens before this #ifdef, 7490b57cec5SDimitry Andric // and the macro is defined, then just return the next token. 7500b57cec5SDimitry Andric if (MacroIsDefined) 7510b57cec5SDimitry Andric return LexToken(); 7520b57cec5SDimitry Andric 7530b57cec5SDimitry Andric // We were processing tokens before this #ifdef, and the macro 7540b57cec5SDimitry Andric // is not defined, so we have to start skipping the lines. 7550b57cec5SDimitry Andric // If the skipping is successful, it will return the token following 7560b57cec5SDimitry Andric // either #else or #endif corresponding to this #ifdef. 7570b57cec5SDimitry Andric if (prepSkipRegion(ReturnNextLiveToken)) 7580b57cec5SDimitry Andric return LexToken(); 7590b57cec5SDimitry Andric 7600b57cec5SDimitry Andric return tgtok::Error; 7610b57cec5SDimitry Andric } else if (Kind == tgtok::Else) { 7620b57cec5SDimitry Andric // Check if this #else is correct before calling prepSkipDirectiveEnd(), 7630b57cec5SDimitry Andric // which will move CurPtr away from the beginning of #else. 7640b57cec5SDimitry Andric if (PrepIncludeStack.back()->empty()) 7650b57cec5SDimitry Andric return ReturnError(TokStart, "#else without #ifdef or #ifndef"); 7660b57cec5SDimitry Andric 7670b57cec5SDimitry Andric PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back()->back(); 7680b57cec5SDimitry Andric 7690b57cec5SDimitry Andric if (IfdefEntry.Kind != tgtok::Ifdef) { 7700b57cec5SDimitry Andric PrintError(TokStart, "double #else"); 7710b57cec5SDimitry Andric return ReturnError(IfdefEntry.SrcPos, "Previous #else is here"); 7720b57cec5SDimitry Andric } 7730b57cec5SDimitry Andric 7740b57cec5SDimitry Andric // Replace the corresponding #ifdef's control with its negation 7750b57cec5SDimitry Andric // on the control stack. 7760b57cec5SDimitry Andric PrepIncludeStack.back()->pop_back(); 7770b57cec5SDimitry Andric PrepIncludeStack.back()->push_back( 7780b57cec5SDimitry Andric {Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)}); 7790b57cec5SDimitry Andric 7800b57cec5SDimitry Andric if (!prepSkipDirectiveEnd()) 7810b57cec5SDimitry Andric return ReturnError(CurPtr, "Only comments are supported after #else"); 7820b57cec5SDimitry Andric 7830b57cec5SDimitry Andric // If we were processing tokens before this #else, 7840b57cec5SDimitry Andric // we have to start skipping lines until the matching #endif. 7850b57cec5SDimitry Andric if (ReturnNextLiveToken) { 7860b57cec5SDimitry Andric if (prepSkipRegion(ReturnNextLiveToken)) 7870b57cec5SDimitry Andric return LexToken(); 7880b57cec5SDimitry Andric 7890b57cec5SDimitry Andric return tgtok::Error; 7900b57cec5SDimitry Andric } 7910b57cec5SDimitry Andric 7920b57cec5SDimitry Andric // Return to the lines skipping code. 7930b57cec5SDimitry Andric return Kind; 7940b57cec5SDimitry Andric } else if (Kind == tgtok::Endif) { 7950b57cec5SDimitry Andric // Check if this #endif is correct before calling prepSkipDirectiveEnd(), 7960b57cec5SDimitry Andric // which will move CurPtr away from the beginning of #endif. 7970b57cec5SDimitry Andric if (PrepIncludeStack.back()->empty()) 7980b57cec5SDimitry Andric return ReturnError(TokStart, "#endif without #ifdef"); 7990b57cec5SDimitry Andric 8000b57cec5SDimitry Andric auto &IfdefOrElseEntry = PrepIncludeStack.back()->back(); 8010b57cec5SDimitry Andric 8020b57cec5SDimitry Andric if (IfdefOrElseEntry.Kind != tgtok::Ifdef && 8030b57cec5SDimitry Andric IfdefOrElseEntry.Kind != tgtok::Else) { 8040b57cec5SDimitry Andric PrintFatalError("Invalid preprocessor control on the stack"); 8050b57cec5SDimitry Andric return tgtok::Error; 8060b57cec5SDimitry Andric } 8070b57cec5SDimitry Andric 8080b57cec5SDimitry Andric if (!prepSkipDirectiveEnd()) 8090b57cec5SDimitry Andric return ReturnError(CurPtr, "Only comments are supported after #endif"); 8100b57cec5SDimitry Andric 8110b57cec5SDimitry Andric PrepIncludeStack.back()->pop_back(); 8120b57cec5SDimitry Andric 8130b57cec5SDimitry Andric // If we were processing tokens before this #endif, then 8140b57cec5SDimitry Andric // we should continue it. 8150b57cec5SDimitry Andric if (ReturnNextLiveToken) { 8160b57cec5SDimitry Andric return LexToken(); 8170b57cec5SDimitry Andric } 8180b57cec5SDimitry Andric 8190b57cec5SDimitry Andric // Return to the lines skipping code. 8200b57cec5SDimitry Andric return Kind; 8210b57cec5SDimitry Andric } else if (Kind == tgtok::Define) { 8220b57cec5SDimitry Andric StringRef MacroName = prepLexMacroName(); 8230b57cec5SDimitry Andric if (MacroName.empty()) 8240b57cec5SDimitry Andric return ReturnError(TokStart, "Expected macro name after #define"); 8250b57cec5SDimitry Andric 8260b57cec5SDimitry Andric if (!DefinedMacros.insert(MacroName).second) 8270b57cec5SDimitry Andric PrintWarning(getLoc(), 8280b57cec5SDimitry Andric "Duplicate definition of macro: " + Twine(MacroName)); 8290b57cec5SDimitry Andric 8300b57cec5SDimitry Andric if (!prepSkipDirectiveEnd()) 8310b57cec5SDimitry Andric return ReturnError(CurPtr, 8320b57cec5SDimitry Andric "Only comments are supported after #define NAME"); 8330b57cec5SDimitry Andric 8340b57cec5SDimitry Andric if (!ReturnNextLiveToken) { 8350b57cec5SDimitry Andric PrintFatalError("#define must be ignored during the lines skipping"); 8360b57cec5SDimitry Andric return tgtok::Error; 8370b57cec5SDimitry Andric } 8380b57cec5SDimitry Andric 8390b57cec5SDimitry Andric return LexToken(); 8400b57cec5SDimitry Andric } 8410b57cec5SDimitry Andric 8420b57cec5SDimitry Andric PrintFatalError("Preprocessing directive is not supported"); 8430b57cec5SDimitry Andric return tgtok::Error; 8440b57cec5SDimitry Andric } 8450b57cec5SDimitry Andric 8460b57cec5SDimitry Andric bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) { 8470b57cec5SDimitry Andric if (!MustNeverBeFalse) 8480b57cec5SDimitry Andric PrintFatalError("Invalid recursion."); 8490b57cec5SDimitry Andric 8500b57cec5SDimitry Andric do { 8510b57cec5SDimitry Andric // Skip all symbols to the line end. 852*0fca6ea1SDimitry Andric while (*CurPtr != '\n') 853*0fca6ea1SDimitry Andric ++CurPtr; 8540b57cec5SDimitry Andric 8550b57cec5SDimitry Andric // Find the first non-whitespace symbol in the next line(s). 8560b57cec5SDimitry Andric if (!prepSkipLineBegin()) 8570b57cec5SDimitry Andric return false; 8580b57cec5SDimitry Andric 8590b57cec5SDimitry Andric // If the first non-blank/comment symbol on the line is '#', 8600b57cec5SDimitry Andric // it may be a start of preprocessing directive. 8610b57cec5SDimitry Andric // 8620b57cec5SDimitry Andric // If it is not '#' just go to the next line. 8630b57cec5SDimitry Andric if (*CurPtr == '#') 8640b57cec5SDimitry Andric ++CurPtr; 8650b57cec5SDimitry Andric else 8660b57cec5SDimitry Andric continue; 8670b57cec5SDimitry Andric 8680b57cec5SDimitry Andric tgtok::TokKind Kind = prepIsDirective(); 8690b57cec5SDimitry Andric 8700b57cec5SDimitry Andric // If we did not find a preprocessing directive or it is #define, 8710b57cec5SDimitry Andric // then just skip to the next line. We do not have to do anything 8720b57cec5SDimitry Andric // for #define in the line-skipping mode. 8730b57cec5SDimitry Andric if (Kind == tgtok::Error || Kind == tgtok::Define) 8740b57cec5SDimitry Andric continue; 8750b57cec5SDimitry Andric 8760b57cec5SDimitry Andric tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false); 8770b57cec5SDimitry Andric 8780b57cec5SDimitry Andric // If lexPreprocessor() encountered an error during lexing this 8790b57cec5SDimitry Andric // preprocessor idiom, then return false to the calling lexPreprocessor(). 8800b57cec5SDimitry Andric // This will force tgtok::Error to be returned to the tokens processing. 8810b57cec5SDimitry Andric if (ProcessedKind == tgtok::Error) 8820b57cec5SDimitry Andric return false; 8830b57cec5SDimitry Andric 8840b57cec5SDimitry Andric if (Kind != ProcessedKind) 8850b57cec5SDimitry Andric PrintFatalError("prepIsDirective() and lexPreprocessor() " 8860b57cec5SDimitry Andric "returned different token kinds"); 8870b57cec5SDimitry Andric 8880b57cec5SDimitry Andric // If this preprocessing directive enables tokens processing, 8890b57cec5SDimitry Andric // then return to the lexPreprocessor() and get to the next token. 8900b57cec5SDimitry Andric // We can move from line-skipping mode to processing tokens only 8910b57cec5SDimitry Andric // due to #else or #endif. 8920b57cec5SDimitry Andric if (prepIsProcessingEnabled()) { 8930b57cec5SDimitry Andric if (Kind != tgtok::Else && Kind != tgtok::Endif) { 8940b57cec5SDimitry Andric PrintFatalError("Tokens processing was enabled by an unexpected " 8950b57cec5SDimitry Andric "preprocessing directive"); 8960b57cec5SDimitry Andric return false; 8970b57cec5SDimitry Andric } 8980b57cec5SDimitry Andric 8990b57cec5SDimitry Andric return true; 9000b57cec5SDimitry Andric } 9010b57cec5SDimitry Andric } while (CurPtr != CurBuf.end()); 9020b57cec5SDimitry Andric 9030b57cec5SDimitry Andric // We have reached the end of the file, but never left the lines-skipping 9040b57cec5SDimitry Andric // mode. This means there is no matching #endif. 9050b57cec5SDimitry Andric prepReportPreprocessorStackError(); 9060b57cec5SDimitry Andric return false; 9070b57cec5SDimitry Andric } 9080b57cec5SDimitry Andric 9090b57cec5SDimitry Andric StringRef TGLexer::prepLexMacroName() { 9100b57cec5SDimitry Andric // Skip whitespaces between the preprocessing directive and the macro name. 9110b57cec5SDimitry Andric while (*CurPtr == ' ' || *CurPtr == '\t') 9120b57cec5SDimitry Andric ++CurPtr; 9130b57cec5SDimitry Andric 9140b57cec5SDimitry Andric TokStart = CurPtr; 9150b57cec5SDimitry Andric // Macro names start with [a-zA-Z_]. 9160b57cec5SDimitry Andric if (*CurPtr != '_' && !isalpha(*CurPtr)) 9170b57cec5SDimitry Andric return ""; 9180b57cec5SDimitry Andric 9190b57cec5SDimitry Andric // Match the rest of the identifier regex: [0-9a-zA-Z_]* 9200b57cec5SDimitry Andric while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') 9210b57cec5SDimitry Andric ++CurPtr; 9220b57cec5SDimitry Andric 9230b57cec5SDimitry Andric return StringRef(TokStart, CurPtr - TokStart); 9240b57cec5SDimitry Andric } 9250b57cec5SDimitry Andric 9260b57cec5SDimitry Andric bool TGLexer::prepSkipLineBegin() { 9270b57cec5SDimitry Andric while (CurPtr != CurBuf.end()) { 9280b57cec5SDimitry Andric switch (*CurPtr) { 9290b57cec5SDimitry Andric case ' ': 9300b57cec5SDimitry Andric case '\t': 9310b57cec5SDimitry Andric case '\n': 9320b57cec5SDimitry Andric case '\r': 9330b57cec5SDimitry Andric break; 9340b57cec5SDimitry Andric 9350b57cec5SDimitry Andric case '/': { 9360b57cec5SDimitry Andric int NextChar = peekNextChar(1); 9370b57cec5SDimitry Andric if (NextChar == '*') { 9380b57cec5SDimitry Andric // Skip C-style comment. 9390b57cec5SDimitry Andric // Note that we do not care about skipping the C++-style comments. 9400b57cec5SDimitry Andric // If the line contains "//", it may not contain any processable 9410b57cec5SDimitry Andric // preprocessing directive. Just return CurPtr pointing to 9420b57cec5SDimitry Andric // the first '/' in this case. We also do not care about 9430b57cec5SDimitry Andric // incorrect symbols after the first '/' - we are in lines-skipping 9440b57cec5SDimitry Andric // mode, so incorrect code is allowed to some extent. 9450b57cec5SDimitry Andric 9460b57cec5SDimitry Andric // Set TokStart to the beginning of the comment to enable proper 9470b57cec5SDimitry Andric // diagnostic printing in case of error in SkipCComment(). 9480b57cec5SDimitry Andric TokStart = CurPtr; 9490b57cec5SDimitry Andric 9500b57cec5SDimitry Andric // CurPtr must point to '*' before call to SkipCComment(). 9510b57cec5SDimitry Andric ++CurPtr; 9520b57cec5SDimitry Andric if (SkipCComment()) 9530b57cec5SDimitry Andric return false; 9540b57cec5SDimitry Andric } else { 9550b57cec5SDimitry Andric // CurPtr points to the non-whitespace '/'. 9560b57cec5SDimitry Andric return true; 9570b57cec5SDimitry Andric } 9580b57cec5SDimitry Andric 9590b57cec5SDimitry Andric // We must not increment CurPtr after the comment was lexed. 9600b57cec5SDimitry Andric continue; 9610b57cec5SDimitry Andric } 9620b57cec5SDimitry Andric 9630b57cec5SDimitry Andric default: 9640b57cec5SDimitry Andric return true; 9650b57cec5SDimitry Andric } 9660b57cec5SDimitry Andric 9670b57cec5SDimitry Andric ++CurPtr; 9680b57cec5SDimitry Andric } 9690b57cec5SDimitry Andric 9700b57cec5SDimitry Andric // We have reached the end of the file. Return to the lines skipping 9710b57cec5SDimitry Andric // code, and allow it to handle the EOF as needed. 9720b57cec5SDimitry Andric return true; 9730b57cec5SDimitry Andric } 9740b57cec5SDimitry Andric 9750b57cec5SDimitry Andric bool TGLexer::prepSkipDirectiveEnd() { 9760b57cec5SDimitry Andric while (CurPtr != CurBuf.end()) { 9770b57cec5SDimitry Andric switch (*CurPtr) { 9780b57cec5SDimitry Andric case ' ': 9790b57cec5SDimitry Andric case '\t': 9800b57cec5SDimitry Andric break; 9810b57cec5SDimitry Andric 9820b57cec5SDimitry Andric case '\n': 9830b57cec5SDimitry Andric case '\r': 9840b57cec5SDimitry Andric return true; 9850b57cec5SDimitry Andric 9860b57cec5SDimitry Andric case '/': { 9870b57cec5SDimitry Andric int NextChar = peekNextChar(1); 9880b57cec5SDimitry Andric if (NextChar == '/') { 9890b57cec5SDimitry Andric // Skip C++-style comment. 9900b57cec5SDimitry Andric // We may just return true now, but let's skip to the line/buffer end 9910b57cec5SDimitry Andric // to simplify the method specification. 9920b57cec5SDimitry Andric ++CurPtr; 9930b57cec5SDimitry Andric SkipBCPLComment(); 9940b57cec5SDimitry Andric } else if (NextChar == '*') { 9950b57cec5SDimitry Andric // When we are skipping C-style comment at the end of a preprocessing 9960b57cec5SDimitry Andric // directive, we can skip several lines. If any meaningful TD token 9970b57cec5SDimitry Andric // follows the end of the C-style comment on the same line, it will 9980b57cec5SDimitry Andric // be considered as an invalid usage of TD token. 9990b57cec5SDimitry Andric // For example, we want to forbid usages like this one: 10000b57cec5SDimitry Andric // #define MACRO class Class {} 10010b57cec5SDimitry Andric // But with C-style comments we also disallow the following: 10020b57cec5SDimitry Andric // #define MACRO /* This macro is used 10030b57cec5SDimitry Andric // to ... */ class Class {} 10040b57cec5SDimitry Andric // One can argue that this should be allowed, but it does not seem 10050b57cec5SDimitry Andric // to be worth of the complication. Moreover, this matches 10060b57cec5SDimitry Andric // the C preprocessor behavior. 10070b57cec5SDimitry Andric 10080b57cec5SDimitry Andric // Set TokStart to the beginning of the comment to enable proper 10090b57cec5SDimitry Andric // diagnostic printer in case of error in SkipCComment(). 10100b57cec5SDimitry Andric TokStart = CurPtr; 10110b57cec5SDimitry Andric ++CurPtr; 10120b57cec5SDimitry Andric if (SkipCComment()) 10130b57cec5SDimitry Andric return false; 10140b57cec5SDimitry Andric } else { 10150b57cec5SDimitry Andric TokStart = CurPtr; 10160b57cec5SDimitry Andric PrintError(CurPtr, "Unexpected character"); 10170b57cec5SDimitry Andric return false; 10180b57cec5SDimitry Andric } 10190b57cec5SDimitry Andric 10200b57cec5SDimitry Andric // We must not increment CurPtr after the comment was lexed. 10210b57cec5SDimitry Andric continue; 10220b57cec5SDimitry Andric } 10230b57cec5SDimitry Andric 10240b57cec5SDimitry Andric default: 10250b57cec5SDimitry Andric // Do not allow any non-whitespaces after the directive. 10260b57cec5SDimitry Andric TokStart = CurPtr; 10270b57cec5SDimitry Andric return false; 10280b57cec5SDimitry Andric } 10290b57cec5SDimitry Andric 10300b57cec5SDimitry Andric ++CurPtr; 10310b57cec5SDimitry Andric } 10320b57cec5SDimitry Andric 10330b57cec5SDimitry Andric return true; 10340b57cec5SDimitry Andric } 10350b57cec5SDimitry Andric 10360b57cec5SDimitry Andric bool TGLexer::prepIsProcessingEnabled() { 10374824e7fdSDimitry Andric for (const PreprocessorControlDesc &I : 10384824e7fdSDimitry Andric llvm::reverse(*PrepIncludeStack.back())) 10394824e7fdSDimitry Andric if (!I.IsDefined) 10400b57cec5SDimitry Andric return false; 10410b57cec5SDimitry Andric 10420b57cec5SDimitry Andric return true; 10430b57cec5SDimitry Andric } 10440b57cec5SDimitry Andric 10450b57cec5SDimitry Andric void TGLexer::prepReportPreprocessorStackError() { 10460b57cec5SDimitry Andric if (PrepIncludeStack.back()->empty()) 10470b57cec5SDimitry Andric PrintFatalError("prepReportPreprocessorStackError() called with " 10480b57cec5SDimitry Andric "empty control stack"); 10490b57cec5SDimitry Andric 10500b57cec5SDimitry Andric auto &PrepControl = PrepIncludeStack.back()->back(); 10510b57cec5SDimitry Andric PrintError(CurBuf.end(), "Reached EOF without matching #endif"); 10520b57cec5SDimitry Andric PrintError(PrepControl.SrcPos, "The latest preprocessor control is here"); 10530b57cec5SDimitry Andric 10540b57cec5SDimitry Andric TokStart = CurPtr; 10550b57cec5SDimitry Andric } 1056