1*06c3fb27SDimitry Andric //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===// 2*06c3fb27SDimitry Andric // 3*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*06c3fb27SDimitry Andric // 7*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 8*06c3fb27SDimitry Andric /// 9*06c3fb27SDimitry Andric /// \file 10*06c3fb27SDimitry Andric /// This file defines the \c FormatTokenSource interface, which provides a token 11*06c3fb27SDimitry Andric /// stream as well as the ability to manipulate the token stream. 12*06c3fb27SDimitry Andric /// 13*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 14*06c3fb27SDimitry Andric 15*06c3fb27SDimitry Andric #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H 16*06c3fb27SDimitry Andric #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H 17*06c3fb27SDimitry Andric 18*06c3fb27SDimitry Andric #include "FormatToken.h" 19*06c3fb27SDimitry Andric #include "UnwrappedLineParser.h" 20*06c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h" 21*06c3fb27SDimitry Andric #include <cstddef> 22*06c3fb27SDimitry Andric 23*06c3fb27SDimitry Andric #define DEBUG_TYPE "format-token-source" 24*06c3fb27SDimitry Andric 25*06c3fb27SDimitry Andric namespace clang { 26*06c3fb27SDimitry Andric namespace format { 27*06c3fb27SDimitry Andric 28*06c3fb27SDimitry Andric // Navigate a token stream. 29*06c3fb27SDimitry Andric // 30*06c3fb27SDimitry Andric // Enables traversal of a token stream, resetting the position in a token 31*06c3fb27SDimitry Andric // stream, as well as inserting new tokens. 32*06c3fb27SDimitry Andric class FormatTokenSource { 33*06c3fb27SDimitry Andric public: 34*06c3fb27SDimitry Andric virtual ~FormatTokenSource() {} 35*06c3fb27SDimitry Andric 36*06c3fb27SDimitry Andric // Returns the next token in the token stream. 37*06c3fb27SDimitry Andric virtual FormatToken *getNextToken() = 0; 38*06c3fb27SDimitry Andric 39*06c3fb27SDimitry Andric // Returns the token preceding the token returned by the last call to 40*06c3fb27SDimitry Andric // getNextToken() in the token stream, or nullptr if no such token exists. 41*06c3fb27SDimitry Andric // 42*06c3fb27SDimitry Andric // Must not be called directly at the position directly after insertTokens() 43*06c3fb27SDimitry Andric // is called. 44*06c3fb27SDimitry Andric virtual FormatToken *getPreviousToken() = 0; 45*06c3fb27SDimitry Andric 46*06c3fb27SDimitry Andric // Returns the token that would be returned by the next call to 47*06c3fb27SDimitry Andric // getNextToken(). 48*06c3fb27SDimitry Andric virtual FormatToken *peekNextToken(bool SkipComment = false) = 0; 49*06c3fb27SDimitry Andric 50*06c3fb27SDimitry Andric // Returns whether we are at the end of the file. 51*06c3fb27SDimitry Andric // This can be different from whether getNextToken() returned an eof token 52*06c3fb27SDimitry Andric // when the FormatTokenSource is a view on a part of the token stream. 53*06c3fb27SDimitry Andric virtual bool isEOF() = 0; 54*06c3fb27SDimitry Andric 55*06c3fb27SDimitry Andric // Gets the current position in the token stream, to be used by setPosition(). 56*06c3fb27SDimitry Andric // 57*06c3fb27SDimitry Andric // Note that the value of the position is not meaningful, and specifically 58*06c3fb27SDimitry Andric // should not be used to get relative token positions. 59*06c3fb27SDimitry Andric virtual unsigned getPosition() = 0; 60*06c3fb27SDimitry Andric 61*06c3fb27SDimitry Andric // Resets the token stream to the state it was in when getPosition() returned 62*06c3fb27SDimitry Andric // Position, and return the token at that position in the stream. 63*06c3fb27SDimitry Andric virtual FormatToken *setPosition(unsigned Position) = 0; 64*06c3fb27SDimitry Andric 65*06c3fb27SDimitry Andric // Insert the given tokens before the current position. 66*06c3fb27SDimitry Andric // Returns the first token in \c Tokens. 67*06c3fb27SDimitry Andric // The next returned token will be the second token in \c Tokens. 68*06c3fb27SDimitry Andric // Requires the last token in Tokens to be EOF; once the EOF token is reached, 69*06c3fb27SDimitry Andric // the next token will be the last token returned by getNextToken(); 70*06c3fb27SDimitry Andric // 71*06c3fb27SDimitry Andric // For example, given the token sequence 'a1 a2': 72*06c3fb27SDimitry Andric // getNextToken() -> a1 73*06c3fb27SDimitry Andric // insertTokens('b1 b2') -> b1 74*06c3fb27SDimitry Andric // getNextToken() -> b2 75*06c3fb27SDimitry Andric // getNextToken() -> a1 76*06c3fb27SDimitry Andric // getNextToken() -> a2 77*06c3fb27SDimitry Andric virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0; 78*06c3fb27SDimitry Andric }; 79*06c3fb27SDimitry Andric 80*06c3fb27SDimitry Andric class IndexedTokenSource : public FormatTokenSource { 81*06c3fb27SDimitry Andric public: 82*06c3fb27SDimitry Andric IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 83*06c3fb27SDimitry Andric : Tokens(Tokens), Position(-1) {} 84*06c3fb27SDimitry Andric 85*06c3fb27SDimitry Andric FormatToken *getNextToken() override { 86*06c3fb27SDimitry Andric if (Position >= 0 && isEOF()) { 87*06c3fb27SDimitry Andric LLVM_DEBUG({ 88*06c3fb27SDimitry Andric llvm::dbgs() << "Next "; 89*06c3fb27SDimitry Andric dbgToken(Position); 90*06c3fb27SDimitry Andric }); 91*06c3fb27SDimitry Andric return Tokens[Position]; 92*06c3fb27SDimitry Andric } 93*06c3fb27SDimitry Andric Position = successor(Position); 94*06c3fb27SDimitry Andric LLVM_DEBUG({ 95*06c3fb27SDimitry Andric llvm::dbgs() << "Next "; 96*06c3fb27SDimitry Andric dbgToken(Position); 97*06c3fb27SDimitry Andric }); 98*06c3fb27SDimitry Andric return Tokens[Position]; 99*06c3fb27SDimitry Andric } 100*06c3fb27SDimitry Andric 101*06c3fb27SDimitry Andric FormatToken *getPreviousToken() override { 102*06c3fb27SDimitry Andric assert(Position <= 0 || !Tokens[Position - 1]->is(tok::eof)); 103*06c3fb27SDimitry Andric return Position > 0 ? Tokens[Position - 1] : nullptr; 104*06c3fb27SDimitry Andric } 105*06c3fb27SDimitry Andric 106*06c3fb27SDimitry Andric FormatToken *peekNextToken(bool SkipComment = false) override { 107*06c3fb27SDimitry Andric if (isEOF()) 108*06c3fb27SDimitry Andric return Tokens[Position]; 109*06c3fb27SDimitry Andric int Next = successor(Position); 110*06c3fb27SDimitry Andric if (SkipComment) 111*06c3fb27SDimitry Andric while (Tokens[Next]->is(tok::comment)) 112*06c3fb27SDimitry Andric Next = successor(Next); 113*06c3fb27SDimitry Andric LLVM_DEBUG({ 114*06c3fb27SDimitry Andric llvm::dbgs() << "Peeking "; 115*06c3fb27SDimitry Andric dbgToken(Next); 116*06c3fb27SDimitry Andric }); 117*06c3fb27SDimitry Andric return Tokens[Next]; 118*06c3fb27SDimitry Andric } 119*06c3fb27SDimitry Andric 120*06c3fb27SDimitry Andric bool isEOF() override { 121*06c3fb27SDimitry Andric return Position == -1 ? false : Tokens[Position]->is(tok::eof); 122*06c3fb27SDimitry Andric } 123*06c3fb27SDimitry Andric 124*06c3fb27SDimitry Andric unsigned getPosition() override { 125*06c3fb27SDimitry Andric LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 126*06c3fb27SDimitry Andric assert(Position >= 0); 127*06c3fb27SDimitry Andric return Position; 128*06c3fb27SDimitry Andric } 129*06c3fb27SDimitry Andric 130*06c3fb27SDimitry Andric FormatToken *setPosition(unsigned P) override { 131*06c3fb27SDimitry Andric LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 132*06c3fb27SDimitry Andric Position = P; 133*06c3fb27SDimitry Andric return Tokens[Position]; 134*06c3fb27SDimitry Andric } 135*06c3fb27SDimitry Andric 136*06c3fb27SDimitry Andric FormatToken *insertTokens(ArrayRef<FormatToken *> New) override { 137*06c3fb27SDimitry Andric assert(Position != -1); 138*06c3fb27SDimitry Andric assert((*New.rbegin())->Tok.is(tok::eof)); 139*06c3fb27SDimitry Andric int Next = Tokens.size(); 140*06c3fb27SDimitry Andric Tokens.append(New.begin(), New.end()); 141*06c3fb27SDimitry Andric LLVM_DEBUG({ 142*06c3fb27SDimitry Andric llvm::dbgs() << "Inserting:\n"; 143*06c3fb27SDimitry Andric for (int I = Next, E = Tokens.size(); I != E; ++I) 144*06c3fb27SDimitry Andric dbgToken(I, " "); 145*06c3fb27SDimitry Andric llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> " 146*06c3fb27SDimitry Andric << Position << "\n"; 147*06c3fb27SDimitry Andric }); 148*06c3fb27SDimitry Andric Jumps[Tokens.size() - 1] = Position; 149*06c3fb27SDimitry Andric Position = Next; 150*06c3fb27SDimitry Andric LLVM_DEBUG({ 151*06c3fb27SDimitry Andric llvm::dbgs() << "At inserted token "; 152*06c3fb27SDimitry Andric dbgToken(Position); 153*06c3fb27SDimitry Andric }); 154*06c3fb27SDimitry Andric return Tokens[Position]; 155*06c3fb27SDimitry Andric } 156*06c3fb27SDimitry Andric 157*06c3fb27SDimitry Andric void reset() { Position = -1; } 158*06c3fb27SDimitry Andric 159*06c3fb27SDimitry Andric private: 160*06c3fb27SDimitry Andric int successor(int Current) const { 161*06c3fb27SDimitry Andric int Next = Current + 1; 162*06c3fb27SDimitry Andric auto it = Jumps.find(Next); 163*06c3fb27SDimitry Andric if (it != Jumps.end()) { 164*06c3fb27SDimitry Andric Next = it->second; 165*06c3fb27SDimitry Andric assert(!Jumps.contains(Next)); 166*06c3fb27SDimitry Andric } 167*06c3fb27SDimitry Andric return Next; 168*06c3fb27SDimitry Andric } 169*06c3fb27SDimitry Andric 170*06c3fb27SDimitry Andric void dbgToken(int Position, llvm::StringRef Indent = "") { 171*06c3fb27SDimitry Andric FormatToken *Tok = Tokens[Position]; 172*06c3fb27SDimitry Andric llvm::dbgs() << Indent << "[" << Position 173*06c3fb27SDimitry Andric << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 174*06c3fb27SDimitry Andric << ", Macro: " << !!Tok->MacroCtx << "\n"; 175*06c3fb27SDimitry Andric } 176*06c3fb27SDimitry Andric 177*06c3fb27SDimitry Andric SmallVector<FormatToken *> Tokens; 178*06c3fb27SDimitry Andric int Position; 179*06c3fb27SDimitry Andric 180*06c3fb27SDimitry Andric // Maps from position a to position b, so that when we reach a, the token 181*06c3fb27SDimitry Andric // stream continues at position b instead. 182*06c3fb27SDimitry Andric llvm::DenseMap<int, int> Jumps; 183*06c3fb27SDimitry Andric }; 184*06c3fb27SDimitry Andric 185*06c3fb27SDimitry Andric class ScopedMacroState : public FormatTokenSource { 186*06c3fb27SDimitry Andric public: 187*06c3fb27SDimitry Andric ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 188*06c3fb27SDimitry Andric FormatToken *&ResetToken) 189*06c3fb27SDimitry Andric : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 190*06c3fb27SDimitry Andric PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 191*06c3fb27SDimitry Andric Token(nullptr), PreviousToken(nullptr) { 192*06c3fb27SDimitry Andric FakeEOF.Tok.startToken(); 193*06c3fb27SDimitry Andric FakeEOF.Tok.setKind(tok::eof); 194*06c3fb27SDimitry Andric TokenSource = this; 195*06c3fb27SDimitry Andric Line.Level = 0; 196*06c3fb27SDimitry Andric Line.InPPDirective = true; 197*06c3fb27SDimitry Andric // InMacroBody gets set after the `#define x` part. 198*06c3fb27SDimitry Andric } 199*06c3fb27SDimitry Andric 200*06c3fb27SDimitry Andric ~ScopedMacroState() override { 201*06c3fb27SDimitry Andric TokenSource = PreviousTokenSource; 202*06c3fb27SDimitry Andric ResetToken = Token; 203*06c3fb27SDimitry Andric Line.InPPDirective = false; 204*06c3fb27SDimitry Andric Line.InMacroBody = false; 205*06c3fb27SDimitry Andric Line.Level = PreviousLineLevel; 206*06c3fb27SDimitry Andric } 207*06c3fb27SDimitry Andric 208*06c3fb27SDimitry Andric FormatToken *getNextToken() override { 209*06c3fb27SDimitry Andric // The \c UnwrappedLineParser guards against this by never calling 210*06c3fb27SDimitry Andric // \c getNextToken() after it has encountered the first eof token. 211*06c3fb27SDimitry Andric assert(!eof()); 212*06c3fb27SDimitry Andric PreviousToken = Token; 213*06c3fb27SDimitry Andric Token = PreviousTokenSource->getNextToken(); 214*06c3fb27SDimitry Andric if (eof()) 215*06c3fb27SDimitry Andric return &FakeEOF; 216*06c3fb27SDimitry Andric return Token; 217*06c3fb27SDimitry Andric } 218*06c3fb27SDimitry Andric 219*06c3fb27SDimitry Andric FormatToken *getPreviousToken() override { 220*06c3fb27SDimitry Andric return PreviousTokenSource->getPreviousToken(); 221*06c3fb27SDimitry Andric } 222*06c3fb27SDimitry Andric 223*06c3fb27SDimitry Andric FormatToken *peekNextToken(bool SkipComment) override { 224*06c3fb27SDimitry Andric if (eof()) 225*06c3fb27SDimitry Andric return &FakeEOF; 226*06c3fb27SDimitry Andric return PreviousTokenSource->peekNextToken(SkipComment); 227*06c3fb27SDimitry Andric } 228*06c3fb27SDimitry Andric 229*06c3fb27SDimitry Andric bool isEOF() override { return PreviousTokenSource->isEOF(); } 230*06c3fb27SDimitry Andric 231*06c3fb27SDimitry Andric unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 232*06c3fb27SDimitry Andric 233*06c3fb27SDimitry Andric FormatToken *setPosition(unsigned Position) override { 234*06c3fb27SDimitry Andric PreviousToken = nullptr; 235*06c3fb27SDimitry Andric Token = PreviousTokenSource->setPosition(Position); 236*06c3fb27SDimitry Andric return Token; 237*06c3fb27SDimitry Andric } 238*06c3fb27SDimitry Andric 239*06c3fb27SDimitry Andric FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override { 240*06c3fb27SDimitry Andric llvm_unreachable("Cannot insert tokens while parsing a macro."); 241*06c3fb27SDimitry Andric return nullptr; 242*06c3fb27SDimitry Andric } 243*06c3fb27SDimitry Andric 244*06c3fb27SDimitry Andric private: 245*06c3fb27SDimitry Andric bool eof() { 246*06c3fb27SDimitry Andric return Token && Token->HasUnescapedNewline && 247*06c3fb27SDimitry Andric !continuesLineComment(*Token, PreviousToken, 248*06c3fb27SDimitry Andric /*MinColumnToken=*/PreviousToken); 249*06c3fb27SDimitry Andric } 250*06c3fb27SDimitry Andric 251*06c3fb27SDimitry Andric FormatToken FakeEOF; 252*06c3fb27SDimitry Andric UnwrappedLine &Line; 253*06c3fb27SDimitry Andric FormatTokenSource *&TokenSource; 254*06c3fb27SDimitry Andric FormatToken *&ResetToken; 255*06c3fb27SDimitry Andric unsigned PreviousLineLevel; 256*06c3fb27SDimitry Andric FormatTokenSource *PreviousTokenSource; 257*06c3fb27SDimitry Andric 258*06c3fb27SDimitry Andric FormatToken *Token; 259*06c3fb27SDimitry Andric FormatToken *PreviousToken; 260*06c3fb27SDimitry Andric }; 261*06c3fb27SDimitry Andric 262*06c3fb27SDimitry Andric } // namespace format 263*06c3fb27SDimitry Andric } // namespace clang 264*06c3fb27SDimitry Andric 265*06c3fb27SDimitry Andric #undef DEBUG_TYPE 266*06c3fb27SDimitry Andric 267*06c3fb27SDimitry Andric #endif 268