106c3fb27SDimitry Andric //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric /// 906c3fb27SDimitry Andric /// \file 1006c3fb27SDimitry Andric /// This file defines the \c FormatTokenSource interface, which provides a token 1106c3fb27SDimitry Andric /// stream as well as the ability to manipulate the token stream. 1206c3fb27SDimitry Andric /// 1306c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 1406c3fb27SDimitry Andric 1506c3fb27SDimitry Andric #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H 1606c3fb27SDimitry Andric #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H 1706c3fb27SDimitry Andric 1806c3fb27SDimitry Andric #include "UnwrappedLineParser.h" 1906c3fb27SDimitry Andric 2006c3fb27SDimitry Andric #define DEBUG_TYPE "format-token-source" 2106c3fb27SDimitry Andric 2206c3fb27SDimitry Andric namespace clang { 2306c3fb27SDimitry Andric namespace format { 2406c3fb27SDimitry Andric 2506c3fb27SDimitry Andric // Navigate a token stream. 2606c3fb27SDimitry Andric // 2706c3fb27SDimitry Andric // Enables traversal of a token stream, resetting the position in a token 2806c3fb27SDimitry Andric // stream, as well as inserting new tokens. 2906c3fb27SDimitry Andric class FormatTokenSource { 3006c3fb27SDimitry Andric public: 3106c3fb27SDimitry Andric virtual ~FormatTokenSource() {} 3206c3fb27SDimitry Andric 3306c3fb27SDimitry Andric // Returns the next token in the token stream. 3406c3fb27SDimitry Andric virtual FormatToken *getNextToken() = 0; 3506c3fb27SDimitry Andric 3606c3fb27SDimitry Andric // Returns the token preceding the token returned by the last call to 3706c3fb27SDimitry Andric // getNextToken() in the token stream, or nullptr if no such token exists. 3806c3fb27SDimitry Andric // 3906c3fb27SDimitry Andric // Must not be called directly at the position directly after insertTokens() 4006c3fb27SDimitry Andric // is called. 4106c3fb27SDimitry Andric virtual FormatToken *getPreviousToken() = 0; 4206c3fb27SDimitry Andric 4306c3fb27SDimitry Andric // Returns the token that would be returned by the next call to 4406c3fb27SDimitry Andric // getNextToken(). 4506c3fb27SDimitry Andric virtual FormatToken *peekNextToken(bool SkipComment = false) = 0; 4606c3fb27SDimitry Andric 4706c3fb27SDimitry Andric // Returns whether we are at the end of the file. 4806c3fb27SDimitry Andric // This can be different from whether getNextToken() returned an eof token 4906c3fb27SDimitry Andric // when the FormatTokenSource is a view on a part of the token stream. 5006c3fb27SDimitry Andric virtual bool isEOF() = 0; 5106c3fb27SDimitry Andric 5206c3fb27SDimitry Andric // Gets the current position in the token stream, to be used by setPosition(). 5306c3fb27SDimitry Andric // 5406c3fb27SDimitry Andric // Note that the value of the position is not meaningful, and specifically 5506c3fb27SDimitry Andric // should not be used to get relative token positions. 5606c3fb27SDimitry Andric virtual unsigned getPosition() = 0; 5706c3fb27SDimitry Andric 5806c3fb27SDimitry Andric // Resets the token stream to the state it was in when getPosition() returned 5906c3fb27SDimitry Andric // Position, and return the token at that position in the stream. 6006c3fb27SDimitry Andric virtual FormatToken *setPosition(unsigned Position) = 0; 6106c3fb27SDimitry Andric 6206c3fb27SDimitry Andric // Insert the given tokens before the current position. 6306c3fb27SDimitry Andric // Returns the first token in \c Tokens. 6406c3fb27SDimitry Andric // The next returned token will be the second token in \c Tokens. 6506c3fb27SDimitry Andric // Requires the last token in Tokens to be EOF; once the EOF token is reached, 6606c3fb27SDimitry Andric // the next token will be the last token returned by getNextToken(); 6706c3fb27SDimitry Andric // 6806c3fb27SDimitry Andric // For example, given the token sequence 'a1 a2': 6906c3fb27SDimitry Andric // getNextToken() -> a1 7006c3fb27SDimitry Andric // insertTokens('b1 b2') -> b1 7106c3fb27SDimitry Andric // getNextToken() -> b2 7206c3fb27SDimitry Andric // getNextToken() -> a1 7306c3fb27SDimitry Andric // getNextToken() -> a2 7406c3fb27SDimitry Andric virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0; 75*0fca6ea1SDimitry Andric 76*0fca6ea1SDimitry Andric [[nodiscard]] FormatToken *getNextNonComment() { 77*0fca6ea1SDimitry Andric FormatToken *Tok; 78*0fca6ea1SDimitry Andric do { 79*0fca6ea1SDimitry Andric Tok = getNextToken(); 80*0fca6ea1SDimitry Andric assert(Tok); 81*0fca6ea1SDimitry Andric } while (Tok->is(tok::comment)); 82*0fca6ea1SDimitry Andric return Tok; 83*0fca6ea1SDimitry Andric } 8406c3fb27SDimitry Andric }; 8506c3fb27SDimitry Andric 8606c3fb27SDimitry Andric class IndexedTokenSource : public FormatTokenSource { 8706c3fb27SDimitry Andric public: 8806c3fb27SDimitry Andric IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 8906c3fb27SDimitry Andric : Tokens(Tokens), Position(-1) {} 9006c3fb27SDimitry Andric 9106c3fb27SDimitry Andric FormatToken *getNextToken() override { 9206c3fb27SDimitry Andric if (Position >= 0 && isEOF()) { 9306c3fb27SDimitry Andric LLVM_DEBUG({ 9406c3fb27SDimitry Andric llvm::dbgs() << "Next "; 9506c3fb27SDimitry Andric dbgToken(Position); 9606c3fb27SDimitry Andric }); 9706c3fb27SDimitry Andric return Tokens[Position]; 9806c3fb27SDimitry Andric } 9906c3fb27SDimitry Andric Position = successor(Position); 10006c3fb27SDimitry Andric LLVM_DEBUG({ 10106c3fb27SDimitry Andric llvm::dbgs() << "Next "; 10206c3fb27SDimitry Andric dbgToken(Position); 10306c3fb27SDimitry Andric }); 10406c3fb27SDimitry Andric return Tokens[Position]; 10506c3fb27SDimitry Andric } 10606c3fb27SDimitry Andric 10706c3fb27SDimitry Andric FormatToken *getPreviousToken() override { 1085f757f3fSDimitry Andric assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof)); 10906c3fb27SDimitry Andric return Position > 0 ? Tokens[Position - 1] : nullptr; 11006c3fb27SDimitry Andric } 11106c3fb27SDimitry Andric 11206c3fb27SDimitry Andric FormatToken *peekNextToken(bool SkipComment = false) override { 11306c3fb27SDimitry Andric if (isEOF()) 11406c3fb27SDimitry Andric return Tokens[Position]; 11506c3fb27SDimitry Andric int Next = successor(Position); 11606c3fb27SDimitry Andric if (SkipComment) 11706c3fb27SDimitry Andric while (Tokens[Next]->is(tok::comment)) 11806c3fb27SDimitry Andric Next = successor(Next); 11906c3fb27SDimitry Andric LLVM_DEBUG({ 12006c3fb27SDimitry Andric llvm::dbgs() << "Peeking "; 12106c3fb27SDimitry Andric dbgToken(Next); 12206c3fb27SDimitry Andric }); 12306c3fb27SDimitry Andric return Tokens[Next]; 12406c3fb27SDimitry Andric } 12506c3fb27SDimitry Andric 12606c3fb27SDimitry Andric bool isEOF() override { 12706c3fb27SDimitry Andric return Position == -1 ? false : Tokens[Position]->is(tok::eof); 12806c3fb27SDimitry Andric } 12906c3fb27SDimitry Andric 13006c3fb27SDimitry Andric unsigned getPosition() override { 13106c3fb27SDimitry Andric LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 13206c3fb27SDimitry Andric assert(Position >= 0); 13306c3fb27SDimitry Andric return Position; 13406c3fb27SDimitry Andric } 13506c3fb27SDimitry Andric 13606c3fb27SDimitry Andric FormatToken *setPosition(unsigned P) override { 13706c3fb27SDimitry Andric LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 13806c3fb27SDimitry Andric Position = P; 13906c3fb27SDimitry Andric return Tokens[Position]; 14006c3fb27SDimitry Andric } 14106c3fb27SDimitry Andric 14206c3fb27SDimitry Andric FormatToken *insertTokens(ArrayRef<FormatToken *> New) override { 14306c3fb27SDimitry Andric assert(Position != -1); 14406c3fb27SDimitry Andric assert((*New.rbegin())->Tok.is(tok::eof)); 14506c3fb27SDimitry Andric int Next = Tokens.size(); 14606c3fb27SDimitry Andric Tokens.append(New.begin(), New.end()); 14706c3fb27SDimitry Andric LLVM_DEBUG({ 14806c3fb27SDimitry Andric llvm::dbgs() << "Inserting:\n"; 14906c3fb27SDimitry Andric for (int I = Next, E = Tokens.size(); I != E; ++I) 15006c3fb27SDimitry Andric dbgToken(I, " "); 15106c3fb27SDimitry Andric llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> " 15206c3fb27SDimitry Andric << Position << "\n"; 15306c3fb27SDimitry Andric }); 15406c3fb27SDimitry Andric Jumps[Tokens.size() - 1] = Position; 15506c3fb27SDimitry Andric Position = Next; 15606c3fb27SDimitry Andric LLVM_DEBUG({ 15706c3fb27SDimitry Andric llvm::dbgs() << "At inserted token "; 15806c3fb27SDimitry Andric dbgToken(Position); 15906c3fb27SDimitry Andric }); 16006c3fb27SDimitry Andric return Tokens[Position]; 16106c3fb27SDimitry Andric } 16206c3fb27SDimitry Andric 16306c3fb27SDimitry Andric void reset() { Position = -1; } 16406c3fb27SDimitry Andric 16506c3fb27SDimitry Andric private: 16606c3fb27SDimitry Andric int successor(int Current) const { 16706c3fb27SDimitry Andric int Next = Current + 1; 16806c3fb27SDimitry Andric auto it = Jumps.find(Next); 16906c3fb27SDimitry Andric if (it != Jumps.end()) { 17006c3fb27SDimitry Andric Next = it->second; 17106c3fb27SDimitry Andric assert(!Jumps.contains(Next)); 17206c3fb27SDimitry Andric } 17306c3fb27SDimitry Andric return Next; 17406c3fb27SDimitry Andric } 17506c3fb27SDimitry Andric 176*0fca6ea1SDimitry Andric void dbgToken(int Position, StringRef Indent = "") { 17706c3fb27SDimitry Andric FormatToken *Tok = Tokens[Position]; 17806c3fb27SDimitry Andric llvm::dbgs() << Indent << "[" << Position 17906c3fb27SDimitry Andric << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 18006c3fb27SDimitry Andric << ", Macro: " << !!Tok->MacroCtx << "\n"; 18106c3fb27SDimitry Andric } 18206c3fb27SDimitry Andric 18306c3fb27SDimitry Andric SmallVector<FormatToken *> Tokens; 18406c3fb27SDimitry Andric int Position; 18506c3fb27SDimitry Andric 18606c3fb27SDimitry Andric // Maps from position a to position b, so that when we reach a, the token 18706c3fb27SDimitry Andric // stream continues at position b instead. 18806c3fb27SDimitry Andric llvm::DenseMap<int, int> Jumps; 18906c3fb27SDimitry Andric }; 19006c3fb27SDimitry Andric 19106c3fb27SDimitry Andric class ScopedMacroState : public FormatTokenSource { 19206c3fb27SDimitry Andric public: 19306c3fb27SDimitry Andric ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 19406c3fb27SDimitry Andric FormatToken *&ResetToken) 19506c3fb27SDimitry Andric : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 19606c3fb27SDimitry Andric PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 19706c3fb27SDimitry Andric Token(nullptr), PreviousToken(nullptr) { 19806c3fb27SDimitry Andric FakeEOF.Tok.startToken(); 19906c3fb27SDimitry Andric FakeEOF.Tok.setKind(tok::eof); 20006c3fb27SDimitry Andric TokenSource = this; 20106c3fb27SDimitry Andric Line.Level = 0; 20206c3fb27SDimitry Andric Line.InPPDirective = true; 20306c3fb27SDimitry Andric // InMacroBody gets set after the `#define x` part. 20406c3fb27SDimitry Andric } 20506c3fb27SDimitry Andric 20606c3fb27SDimitry Andric ~ScopedMacroState() override { 20706c3fb27SDimitry Andric TokenSource = PreviousTokenSource; 20806c3fb27SDimitry Andric ResetToken = Token; 20906c3fb27SDimitry Andric Line.InPPDirective = false; 21006c3fb27SDimitry Andric Line.InMacroBody = false; 21106c3fb27SDimitry Andric Line.Level = PreviousLineLevel; 21206c3fb27SDimitry Andric } 21306c3fb27SDimitry Andric 21406c3fb27SDimitry Andric FormatToken *getNextToken() override { 21506c3fb27SDimitry Andric // The \c UnwrappedLineParser guards against this by never calling 21606c3fb27SDimitry Andric // \c getNextToken() after it has encountered the first eof token. 21706c3fb27SDimitry Andric assert(!eof()); 21806c3fb27SDimitry Andric PreviousToken = Token; 21906c3fb27SDimitry Andric Token = PreviousTokenSource->getNextToken(); 22006c3fb27SDimitry Andric if (eof()) 22106c3fb27SDimitry Andric return &FakeEOF; 22206c3fb27SDimitry Andric return Token; 22306c3fb27SDimitry Andric } 22406c3fb27SDimitry Andric 22506c3fb27SDimitry Andric FormatToken *getPreviousToken() override { 22606c3fb27SDimitry Andric return PreviousTokenSource->getPreviousToken(); 22706c3fb27SDimitry Andric } 22806c3fb27SDimitry Andric 22906c3fb27SDimitry Andric FormatToken *peekNextToken(bool SkipComment) override { 23006c3fb27SDimitry Andric if (eof()) 23106c3fb27SDimitry Andric return &FakeEOF; 23206c3fb27SDimitry Andric return PreviousTokenSource->peekNextToken(SkipComment); 23306c3fb27SDimitry Andric } 23406c3fb27SDimitry Andric 23506c3fb27SDimitry Andric bool isEOF() override { return PreviousTokenSource->isEOF(); } 23606c3fb27SDimitry Andric 23706c3fb27SDimitry Andric unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 23806c3fb27SDimitry Andric 23906c3fb27SDimitry Andric FormatToken *setPosition(unsigned Position) override { 24006c3fb27SDimitry Andric PreviousToken = nullptr; 24106c3fb27SDimitry Andric Token = PreviousTokenSource->setPosition(Position); 24206c3fb27SDimitry Andric return Token; 24306c3fb27SDimitry Andric } 24406c3fb27SDimitry Andric 24506c3fb27SDimitry Andric FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override { 24606c3fb27SDimitry Andric llvm_unreachable("Cannot insert tokens while parsing a macro."); 24706c3fb27SDimitry Andric return nullptr; 24806c3fb27SDimitry Andric } 24906c3fb27SDimitry Andric 25006c3fb27SDimitry Andric private: 25106c3fb27SDimitry Andric bool eof() { 25206c3fb27SDimitry Andric return Token && Token->HasUnescapedNewline && 25306c3fb27SDimitry Andric !continuesLineComment(*Token, PreviousToken, 25406c3fb27SDimitry Andric /*MinColumnToken=*/PreviousToken); 25506c3fb27SDimitry Andric } 25606c3fb27SDimitry Andric 25706c3fb27SDimitry Andric FormatToken FakeEOF; 25806c3fb27SDimitry Andric UnwrappedLine &Line; 25906c3fb27SDimitry Andric FormatTokenSource *&TokenSource; 26006c3fb27SDimitry Andric FormatToken *&ResetToken; 26106c3fb27SDimitry Andric unsigned PreviousLineLevel; 26206c3fb27SDimitry Andric FormatTokenSource *PreviousTokenSource; 26306c3fb27SDimitry Andric 26406c3fb27SDimitry Andric FormatToken *Token; 26506c3fb27SDimitry Andric FormatToken *PreviousToken; 26606c3fb27SDimitry Andric }; 26706c3fb27SDimitry Andric 26806c3fb27SDimitry Andric } // namespace format 26906c3fb27SDimitry Andric } // namespace clang 27006c3fb27SDimitry Andric 27106c3fb27SDimitry Andric #undef DEBUG_TYPE 27206c3fb27SDimitry Andric 27306c3fb27SDimitry Andric #endif 274