106c3fb27SDimitry Andric //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric /// 906c3fb27SDimitry Andric /// \file 1006c3fb27SDimitry Andric /// This file defines the \c FormatTokenSource interface, which provides a token 1106c3fb27SDimitry Andric /// stream as well as the ability to manipulate the token stream. 1206c3fb27SDimitry Andric /// 1306c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 1406c3fb27SDimitry Andric 1506c3fb27SDimitry Andric #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H 1606c3fb27SDimitry Andric #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H 1706c3fb27SDimitry Andric 1806c3fb27SDimitry Andric #include "FormatToken.h" 1906c3fb27SDimitry Andric #include "UnwrappedLineParser.h" 2006c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h" 2106c3fb27SDimitry Andric #include <cstddef> 2206c3fb27SDimitry Andric 2306c3fb27SDimitry Andric #define DEBUG_TYPE "format-token-source" 2406c3fb27SDimitry Andric 2506c3fb27SDimitry Andric namespace clang { 2606c3fb27SDimitry Andric namespace format { 2706c3fb27SDimitry Andric 2806c3fb27SDimitry Andric // Navigate a token stream. 2906c3fb27SDimitry Andric // 3006c3fb27SDimitry Andric // Enables traversal of a token stream, resetting the position in a token 3106c3fb27SDimitry Andric // stream, as well as inserting new tokens. 3206c3fb27SDimitry Andric class FormatTokenSource { 3306c3fb27SDimitry Andric public: 3406c3fb27SDimitry Andric virtual ~FormatTokenSource() {} 3506c3fb27SDimitry Andric 3606c3fb27SDimitry Andric // Returns the next token in the token stream. 3706c3fb27SDimitry Andric virtual FormatToken *getNextToken() = 0; 3806c3fb27SDimitry Andric 3906c3fb27SDimitry Andric // Returns the token preceding the token returned by the last call to 4006c3fb27SDimitry Andric // getNextToken() in the token stream, or nullptr if no such token exists. 4106c3fb27SDimitry Andric // 4206c3fb27SDimitry Andric // Must not be called directly at the position directly after insertTokens() 4306c3fb27SDimitry Andric // is called. 4406c3fb27SDimitry Andric virtual FormatToken *getPreviousToken() = 0; 4506c3fb27SDimitry Andric 4606c3fb27SDimitry Andric // Returns the token that would be returned by the next call to 4706c3fb27SDimitry Andric // getNextToken(). 4806c3fb27SDimitry Andric virtual FormatToken *peekNextToken(bool SkipComment = false) = 0; 4906c3fb27SDimitry Andric 5006c3fb27SDimitry Andric // Returns whether we are at the end of the file. 5106c3fb27SDimitry Andric // This can be different from whether getNextToken() returned an eof token 5206c3fb27SDimitry Andric // when the FormatTokenSource is a view on a part of the token stream. 5306c3fb27SDimitry Andric virtual bool isEOF() = 0; 5406c3fb27SDimitry Andric 5506c3fb27SDimitry Andric // Gets the current position in the token stream, to be used by setPosition(). 5606c3fb27SDimitry Andric // 5706c3fb27SDimitry Andric // Note that the value of the position is not meaningful, and specifically 5806c3fb27SDimitry Andric // should not be used to get relative token positions. 5906c3fb27SDimitry Andric virtual unsigned getPosition() = 0; 6006c3fb27SDimitry Andric 6106c3fb27SDimitry Andric // Resets the token stream to the state it was in when getPosition() returned 6206c3fb27SDimitry Andric // Position, and return the token at that position in the stream. 6306c3fb27SDimitry Andric virtual FormatToken *setPosition(unsigned Position) = 0; 6406c3fb27SDimitry Andric 6506c3fb27SDimitry Andric // Insert the given tokens before the current position. 6606c3fb27SDimitry Andric // Returns the first token in \c Tokens. 6706c3fb27SDimitry Andric // The next returned token will be the second token in \c Tokens. 6806c3fb27SDimitry Andric // Requires the last token in Tokens to be EOF; once the EOF token is reached, 6906c3fb27SDimitry Andric // the next token will be the last token returned by getNextToken(); 7006c3fb27SDimitry Andric // 7106c3fb27SDimitry Andric // For example, given the token sequence 'a1 a2': 7206c3fb27SDimitry Andric // getNextToken() -> a1 7306c3fb27SDimitry Andric // insertTokens('b1 b2') -> b1 7406c3fb27SDimitry Andric // getNextToken() -> b2 7506c3fb27SDimitry Andric // getNextToken() -> a1 7606c3fb27SDimitry Andric // getNextToken() -> a2 7706c3fb27SDimitry Andric virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0; 7806c3fb27SDimitry Andric }; 7906c3fb27SDimitry Andric 8006c3fb27SDimitry Andric class IndexedTokenSource : public FormatTokenSource { 8106c3fb27SDimitry Andric public: 8206c3fb27SDimitry Andric IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 8306c3fb27SDimitry Andric : Tokens(Tokens), Position(-1) {} 8406c3fb27SDimitry Andric 8506c3fb27SDimitry Andric FormatToken *getNextToken() override { 8606c3fb27SDimitry Andric if (Position >= 0 && isEOF()) { 8706c3fb27SDimitry Andric LLVM_DEBUG({ 8806c3fb27SDimitry Andric llvm::dbgs() << "Next "; 8906c3fb27SDimitry Andric dbgToken(Position); 9006c3fb27SDimitry Andric }); 9106c3fb27SDimitry Andric return Tokens[Position]; 9206c3fb27SDimitry Andric } 9306c3fb27SDimitry Andric Position = successor(Position); 9406c3fb27SDimitry Andric LLVM_DEBUG({ 9506c3fb27SDimitry Andric llvm::dbgs() << "Next "; 9606c3fb27SDimitry Andric dbgToken(Position); 9706c3fb27SDimitry Andric }); 9806c3fb27SDimitry Andric return Tokens[Position]; 9906c3fb27SDimitry Andric } 10006c3fb27SDimitry Andric 10106c3fb27SDimitry Andric FormatToken *getPreviousToken() override { 102*5f757f3fSDimitry Andric assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof)); 10306c3fb27SDimitry Andric return Position > 0 ? Tokens[Position - 1] : nullptr; 10406c3fb27SDimitry Andric } 10506c3fb27SDimitry Andric 10606c3fb27SDimitry Andric FormatToken *peekNextToken(bool SkipComment = false) override { 10706c3fb27SDimitry Andric if (isEOF()) 10806c3fb27SDimitry Andric return Tokens[Position]; 10906c3fb27SDimitry Andric int Next = successor(Position); 11006c3fb27SDimitry Andric if (SkipComment) 11106c3fb27SDimitry Andric while (Tokens[Next]->is(tok::comment)) 11206c3fb27SDimitry Andric Next = successor(Next); 11306c3fb27SDimitry Andric LLVM_DEBUG({ 11406c3fb27SDimitry Andric llvm::dbgs() << "Peeking "; 11506c3fb27SDimitry Andric dbgToken(Next); 11606c3fb27SDimitry Andric }); 11706c3fb27SDimitry Andric return Tokens[Next]; 11806c3fb27SDimitry Andric } 11906c3fb27SDimitry Andric 12006c3fb27SDimitry Andric bool isEOF() override { 12106c3fb27SDimitry Andric return Position == -1 ? false : Tokens[Position]->is(tok::eof); 12206c3fb27SDimitry Andric } 12306c3fb27SDimitry Andric 12406c3fb27SDimitry Andric unsigned getPosition() override { 12506c3fb27SDimitry Andric LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 12606c3fb27SDimitry Andric assert(Position >= 0); 12706c3fb27SDimitry Andric return Position; 12806c3fb27SDimitry Andric } 12906c3fb27SDimitry Andric 13006c3fb27SDimitry Andric FormatToken *setPosition(unsigned P) override { 13106c3fb27SDimitry Andric LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 13206c3fb27SDimitry Andric Position = P; 13306c3fb27SDimitry Andric return Tokens[Position]; 13406c3fb27SDimitry Andric } 13506c3fb27SDimitry Andric 13606c3fb27SDimitry Andric FormatToken *insertTokens(ArrayRef<FormatToken *> New) override { 13706c3fb27SDimitry Andric assert(Position != -1); 13806c3fb27SDimitry Andric assert((*New.rbegin())->Tok.is(tok::eof)); 13906c3fb27SDimitry Andric int Next = Tokens.size(); 14006c3fb27SDimitry Andric Tokens.append(New.begin(), New.end()); 14106c3fb27SDimitry Andric LLVM_DEBUG({ 14206c3fb27SDimitry Andric llvm::dbgs() << "Inserting:\n"; 14306c3fb27SDimitry Andric for (int I = Next, E = Tokens.size(); I != E; ++I) 14406c3fb27SDimitry Andric dbgToken(I, " "); 14506c3fb27SDimitry Andric llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> " 14606c3fb27SDimitry Andric << Position << "\n"; 14706c3fb27SDimitry Andric }); 14806c3fb27SDimitry Andric Jumps[Tokens.size() - 1] = Position; 14906c3fb27SDimitry Andric Position = Next; 15006c3fb27SDimitry Andric LLVM_DEBUG({ 15106c3fb27SDimitry Andric llvm::dbgs() << "At inserted token "; 15206c3fb27SDimitry Andric dbgToken(Position); 15306c3fb27SDimitry Andric }); 15406c3fb27SDimitry Andric return Tokens[Position]; 15506c3fb27SDimitry Andric } 15606c3fb27SDimitry Andric 15706c3fb27SDimitry Andric void reset() { Position = -1; } 15806c3fb27SDimitry Andric 15906c3fb27SDimitry Andric private: 16006c3fb27SDimitry Andric int successor(int Current) const { 16106c3fb27SDimitry Andric int Next = Current + 1; 16206c3fb27SDimitry Andric auto it = Jumps.find(Next); 16306c3fb27SDimitry Andric if (it != Jumps.end()) { 16406c3fb27SDimitry Andric Next = it->second; 16506c3fb27SDimitry Andric assert(!Jumps.contains(Next)); 16606c3fb27SDimitry Andric } 16706c3fb27SDimitry Andric return Next; 16806c3fb27SDimitry Andric } 16906c3fb27SDimitry Andric 17006c3fb27SDimitry Andric void dbgToken(int Position, llvm::StringRef Indent = "") { 17106c3fb27SDimitry Andric FormatToken *Tok = Tokens[Position]; 17206c3fb27SDimitry Andric llvm::dbgs() << Indent << "[" << Position 17306c3fb27SDimitry Andric << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 17406c3fb27SDimitry Andric << ", Macro: " << !!Tok->MacroCtx << "\n"; 17506c3fb27SDimitry Andric } 17606c3fb27SDimitry Andric 17706c3fb27SDimitry Andric SmallVector<FormatToken *> Tokens; 17806c3fb27SDimitry Andric int Position; 17906c3fb27SDimitry Andric 18006c3fb27SDimitry Andric // Maps from position a to position b, so that when we reach a, the token 18106c3fb27SDimitry Andric // stream continues at position b instead. 18206c3fb27SDimitry Andric llvm::DenseMap<int, int> Jumps; 18306c3fb27SDimitry Andric }; 18406c3fb27SDimitry Andric 18506c3fb27SDimitry Andric class ScopedMacroState : public FormatTokenSource { 18606c3fb27SDimitry Andric public: 18706c3fb27SDimitry Andric ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 18806c3fb27SDimitry Andric FormatToken *&ResetToken) 18906c3fb27SDimitry Andric : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 19006c3fb27SDimitry Andric PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 19106c3fb27SDimitry Andric Token(nullptr), PreviousToken(nullptr) { 19206c3fb27SDimitry Andric FakeEOF.Tok.startToken(); 19306c3fb27SDimitry Andric FakeEOF.Tok.setKind(tok::eof); 19406c3fb27SDimitry Andric TokenSource = this; 19506c3fb27SDimitry Andric Line.Level = 0; 19606c3fb27SDimitry Andric Line.InPPDirective = true; 19706c3fb27SDimitry Andric // InMacroBody gets set after the `#define x` part. 19806c3fb27SDimitry Andric } 19906c3fb27SDimitry Andric 20006c3fb27SDimitry Andric ~ScopedMacroState() override { 20106c3fb27SDimitry Andric TokenSource = PreviousTokenSource; 20206c3fb27SDimitry Andric ResetToken = Token; 20306c3fb27SDimitry Andric Line.InPPDirective = false; 20406c3fb27SDimitry Andric Line.InMacroBody = false; 20506c3fb27SDimitry Andric Line.Level = PreviousLineLevel; 20606c3fb27SDimitry Andric } 20706c3fb27SDimitry Andric 20806c3fb27SDimitry Andric FormatToken *getNextToken() override { 20906c3fb27SDimitry Andric // The \c UnwrappedLineParser guards against this by never calling 21006c3fb27SDimitry Andric // \c getNextToken() after it has encountered the first eof token. 21106c3fb27SDimitry Andric assert(!eof()); 21206c3fb27SDimitry Andric PreviousToken = Token; 21306c3fb27SDimitry Andric Token = PreviousTokenSource->getNextToken(); 21406c3fb27SDimitry Andric if (eof()) 21506c3fb27SDimitry Andric return &FakeEOF; 21606c3fb27SDimitry Andric return Token; 21706c3fb27SDimitry Andric } 21806c3fb27SDimitry Andric 21906c3fb27SDimitry Andric FormatToken *getPreviousToken() override { 22006c3fb27SDimitry Andric return PreviousTokenSource->getPreviousToken(); 22106c3fb27SDimitry Andric } 22206c3fb27SDimitry Andric 22306c3fb27SDimitry Andric FormatToken *peekNextToken(bool SkipComment) override { 22406c3fb27SDimitry Andric if (eof()) 22506c3fb27SDimitry Andric return &FakeEOF; 22606c3fb27SDimitry Andric return PreviousTokenSource->peekNextToken(SkipComment); 22706c3fb27SDimitry Andric } 22806c3fb27SDimitry Andric 22906c3fb27SDimitry Andric bool isEOF() override { return PreviousTokenSource->isEOF(); } 23006c3fb27SDimitry Andric 23106c3fb27SDimitry Andric unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 23206c3fb27SDimitry Andric 23306c3fb27SDimitry Andric FormatToken *setPosition(unsigned Position) override { 23406c3fb27SDimitry Andric PreviousToken = nullptr; 23506c3fb27SDimitry Andric Token = PreviousTokenSource->setPosition(Position); 23606c3fb27SDimitry Andric return Token; 23706c3fb27SDimitry Andric } 23806c3fb27SDimitry Andric 23906c3fb27SDimitry Andric FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override { 24006c3fb27SDimitry Andric llvm_unreachable("Cannot insert tokens while parsing a macro."); 24106c3fb27SDimitry Andric return nullptr; 24206c3fb27SDimitry Andric } 24306c3fb27SDimitry Andric 24406c3fb27SDimitry Andric private: 24506c3fb27SDimitry Andric bool eof() { 24606c3fb27SDimitry Andric return Token && Token->HasUnescapedNewline && 24706c3fb27SDimitry Andric !continuesLineComment(*Token, PreviousToken, 24806c3fb27SDimitry Andric /*MinColumnToken=*/PreviousToken); 24906c3fb27SDimitry Andric } 25006c3fb27SDimitry Andric 25106c3fb27SDimitry Andric FormatToken FakeEOF; 25206c3fb27SDimitry Andric UnwrappedLine &Line; 25306c3fb27SDimitry Andric FormatTokenSource *&TokenSource; 25406c3fb27SDimitry Andric FormatToken *&ResetToken; 25506c3fb27SDimitry Andric unsigned PreviousLineLevel; 25606c3fb27SDimitry Andric FormatTokenSource *PreviousTokenSource; 25706c3fb27SDimitry Andric 25806c3fb27SDimitry Andric FormatToken *Token; 25906c3fb27SDimitry Andric FormatToken *PreviousToken; 26006c3fb27SDimitry Andric }; 26106c3fb27SDimitry Andric 26206c3fb27SDimitry Andric } // namespace format 26306c3fb27SDimitry Andric } // namespace clang 26406c3fb27SDimitry Andric 26506c3fb27SDimitry Andric #undef DEBUG_TYPE 26606c3fb27SDimitry Andric 26706c3fb27SDimitry Andric #endif 268