1c3bc61d7SManuel Klimek //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===// 2c3bc61d7SManuel Klimek // 3c3bc61d7SManuel Klimek // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4c3bc61d7SManuel Klimek // See https://llvm.org/LICENSE.txt for license information. 5c3bc61d7SManuel Klimek // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6c3bc61d7SManuel Klimek // 7c3bc61d7SManuel Klimek //===----------------------------------------------------------------------===// 8c3bc61d7SManuel Klimek /// 9c3bc61d7SManuel Klimek /// \file 101995d442SManuel Klimek /// This file defines the \c FormatTokenSource interface, which provides a token 11c3bc61d7SManuel Klimek /// stream as well as the ability to manipulate the token stream. 12c3bc61d7SManuel Klimek /// 13c3bc61d7SManuel Klimek //===----------------------------------------------------------------------===// 14c3bc61d7SManuel Klimek 15c3bc61d7SManuel Klimek #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H 16c3bc61d7SManuel Klimek #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H 17c3bc61d7SManuel Klimek 18c3bc61d7SManuel Klimek #include "UnwrappedLineParser.h" 19c3bc61d7SManuel Klimek 20c3bc61d7SManuel Klimek #define DEBUG_TYPE "format-token-source" 21c3bc61d7SManuel Klimek 22c3bc61d7SManuel Klimek namespace clang { 23c3bc61d7SManuel Klimek namespace format { 24c3bc61d7SManuel Klimek 251995d442SManuel Klimek // Navigate a token stream. 261995d442SManuel Klimek // 271995d442SManuel Klimek // Enables traversal of a token stream, resetting the position in a token 281995d442SManuel Klimek // stream, as well as inserting new tokens. 29c3bc61d7SManuel Klimek class FormatTokenSource { 30c3bc61d7SManuel Klimek public: ~FormatTokenSource()31c3bc61d7SManuel Klimek virtual ~FormatTokenSource() {} 32c3bc61d7SManuel Klimek 33c3bc61d7SManuel Klimek // Returns the next token in the token stream. 34c3bc61d7SManuel Klimek virtual FormatToken *getNextToken() = 0; 35c3bc61d7SManuel Klimek 36c3bc61d7SManuel Klimek // Returns the token preceding the token returned by the last call to 37c3bc61d7SManuel Klimek // getNextToken() in the token stream, or nullptr if no such token exists. 381995d442SManuel Klimek // 391995d442SManuel Klimek // Must not be called directly at the position directly after insertTokens() 401995d442SManuel Klimek // is called. 41c3bc61d7SManuel Klimek virtual FormatToken *getPreviousToken() = 0; 42c3bc61d7SManuel Klimek 43c3bc61d7SManuel Klimek // Returns the token that would be returned by the next call to 44c3bc61d7SManuel Klimek // getNextToken(). 45c3bc61d7SManuel Klimek virtual FormatToken *peekNextToken(bool SkipComment = false) = 0; 46c3bc61d7SManuel Klimek 47c3bc61d7SManuel Klimek // Returns whether we are at the end of the file. 48c3bc61d7SManuel Klimek // This can be different from whether getNextToken() returned an eof token 49c3bc61d7SManuel Klimek // when the FormatTokenSource is a view on a part of the token stream. 50c3bc61d7SManuel Klimek virtual bool isEOF() = 0; 51c3bc61d7SManuel Klimek 52c3bc61d7SManuel Klimek // Gets the current position in the token stream, to be used by setPosition(). 531995d442SManuel Klimek // 541995d442SManuel Klimek // Note that the value of the position is not meaningful, and specifically 551995d442SManuel Klimek // should not be used to get relative token positions. 56c3bc61d7SManuel Klimek virtual unsigned getPosition() = 0; 57c3bc61d7SManuel Klimek 58c3bc61d7SManuel Klimek // Resets the token stream to the state it was in when getPosition() returned 59c3bc61d7SManuel Klimek // Position, and return the token at that position in the stream. 60c3bc61d7SManuel Klimek virtual FormatToken *setPosition(unsigned Position) = 0; 611995d442SManuel Klimek 621995d442SManuel Klimek // Insert the given tokens before the current position. 631995d442SManuel Klimek // Returns the first token in \c Tokens. 641995d442SManuel Klimek // The next returned token will be the second token in \c Tokens. 651995d442SManuel Klimek // Requires the last token in Tokens to be EOF; once the EOF token is reached, 661995d442SManuel Klimek // the next token will be the last token returned by getNextToken(); 671995d442SManuel Klimek // 681995d442SManuel Klimek // For example, given the token sequence 'a1 a2': 691995d442SManuel Klimek // getNextToken() -> a1 701995d442SManuel Klimek // insertTokens('b1 b2') -> b1 711995d442SManuel Klimek // getNextToken() -> b2 721995d442SManuel Klimek // getNextToken() -> a1 731995d442SManuel Klimek // getNextToken() -> a2 741995d442SManuel Klimek virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0; 75943db678SOwen Pan getNextNonComment()76943db678SOwen Pan [[nodiscard]] FormatToken *getNextNonComment() { 77943db678SOwen Pan FormatToken *Tok; 78943db678SOwen Pan do { 79943db678SOwen Pan Tok = getNextToken(); 80943db678SOwen Pan assert(Tok); 81943db678SOwen Pan } while (Tok->is(tok::comment)); 82943db678SOwen Pan return Tok; 83943db678SOwen Pan } 84c3bc61d7SManuel Klimek }; 85c3bc61d7SManuel Klimek 861995d442SManuel Klimek class IndexedTokenSource : public FormatTokenSource { 87be31f2c1SManuel Klimek public: IndexedTokenSource(ArrayRef<FormatToken * > Tokens)88be31f2c1SManuel Klimek IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 89be31f2c1SManuel Klimek : Tokens(Tokens), Position(-1) {} 90be31f2c1SManuel Klimek getNextToken()91be31f2c1SManuel Klimek FormatToken *getNextToken() override { 92be31f2c1SManuel Klimek if (Position >= 0 && isEOF()) { 93be31f2c1SManuel Klimek LLVM_DEBUG({ 94be31f2c1SManuel Klimek llvm::dbgs() << "Next "; 95be31f2c1SManuel Klimek dbgToken(Position); 96be31f2c1SManuel Klimek }); 97be31f2c1SManuel Klimek return Tokens[Position]; 98be31f2c1SManuel Klimek } 991995d442SManuel Klimek Position = successor(Position); 100be31f2c1SManuel Klimek LLVM_DEBUG({ 101be31f2c1SManuel Klimek llvm::dbgs() << "Next "; 102be31f2c1SManuel Klimek dbgToken(Position); 103be31f2c1SManuel Klimek }); 104be31f2c1SManuel Klimek return Tokens[Position]; 105be31f2c1SManuel Klimek } 106be31f2c1SManuel Klimek getPreviousToken()107be31f2c1SManuel Klimek FormatToken *getPreviousToken() override { 10891c4db00SOwen Pan assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof)); 109be31f2c1SManuel Klimek return Position > 0 ? Tokens[Position - 1] : nullptr; 110be31f2c1SManuel Klimek } 111be31f2c1SManuel Klimek 11204ed86ffSManuel Klimek FormatToken *peekNextToken(bool SkipComment = false) override { 11304ed86ffSManuel Klimek if (isEOF()) 11404ed86ffSManuel Klimek return Tokens[Position]; 1151995d442SManuel Klimek int Next = successor(Position); 116be31f2c1SManuel Klimek if (SkipComment) 117be31f2c1SManuel Klimek while (Tokens[Next]->is(tok::comment)) 1181995d442SManuel Klimek Next = successor(Next); 119be31f2c1SManuel Klimek LLVM_DEBUG({ 120be31f2c1SManuel Klimek llvm::dbgs() << "Peeking "; 121be31f2c1SManuel Klimek dbgToken(Next); 122be31f2c1SManuel Klimek }); 123be31f2c1SManuel Klimek return Tokens[Next]; 124be31f2c1SManuel Klimek } 125be31f2c1SManuel Klimek isEOF()12604ed86ffSManuel Klimek bool isEOF() override { 12704ed86ffSManuel Klimek return Position == -1 ? false : Tokens[Position]->is(tok::eof); 12804ed86ffSManuel Klimek } 129be31f2c1SManuel Klimek getPosition()130be31f2c1SManuel Klimek unsigned getPosition() override { 131be31f2c1SManuel Klimek LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 132be31f2c1SManuel Klimek assert(Position >= 0); 133be31f2c1SManuel Klimek return Position; 134be31f2c1SManuel Klimek } 135be31f2c1SManuel Klimek setPosition(unsigned P)136be31f2c1SManuel Klimek FormatToken *setPosition(unsigned P) override { 137be31f2c1SManuel Klimek LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 138be31f2c1SManuel Klimek Position = P; 139be31f2c1SManuel Klimek return Tokens[Position]; 140be31f2c1SManuel Klimek } 141be31f2c1SManuel Klimek insertTokens(ArrayRef<FormatToken * > New)1421995d442SManuel Klimek FormatToken *insertTokens(ArrayRef<FormatToken *> New) override { 1431995d442SManuel Klimek assert(Position != -1); 1441995d442SManuel Klimek assert((*New.rbegin())->Tok.is(tok::eof)); 1451995d442SManuel Klimek int Next = Tokens.size(); 1461995d442SManuel Klimek Tokens.append(New.begin(), New.end()); 1471995d442SManuel Klimek LLVM_DEBUG({ 1481995d442SManuel Klimek llvm::dbgs() << "Inserting:\n"; 1491995d442SManuel Klimek for (int I = Next, E = Tokens.size(); I != E; ++I) 1501995d442SManuel Klimek dbgToken(I, " "); 1511995d442SManuel Klimek llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> " 1521995d442SManuel Klimek << Position << "\n"; 1531995d442SManuel Klimek }); 1541995d442SManuel Klimek Jumps[Tokens.size() - 1] = Position; 1551995d442SManuel Klimek Position = Next; 1561995d442SManuel Klimek LLVM_DEBUG({ 1571995d442SManuel Klimek llvm::dbgs() << "At inserted token "; 1581995d442SManuel Klimek dbgToken(Position); 1591995d442SManuel Klimek }); 1601995d442SManuel Klimek return Tokens[Position]; 1611995d442SManuel Klimek } 1621995d442SManuel Klimek reset()163be31f2c1SManuel Klimek void reset() { Position = -1; } 164be31f2c1SManuel Klimek 165be31f2c1SManuel Klimek private: successor(int Current)1661995d442SManuel Klimek int successor(int Current) const { 1671995d442SManuel Klimek int Next = Current + 1; 1681995d442SManuel Klimek auto it = Jumps.find(Next); 1691995d442SManuel Klimek if (it != Jumps.end()) { 1701995d442SManuel Klimek Next = it->second; 1717eaa7b05SKazu Hirata assert(!Jumps.contains(Next)); 1721995d442SManuel Klimek } 1731995d442SManuel Klimek return Next; 1741995d442SManuel Klimek } 1751995d442SManuel Klimek 176*1c58208dSOwen Pan void dbgToken(int Position, StringRef Indent = "") { 177be31f2c1SManuel Klimek FormatToken *Tok = Tokens[Position]; 178be31f2c1SManuel Klimek llvm::dbgs() << Indent << "[" << Position 179be31f2c1SManuel Klimek << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 180be31f2c1SManuel Klimek << ", Macro: " << !!Tok->MacroCtx << "\n"; 181be31f2c1SManuel Klimek } 182be31f2c1SManuel Klimek 1831995d442SManuel Klimek SmallVector<FormatToken *> Tokens; 184be31f2c1SManuel Klimek int Position; 1851995d442SManuel Klimek 1861995d442SManuel Klimek // Maps from position a to position b, so that when we reach a, the token 1871995d442SManuel Klimek // stream continues at position b instead. 1881995d442SManuel Klimek llvm::DenseMap<int, int> Jumps; 189be31f2c1SManuel Klimek }; 190be31f2c1SManuel Klimek 191c3bc61d7SManuel Klimek class ScopedMacroState : public FormatTokenSource { 192c3bc61d7SManuel Klimek public: ScopedMacroState(UnwrappedLine & Line,FormatTokenSource * & TokenSource,FormatToken * & ResetToken)193c3bc61d7SManuel Klimek ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 194c3bc61d7SManuel Klimek FormatToken *&ResetToken) 195c3bc61d7SManuel Klimek : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 196c3bc61d7SManuel Klimek PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 197c3bc61d7SManuel Klimek Token(nullptr), PreviousToken(nullptr) { 198c3bc61d7SManuel Klimek FakeEOF.Tok.startToken(); 199c3bc61d7SManuel Klimek FakeEOF.Tok.setKind(tok::eof); 200c3bc61d7SManuel Klimek TokenSource = this; 201c3bc61d7SManuel Klimek Line.Level = 0; 202c3bc61d7SManuel Klimek Line.InPPDirective = true; 203c3bc61d7SManuel Klimek // InMacroBody gets set after the `#define x` part. 204c3bc61d7SManuel Klimek } 205c3bc61d7SManuel Klimek ~ScopedMacroState()206c3bc61d7SManuel Klimek ~ScopedMacroState() override { 207c3bc61d7SManuel Klimek TokenSource = PreviousTokenSource; 208c3bc61d7SManuel Klimek ResetToken = Token; 209c3bc61d7SManuel Klimek Line.InPPDirective = false; 210c3bc61d7SManuel Klimek Line.InMacroBody = false; 211c3bc61d7SManuel Klimek Line.Level = PreviousLineLevel; 212c3bc61d7SManuel Klimek } 213c3bc61d7SManuel Klimek getNextToken()214c3bc61d7SManuel Klimek FormatToken *getNextToken() override { 215c3bc61d7SManuel Klimek // The \c UnwrappedLineParser guards against this by never calling 216c3bc61d7SManuel Klimek // \c getNextToken() after it has encountered the first eof token. 217c3bc61d7SManuel Klimek assert(!eof()); 218c3bc61d7SManuel Klimek PreviousToken = Token; 219c3bc61d7SManuel Klimek Token = PreviousTokenSource->getNextToken(); 220c3bc61d7SManuel Klimek if (eof()) 221c3bc61d7SManuel Klimek return &FakeEOF; 222c3bc61d7SManuel Klimek return Token; 223c3bc61d7SManuel Klimek } 224c3bc61d7SManuel Klimek getPreviousToken()225c3bc61d7SManuel Klimek FormatToken *getPreviousToken() override { 226c3bc61d7SManuel Klimek return PreviousTokenSource->getPreviousToken(); 227c3bc61d7SManuel Klimek } 228c3bc61d7SManuel Klimek peekNextToken(bool SkipComment)229c3bc61d7SManuel Klimek FormatToken *peekNextToken(bool SkipComment) override { 230c3bc61d7SManuel Klimek if (eof()) 231c3bc61d7SManuel Klimek return &FakeEOF; 232c3bc61d7SManuel Klimek return PreviousTokenSource->peekNextToken(SkipComment); 233c3bc61d7SManuel Klimek } 234c3bc61d7SManuel Klimek isEOF()235c3bc61d7SManuel Klimek bool isEOF() override { return PreviousTokenSource->isEOF(); } 236c3bc61d7SManuel Klimek getPosition()237c3bc61d7SManuel Klimek unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 238c3bc61d7SManuel Klimek setPosition(unsigned Position)239c3bc61d7SManuel Klimek FormatToken *setPosition(unsigned Position) override { 240c3bc61d7SManuel Klimek PreviousToken = nullptr; 241c3bc61d7SManuel Klimek Token = PreviousTokenSource->setPosition(Position); 242c3bc61d7SManuel Klimek return Token; 243c3bc61d7SManuel Klimek } 244c3bc61d7SManuel Klimek insertTokens(ArrayRef<FormatToken * > Tokens)2451995d442SManuel Klimek FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override { 2461f818f63SBenjamin Kramer llvm_unreachable("Cannot insert tokens while parsing a macro."); 2478c1f77afSManuel Klimek return nullptr; 2481995d442SManuel Klimek } 2491995d442SManuel Klimek 250c3bc61d7SManuel Klimek private: eof()251c3bc61d7SManuel Klimek bool eof() { 252c3bc61d7SManuel Klimek return Token && Token->HasUnescapedNewline && 253c3bc61d7SManuel Klimek !continuesLineComment(*Token, PreviousToken, 254c3bc61d7SManuel Klimek /*MinColumnToken=*/PreviousToken); 255c3bc61d7SManuel Klimek } 256c3bc61d7SManuel Klimek 257c3bc61d7SManuel Klimek FormatToken FakeEOF; 258c3bc61d7SManuel Klimek UnwrappedLine &Line; 259c3bc61d7SManuel Klimek FormatTokenSource *&TokenSource; 260c3bc61d7SManuel Klimek FormatToken *&ResetToken; 261c3bc61d7SManuel Klimek unsigned PreviousLineLevel; 262c3bc61d7SManuel Klimek FormatTokenSource *PreviousTokenSource; 263c3bc61d7SManuel Klimek 264c3bc61d7SManuel Klimek FormatToken *Token; 265c3bc61d7SManuel Klimek FormatToken *PreviousToken; 266c3bc61d7SManuel Klimek }; 267c3bc61d7SManuel Klimek 268c3bc61d7SManuel Klimek } // namespace format 269c3bc61d7SManuel Klimek } // namespace clang 270c3bc61d7SManuel Klimek 271c3bc61d7SManuel Klimek #undef DEBUG_TYPE 272c3bc61d7SManuel Klimek 273c3bc61d7SManuel Klimek #endif 274