xref: /llvm-project/clang/lib/Format/FormatTokenSource.h (revision 1c58208d899285318c89e069268145c85ec33368)
1c3bc61d7SManuel Klimek //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
2c3bc61d7SManuel Klimek //
3c3bc61d7SManuel Klimek // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c3bc61d7SManuel Klimek // See https://llvm.org/LICENSE.txt for license information.
5c3bc61d7SManuel Klimek // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6c3bc61d7SManuel Klimek //
7c3bc61d7SManuel Klimek //===----------------------------------------------------------------------===//
8c3bc61d7SManuel Klimek ///
9c3bc61d7SManuel Klimek /// \file
101995d442SManuel Klimek /// This file defines the \c FormatTokenSource interface, which provides a token
11c3bc61d7SManuel Klimek /// stream as well as the ability to manipulate the token stream.
12c3bc61d7SManuel Klimek ///
13c3bc61d7SManuel Klimek //===----------------------------------------------------------------------===//
14c3bc61d7SManuel Klimek 
15c3bc61d7SManuel Klimek #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
16c3bc61d7SManuel Klimek #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
17c3bc61d7SManuel Klimek 
18c3bc61d7SManuel Klimek #include "UnwrappedLineParser.h"
19c3bc61d7SManuel Klimek 
20c3bc61d7SManuel Klimek #define DEBUG_TYPE "format-token-source"
21c3bc61d7SManuel Klimek 
22c3bc61d7SManuel Klimek namespace clang {
23c3bc61d7SManuel Klimek namespace format {
24c3bc61d7SManuel Klimek 
251995d442SManuel Klimek // Navigate a token stream.
261995d442SManuel Klimek //
271995d442SManuel Klimek // Enables traversal of a token stream, resetting the position in a token
281995d442SManuel Klimek // stream, as well as inserting new tokens.
29c3bc61d7SManuel Klimek class FormatTokenSource {
30c3bc61d7SManuel Klimek public:
~FormatTokenSource()31c3bc61d7SManuel Klimek   virtual ~FormatTokenSource() {}
32c3bc61d7SManuel Klimek 
33c3bc61d7SManuel Klimek   // Returns the next token in the token stream.
34c3bc61d7SManuel Klimek   virtual FormatToken *getNextToken() = 0;
35c3bc61d7SManuel Klimek 
36c3bc61d7SManuel Klimek   // Returns the token preceding the token returned by the last call to
37c3bc61d7SManuel Klimek   // getNextToken() in the token stream, or nullptr if no such token exists.
381995d442SManuel Klimek   //
391995d442SManuel Klimek   // Must not be called directly at the position directly after insertTokens()
401995d442SManuel Klimek   // is called.
41c3bc61d7SManuel Klimek   virtual FormatToken *getPreviousToken() = 0;
42c3bc61d7SManuel Klimek 
43c3bc61d7SManuel Klimek   // Returns the token that would be returned by the next call to
44c3bc61d7SManuel Klimek   // getNextToken().
45c3bc61d7SManuel Klimek   virtual FormatToken *peekNextToken(bool SkipComment = false) = 0;
46c3bc61d7SManuel Klimek 
47c3bc61d7SManuel Klimek   // Returns whether we are at the end of the file.
48c3bc61d7SManuel Klimek   // This can be different from whether getNextToken() returned an eof token
49c3bc61d7SManuel Klimek   // when the FormatTokenSource is a view on a part of the token stream.
50c3bc61d7SManuel Klimek   virtual bool isEOF() = 0;
51c3bc61d7SManuel Klimek 
52c3bc61d7SManuel Klimek   // Gets the current position in the token stream, to be used by setPosition().
531995d442SManuel Klimek   //
541995d442SManuel Klimek   // Note that the value of the position is not meaningful, and specifically
551995d442SManuel Klimek   // should not be used to get relative token positions.
56c3bc61d7SManuel Klimek   virtual unsigned getPosition() = 0;
57c3bc61d7SManuel Klimek 
58c3bc61d7SManuel Klimek   // Resets the token stream to the state it was in when getPosition() returned
59c3bc61d7SManuel Klimek   // Position, and return the token at that position in the stream.
60c3bc61d7SManuel Klimek   virtual FormatToken *setPosition(unsigned Position) = 0;
611995d442SManuel Klimek 
621995d442SManuel Klimek   // Insert the given tokens before the current position.
631995d442SManuel Klimek   // Returns the first token in \c Tokens.
641995d442SManuel Klimek   // The next returned token will be the second token in \c Tokens.
651995d442SManuel Klimek   // Requires the last token in Tokens to be EOF; once the EOF token is reached,
661995d442SManuel Klimek   // the next token will be the last token returned by getNextToken();
671995d442SManuel Klimek   //
681995d442SManuel Klimek   // For example, given the token sequence 'a1 a2':
691995d442SManuel Klimek   // getNextToken() -> a1
701995d442SManuel Klimek   // insertTokens('b1 b2') -> b1
711995d442SManuel Klimek   // getNextToken() -> b2
721995d442SManuel Klimek   // getNextToken() -> a1
731995d442SManuel Klimek   // getNextToken() -> a2
741995d442SManuel Klimek   virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0;
75943db678SOwen Pan 
getNextNonComment()76943db678SOwen Pan   [[nodiscard]] FormatToken *getNextNonComment() {
77943db678SOwen Pan     FormatToken *Tok;
78943db678SOwen Pan     do {
79943db678SOwen Pan       Tok = getNextToken();
80943db678SOwen Pan       assert(Tok);
81943db678SOwen Pan     } while (Tok->is(tok::comment));
82943db678SOwen Pan     return Tok;
83943db678SOwen Pan   }
84c3bc61d7SManuel Klimek };
85c3bc61d7SManuel Klimek 
861995d442SManuel Klimek class IndexedTokenSource : public FormatTokenSource {
87be31f2c1SManuel Klimek public:
IndexedTokenSource(ArrayRef<FormatToken * > Tokens)88be31f2c1SManuel Klimek   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
89be31f2c1SManuel Klimek       : Tokens(Tokens), Position(-1) {}
90be31f2c1SManuel Klimek 
getNextToken()91be31f2c1SManuel Klimek   FormatToken *getNextToken() override {
92be31f2c1SManuel Klimek     if (Position >= 0 && isEOF()) {
93be31f2c1SManuel Klimek       LLVM_DEBUG({
94be31f2c1SManuel Klimek         llvm::dbgs() << "Next ";
95be31f2c1SManuel Klimek         dbgToken(Position);
96be31f2c1SManuel Klimek       });
97be31f2c1SManuel Klimek       return Tokens[Position];
98be31f2c1SManuel Klimek     }
991995d442SManuel Klimek     Position = successor(Position);
100be31f2c1SManuel Klimek     LLVM_DEBUG({
101be31f2c1SManuel Klimek       llvm::dbgs() << "Next ";
102be31f2c1SManuel Klimek       dbgToken(Position);
103be31f2c1SManuel Klimek     });
104be31f2c1SManuel Klimek     return Tokens[Position];
105be31f2c1SManuel Klimek   }
106be31f2c1SManuel Klimek 
getPreviousToken()107be31f2c1SManuel Klimek   FormatToken *getPreviousToken() override {
10891c4db00SOwen Pan     assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof));
109be31f2c1SManuel Klimek     return Position > 0 ? Tokens[Position - 1] : nullptr;
110be31f2c1SManuel Klimek   }
111be31f2c1SManuel Klimek 
11204ed86ffSManuel Klimek   FormatToken *peekNextToken(bool SkipComment = false) override {
11304ed86ffSManuel Klimek     if (isEOF())
11404ed86ffSManuel Klimek       return Tokens[Position];
1151995d442SManuel Klimek     int Next = successor(Position);
116be31f2c1SManuel Klimek     if (SkipComment)
117be31f2c1SManuel Klimek       while (Tokens[Next]->is(tok::comment))
1181995d442SManuel Klimek         Next = successor(Next);
119be31f2c1SManuel Klimek     LLVM_DEBUG({
120be31f2c1SManuel Klimek       llvm::dbgs() << "Peeking ";
121be31f2c1SManuel Klimek       dbgToken(Next);
122be31f2c1SManuel Klimek     });
123be31f2c1SManuel Klimek     return Tokens[Next];
124be31f2c1SManuel Klimek   }
125be31f2c1SManuel Klimek 
isEOF()12604ed86ffSManuel Klimek   bool isEOF() override {
12704ed86ffSManuel Klimek     return Position == -1 ? false : Tokens[Position]->is(tok::eof);
12804ed86ffSManuel Klimek   }
129be31f2c1SManuel Klimek 
getPosition()130be31f2c1SManuel Klimek   unsigned getPosition() override {
131be31f2c1SManuel Klimek     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
132be31f2c1SManuel Klimek     assert(Position >= 0);
133be31f2c1SManuel Klimek     return Position;
134be31f2c1SManuel Klimek   }
135be31f2c1SManuel Klimek 
setPosition(unsigned P)136be31f2c1SManuel Klimek   FormatToken *setPosition(unsigned P) override {
137be31f2c1SManuel Klimek     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
138be31f2c1SManuel Klimek     Position = P;
139be31f2c1SManuel Klimek     return Tokens[Position];
140be31f2c1SManuel Klimek   }
141be31f2c1SManuel Klimek 
insertTokens(ArrayRef<FormatToken * > New)1421995d442SManuel Klimek   FormatToken *insertTokens(ArrayRef<FormatToken *> New) override {
1431995d442SManuel Klimek     assert(Position != -1);
1441995d442SManuel Klimek     assert((*New.rbegin())->Tok.is(tok::eof));
1451995d442SManuel Klimek     int Next = Tokens.size();
1461995d442SManuel Klimek     Tokens.append(New.begin(), New.end());
1471995d442SManuel Klimek     LLVM_DEBUG({
1481995d442SManuel Klimek       llvm::dbgs() << "Inserting:\n";
1491995d442SManuel Klimek       for (int I = Next, E = Tokens.size(); I != E; ++I)
1501995d442SManuel Klimek         dbgToken(I, "  ");
1511995d442SManuel Klimek       llvm::dbgs() << "  Jump from: " << (Tokens.size() - 1) << " -> "
1521995d442SManuel Klimek                    << Position << "\n";
1531995d442SManuel Klimek     });
1541995d442SManuel Klimek     Jumps[Tokens.size() - 1] = Position;
1551995d442SManuel Klimek     Position = Next;
1561995d442SManuel Klimek     LLVM_DEBUG({
1571995d442SManuel Klimek       llvm::dbgs() << "At inserted token ";
1581995d442SManuel Klimek       dbgToken(Position);
1591995d442SManuel Klimek     });
1601995d442SManuel Klimek     return Tokens[Position];
1611995d442SManuel Klimek   }
1621995d442SManuel Klimek 
reset()163be31f2c1SManuel Klimek   void reset() { Position = -1; }
164be31f2c1SManuel Klimek 
165be31f2c1SManuel Klimek private:
successor(int Current)1661995d442SManuel Klimek   int successor(int Current) const {
1671995d442SManuel Klimek     int Next = Current + 1;
1681995d442SManuel Klimek     auto it = Jumps.find(Next);
1691995d442SManuel Klimek     if (it != Jumps.end()) {
1701995d442SManuel Klimek       Next = it->second;
1717eaa7b05SKazu Hirata       assert(!Jumps.contains(Next));
1721995d442SManuel Klimek     }
1731995d442SManuel Klimek     return Next;
1741995d442SManuel Klimek   }
1751995d442SManuel Klimek 
176*1c58208dSOwen Pan   void dbgToken(int Position, StringRef Indent = "") {
177be31f2c1SManuel Klimek     FormatToken *Tok = Tokens[Position];
178be31f2c1SManuel Klimek     llvm::dbgs() << Indent << "[" << Position
179be31f2c1SManuel Klimek                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
180be31f2c1SManuel Klimek                  << ", Macro: " << !!Tok->MacroCtx << "\n";
181be31f2c1SManuel Klimek   }
182be31f2c1SManuel Klimek 
1831995d442SManuel Klimek   SmallVector<FormatToken *> Tokens;
184be31f2c1SManuel Klimek   int Position;
1851995d442SManuel Klimek 
1861995d442SManuel Klimek   // Maps from position a to position b, so that when we reach a, the token
1871995d442SManuel Klimek   // stream continues at position b instead.
1881995d442SManuel Klimek   llvm::DenseMap<int, int> Jumps;
189be31f2c1SManuel Klimek };
190be31f2c1SManuel Klimek 
191c3bc61d7SManuel Klimek class ScopedMacroState : public FormatTokenSource {
192c3bc61d7SManuel Klimek public:
ScopedMacroState(UnwrappedLine & Line,FormatTokenSource * & TokenSource,FormatToken * & ResetToken)193c3bc61d7SManuel Klimek   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
194c3bc61d7SManuel Klimek                    FormatToken *&ResetToken)
195c3bc61d7SManuel Klimek       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
196c3bc61d7SManuel Klimek         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
197c3bc61d7SManuel Klimek         Token(nullptr), PreviousToken(nullptr) {
198c3bc61d7SManuel Klimek     FakeEOF.Tok.startToken();
199c3bc61d7SManuel Klimek     FakeEOF.Tok.setKind(tok::eof);
200c3bc61d7SManuel Klimek     TokenSource = this;
201c3bc61d7SManuel Klimek     Line.Level = 0;
202c3bc61d7SManuel Klimek     Line.InPPDirective = true;
203c3bc61d7SManuel Klimek     // InMacroBody gets set after the `#define x` part.
204c3bc61d7SManuel Klimek   }
205c3bc61d7SManuel Klimek 
~ScopedMacroState()206c3bc61d7SManuel Klimek   ~ScopedMacroState() override {
207c3bc61d7SManuel Klimek     TokenSource = PreviousTokenSource;
208c3bc61d7SManuel Klimek     ResetToken = Token;
209c3bc61d7SManuel Klimek     Line.InPPDirective = false;
210c3bc61d7SManuel Klimek     Line.InMacroBody = false;
211c3bc61d7SManuel Klimek     Line.Level = PreviousLineLevel;
212c3bc61d7SManuel Klimek   }
213c3bc61d7SManuel Klimek 
getNextToken()214c3bc61d7SManuel Klimek   FormatToken *getNextToken() override {
215c3bc61d7SManuel Klimek     // The \c UnwrappedLineParser guards against this by never calling
216c3bc61d7SManuel Klimek     // \c getNextToken() after it has encountered the first eof token.
217c3bc61d7SManuel Klimek     assert(!eof());
218c3bc61d7SManuel Klimek     PreviousToken = Token;
219c3bc61d7SManuel Klimek     Token = PreviousTokenSource->getNextToken();
220c3bc61d7SManuel Klimek     if (eof())
221c3bc61d7SManuel Klimek       return &FakeEOF;
222c3bc61d7SManuel Klimek     return Token;
223c3bc61d7SManuel Klimek   }
224c3bc61d7SManuel Klimek 
getPreviousToken()225c3bc61d7SManuel Klimek   FormatToken *getPreviousToken() override {
226c3bc61d7SManuel Klimek     return PreviousTokenSource->getPreviousToken();
227c3bc61d7SManuel Klimek   }
228c3bc61d7SManuel Klimek 
peekNextToken(bool SkipComment)229c3bc61d7SManuel Klimek   FormatToken *peekNextToken(bool SkipComment) override {
230c3bc61d7SManuel Klimek     if (eof())
231c3bc61d7SManuel Klimek       return &FakeEOF;
232c3bc61d7SManuel Klimek     return PreviousTokenSource->peekNextToken(SkipComment);
233c3bc61d7SManuel Klimek   }
234c3bc61d7SManuel Klimek 
isEOF()235c3bc61d7SManuel Klimek   bool isEOF() override { return PreviousTokenSource->isEOF(); }
236c3bc61d7SManuel Klimek 
getPosition()237c3bc61d7SManuel Klimek   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
238c3bc61d7SManuel Klimek 
setPosition(unsigned Position)239c3bc61d7SManuel Klimek   FormatToken *setPosition(unsigned Position) override {
240c3bc61d7SManuel Klimek     PreviousToken = nullptr;
241c3bc61d7SManuel Klimek     Token = PreviousTokenSource->setPosition(Position);
242c3bc61d7SManuel Klimek     return Token;
243c3bc61d7SManuel Klimek   }
244c3bc61d7SManuel Klimek 
insertTokens(ArrayRef<FormatToken * > Tokens)2451995d442SManuel Klimek   FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override {
2461f818f63SBenjamin Kramer     llvm_unreachable("Cannot insert tokens while parsing a macro.");
2478c1f77afSManuel Klimek     return nullptr;
2481995d442SManuel Klimek   }
2491995d442SManuel Klimek 
250c3bc61d7SManuel Klimek private:
eof()251c3bc61d7SManuel Klimek   bool eof() {
252c3bc61d7SManuel Klimek     return Token && Token->HasUnescapedNewline &&
253c3bc61d7SManuel Klimek            !continuesLineComment(*Token, PreviousToken,
254c3bc61d7SManuel Klimek                                  /*MinColumnToken=*/PreviousToken);
255c3bc61d7SManuel Klimek   }
256c3bc61d7SManuel Klimek 
257c3bc61d7SManuel Klimek   FormatToken FakeEOF;
258c3bc61d7SManuel Klimek   UnwrappedLine &Line;
259c3bc61d7SManuel Klimek   FormatTokenSource *&TokenSource;
260c3bc61d7SManuel Klimek   FormatToken *&ResetToken;
261c3bc61d7SManuel Klimek   unsigned PreviousLineLevel;
262c3bc61d7SManuel Klimek   FormatTokenSource *PreviousTokenSource;
263c3bc61d7SManuel Klimek 
264c3bc61d7SManuel Klimek   FormatToken *Token;
265c3bc61d7SManuel Klimek   FormatToken *PreviousToken;
266c3bc61d7SManuel Klimek };
267c3bc61d7SManuel Klimek 
268c3bc61d7SManuel Klimek } // namespace format
269c3bc61d7SManuel Klimek } // namespace clang
270c3bc61d7SManuel Klimek 
271c3bc61d7SManuel Klimek #undef DEBUG_TYPE
272c3bc61d7SManuel Klimek 
273c3bc61d7SManuel Klimek #endif
274