xref: /freebsd-src/contrib/llvm-project/clang/lib/Format/FormatTokenSource.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
106c3fb27SDimitry Andric //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
206c3fb27SDimitry Andric //
306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606c3fb27SDimitry Andric //
706c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
806c3fb27SDimitry Andric ///
906c3fb27SDimitry Andric /// \file
1006c3fb27SDimitry Andric /// This file defines the \c FormatTokenSource interface, which provides a token
1106c3fb27SDimitry Andric /// stream as well as the ability to manipulate the token stream.
1206c3fb27SDimitry Andric ///
1306c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
1406c3fb27SDimitry Andric 
1506c3fb27SDimitry Andric #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
1606c3fb27SDimitry Andric #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
1706c3fb27SDimitry Andric 
1806c3fb27SDimitry Andric #include "FormatToken.h"
1906c3fb27SDimitry Andric #include "UnwrappedLineParser.h"
2006c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h"
2106c3fb27SDimitry Andric #include <cstddef>
2206c3fb27SDimitry Andric 
2306c3fb27SDimitry Andric #define DEBUG_TYPE "format-token-source"
2406c3fb27SDimitry Andric 
2506c3fb27SDimitry Andric namespace clang {
2606c3fb27SDimitry Andric namespace format {
2706c3fb27SDimitry Andric 
2806c3fb27SDimitry Andric // Navigate a token stream.
2906c3fb27SDimitry Andric //
3006c3fb27SDimitry Andric // Enables traversal of a token stream, resetting the position in a token
3106c3fb27SDimitry Andric // stream, as well as inserting new tokens.
3206c3fb27SDimitry Andric class FormatTokenSource {
3306c3fb27SDimitry Andric public:
3406c3fb27SDimitry Andric   virtual ~FormatTokenSource() {}
3506c3fb27SDimitry Andric 
3606c3fb27SDimitry Andric   // Returns the next token in the token stream.
3706c3fb27SDimitry Andric   virtual FormatToken *getNextToken() = 0;
3806c3fb27SDimitry Andric 
3906c3fb27SDimitry Andric   // Returns the token preceding the token returned by the last call to
4006c3fb27SDimitry Andric   // getNextToken() in the token stream, or nullptr if no such token exists.
4106c3fb27SDimitry Andric   //
4206c3fb27SDimitry Andric   // Must not be called directly at the position directly after insertTokens()
4306c3fb27SDimitry Andric   // is called.
4406c3fb27SDimitry Andric   virtual FormatToken *getPreviousToken() = 0;
4506c3fb27SDimitry Andric 
4606c3fb27SDimitry Andric   // Returns the token that would be returned by the next call to
4706c3fb27SDimitry Andric   // getNextToken().
4806c3fb27SDimitry Andric   virtual FormatToken *peekNextToken(bool SkipComment = false) = 0;
4906c3fb27SDimitry Andric 
5006c3fb27SDimitry Andric   // Returns whether we are at the end of the file.
5106c3fb27SDimitry Andric   // This can be different from whether getNextToken() returned an eof token
5206c3fb27SDimitry Andric   // when the FormatTokenSource is a view on a part of the token stream.
5306c3fb27SDimitry Andric   virtual bool isEOF() = 0;
5406c3fb27SDimitry Andric 
5506c3fb27SDimitry Andric   // Gets the current position in the token stream, to be used by setPosition().
5606c3fb27SDimitry Andric   //
5706c3fb27SDimitry Andric   // Note that the value of the position is not meaningful, and specifically
5806c3fb27SDimitry Andric   // should not be used to get relative token positions.
5906c3fb27SDimitry Andric   virtual unsigned getPosition() = 0;
6006c3fb27SDimitry Andric 
6106c3fb27SDimitry Andric   // Resets the token stream to the state it was in when getPosition() returned
6206c3fb27SDimitry Andric   // Position, and return the token at that position in the stream.
6306c3fb27SDimitry Andric   virtual FormatToken *setPosition(unsigned Position) = 0;
6406c3fb27SDimitry Andric 
6506c3fb27SDimitry Andric   // Insert the given tokens before the current position.
6606c3fb27SDimitry Andric   // Returns the first token in \c Tokens.
6706c3fb27SDimitry Andric   // The next returned token will be the second token in \c Tokens.
6806c3fb27SDimitry Andric   // Requires the last token in Tokens to be EOF; once the EOF token is reached,
6906c3fb27SDimitry Andric   // the next token will be the last token returned by getNextToken();
7006c3fb27SDimitry Andric   //
7106c3fb27SDimitry Andric   // For example, given the token sequence 'a1 a2':
7206c3fb27SDimitry Andric   // getNextToken() -> a1
7306c3fb27SDimitry Andric   // insertTokens('b1 b2') -> b1
7406c3fb27SDimitry Andric   // getNextToken() -> b2
7506c3fb27SDimitry Andric   // getNextToken() -> a1
7606c3fb27SDimitry Andric   // getNextToken() -> a2
7706c3fb27SDimitry Andric   virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0;
7806c3fb27SDimitry Andric };
7906c3fb27SDimitry Andric 
8006c3fb27SDimitry Andric class IndexedTokenSource : public FormatTokenSource {
8106c3fb27SDimitry Andric public:
8206c3fb27SDimitry Andric   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
8306c3fb27SDimitry Andric       : Tokens(Tokens), Position(-1) {}
8406c3fb27SDimitry Andric 
8506c3fb27SDimitry Andric   FormatToken *getNextToken() override {
8606c3fb27SDimitry Andric     if (Position >= 0 && isEOF()) {
8706c3fb27SDimitry Andric       LLVM_DEBUG({
8806c3fb27SDimitry Andric         llvm::dbgs() << "Next ";
8906c3fb27SDimitry Andric         dbgToken(Position);
9006c3fb27SDimitry Andric       });
9106c3fb27SDimitry Andric       return Tokens[Position];
9206c3fb27SDimitry Andric     }
9306c3fb27SDimitry Andric     Position = successor(Position);
9406c3fb27SDimitry Andric     LLVM_DEBUG({
9506c3fb27SDimitry Andric       llvm::dbgs() << "Next ";
9606c3fb27SDimitry Andric       dbgToken(Position);
9706c3fb27SDimitry Andric     });
9806c3fb27SDimitry Andric     return Tokens[Position];
9906c3fb27SDimitry Andric   }
10006c3fb27SDimitry Andric 
10106c3fb27SDimitry Andric   FormatToken *getPreviousToken() override {
102*5f757f3fSDimitry Andric     assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof));
10306c3fb27SDimitry Andric     return Position > 0 ? Tokens[Position - 1] : nullptr;
10406c3fb27SDimitry Andric   }
10506c3fb27SDimitry Andric 
10606c3fb27SDimitry Andric   FormatToken *peekNextToken(bool SkipComment = false) override {
10706c3fb27SDimitry Andric     if (isEOF())
10806c3fb27SDimitry Andric       return Tokens[Position];
10906c3fb27SDimitry Andric     int Next = successor(Position);
11006c3fb27SDimitry Andric     if (SkipComment)
11106c3fb27SDimitry Andric       while (Tokens[Next]->is(tok::comment))
11206c3fb27SDimitry Andric         Next = successor(Next);
11306c3fb27SDimitry Andric     LLVM_DEBUG({
11406c3fb27SDimitry Andric       llvm::dbgs() << "Peeking ";
11506c3fb27SDimitry Andric       dbgToken(Next);
11606c3fb27SDimitry Andric     });
11706c3fb27SDimitry Andric     return Tokens[Next];
11806c3fb27SDimitry Andric   }
11906c3fb27SDimitry Andric 
12006c3fb27SDimitry Andric   bool isEOF() override {
12106c3fb27SDimitry Andric     return Position == -1 ? false : Tokens[Position]->is(tok::eof);
12206c3fb27SDimitry Andric   }
12306c3fb27SDimitry Andric 
12406c3fb27SDimitry Andric   unsigned getPosition() override {
12506c3fb27SDimitry Andric     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
12606c3fb27SDimitry Andric     assert(Position >= 0);
12706c3fb27SDimitry Andric     return Position;
12806c3fb27SDimitry Andric   }
12906c3fb27SDimitry Andric 
13006c3fb27SDimitry Andric   FormatToken *setPosition(unsigned P) override {
13106c3fb27SDimitry Andric     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
13206c3fb27SDimitry Andric     Position = P;
13306c3fb27SDimitry Andric     return Tokens[Position];
13406c3fb27SDimitry Andric   }
13506c3fb27SDimitry Andric 
13606c3fb27SDimitry Andric   FormatToken *insertTokens(ArrayRef<FormatToken *> New) override {
13706c3fb27SDimitry Andric     assert(Position != -1);
13806c3fb27SDimitry Andric     assert((*New.rbegin())->Tok.is(tok::eof));
13906c3fb27SDimitry Andric     int Next = Tokens.size();
14006c3fb27SDimitry Andric     Tokens.append(New.begin(), New.end());
14106c3fb27SDimitry Andric     LLVM_DEBUG({
14206c3fb27SDimitry Andric       llvm::dbgs() << "Inserting:\n";
14306c3fb27SDimitry Andric       for (int I = Next, E = Tokens.size(); I != E; ++I)
14406c3fb27SDimitry Andric         dbgToken(I, "  ");
14506c3fb27SDimitry Andric       llvm::dbgs() << "  Jump from: " << (Tokens.size() - 1) << " -> "
14606c3fb27SDimitry Andric                    << Position << "\n";
14706c3fb27SDimitry Andric     });
14806c3fb27SDimitry Andric     Jumps[Tokens.size() - 1] = Position;
14906c3fb27SDimitry Andric     Position = Next;
15006c3fb27SDimitry Andric     LLVM_DEBUG({
15106c3fb27SDimitry Andric       llvm::dbgs() << "At inserted token ";
15206c3fb27SDimitry Andric       dbgToken(Position);
15306c3fb27SDimitry Andric     });
15406c3fb27SDimitry Andric     return Tokens[Position];
15506c3fb27SDimitry Andric   }
15606c3fb27SDimitry Andric 
15706c3fb27SDimitry Andric   void reset() { Position = -1; }
15806c3fb27SDimitry Andric 
15906c3fb27SDimitry Andric private:
16006c3fb27SDimitry Andric   int successor(int Current) const {
16106c3fb27SDimitry Andric     int Next = Current + 1;
16206c3fb27SDimitry Andric     auto it = Jumps.find(Next);
16306c3fb27SDimitry Andric     if (it != Jumps.end()) {
16406c3fb27SDimitry Andric       Next = it->second;
16506c3fb27SDimitry Andric       assert(!Jumps.contains(Next));
16606c3fb27SDimitry Andric     }
16706c3fb27SDimitry Andric     return Next;
16806c3fb27SDimitry Andric   }
16906c3fb27SDimitry Andric 
17006c3fb27SDimitry Andric   void dbgToken(int Position, llvm::StringRef Indent = "") {
17106c3fb27SDimitry Andric     FormatToken *Tok = Tokens[Position];
17206c3fb27SDimitry Andric     llvm::dbgs() << Indent << "[" << Position
17306c3fb27SDimitry Andric                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
17406c3fb27SDimitry Andric                  << ", Macro: " << !!Tok->MacroCtx << "\n";
17506c3fb27SDimitry Andric   }
17606c3fb27SDimitry Andric 
17706c3fb27SDimitry Andric   SmallVector<FormatToken *> Tokens;
17806c3fb27SDimitry Andric   int Position;
17906c3fb27SDimitry Andric 
18006c3fb27SDimitry Andric   // Maps from position a to position b, so that when we reach a, the token
18106c3fb27SDimitry Andric   // stream continues at position b instead.
18206c3fb27SDimitry Andric   llvm::DenseMap<int, int> Jumps;
18306c3fb27SDimitry Andric };
18406c3fb27SDimitry Andric 
18506c3fb27SDimitry Andric class ScopedMacroState : public FormatTokenSource {
18606c3fb27SDimitry Andric public:
18706c3fb27SDimitry Andric   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
18806c3fb27SDimitry Andric                    FormatToken *&ResetToken)
18906c3fb27SDimitry Andric       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
19006c3fb27SDimitry Andric         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
19106c3fb27SDimitry Andric         Token(nullptr), PreviousToken(nullptr) {
19206c3fb27SDimitry Andric     FakeEOF.Tok.startToken();
19306c3fb27SDimitry Andric     FakeEOF.Tok.setKind(tok::eof);
19406c3fb27SDimitry Andric     TokenSource = this;
19506c3fb27SDimitry Andric     Line.Level = 0;
19606c3fb27SDimitry Andric     Line.InPPDirective = true;
19706c3fb27SDimitry Andric     // InMacroBody gets set after the `#define x` part.
19806c3fb27SDimitry Andric   }
19906c3fb27SDimitry Andric 
20006c3fb27SDimitry Andric   ~ScopedMacroState() override {
20106c3fb27SDimitry Andric     TokenSource = PreviousTokenSource;
20206c3fb27SDimitry Andric     ResetToken = Token;
20306c3fb27SDimitry Andric     Line.InPPDirective = false;
20406c3fb27SDimitry Andric     Line.InMacroBody = false;
20506c3fb27SDimitry Andric     Line.Level = PreviousLineLevel;
20606c3fb27SDimitry Andric   }
20706c3fb27SDimitry Andric 
20806c3fb27SDimitry Andric   FormatToken *getNextToken() override {
20906c3fb27SDimitry Andric     // The \c UnwrappedLineParser guards against this by never calling
21006c3fb27SDimitry Andric     // \c getNextToken() after it has encountered the first eof token.
21106c3fb27SDimitry Andric     assert(!eof());
21206c3fb27SDimitry Andric     PreviousToken = Token;
21306c3fb27SDimitry Andric     Token = PreviousTokenSource->getNextToken();
21406c3fb27SDimitry Andric     if (eof())
21506c3fb27SDimitry Andric       return &FakeEOF;
21606c3fb27SDimitry Andric     return Token;
21706c3fb27SDimitry Andric   }
21806c3fb27SDimitry Andric 
21906c3fb27SDimitry Andric   FormatToken *getPreviousToken() override {
22006c3fb27SDimitry Andric     return PreviousTokenSource->getPreviousToken();
22106c3fb27SDimitry Andric   }
22206c3fb27SDimitry Andric 
22306c3fb27SDimitry Andric   FormatToken *peekNextToken(bool SkipComment) override {
22406c3fb27SDimitry Andric     if (eof())
22506c3fb27SDimitry Andric       return &FakeEOF;
22606c3fb27SDimitry Andric     return PreviousTokenSource->peekNextToken(SkipComment);
22706c3fb27SDimitry Andric   }
22806c3fb27SDimitry Andric 
22906c3fb27SDimitry Andric   bool isEOF() override { return PreviousTokenSource->isEOF(); }
23006c3fb27SDimitry Andric 
23106c3fb27SDimitry Andric   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
23206c3fb27SDimitry Andric 
23306c3fb27SDimitry Andric   FormatToken *setPosition(unsigned Position) override {
23406c3fb27SDimitry Andric     PreviousToken = nullptr;
23506c3fb27SDimitry Andric     Token = PreviousTokenSource->setPosition(Position);
23606c3fb27SDimitry Andric     return Token;
23706c3fb27SDimitry Andric   }
23806c3fb27SDimitry Andric 
23906c3fb27SDimitry Andric   FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override {
24006c3fb27SDimitry Andric     llvm_unreachable("Cannot insert tokens while parsing a macro.");
24106c3fb27SDimitry Andric     return nullptr;
24206c3fb27SDimitry Andric   }
24306c3fb27SDimitry Andric 
24406c3fb27SDimitry Andric private:
24506c3fb27SDimitry Andric   bool eof() {
24606c3fb27SDimitry Andric     return Token && Token->HasUnescapedNewline &&
24706c3fb27SDimitry Andric            !continuesLineComment(*Token, PreviousToken,
24806c3fb27SDimitry Andric                                  /*MinColumnToken=*/PreviousToken);
24906c3fb27SDimitry Andric   }
25006c3fb27SDimitry Andric 
25106c3fb27SDimitry Andric   FormatToken FakeEOF;
25206c3fb27SDimitry Andric   UnwrappedLine &Line;
25306c3fb27SDimitry Andric   FormatTokenSource *&TokenSource;
25406c3fb27SDimitry Andric   FormatToken *&ResetToken;
25506c3fb27SDimitry Andric   unsigned PreviousLineLevel;
25606c3fb27SDimitry Andric   FormatTokenSource *PreviousTokenSource;
25706c3fb27SDimitry Andric 
25806c3fb27SDimitry Andric   FormatToken *Token;
25906c3fb27SDimitry Andric   FormatToken *PreviousToken;
26006c3fb27SDimitry Andric };
26106c3fb27SDimitry Andric 
26206c3fb27SDimitry Andric } // namespace format
26306c3fb27SDimitry Andric } // namespace clang
26406c3fb27SDimitry Andric 
26506c3fb27SDimitry Andric #undef DEBUG_TYPE
26606c3fb27SDimitry Andric 
26706c3fb27SDimitry Andric #endif
268