xref: /freebsd-src/contrib/llvm-project/clang/lib/Format/FormatTokenSource.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
106c3fb27SDimitry Andric //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
206c3fb27SDimitry Andric //
306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606c3fb27SDimitry Andric //
706c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
806c3fb27SDimitry Andric ///
906c3fb27SDimitry Andric /// \file
1006c3fb27SDimitry Andric /// This file defines the \c FormatTokenSource interface, which provides a token
1106c3fb27SDimitry Andric /// stream as well as the ability to manipulate the token stream.
1206c3fb27SDimitry Andric ///
1306c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
1406c3fb27SDimitry Andric 
1506c3fb27SDimitry Andric #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
1606c3fb27SDimitry Andric #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
1706c3fb27SDimitry Andric 
1806c3fb27SDimitry Andric #include "UnwrappedLineParser.h"
1906c3fb27SDimitry Andric 
2006c3fb27SDimitry Andric #define DEBUG_TYPE "format-token-source"
2106c3fb27SDimitry Andric 
2206c3fb27SDimitry Andric namespace clang {
2306c3fb27SDimitry Andric namespace format {
2406c3fb27SDimitry Andric 
2506c3fb27SDimitry Andric // Navigate a token stream.
2606c3fb27SDimitry Andric //
2706c3fb27SDimitry Andric // Enables traversal of a token stream, resetting the position in a token
2806c3fb27SDimitry Andric // stream, as well as inserting new tokens.
2906c3fb27SDimitry Andric class FormatTokenSource {
3006c3fb27SDimitry Andric public:
3106c3fb27SDimitry Andric   virtual ~FormatTokenSource() {}
3206c3fb27SDimitry Andric 
3306c3fb27SDimitry Andric   // Returns the next token in the token stream.
3406c3fb27SDimitry Andric   virtual FormatToken *getNextToken() = 0;
3506c3fb27SDimitry Andric 
3606c3fb27SDimitry Andric   // Returns the token preceding the token returned by the last call to
3706c3fb27SDimitry Andric   // getNextToken() in the token stream, or nullptr if no such token exists.
3806c3fb27SDimitry Andric   //
3906c3fb27SDimitry Andric   // Must not be called directly at the position directly after insertTokens()
4006c3fb27SDimitry Andric   // is called.
4106c3fb27SDimitry Andric   virtual FormatToken *getPreviousToken() = 0;
4206c3fb27SDimitry Andric 
4306c3fb27SDimitry Andric   // Returns the token that would be returned by the next call to
4406c3fb27SDimitry Andric   // getNextToken().
4506c3fb27SDimitry Andric   virtual FormatToken *peekNextToken(bool SkipComment = false) = 0;
4606c3fb27SDimitry Andric 
4706c3fb27SDimitry Andric   // Returns whether we are at the end of the file.
4806c3fb27SDimitry Andric   // This can be different from whether getNextToken() returned an eof token
4906c3fb27SDimitry Andric   // when the FormatTokenSource is a view on a part of the token stream.
5006c3fb27SDimitry Andric   virtual bool isEOF() = 0;
5106c3fb27SDimitry Andric 
5206c3fb27SDimitry Andric   // Gets the current position in the token stream, to be used by setPosition().
5306c3fb27SDimitry Andric   //
5406c3fb27SDimitry Andric   // Note that the value of the position is not meaningful, and specifically
5506c3fb27SDimitry Andric   // should not be used to get relative token positions.
5606c3fb27SDimitry Andric   virtual unsigned getPosition() = 0;
5706c3fb27SDimitry Andric 
5806c3fb27SDimitry Andric   // Resets the token stream to the state it was in when getPosition() returned
5906c3fb27SDimitry Andric   // Position, and return the token at that position in the stream.
6006c3fb27SDimitry Andric   virtual FormatToken *setPosition(unsigned Position) = 0;
6106c3fb27SDimitry Andric 
6206c3fb27SDimitry Andric   // Insert the given tokens before the current position.
6306c3fb27SDimitry Andric   // Returns the first token in \c Tokens.
6406c3fb27SDimitry Andric   // The next returned token will be the second token in \c Tokens.
6506c3fb27SDimitry Andric   // Requires the last token in Tokens to be EOF; once the EOF token is reached,
6606c3fb27SDimitry Andric   // the next token will be the last token returned by getNextToken();
6706c3fb27SDimitry Andric   //
6806c3fb27SDimitry Andric   // For example, given the token sequence 'a1 a2':
6906c3fb27SDimitry Andric   // getNextToken() -> a1
7006c3fb27SDimitry Andric   // insertTokens('b1 b2') -> b1
7106c3fb27SDimitry Andric   // getNextToken() -> b2
7206c3fb27SDimitry Andric   // getNextToken() -> a1
7306c3fb27SDimitry Andric   // getNextToken() -> a2
7406c3fb27SDimitry Andric   virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0;
75*0fca6ea1SDimitry Andric 
76*0fca6ea1SDimitry Andric   [[nodiscard]] FormatToken *getNextNonComment() {
77*0fca6ea1SDimitry Andric     FormatToken *Tok;
78*0fca6ea1SDimitry Andric     do {
79*0fca6ea1SDimitry Andric       Tok = getNextToken();
80*0fca6ea1SDimitry Andric       assert(Tok);
81*0fca6ea1SDimitry Andric     } while (Tok->is(tok::comment));
82*0fca6ea1SDimitry Andric     return Tok;
83*0fca6ea1SDimitry Andric   }
8406c3fb27SDimitry Andric };
8506c3fb27SDimitry Andric 
8606c3fb27SDimitry Andric class IndexedTokenSource : public FormatTokenSource {
8706c3fb27SDimitry Andric public:
8806c3fb27SDimitry Andric   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
8906c3fb27SDimitry Andric       : Tokens(Tokens), Position(-1) {}
9006c3fb27SDimitry Andric 
9106c3fb27SDimitry Andric   FormatToken *getNextToken() override {
9206c3fb27SDimitry Andric     if (Position >= 0 && isEOF()) {
9306c3fb27SDimitry Andric       LLVM_DEBUG({
9406c3fb27SDimitry Andric         llvm::dbgs() << "Next ";
9506c3fb27SDimitry Andric         dbgToken(Position);
9606c3fb27SDimitry Andric       });
9706c3fb27SDimitry Andric       return Tokens[Position];
9806c3fb27SDimitry Andric     }
9906c3fb27SDimitry Andric     Position = successor(Position);
10006c3fb27SDimitry Andric     LLVM_DEBUG({
10106c3fb27SDimitry Andric       llvm::dbgs() << "Next ";
10206c3fb27SDimitry Andric       dbgToken(Position);
10306c3fb27SDimitry Andric     });
10406c3fb27SDimitry Andric     return Tokens[Position];
10506c3fb27SDimitry Andric   }
10606c3fb27SDimitry Andric 
10706c3fb27SDimitry Andric   FormatToken *getPreviousToken() override {
1085f757f3fSDimitry Andric     assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof));
10906c3fb27SDimitry Andric     return Position > 0 ? Tokens[Position - 1] : nullptr;
11006c3fb27SDimitry Andric   }
11106c3fb27SDimitry Andric 
11206c3fb27SDimitry Andric   FormatToken *peekNextToken(bool SkipComment = false) override {
11306c3fb27SDimitry Andric     if (isEOF())
11406c3fb27SDimitry Andric       return Tokens[Position];
11506c3fb27SDimitry Andric     int Next = successor(Position);
11606c3fb27SDimitry Andric     if (SkipComment)
11706c3fb27SDimitry Andric       while (Tokens[Next]->is(tok::comment))
11806c3fb27SDimitry Andric         Next = successor(Next);
11906c3fb27SDimitry Andric     LLVM_DEBUG({
12006c3fb27SDimitry Andric       llvm::dbgs() << "Peeking ";
12106c3fb27SDimitry Andric       dbgToken(Next);
12206c3fb27SDimitry Andric     });
12306c3fb27SDimitry Andric     return Tokens[Next];
12406c3fb27SDimitry Andric   }
12506c3fb27SDimitry Andric 
12606c3fb27SDimitry Andric   bool isEOF() override {
12706c3fb27SDimitry Andric     return Position == -1 ? false : Tokens[Position]->is(tok::eof);
12806c3fb27SDimitry Andric   }
12906c3fb27SDimitry Andric 
13006c3fb27SDimitry Andric   unsigned getPosition() override {
13106c3fb27SDimitry Andric     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
13206c3fb27SDimitry Andric     assert(Position >= 0);
13306c3fb27SDimitry Andric     return Position;
13406c3fb27SDimitry Andric   }
13506c3fb27SDimitry Andric 
13606c3fb27SDimitry Andric   FormatToken *setPosition(unsigned P) override {
13706c3fb27SDimitry Andric     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
13806c3fb27SDimitry Andric     Position = P;
13906c3fb27SDimitry Andric     return Tokens[Position];
14006c3fb27SDimitry Andric   }
14106c3fb27SDimitry Andric 
14206c3fb27SDimitry Andric   FormatToken *insertTokens(ArrayRef<FormatToken *> New) override {
14306c3fb27SDimitry Andric     assert(Position != -1);
14406c3fb27SDimitry Andric     assert((*New.rbegin())->Tok.is(tok::eof));
14506c3fb27SDimitry Andric     int Next = Tokens.size();
14606c3fb27SDimitry Andric     Tokens.append(New.begin(), New.end());
14706c3fb27SDimitry Andric     LLVM_DEBUG({
14806c3fb27SDimitry Andric       llvm::dbgs() << "Inserting:\n";
14906c3fb27SDimitry Andric       for (int I = Next, E = Tokens.size(); I != E; ++I)
15006c3fb27SDimitry Andric         dbgToken(I, "  ");
15106c3fb27SDimitry Andric       llvm::dbgs() << "  Jump from: " << (Tokens.size() - 1) << " -> "
15206c3fb27SDimitry Andric                    << Position << "\n";
15306c3fb27SDimitry Andric     });
15406c3fb27SDimitry Andric     Jumps[Tokens.size() - 1] = Position;
15506c3fb27SDimitry Andric     Position = Next;
15606c3fb27SDimitry Andric     LLVM_DEBUG({
15706c3fb27SDimitry Andric       llvm::dbgs() << "At inserted token ";
15806c3fb27SDimitry Andric       dbgToken(Position);
15906c3fb27SDimitry Andric     });
16006c3fb27SDimitry Andric     return Tokens[Position];
16106c3fb27SDimitry Andric   }
16206c3fb27SDimitry Andric 
16306c3fb27SDimitry Andric   void reset() { Position = -1; }
16406c3fb27SDimitry Andric 
16506c3fb27SDimitry Andric private:
16606c3fb27SDimitry Andric   int successor(int Current) const {
16706c3fb27SDimitry Andric     int Next = Current + 1;
16806c3fb27SDimitry Andric     auto it = Jumps.find(Next);
16906c3fb27SDimitry Andric     if (it != Jumps.end()) {
17006c3fb27SDimitry Andric       Next = it->second;
17106c3fb27SDimitry Andric       assert(!Jumps.contains(Next));
17206c3fb27SDimitry Andric     }
17306c3fb27SDimitry Andric     return Next;
17406c3fb27SDimitry Andric   }
17506c3fb27SDimitry Andric 
176*0fca6ea1SDimitry Andric   void dbgToken(int Position, StringRef Indent = "") {
17706c3fb27SDimitry Andric     FormatToken *Tok = Tokens[Position];
17806c3fb27SDimitry Andric     llvm::dbgs() << Indent << "[" << Position
17906c3fb27SDimitry Andric                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
18006c3fb27SDimitry Andric                  << ", Macro: " << !!Tok->MacroCtx << "\n";
18106c3fb27SDimitry Andric   }
18206c3fb27SDimitry Andric 
18306c3fb27SDimitry Andric   SmallVector<FormatToken *> Tokens;
18406c3fb27SDimitry Andric   int Position;
18506c3fb27SDimitry Andric 
18606c3fb27SDimitry Andric   // Maps from position a to position b, so that when we reach a, the token
18706c3fb27SDimitry Andric   // stream continues at position b instead.
18806c3fb27SDimitry Andric   llvm::DenseMap<int, int> Jumps;
18906c3fb27SDimitry Andric };
19006c3fb27SDimitry Andric 
19106c3fb27SDimitry Andric class ScopedMacroState : public FormatTokenSource {
19206c3fb27SDimitry Andric public:
19306c3fb27SDimitry Andric   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
19406c3fb27SDimitry Andric                    FormatToken *&ResetToken)
19506c3fb27SDimitry Andric       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
19606c3fb27SDimitry Andric         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
19706c3fb27SDimitry Andric         Token(nullptr), PreviousToken(nullptr) {
19806c3fb27SDimitry Andric     FakeEOF.Tok.startToken();
19906c3fb27SDimitry Andric     FakeEOF.Tok.setKind(tok::eof);
20006c3fb27SDimitry Andric     TokenSource = this;
20106c3fb27SDimitry Andric     Line.Level = 0;
20206c3fb27SDimitry Andric     Line.InPPDirective = true;
20306c3fb27SDimitry Andric     // InMacroBody gets set after the `#define x` part.
20406c3fb27SDimitry Andric   }
20506c3fb27SDimitry Andric 
20606c3fb27SDimitry Andric   ~ScopedMacroState() override {
20706c3fb27SDimitry Andric     TokenSource = PreviousTokenSource;
20806c3fb27SDimitry Andric     ResetToken = Token;
20906c3fb27SDimitry Andric     Line.InPPDirective = false;
21006c3fb27SDimitry Andric     Line.InMacroBody = false;
21106c3fb27SDimitry Andric     Line.Level = PreviousLineLevel;
21206c3fb27SDimitry Andric   }
21306c3fb27SDimitry Andric 
21406c3fb27SDimitry Andric   FormatToken *getNextToken() override {
21506c3fb27SDimitry Andric     // The \c UnwrappedLineParser guards against this by never calling
21606c3fb27SDimitry Andric     // \c getNextToken() after it has encountered the first eof token.
21706c3fb27SDimitry Andric     assert(!eof());
21806c3fb27SDimitry Andric     PreviousToken = Token;
21906c3fb27SDimitry Andric     Token = PreviousTokenSource->getNextToken();
22006c3fb27SDimitry Andric     if (eof())
22106c3fb27SDimitry Andric       return &FakeEOF;
22206c3fb27SDimitry Andric     return Token;
22306c3fb27SDimitry Andric   }
22406c3fb27SDimitry Andric 
22506c3fb27SDimitry Andric   FormatToken *getPreviousToken() override {
22606c3fb27SDimitry Andric     return PreviousTokenSource->getPreviousToken();
22706c3fb27SDimitry Andric   }
22806c3fb27SDimitry Andric 
22906c3fb27SDimitry Andric   FormatToken *peekNextToken(bool SkipComment) override {
23006c3fb27SDimitry Andric     if (eof())
23106c3fb27SDimitry Andric       return &FakeEOF;
23206c3fb27SDimitry Andric     return PreviousTokenSource->peekNextToken(SkipComment);
23306c3fb27SDimitry Andric   }
23406c3fb27SDimitry Andric 
23506c3fb27SDimitry Andric   bool isEOF() override { return PreviousTokenSource->isEOF(); }
23606c3fb27SDimitry Andric 
23706c3fb27SDimitry Andric   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
23806c3fb27SDimitry Andric 
23906c3fb27SDimitry Andric   FormatToken *setPosition(unsigned Position) override {
24006c3fb27SDimitry Andric     PreviousToken = nullptr;
24106c3fb27SDimitry Andric     Token = PreviousTokenSource->setPosition(Position);
24206c3fb27SDimitry Andric     return Token;
24306c3fb27SDimitry Andric   }
24406c3fb27SDimitry Andric 
24506c3fb27SDimitry Andric   FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override {
24606c3fb27SDimitry Andric     llvm_unreachable("Cannot insert tokens while parsing a macro.");
24706c3fb27SDimitry Andric     return nullptr;
24806c3fb27SDimitry Andric   }
24906c3fb27SDimitry Andric 
25006c3fb27SDimitry Andric private:
25106c3fb27SDimitry Andric   bool eof() {
25206c3fb27SDimitry Andric     return Token && Token->HasUnescapedNewline &&
25306c3fb27SDimitry Andric            !continuesLineComment(*Token, PreviousToken,
25406c3fb27SDimitry Andric                                  /*MinColumnToken=*/PreviousToken);
25506c3fb27SDimitry Andric   }
25606c3fb27SDimitry Andric 
25706c3fb27SDimitry Andric   FormatToken FakeEOF;
25806c3fb27SDimitry Andric   UnwrappedLine &Line;
25906c3fb27SDimitry Andric   FormatTokenSource *&TokenSource;
26006c3fb27SDimitry Andric   FormatToken *&ResetToken;
26106c3fb27SDimitry Andric   unsigned PreviousLineLevel;
26206c3fb27SDimitry Andric   FormatTokenSource *PreviousTokenSource;
26306c3fb27SDimitry Andric 
26406c3fb27SDimitry Andric   FormatToken *Token;
26506c3fb27SDimitry Andric   FormatToken *PreviousToken;
26606c3fb27SDimitry Andric };
26706c3fb27SDimitry Andric 
26806c3fb27SDimitry Andric } // namespace format
26906c3fb27SDimitry Andric } // namespace clang
27006c3fb27SDimitry Andric 
27106c3fb27SDimitry Andric #undef DEBUG_TYPE
27206c3fb27SDimitry Andric 
27306c3fb27SDimitry Andric #endif
274