xref: /freebsd-src/contrib/llvm-project/clang/lib/Format/FormatTokenSource.h (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1*06c3fb27SDimitry Andric //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
2*06c3fb27SDimitry Andric //
3*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*06c3fb27SDimitry Andric //
7*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
8*06c3fb27SDimitry Andric ///
9*06c3fb27SDimitry Andric /// \file
10*06c3fb27SDimitry Andric /// This file defines the \c FormatTokenSource interface, which provides a token
11*06c3fb27SDimitry Andric /// stream as well as the ability to manipulate the token stream.
12*06c3fb27SDimitry Andric ///
13*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
14*06c3fb27SDimitry Andric 
15*06c3fb27SDimitry Andric #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
16*06c3fb27SDimitry Andric #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
17*06c3fb27SDimitry Andric 
18*06c3fb27SDimitry Andric #include "FormatToken.h"
19*06c3fb27SDimitry Andric #include "UnwrappedLineParser.h"
20*06c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h"
21*06c3fb27SDimitry Andric #include <cstddef>
22*06c3fb27SDimitry Andric 
23*06c3fb27SDimitry Andric #define DEBUG_TYPE "format-token-source"
24*06c3fb27SDimitry Andric 
25*06c3fb27SDimitry Andric namespace clang {
26*06c3fb27SDimitry Andric namespace format {
27*06c3fb27SDimitry Andric 
28*06c3fb27SDimitry Andric // Navigate a token stream.
29*06c3fb27SDimitry Andric //
30*06c3fb27SDimitry Andric // Enables traversal of a token stream, resetting the position in a token
31*06c3fb27SDimitry Andric // stream, as well as inserting new tokens.
32*06c3fb27SDimitry Andric class FormatTokenSource {
33*06c3fb27SDimitry Andric public:
34*06c3fb27SDimitry Andric   virtual ~FormatTokenSource() {}
35*06c3fb27SDimitry Andric 
36*06c3fb27SDimitry Andric   // Returns the next token in the token stream.
37*06c3fb27SDimitry Andric   virtual FormatToken *getNextToken() = 0;
38*06c3fb27SDimitry Andric 
39*06c3fb27SDimitry Andric   // Returns the token preceding the token returned by the last call to
40*06c3fb27SDimitry Andric   // getNextToken() in the token stream, or nullptr if no such token exists.
41*06c3fb27SDimitry Andric   //
42*06c3fb27SDimitry Andric   // Must not be called directly at the position directly after insertTokens()
43*06c3fb27SDimitry Andric   // is called.
44*06c3fb27SDimitry Andric   virtual FormatToken *getPreviousToken() = 0;
45*06c3fb27SDimitry Andric 
46*06c3fb27SDimitry Andric   // Returns the token that would be returned by the next call to
47*06c3fb27SDimitry Andric   // getNextToken().
48*06c3fb27SDimitry Andric   virtual FormatToken *peekNextToken(bool SkipComment = false) = 0;
49*06c3fb27SDimitry Andric 
50*06c3fb27SDimitry Andric   // Returns whether we are at the end of the file.
51*06c3fb27SDimitry Andric   // This can be different from whether getNextToken() returned an eof token
52*06c3fb27SDimitry Andric   // when the FormatTokenSource is a view on a part of the token stream.
53*06c3fb27SDimitry Andric   virtual bool isEOF() = 0;
54*06c3fb27SDimitry Andric 
55*06c3fb27SDimitry Andric   // Gets the current position in the token stream, to be used by setPosition().
56*06c3fb27SDimitry Andric   //
57*06c3fb27SDimitry Andric   // Note that the value of the position is not meaningful, and specifically
58*06c3fb27SDimitry Andric   // should not be used to get relative token positions.
59*06c3fb27SDimitry Andric   virtual unsigned getPosition() = 0;
60*06c3fb27SDimitry Andric 
61*06c3fb27SDimitry Andric   // Resets the token stream to the state it was in when getPosition() returned
62*06c3fb27SDimitry Andric   // Position, and return the token at that position in the stream.
63*06c3fb27SDimitry Andric   virtual FormatToken *setPosition(unsigned Position) = 0;
64*06c3fb27SDimitry Andric 
65*06c3fb27SDimitry Andric   // Insert the given tokens before the current position.
66*06c3fb27SDimitry Andric   // Returns the first token in \c Tokens.
67*06c3fb27SDimitry Andric   // The next returned token will be the second token in \c Tokens.
68*06c3fb27SDimitry Andric   // Requires the last token in Tokens to be EOF; once the EOF token is reached,
69*06c3fb27SDimitry Andric   // the next token will be the last token returned by getNextToken();
70*06c3fb27SDimitry Andric   //
71*06c3fb27SDimitry Andric   // For example, given the token sequence 'a1 a2':
72*06c3fb27SDimitry Andric   // getNextToken() -> a1
73*06c3fb27SDimitry Andric   // insertTokens('b1 b2') -> b1
74*06c3fb27SDimitry Andric   // getNextToken() -> b2
75*06c3fb27SDimitry Andric   // getNextToken() -> a1
76*06c3fb27SDimitry Andric   // getNextToken() -> a2
77*06c3fb27SDimitry Andric   virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0;
78*06c3fb27SDimitry Andric };
79*06c3fb27SDimitry Andric 
80*06c3fb27SDimitry Andric class IndexedTokenSource : public FormatTokenSource {
81*06c3fb27SDimitry Andric public:
82*06c3fb27SDimitry Andric   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
83*06c3fb27SDimitry Andric       : Tokens(Tokens), Position(-1) {}
84*06c3fb27SDimitry Andric 
85*06c3fb27SDimitry Andric   FormatToken *getNextToken() override {
86*06c3fb27SDimitry Andric     if (Position >= 0 && isEOF()) {
87*06c3fb27SDimitry Andric       LLVM_DEBUG({
88*06c3fb27SDimitry Andric         llvm::dbgs() << "Next ";
89*06c3fb27SDimitry Andric         dbgToken(Position);
90*06c3fb27SDimitry Andric       });
91*06c3fb27SDimitry Andric       return Tokens[Position];
92*06c3fb27SDimitry Andric     }
93*06c3fb27SDimitry Andric     Position = successor(Position);
94*06c3fb27SDimitry Andric     LLVM_DEBUG({
95*06c3fb27SDimitry Andric       llvm::dbgs() << "Next ";
96*06c3fb27SDimitry Andric       dbgToken(Position);
97*06c3fb27SDimitry Andric     });
98*06c3fb27SDimitry Andric     return Tokens[Position];
99*06c3fb27SDimitry Andric   }
100*06c3fb27SDimitry Andric 
101*06c3fb27SDimitry Andric   FormatToken *getPreviousToken() override {
102*06c3fb27SDimitry Andric     assert(Position <= 0 || !Tokens[Position - 1]->is(tok::eof));
103*06c3fb27SDimitry Andric     return Position > 0 ? Tokens[Position - 1] : nullptr;
104*06c3fb27SDimitry Andric   }
105*06c3fb27SDimitry Andric 
106*06c3fb27SDimitry Andric   FormatToken *peekNextToken(bool SkipComment = false) override {
107*06c3fb27SDimitry Andric     if (isEOF())
108*06c3fb27SDimitry Andric       return Tokens[Position];
109*06c3fb27SDimitry Andric     int Next = successor(Position);
110*06c3fb27SDimitry Andric     if (SkipComment)
111*06c3fb27SDimitry Andric       while (Tokens[Next]->is(tok::comment))
112*06c3fb27SDimitry Andric         Next = successor(Next);
113*06c3fb27SDimitry Andric     LLVM_DEBUG({
114*06c3fb27SDimitry Andric       llvm::dbgs() << "Peeking ";
115*06c3fb27SDimitry Andric       dbgToken(Next);
116*06c3fb27SDimitry Andric     });
117*06c3fb27SDimitry Andric     return Tokens[Next];
118*06c3fb27SDimitry Andric   }
119*06c3fb27SDimitry Andric 
120*06c3fb27SDimitry Andric   bool isEOF() override {
121*06c3fb27SDimitry Andric     return Position == -1 ? false : Tokens[Position]->is(tok::eof);
122*06c3fb27SDimitry Andric   }
123*06c3fb27SDimitry Andric 
124*06c3fb27SDimitry Andric   unsigned getPosition() override {
125*06c3fb27SDimitry Andric     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
126*06c3fb27SDimitry Andric     assert(Position >= 0);
127*06c3fb27SDimitry Andric     return Position;
128*06c3fb27SDimitry Andric   }
129*06c3fb27SDimitry Andric 
130*06c3fb27SDimitry Andric   FormatToken *setPosition(unsigned P) override {
131*06c3fb27SDimitry Andric     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
132*06c3fb27SDimitry Andric     Position = P;
133*06c3fb27SDimitry Andric     return Tokens[Position];
134*06c3fb27SDimitry Andric   }
135*06c3fb27SDimitry Andric 
136*06c3fb27SDimitry Andric   FormatToken *insertTokens(ArrayRef<FormatToken *> New) override {
137*06c3fb27SDimitry Andric     assert(Position != -1);
138*06c3fb27SDimitry Andric     assert((*New.rbegin())->Tok.is(tok::eof));
139*06c3fb27SDimitry Andric     int Next = Tokens.size();
140*06c3fb27SDimitry Andric     Tokens.append(New.begin(), New.end());
141*06c3fb27SDimitry Andric     LLVM_DEBUG({
142*06c3fb27SDimitry Andric       llvm::dbgs() << "Inserting:\n";
143*06c3fb27SDimitry Andric       for (int I = Next, E = Tokens.size(); I != E; ++I)
144*06c3fb27SDimitry Andric         dbgToken(I, "  ");
145*06c3fb27SDimitry Andric       llvm::dbgs() << "  Jump from: " << (Tokens.size() - 1) << " -> "
146*06c3fb27SDimitry Andric                    << Position << "\n";
147*06c3fb27SDimitry Andric     });
148*06c3fb27SDimitry Andric     Jumps[Tokens.size() - 1] = Position;
149*06c3fb27SDimitry Andric     Position = Next;
150*06c3fb27SDimitry Andric     LLVM_DEBUG({
151*06c3fb27SDimitry Andric       llvm::dbgs() << "At inserted token ";
152*06c3fb27SDimitry Andric       dbgToken(Position);
153*06c3fb27SDimitry Andric     });
154*06c3fb27SDimitry Andric     return Tokens[Position];
155*06c3fb27SDimitry Andric   }
156*06c3fb27SDimitry Andric 
157*06c3fb27SDimitry Andric   void reset() { Position = -1; }
158*06c3fb27SDimitry Andric 
159*06c3fb27SDimitry Andric private:
160*06c3fb27SDimitry Andric   int successor(int Current) const {
161*06c3fb27SDimitry Andric     int Next = Current + 1;
162*06c3fb27SDimitry Andric     auto it = Jumps.find(Next);
163*06c3fb27SDimitry Andric     if (it != Jumps.end()) {
164*06c3fb27SDimitry Andric       Next = it->second;
165*06c3fb27SDimitry Andric       assert(!Jumps.contains(Next));
166*06c3fb27SDimitry Andric     }
167*06c3fb27SDimitry Andric     return Next;
168*06c3fb27SDimitry Andric   }
169*06c3fb27SDimitry Andric 
170*06c3fb27SDimitry Andric   void dbgToken(int Position, llvm::StringRef Indent = "") {
171*06c3fb27SDimitry Andric     FormatToken *Tok = Tokens[Position];
172*06c3fb27SDimitry Andric     llvm::dbgs() << Indent << "[" << Position
173*06c3fb27SDimitry Andric                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
174*06c3fb27SDimitry Andric                  << ", Macro: " << !!Tok->MacroCtx << "\n";
175*06c3fb27SDimitry Andric   }
176*06c3fb27SDimitry Andric 
177*06c3fb27SDimitry Andric   SmallVector<FormatToken *> Tokens;
178*06c3fb27SDimitry Andric   int Position;
179*06c3fb27SDimitry Andric 
180*06c3fb27SDimitry Andric   // Maps from position a to position b, so that when we reach a, the token
181*06c3fb27SDimitry Andric   // stream continues at position b instead.
182*06c3fb27SDimitry Andric   llvm::DenseMap<int, int> Jumps;
183*06c3fb27SDimitry Andric };
184*06c3fb27SDimitry Andric 
185*06c3fb27SDimitry Andric class ScopedMacroState : public FormatTokenSource {
186*06c3fb27SDimitry Andric public:
187*06c3fb27SDimitry Andric   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
188*06c3fb27SDimitry Andric                    FormatToken *&ResetToken)
189*06c3fb27SDimitry Andric       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
190*06c3fb27SDimitry Andric         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
191*06c3fb27SDimitry Andric         Token(nullptr), PreviousToken(nullptr) {
192*06c3fb27SDimitry Andric     FakeEOF.Tok.startToken();
193*06c3fb27SDimitry Andric     FakeEOF.Tok.setKind(tok::eof);
194*06c3fb27SDimitry Andric     TokenSource = this;
195*06c3fb27SDimitry Andric     Line.Level = 0;
196*06c3fb27SDimitry Andric     Line.InPPDirective = true;
197*06c3fb27SDimitry Andric     // InMacroBody gets set after the `#define x` part.
198*06c3fb27SDimitry Andric   }
199*06c3fb27SDimitry Andric 
200*06c3fb27SDimitry Andric   ~ScopedMacroState() override {
201*06c3fb27SDimitry Andric     TokenSource = PreviousTokenSource;
202*06c3fb27SDimitry Andric     ResetToken = Token;
203*06c3fb27SDimitry Andric     Line.InPPDirective = false;
204*06c3fb27SDimitry Andric     Line.InMacroBody = false;
205*06c3fb27SDimitry Andric     Line.Level = PreviousLineLevel;
206*06c3fb27SDimitry Andric   }
207*06c3fb27SDimitry Andric 
208*06c3fb27SDimitry Andric   FormatToken *getNextToken() override {
209*06c3fb27SDimitry Andric     // The \c UnwrappedLineParser guards against this by never calling
210*06c3fb27SDimitry Andric     // \c getNextToken() after it has encountered the first eof token.
211*06c3fb27SDimitry Andric     assert(!eof());
212*06c3fb27SDimitry Andric     PreviousToken = Token;
213*06c3fb27SDimitry Andric     Token = PreviousTokenSource->getNextToken();
214*06c3fb27SDimitry Andric     if (eof())
215*06c3fb27SDimitry Andric       return &FakeEOF;
216*06c3fb27SDimitry Andric     return Token;
217*06c3fb27SDimitry Andric   }
218*06c3fb27SDimitry Andric 
219*06c3fb27SDimitry Andric   FormatToken *getPreviousToken() override {
220*06c3fb27SDimitry Andric     return PreviousTokenSource->getPreviousToken();
221*06c3fb27SDimitry Andric   }
222*06c3fb27SDimitry Andric 
223*06c3fb27SDimitry Andric   FormatToken *peekNextToken(bool SkipComment) override {
224*06c3fb27SDimitry Andric     if (eof())
225*06c3fb27SDimitry Andric       return &FakeEOF;
226*06c3fb27SDimitry Andric     return PreviousTokenSource->peekNextToken(SkipComment);
227*06c3fb27SDimitry Andric   }
228*06c3fb27SDimitry Andric 
229*06c3fb27SDimitry Andric   bool isEOF() override { return PreviousTokenSource->isEOF(); }
230*06c3fb27SDimitry Andric 
231*06c3fb27SDimitry Andric   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
232*06c3fb27SDimitry Andric 
233*06c3fb27SDimitry Andric   FormatToken *setPosition(unsigned Position) override {
234*06c3fb27SDimitry Andric     PreviousToken = nullptr;
235*06c3fb27SDimitry Andric     Token = PreviousTokenSource->setPosition(Position);
236*06c3fb27SDimitry Andric     return Token;
237*06c3fb27SDimitry Andric   }
238*06c3fb27SDimitry Andric 
239*06c3fb27SDimitry Andric   FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override {
240*06c3fb27SDimitry Andric     llvm_unreachable("Cannot insert tokens while parsing a macro.");
241*06c3fb27SDimitry Andric     return nullptr;
242*06c3fb27SDimitry Andric   }
243*06c3fb27SDimitry Andric 
244*06c3fb27SDimitry Andric private:
245*06c3fb27SDimitry Andric   bool eof() {
246*06c3fb27SDimitry Andric     return Token && Token->HasUnescapedNewline &&
247*06c3fb27SDimitry Andric            !continuesLineComment(*Token, PreviousToken,
248*06c3fb27SDimitry Andric                                  /*MinColumnToken=*/PreviousToken);
249*06c3fb27SDimitry Andric   }
250*06c3fb27SDimitry Andric 
251*06c3fb27SDimitry Andric   FormatToken FakeEOF;
252*06c3fb27SDimitry Andric   UnwrappedLine &Line;
253*06c3fb27SDimitry Andric   FormatTokenSource *&TokenSource;
254*06c3fb27SDimitry Andric   FormatToken *&ResetToken;
255*06c3fb27SDimitry Andric   unsigned PreviousLineLevel;
256*06c3fb27SDimitry Andric   FormatTokenSource *PreviousTokenSource;
257*06c3fb27SDimitry Andric 
258*06c3fb27SDimitry Andric   FormatToken *Token;
259*06c3fb27SDimitry Andric   FormatToken *PreviousToken;
260*06c3fb27SDimitry Andric };
261*06c3fb27SDimitry Andric 
262*06c3fb27SDimitry Andric } // namespace format
263*06c3fb27SDimitry Andric } // namespace clang
264*06c3fb27SDimitry Andric 
265*06c3fb27SDimitry Andric #undef DEBUG_TYPE
266*06c3fb27SDimitry Andric 
267*06c3fb27SDimitry Andric #endif
268