xref: /llvm-project/clang/lib/Format/MacroCallReconstructor.cpp (revision b2082a98175b0e5356f23bf21d3dc5b76edea390)
1d6d0dc1fSManuel Klimek //===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//
2d6d0dc1fSManuel Klimek //
3d6d0dc1fSManuel Klimek //                     The LLVM Compiler Infrastructure
4d6d0dc1fSManuel Klimek //
5d6d0dc1fSManuel Klimek // This file is distributed under the University of Illinois Open Source
6d6d0dc1fSManuel Klimek // License. See LICENSE.TXT for details.
7d6d0dc1fSManuel Klimek //
8d6d0dc1fSManuel Klimek //===----------------------------------------------------------------------===//
9d6d0dc1fSManuel Klimek ///
10d6d0dc1fSManuel Klimek /// \file
11d6d0dc1fSManuel Klimek /// This file contains the implementation of MacroCallReconstructor, which fits
12d6d0dc1fSManuel Klimek /// an reconstructed macro call to a parsed set of UnwrappedLines.
13d6d0dc1fSManuel Klimek ///
14d6d0dc1fSManuel Klimek //===----------------------------------------------------------------------===//
15d6d0dc1fSManuel Klimek 
16d6d0dc1fSManuel Klimek #include "Macros.h"
17d6d0dc1fSManuel Klimek 
18d6d0dc1fSManuel Klimek #include "UnwrappedLineParser.h"
19*b2082a98SOwen Pan #include "clang/Basic/TokenKinds.h"
20d6d0dc1fSManuel Klimek #include "llvm/ADT/DenseSet.h"
21*b2082a98SOwen Pan #include "llvm/Support/Debug.h"
22*b2082a98SOwen Pan #include <cassert>
23d6d0dc1fSManuel Klimek 
24d6d0dc1fSManuel Klimek #define DEBUG_TYPE "format-reconstruct"
25d6d0dc1fSManuel Klimek 
26d6d0dc1fSManuel Klimek namespace clang {
27d6d0dc1fSManuel Klimek namespace format {
28d6d0dc1fSManuel Klimek 
29d6d0dc1fSManuel Klimek // Call \p Call for each token in the unwrapped line given, passing
30d6d0dc1fSManuel Klimek // the token, its parent and whether it is the first token in the line.
31d6d0dc1fSManuel Klimek template <typename T>
forEachToken(const UnwrappedLine & Line,const T & Call,FormatToken * Parent=nullptr)32d6d0dc1fSManuel Klimek void forEachToken(const UnwrappedLine &Line, const T &Call,
33d6d0dc1fSManuel Klimek                   FormatToken *Parent = nullptr) {
34d6d0dc1fSManuel Klimek   bool First = true;
35d6d0dc1fSManuel Klimek   for (const auto &N : Line.Tokens) {
36ddb4450aSr4nt     Call(N.Tok, Parent, First, Line.Level);
37d6d0dc1fSManuel Klimek     First = false;
383d621398Sowenca     for (const auto &Child : N.Children)
39d6d0dc1fSManuel Klimek       forEachToken(Child, Call, N.Tok);
40d6d0dc1fSManuel Klimek   }
41d6d0dc1fSManuel Klimek }
42d6d0dc1fSManuel Klimek 
MacroCallReconstructor(unsigned Level,const llvm::DenseMap<FormatToken *,std::unique_ptr<UnwrappedLine>> & ActiveExpansions)43d6d0dc1fSManuel Klimek MacroCallReconstructor::MacroCallReconstructor(
44d6d0dc1fSManuel Klimek     unsigned Level,
45d6d0dc1fSManuel Klimek     const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
46d6d0dc1fSManuel Klimek         &ActiveExpansions)
47ddb4450aSr4nt     : Result(Level), IdToReconstructed(ActiveExpansions) {
48d6d0dc1fSManuel Klimek   Result.Tokens.push_back(std::make_unique<LineNode>());
49d6d0dc1fSManuel Klimek   ActiveReconstructedLines.push_back(&Result);
50d6d0dc1fSManuel Klimek }
51d6d0dc1fSManuel Klimek 
addLine(const UnwrappedLine & Line)52d6d0dc1fSManuel Klimek void MacroCallReconstructor::addLine(const UnwrappedLine &Line) {
53d6d0dc1fSManuel Klimek   assert(State != Finalized);
54d6d0dc1fSManuel Klimek   LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
55ddb4450aSr4nt   forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First,
56ddb4450aSr4nt                          unsigned Level) { add(Token, Parent, First, Level); });
57d6d0dc1fSManuel Klimek   assert(InProgress || finished());
58d6d0dc1fSManuel Klimek }
59d6d0dc1fSManuel Klimek 
takeResult()60d6d0dc1fSManuel Klimek UnwrappedLine MacroCallReconstructor::takeResult() && {
61d6d0dc1fSManuel Klimek   finalize();
623d621398Sowenca   assert(Result.Tokens.size() == 1 &&
633d621398Sowenca          Result.Tokens.front()->Children.size() == 1);
64ddb4450aSr4nt   UnwrappedLine Final = createUnwrappedLine(
65ddb4450aSr4nt       *Result.Tokens.front()->Children.front(), Result.Level);
66d6d0dc1fSManuel Klimek   assert(!Final.Tokens.empty());
67d6d0dc1fSManuel Klimek   return Final;
68d6d0dc1fSManuel Klimek }
69d6d0dc1fSManuel Klimek 
70d6d0dc1fSManuel Klimek // Reconstruct the position of the next \p Token, given its parent \p
71d6d0dc1fSManuel Klimek // ExpandedParent in the incoming unwrapped line. \p First specifies whether it
72d6d0dc1fSManuel Klimek // is the first token in a given unwrapped line.
add(FormatToken * Token,FormatToken * ExpandedParent,bool First,unsigned Level)73d6d0dc1fSManuel Klimek void MacroCallReconstructor::add(FormatToken *Token,
74ddb4450aSr4nt                                  FormatToken *ExpandedParent, bool First,
75ddb4450aSr4nt                                  unsigned Level) {
76d6d0dc1fSManuel Klimek   LLVM_DEBUG(
77d6d0dc1fSManuel Klimek       llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: "
78d6d0dc1fSManuel Klimek                    << (ExpandedParent ? ExpandedParent->TokenText : "<null>")
79d6d0dc1fSManuel Klimek                    << ", First: " << First << "\n");
80d6d0dc1fSManuel Klimek   // In order to be able to find the correct parent in the reconstructed token
81d6d0dc1fSManuel Klimek   // stream, we need to continue the last open reconstruction until we find the
82d6d0dc1fSManuel Klimek   // given token if it is part of the reconstructed token stream.
83d6d0dc1fSManuel Klimek   //
84d6d0dc1fSManuel Klimek   // Note that hidden tokens can be part of the reconstructed stream in nested
85d6d0dc1fSManuel Klimek   // macro calls.
86d6d0dc1fSManuel Klimek   // For example, given
87d6d0dc1fSManuel Klimek   //   #define C(x, y) x y
88d6d0dc1fSManuel Klimek   //   #define B(x) {x}
89d6d0dc1fSManuel Klimek   // And the call:
90d6d0dc1fSManuel Klimek   //   C(a, B(b))
91d6d0dc1fSManuel Klimek   // The outer macro call will be C(a, {b}), and the hidden token '}' can be
92d6d0dc1fSManuel Klimek   // found in the reconstructed token stream of that expansion level.
93d6d0dc1fSManuel Klimek   // In the expanded token stream
94d6d0dc1fSManuel Klimek   //   a {b}
95d6d0dc1fSManuel Klimek   // 'b' is a child of '{'. We need to continue the open expansion of the ','
96d6d0dc1fSManuel Klimek   // in the call of 'C' in order to correctly set the ',' as the parent of '{',
97d6d0dc1fSManuel Klimek   // so we later set the spelled token 'b' as a child of the ','.
98d6d0dc1fSManuel Klimek   if (!ActiveExpansions.empty() && Token->MacroCtx &&
99d6d0dc1fSManuel Klimek       (Token->MacroCtx->Role != MR_Hidden ||
100d6d0dc1fSManuel Klimek        ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) {
101f44d28f8SManuel Klimek     if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token))
102d6d0dc1fSManuel Klimek       First = true;
103d6d0dc1fSManuel Klimek   }
104d6d0dc1fSManuel Klimek 
105ddb4450aSr4nt   prepareParent(ExpandedParent, First, Level);
106d6d0dc1fSManuel Klimek 
107d6d0dc1fSManuel Klimek   if (Token->MacroCtx) {
108d6d0dc1fSManuel Klimek     // If this token was generated by a macro call, add the reconstructed
109d6d0dc1fSManuel Klimek     // equivalent of the token.
110d6d0dc1fSManuel Klimek     reconstruct(Token);
111d6d0dc1fSManuel Klimek   } else {
112d6d0dc1fSManuel Klimek     // Otherwise, we add it to the current line.
113d6d0dc1fSManuel Klimek     appendToken(Token);
114d6d0dc1fSManuel Klimek   }
115d6d0dc1fSManuel Klimek }
116d6d0dc1fSManuel Klimek 
117d6d0dc1fSManuel Klimek // Adjusts the stack of active reconstructed lines so we're ready to push
118d6d0dc1fSManuel Klimek // tokens. The tokens to be pushed are children of ExpandedParent in the
119d6d0dc1fSManuel Klimek // expanded code.
120d6d0dc1fSManuel Klimek //
121d6d0dc1fSManuel Klimek // This may entail:
122d6d0dc1fSManuel Klimek // - creating a new line, if the parent is on the active line
123d6d0dc1fSManuel Klimek // - popping active lines, if the parent is further up the stack
124d6d0dc1fSManuel Klimek //
125d6d0dc1fSManuel Klimek // Postcondition:
126d6d0dc1fSManuel Klimek // ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its
127d6d0dc1fSManuel Klimek // reconstructed replacement token as a parent (when possible) - that is, the
128d6d0dc1fSManuel Klimek // last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]
129d6d0dc1fSManuel Klimek // is the parent of ActiveReconstructedLines.back() in the reconstructed
130d6d0dc1fSManuel Klimek // unwrapped line.
prepareParent(FormatToken * ExpandedParent,bool NewLine,unsigned Level)131d6d0dc1fSManuel Klimek void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,
132ddb4450aSr4nt                                            bool NewLine, unsigned Level) {
133d6d0dc1fSManuel Klimek   LLVM_DEBUG({
134d6d0dc1fSManuel Klimek     llvm::dbgs() << "ParentMap:\n";
135d6d0dc1fSManuel Klimek     debugParentMap();
136d6d0dc1fSManuel Klimek   });
137d6d0dc1fSManuel Klimek   // We want to find the parent in the new unwrapped line, where the expanded
138d6d0dc1fSManuel Klimek   // parent might have been replaced during reconstruction.
139d6d0dc1fSManuel Klimek   FormatToken *Parent = getParentInResult(ExpandedParent);
140d6d0dc1fSManuel Klimek   LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "
141d6d0dc1fSManuel Klimek                           << (Parent ? Parent->TokenText : "<null>") << "\n");
142d6d0dc1fSManuel Klimek 
143d6d0dc1fSManuel Klimek   FormatToken *OpenMacroParent = nullptr;
144d6d0dc1fSManuel Klimek   if (!MacroCallStructure.empty()) {
145d6d0dc1fSManuel Klimek     // Inside a macro expansion, it is possible to lose track of the correct
146d6d0dc1fSManuel Klimek     // parent - either because it is already popped, for example because it was
147d6d0dc1fSManuel Klimek     // in a different macro argument (e.g. M({, })), or when we work on invalid
148d6d0dc1fSManuel Klimek     // code.
149d6d0dc1fSManuel Klimek     // Thus, we use the innermost macro call's parent as the parent at which
150d6d0dc1fSManuel Klimek     // we stop; this allows us to stay within the macro expansion and keeps
151d6d0dc1fSManuel Klimek     // any problems confined to the extent of the macro call.
152d6d0dc1fSManuel Klimek     OpenMacroParent =
153d6d0dc1fSManuel Klimek         getParentInResult(MacroCallStructure.back().MacroCallLParen);
154d6d0dc1fSManuel Klimek     LLVM_DEBUG(llvm::dbgs()
155d6d0dc1fSManuel Klimek                << "MacroCallLParen: "
156d6d0dc1fSManuel Klimek                << MacroCallStructure.back().MacroCallLParen->TokenText
157d6d0dc1fSManuel Klimek                << ", OpenMacroParent: "
158d6d0dc1fSManuel Klimek                << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>")
159d6d0dc1fSManuel Klimek                << "\n");
160d6d0dc1fSManuel Klimek   }
161d6d0dc1fSManuel Klimek   if (NewLine ||
162d6d0dc1fSManuel Klimek       (!ActiveReconstructedLines.back()->Tokens.empty() &&
163d6d0dc1fSManuel Klimek        Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) {
164d6d0dc1fSManuel Klimek     // If we are at the first token in a new line, we want to also
165d6d0dc1fSManuel Klimek     // create a new line in the resulting reconstructed unwrapped line.
166d6d0dc1fSManuel Klimek     while (ActiveReconstructedLines.back()->Tokens.empty() ||
167d6d0dc1fSManuel Klimek            (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok &&
168d6d0dc1fSManuel Klimek             ActiveReconstructedLines.back()->Tokens.back()->Tok !=
169d6d0dc1fSManuel Klimek                 OpenMacroParent)) {
170d6d0dc1fSManuel Klimek       ActiveReconstructedLines.pop_back();
171d6d0dc1fSManuel Klimek       assert(!ActiveReconstructedLines.empty());
172d6d0dc1fSManuel Klimek     }
173d6d0dc1fSManuel Klimek     assert(!ActiveReconstructedLines.empty());
174d6d0dc1fSManuel Klimek     ActiveReconstructedLines.back()->Tokens.back()->Children.push_back(
175ddb4450aSr4nt         std::make_unique<ReconstructedLine>(Level));
176d6d0dc1fSManuel Klimek     ActiveReconstructedLines.push_back(
177d6d0dc1fSManuel Klimek         &*ActiveReconstructedLines.back()->Tokens.back()->Children.back());
178d6d0dc1fSManuel Klimek   } else if (parentLine().Tokens.back()->Tok != Parent) {
179d6d0dc1fSManuel Klimek     // If we're not the first token in a new line, pop lines until we find
180d6d0dc1fSManuel Klimek     // the child of \c Parent in the stack.
181d6d0dc1fSManuel Klimek     while (Parent != parentLine().Tokens.back()->Tok &&
182d6d0dc1fSManuel Klimek            parentLine().Tokens.back()->Tok &&
183d6d0dc1fSManuel Klimek            parentLine().Tokens.back()->Tok != OpenMacroParent) {
184d6d0dc1fSManuel Klimek       ActiveReconstructedLines.pop_back();
185d6d0dc1fSManuel Klimek       assert(!ActiveReconstructedLines.empty());
186d6d0dc1fSManuel Klimek     }
187d6d0dc1fSManuel Klimek   }
188d6d0dc1fSManuel Klimek   assert(!ActiveReconstructedLines.empty());
189d6d0dc1fSManuel Klimek }
190d6d0dc1fSManuel Klimek 
191d6d0dc1fSManuel Klimek // For a given \p Parent in the incoming expanded token stream, find the
192d6d0dc1fSManuel Klimek // corresponding parent in the output.
getParentInResult(FormatToken * Parent)193d6d0dc1fSManuel Klimek FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) {
194d6d0dc1fSManuel Klimek   FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent);
195d6d0dc1fSManuel Klimek   if (!Mapped)
196d6d0dc1fSManuel Klimek     return Parent;
1973d621398Sowenca   for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent))
198d6d0dc1fSManuel Klimek     Parent = Mapped;
199d6d0dc1fSManuel Klimek   // If we use a different token than the parent in the expanded token stream
200d6d0dc1fSManuel Klimek   // as parent, mark it as a special parent, so the formatting code knows it
201d6d0dc1fSManuel Klimek   // needs to have its children formatted.
202d6d0dc1fSManuel Klimek   Parent->MacroParent = true;
203d6d0dc1fSManuel Klimek   return Parent;
204d6d0dc1fSManuel Klimek }
205d6d0dc1fSManuel Klimek 
206d6d0dc1fSManuel Klimek // Reconstruct a \p Token that was expanded from a macro call.
reconstruct(FormatToken * Token)207d6d0dc1fSManuel Klimek void MacroCallReconstructor::reconstruct(FormatToken *Token) {
208d6d0dc1fSManuel Klimek   assert(Token->MacroCtx);
209d6d0dc1fSManuel Klimek   // A single token can be the only result of a macro call:
210d6d0dc1fSManuel Klimek   // Given: #define ID(x, y) ;
211d6d0dc1fSManuel Klimek   // And the call: ID(<some>, <tokens>)
212d6d0dc1fSManuel Klimek   // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).
213d6d0dc1fSManuel Klimek   if (Token->MacroCtx->StartOfExpansion) {
214d6d0dc1fSManuel Klimek     startReconstruction(Token);
215d6d0dc1fSManuel Klimek     // If the order of tokens in the expanded token stream is not the
216d6d0dc1fSManuel Klimek     // same as the order of tokens in the reconstructed stream, we need
217d6d0dc1fSManuel Klimek     // to reconstruct tokens that arrive later in the stream.
2183d621398Sowenca     if (Token->MacroCtx->Role != MR_Hidden)
219d6d0dc1fSManuel Klimek       reconstructActiveCallUntil(Token);
220d6d0dc1fSManuel Klimek   }
221d6d0dc1fSManuel Klimek   assert(!ActiveExpansions.empty());
222d6d0dc1fSManuel Klimek   if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) {
223d6d0dc1fSManuel Klimek     assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size());
224d6d0dc1fSManuel Klimek     if (Token->MacroCtx->Role != MR_Hidden) {
225d6d0dc1fSManuel Klimek       // The current token in the reconstructed token stream must be the token
226d6d0dc1fSManuel Klimek       // we're looking for - we either arrive here after startReconstruction,
227d6d0dc1fSManuel Klimek       // which initiates the stream to the first token, or after
228d6d0dc1fSManuel Klimek       // continueReconstructionUntil skipped until the expected token in the
229d6d0dc1fSManuel Klimek       // reconstructed stream at the start of add(...).
230d6d0dc1fSManuel Klimek       assert(ActiveExpansions.back().SpelledI->Tok == Token);
231d6d0dc1fSManuel Klimek       processNextReconstructed();
232d6d0dc1fSManuel Klimek     } else if (!currentLine()->Tokens.empty()) {
233d6d0dc1fSManuel Klimek       // Map all hidden tokens to the last visible token in the output.
234d6d0dc1fSManuel Klimek       // If the hidden token is a parent, we'll use the last visible
235d6d0dc1fSManuel Klimek       // token as the parent of the hidden token's children.
236d6d0dc1fSManuel Klimek       SpelledParentToReconstructedParent[Token] =
237d6d0dc1fSManuel Klimek           currentLine()->Tokens.back()->Tok;
238d6d0dc1fSManuel Klimek     } else {
239d6d0dc1fSManuel Klimek       for (auto I = ActiveReconstructedLines.rbegin(),
240d6d0dc1fSManuel Klimek                 E = ActiveReconstructedLines.rend();
241d6d0dc1fSManuel Klimek            I != E; ++I) {
242d6d0dc1fSManuel Klimek         if (!(*I)->Tokens.empty()) {
243d6d0dc1fSManuel Klimek           SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok;
244d6d0dc1fSManuel Klimek           break;
245d6d0dc1fSManuel Klimek         }
246d6d0dc1fSManuel Klimek       }
247d6d0dc1fSManuel Klimek     }
248d6d0dc1fSManuel Klimek   }
249d6d0dc1fSManuel Klimek   if (Token->MacroCtx->EndOfExpansion)
250d6d0dc1fSManuel Klimek     endReconstruction(Token);
251d6d0dc1fSManuel Klimek }
252d6d0dc1fSManuel Klimek 
253d6d0dc1fSManuel Klimek // Given a \p Token that starts an expansion, reconstruct the beginning of the
254d6d0dc1fSManuel Klimek // macro call.
255d6d0dc1fSManuel Klimek // For example, given: #define ID(x) x
256d6d0dc1fSManuel Klimek // And the call: ID(int a)
257d6d0dc1fSManuel Klimek // Reconstructs: ID(
startReconstruction(FormatToken * Token)258d6d0dc1fSManuel Klimek void MacroCallReconstructor::startReconstruction(FormatToken *Token) {
259d6d0dc1fSManuel Klimek   assert(Token->MacroCtx);
260d6d0dc1fSManuel Klimek   assert(!Token->MacroCtx->ExpandedFrom.empty());
261d6d0dc1fSManuel Klimek   assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size());
262d6d0dc1fSManuel Klimek #ifndef NDEBUG
263d6d0dc1fSManuel Klimek   // Check that the token's reconstruction stack matches our current
264d6d0dc1fSManuel Klimek   // reconstruction stack.
265d6d0dc1fSManuel Klimek   for (size_t I = 0; I < ActiveExpansions.size(); ++I) {
266d6d0dc1fSManuel Klimek     assert(ActiveExpansions[I].ID ==
267d6d0dc1fSManuel Klimek            Token->MacroCtx
268d6d0dc1fSManuel Klimek                ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);
269d6d0dc1fSManuel Klimek   }
270d6d0dc1fSManuel Klimek #endif
271d6d0dc1fSManuel Klimek   // Start reconstruction for all calls for which this token is the first token
272d6d0dc1fSManuel Klimek   // generated by the call.
273d6d0dc1fSManuel Klimek   // Note that the token's expanded from stack is inside-to-outside, and the
274d6d0dc1fSManuel Klimek   // expansions for which this token is not the first are the outermost ones.
275d6d0dc1fSManuel Klimek   ArrayRef<FormatToken *> StartedMacros =
276a3c248dbSserge-sans-paille       ArrayRef(Token->MacroCtx->ExpandedFrom)
277d6d0dc1fSManuel Klimek           .drop_back(ActiveExpansions.size());
278d6d0dc1fSManuel Klimek   assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion);
279d6d0dc1fSManuel Klimek   // We reconstruct macro calls outside-to-inside.
280d6d0dc1fSManuel Klimek   for (FormatToken *ID : llvm::reverse(StartedMacros)) {
281d6d0dc1fSManuel Klimek     // We found a macro call to be reconstructed; the next time our
282d6d0dc1fSManuel Klimek     // reconstruction stack is empty we know we finished an reconstruction.
283d6d0dc1fSManuel Klimek #ifndef NDEBUG
284d6d0dc1fSManuel Klimek     State = InProgress;
285d6d0dc1fSManuel Klimek #endif
286d6d0dc1fSManuel Klimek     // Put the reconstructed macro call's token into our reconstruction stack.
287d6d0dc1fSManuel Klimek     auto IU = IdToReconstructed.find(ID);
288d6d0dc1fSManuel Klimek     assert(IU != IdToReconstructed.end());
289d6d0dc1fSManuel Klimek     ActiveExpansions.push_back(
290d6d0dc1fSManuel Klimek         {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});
291d6d0dc1fSManuel Klimek     // Process the macro call's identifier.
292d6d0dc1fSManuel Klimek     processNextReconstructed();
293d6d0dc1fSManuel Klimek     if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE)
294d6d0dc1fSManuel Klimek       continue;
295d6d0dc1fSManuel Klimek     if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) {
296d6d0dc1fSManuel Klimek       // Process the optional opening parenthesis.
297d6d0dc1fSManuel Klimek       processNextReconstructed();
298d6d0dc1fSManuel Klimek     }
299d6d0dc1fSManuel Klimek   }
300d6d0dc1fSManuel Klimek }
301d6d0dc1fSManuel Klimek 
302d6d0dc1fSManuel Klimek // Add all tokens in the reconstruction stream to the output until we find the
303d6d0dc1fSManuel Klimek // given \p Token.
reconstructActiveCallUntil(FormatToken * Token)304d6d0dc1fSManuel Klimek bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) {
305d6d0dc1fSManuel Klimek   assert(!ActiveExpansions.empty());
306d6d0dc1fSManuel Klimek   bool PassedMacroComma = false;
307d6d0dc1fSManuel Klimek   // FIXME: If Token was already expanded earlier, due to
308d6d0dc1fSManuel Klimek   // a change in order, we will not find it, but need to
309d6d0dc1fSManuel Klimek   // skip it.
310d6d0dc1fSManuel Klimek   while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE &&
311d6d0dc1fSManuel Klimek          ActiveExpansions.back().SpelledI->Tok != Token) {
312d6d0dc1fSManuel Klimek     PassedMacroComma = processNextReconstructed() || PassedMacroComma;
313d6d0dc1fSManuel Klimek   }
314d6d0dc1fSManuel Klimek   return PassedMacroComma;
315d6d0dc1fSManuel Klimek }
316d6d0dc1fSManuel Klimek 
317d6d0dc1fSManuel Klimek // End all reconstructions for which \p Token is the final token.
endReconstruction(FormatToken * Token)318d6d0dc1fSManuel Klimek void MacroCallReconstructor::endReconstruction(FormatToken *Token) {
319d6d0dc1fSManuel Klimek   assert(Token->MacroCtx &&
320d6d0dc1fSManuel Klimek          (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion));
321d6d0dc1fSManuel Klimek   for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {
3222c3b12b5SBjörn Schäpers     LLVM_DEBUG([&] {
3232c3b12b5SBjörn Schäpers       // Check all remaining tokens but the final closing parenthesis and
3242c3b12b5SBjörn Schäpers       // optional trailing comment were already reconstructed at an inner
3252c3b12b5SBjörn Schäpers       // expansion level.
326d6d0dc1fSManuel Klimek       for (auto T = ActiveExpansions.back().SpelledI;
327d6d0dc1fSManuel Klimek            T != ActiveExpansions.back().SpelledE; ++T) {
328d6d0dc1fSManuel Klimek         FormatToken *Token = T->Tok;
329d6d0dc1fSManuel Klimek         bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE ||
330d6d0dc1fSManuel Klimek                              std::next(T)->Tok->isTrailingComment()) &&
331d6d0dc1fSManuel Klimek                             !Token->MacroCtx && Token->is(tok::r_paren);
332d6d0dc1fSManuel Klimek         bool TrailingComment = Token->isTrailingComment();
333d6d0dc1fSManuel Klimek         bool PreviousLevel =
334d6d0dc1fSManuel Klimek             Token->MacroCtx &&
335d6d0dc1fSManuel Klimek             (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size());
3363d621398Sowenca         if (!ClosingParen && !TrailingComment && !PreviousLevel)
337d6d0dc1fSManuel Klimek           llvm::dbgs() << "At token: " << Token->TokenText << "\n";
338d6d0dc1fSManuel Klimek         // In addition to the following cases, we can also run into this
339d6d0dc1fSManuel Klimek         // when a macro call had more arguments than expected; in that case,
3402c3b12b5SBjörn Schäpers         // the comma and the remaining tokens in the macro call will
3412c3b12b5SBjörn Schäpers         // potentially end up in the line when we finish the expansion.
342d6d0dc1fSManuel Klimek         // FIXME: Add the information which arguments are unused, and assert
343d6d0dc1fSManuel Klimek         // one of the cases below plus reconstructed macro argument tokens.
344d6d0dc1fSManuel Klimek         // assert(ClosingParen || TrailingComment || PreviousLevel);
345d6d0dc1fSManuel Klimek       }
3462c3b12b5SBjörn Schäpers     }());
347d6d0dc1fSManuel Klimek     // Handle the remaining open tokens:
348d6d0dc1fSManuel Klimek     // - expand the closing parenthesis, if it exists, including an optional
349d6d0dc1fSManuel Klimek     //   trailing comment
350d6d0dc1fSManuel Klimek     // - handle tokens that were already reconstructed at an inner expansion
351d6d0dc1fSManuel Klimek     //   level
352d6d0dc1fSManuel Klimek     // - handle tokens when a macro call had more than the expected number of
353d6d0dc1fSManuel Klimek     //   arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end
354d6d0dc1fSManuel Klimek     //   up with the sequence ", b, c)" being open at the end of the
355d6d0dc1fSManuel Klimek     //   reconstruction; we want to gracefully handle that case
356d6d0dc1fSManuel Klimek     //
357d6d0dc1fSManuel Klimek     // FIXME: See the above debug-check for what we will need to do to be
358d6d0dc1fSManuel Klimek     // able to assert this.
359d6d0dc1fSManuel Klimek     for (auto T = ActiveExpansions.back().SpelledI;
360d6d0dc1fSManuel Klimek          T != ActiveExpansions.back().SpelledE; ++T) {
361d6d0dc1fSManuel Klimek       processNextReconstructed();
362d6d0dc1fSManuel Klimek     }
363d6d0dc1fSManuel Klimek     ActiveExpansions.pop_back();
364d6d0dc1fSManuel Klimek   }
365d6d0dc1fSManuel Klimek }
366d6d0dc1fSManuel Klimek 
debugParentMap() const367d6d0dc1fSManuel Klimek void MacroCallReconstructor::debugParentMap() const {
368d6d0dc1fSManuel Klimek   llvm::DenseSet<FormatToken *> Values;
369d6d0dc1fSManuel Klimek   for (const auto &P : SpelledParentToReconstructedParent)
370d6d0dc1fSManuel Klimek     Values.insert(P.second);
371d6d0dc1fSManuel Klimek 
372d6d0dc1fSManuel Klimek   for (const auto &P : SpelledParentToReconstructedParent) {
373d6d0dc1fSManuel Klimek     if (Values.contains(P.first))
374d6d0dc1fSManuel Klimek       continue;
375d6d0dc1fSManuel Klimek     llvm::dbgs() << (P.first ? P.first->TokenText : "<null>");
376d6d0dc1fSManuel Klimek     for (auto I = SpelledParentToReconstructedParent.find(P.first),
377d6d0dc1fSManuel Klimek               E = SpelledParentToReconstructedParent.end();
378d6d0dc1fSManuel Klimek          I != E; I = SpelledParentToReconstructedParent.find(I->second)) {
379d6d0dc1fSManuel Klimek       llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>");
380d6d0dc1fSManuel Klimek     }
381d6d0dc1fSManuel Klimek     llvm::dbgs() << "\n";
382d6d0dc1fSManuel Klimek   }
383d6d0dc1fSManuel Klimek }
384d6d0dc1fSManuel Klimek 
385d6d0dc1fSManuel Klimek // If visible, add the next token of the reconstructed token sequence to the
386d6d0dc1fSManuel Klimek // output. Returns whether reconstruction passed a comma that is part of a
387d6d0dc1fSManuel Klimek // macro call.
processNextReconstructed()388d6d0dc1fSManuel Klimek bool MacroCallReconstructor::processNextReconstructed() {
389d6d0dc1fSManuel Klimek   FormatToken *Token = ActiveExpansions.back().SpelledI->Tok;
390d6d0dc1fSManuel Klimek   ++ActiveExpansions.back().SpelledI;
391d6d0dc1fSManuel Klimek   if (Token->MacroCtx) {
392d6d0dc1fSManuel Klimek     // Skip tokens that are not part of the macro call.
3933d621398Sowenca     if (Token->MacroCtx->Role == MR_Hidden)
394d6d0dc1fSManuel Klimek       return false;
395d6d0dc1fSManuel Klimek     // Skip tokens we already expanded during an inner reconstruction.
396d6d0dc1fSManuel Klimek     // For example, given: #define ID(x) {x}
397d6d0dc1fSManuel Klimek     // And the call: ID(ID(f))
398d6d0dc1fSManuel Klimek     // We get two reconstructions:
399d6d0dc1fSManuel Klimek     // ID(f) -> {f}
400d6d0dc1fSManuel Klimek     // ID({f}) -> {{f}}
401d6d0dc1fSManuel Klimek     // We reconstruct f during the first reconstruction, and skip it during the
402d6d0dc1fSManuel Klimek     // second reconstruction.
4033d621398Sowenca     if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size())
404d6d0dc1fSManuel Klimek       return false;
405d6d0dc1fSManuel Klimek   }
406d6d0dc1fSManuel Klimek   // Tokens that do not have a macro context are tokens in that are part of the
407d6d0dc1fSManuel Klimek   // macro call that have not taken part in expansion.
408d6d0dc1fSManuel Klimek   if (!Token->MacroCtx) {
409d6d0dc1fSManuel Klimek     // Put the parentheses and commas of a macro call into the same line;
410d6d0dc1fSManuel Klimek     // if the arguments produce new unwrapped lines, they will become children
411d6d0dc1fSManuel Klimek     // of the corresponding opening parenthesis or comma tokens in the
412d6d0dc1fSManuel Klimek     // reconstructed call.
413d6d0dc1fSManuel Klimek     if (Token->is(tok::l_paren)) {
414d6d0dc1fSManuel Klimek       MacroCallStructure.push_back(MacroCallState(
415d6d0dc1fSManuel Klimek           currentLine(), parentLine().Tokens.back()->Tok, Token));
416d6d0dc1fSManuel Klimek       // All tokens that are children of the previous line's last token in the
417d6d0dc1fSManuel Klimek       // reconstructed token stream will now be children of the l_paren token.
418d6d0dc1fSManuel Klimek       // For example, for the line containing the macro calls:
419d6d0dc1fSManuel Klimek       //   auto x = ID({ID(2)});
420d6d0dc1fSManuel Klimek       // We will build up a map <null> -> ( -> ( with the first and second
421d6d0dc1fSManuel Klimek       // l_paren of the macro call respectively. New lines that come in with a
422d6d0dc1fSManuel Klimek       // <null> parent will then become children of the l_paren token of the
423d6d0dc1fSManuel Klimek       // currently innermost macro call.
424d6d0dc1fSManuel Klimek       SpelledParentToReconstructedParent[MacroCallStructure.back()
425d6d0dc1fSManuel Klimek                                              .ParentLastToken] = Token;
426d6d0dc1fSManuel Klimek       appendToken(Token);
427ddb4450aSr4nt       prepareParent(Token, /*NewLine=*/true,
428ddb4450aSr4nt                     MacroCallStructure.back().Line->Level);
429d6d0dc1fSManuel Klimek       Token->MacroParent = true;
430d6d0dc1fSManuel Klimek       return false;
431d6d0dc1fSManuel Klimek     }
432d6d0dc1fSManuel Klimek     if (!MacroCallStructure.empty()) {
433d6d0dc1fSManuel Klimek       if (Token->is(tok::comma)) {
434d6d0dc1fSManuel Klimek         // Make new lines inside the next argument children of the comma token.
435d6d0dc1fSManuel Klimek         SpelledParentToReconstructedParent
436d6d0dc1fSManuel Klimek             [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
437d6d0dc1fSManuel Klimek         Token->MacroParent = true;
438d6d0dc1fSManuel Klimek         appendToken(Token, MacroCallStructure.back().Line);
439ddb4450aSr4nt         prepareParent(Token, /*NewLine=*/true,
440ddb4450aSr4nt                       MacroCallStructure.back().Line->Level);
441d6d0dc1fSManuel Klimek         return true;
442d6d0dc1fSManuel Klimek       }
443d6d0dc1fSManuel Klimek       if (Token->is(tok::r_paren)) {
444d6d0dc1fSManuel Klimek         appendToken(Token, MacroCallStructure.back().Line);
445d6d0dc1fSManuel Klimek         SpelledParentToReconstructedParent.erase(
446d6d0dc1fSManuel Klimek             MacroCallStructure.back().ParentLastToken);
447d6d0dc1fSManuel Klimek         MacroCallStructure.pop_back();
448d6d0dc1fSManuel Klimek         return false;
449d6d0dc1fSManuel Klimek       }
450d6d0dc1fSManuel Klimek     }
451d6d0dc1fSManuel Klimek   }
452d6d0dc1fSManuel Klimek   // Note that any tokens that are tagged with MR_None have been passed as
453d6d0dc1fSManuel Klimek   // arguments to the macro that have not been expanded, for example:
454d6d0dc1fSManuel Klimek   // Given: #define ID(X) x
455d6d0dc1fSManuel Klimek   // When calling: ID(a, b)
456d6d0dc1fSManuel Klimek   // 'b' will be part of the reconstructed token stream, but tagged MR_None.
457d6d0dc1fSManuel Klimek   // Given that erroring out in this case would be disruptive, we continue
458d6d0dc1fSManuel Klimek   // pushing the (unformatted) token.
459d6d0dc1fSManuel Klimek   // FIXME: This can lead to unfortunate formatting decisions - give the user
460d6d0dc1fSManuel Klimek   // a hint that their macro definition is broken.
461d6d0dc1fSManuel Klimek   appendToken(Token);
462d6d0dc1fSManuel Klimek   return false;
463d6d0dc1fSManuel Klimek }
464d6d0dc1fSManuel Klimek 
finalize()465d6d0dc1fSManuel Klimek void MacroCallReconstructor::finalize() {
466d6d0dc1fSManuel Klimek #ifndef NDEBUG
467d6d0dc1fSManuel Klimek   assert(State != Finalized && finished());
468d6d0dc1fSManuel Klimek   State = Finalized;
469d6d0dc1fSManuel Klimek #endif
470d6d0dc1fSManuel Klimek 
471d6d0dc1fSManuel Klimek   // We created corresponding unwrapped lines for each incoming line as children
472d6d0dc1fSManuel Klimek   // the the toplevel null token.
473d6d0dc1fSManuel Klimek   assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty());
474d6d0dc1fSManuel Klimek   LLVM_DEBUG({
475d6d0dc1fSManuel Klimek     llvm::dbgs() << "Finalizing reconstructed lines:\n";
476d6d0dc1fSManuel Klimek     debug(Result, 0);
477d6d0dc1fSManuel Klimek   });
478d6d0dc1fSManuel Klimek 
479d6d0dc1fSManuel Klimek   // The first line becomes the top level line in the resulting unwrapped line.
480d6d0dc1fSManuel Klimek   LineNode &Top = *Result.Tokens.front();
481d6d0dc1fSManuel Klimek   auto *I = Top.Children.begin();
482d6d0dc1fSManuel Klimek   // Every subsequent line will become a child of the last token in the previous
483d6d0dc1fSManuel Klimek   // line, which is the token prior to the first token in the line.
484d6d0dc1fSManuel Klimek   LineNode *Last = (*I)->Tokens.back().get();
485d6d0dc1fSManuel Klimek   ++I;
486d6d0dc1fSManuel Klimek   for (auto *E = Top.Children.end(); I != E; ++I) {
487d6d0dc1fSManuel Klimek     assert(Last->Children.empty());
488d6d0dc1fSManuel Klimek     Last->Children.push_back(std::move(*I));
489d6d0dc1fSManuel Klimek 
490d6d0dc1fSManuel Klimek     // Mark the previous line's last token as generated by a macro expansion
491d6d0dc1fSManuel Klimek     // so the formatting algorithm can take that into account.
492d6d0dc1fSManuel Klimek     Last->Tok->MacroParent = true;
493d6d0dc1fSManuel Klimek 
494d6d0dc1fSManuel Klimek     Last = Last->Children.back()->Tokens.back().get();
495d6d0dc1fSManuel Klimek   }
496d6d0dc1fSManuel Klimek   Top.Children.resize(1);
497d6d0dc1fSManuel Klimek }
498d6d0dc1fSManuel Klimek 
appendToken(FormatToken * Token,ReconstructedLine * L)499f44d28f8SManuel Klimek void MacroCallReconstructor::appendToken(FormatToken *Token,
500f44d28f8SManuel Klimek                                          ReconstructedLine *L) {
501d6d0dc1fSManuel Klimek   L = L ? L : currentLine();
502d6d0dc1fSManuel Klimek   LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");
503d6d0dc1fSManuel Klimek   L->Tokens.push_back(std::make_unique<LineNode>(Token));
504d6d0dc1fSManuel Klimek }
505d6d0dc1fSManuel Klimek 
506f44d28f8SManuel Klimek UnwrappedLine
createUnwrappedLine(const ReconstructedLine & Line,int Level)507f44d28f8SManuel Klimek MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line,
508d6d0dc1fSManuel Klimek                                             int Level) {
509d6d0dc1fSManuel Klimek   UnwrappedLine Result;
510d6d0dc1fSManuel Klimek   Result.Level = Level;
511d6d0dc1fSManuel Klimek   for (const auto &N : Line.Tokens) {
512d6d0dc1fSManuel Klimek     Result.Tokens.push_back(N->Tok);
513d6d0dc1fSManuel Klimek     UnwrappedLineNode &Current = Result.Tokens.back();
514ddb4450aSr4nt     auto NumChildren =
515ddb4450aSr4nt         std::count_if(N->Children.begin(), N->Children.end(),
516ddb4450aSr4nt                       [](const auto &Child) { return !Child->Tokens.empty(); });
517ddb4450aSr4nt     if (NumChildren == 1 && Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
518ddb4450aSr4nt       // If we only have one child, and the child is due to a macro expansion
519ddb4450aSr4nt       // (either attached to a left parenthesis or comma), merge the child into
520ddb4450aSr4nt       // the current line to prevent forced breaks for macro arguments.
521ddb4450aSr4nt       auto *Child = std::find_if(
522ddb4450aSr4nt           N->Children.begin(), N->Children.end(),
523ddb4450aSr4nt           [](const auto &Child) { return !Child->Tokens.empty(); });
524ddb4450aSr4nt       auto Line = createUnwrappedLine(**Child, Level);
525ddb4450aSr4nt       Result.Tokens.splice(Result.Tokens.end(), Line.Tokens);
526ddb4450aSr4nt     } else if (NumChildren > 0) {
527ddb4450aSr4nt       // When there are multiple children with different indent, make sure that
528ddb4450aSr4nt       // we indent them:
529ddb4450aSr4nt       // 1. One level below the current line's level.
530ddb4450aSr4nt       // 2. At the correct level relative to each other.
531ddb4450aSr4nt       unsigned MinChildLevel =
532ddb4450aSr4nt           std::min_element(N->Children.begin(), N->Children.end(),
533ddb4450aSr4nt                            [](const auto &E1, const auto &E2) {
534ddb4450aSr4nt                              return E1->Level < E2->Level;
535ddb4450aSr4nt                            })
536ddb4450aSr4nt               ->get()
537ddb4450aSr4nt               ->Level;
538d6d0dc1fSManuel Klimek       for (const auto &Child : N->Children) {
539d6d0dc1fSManuel Klimek         if (Child->Tokens.empty())
540d6d0dc1fSManuel Klimek           continue;
541ddb4450aSr4nt         Current.Children.push_back(createUnwrappedLine(
542ddb4450aSr4nt             *Child, Level + 1 + (Child->Level - MinChildLevel)));
543d6d0dc1fSManuel Klimek       }
544d6d0dc1fSManuel Klimek     }
545d6d0dc1fSManuel Klimek   }
546d6d0dc1fSManuel Klimek   return Result;
547d6d0dc1fSManuel Klimek }
548d6d0dc1fSManuel Klimek 
debug(const ReconstructedLine & Line,int Level)549f44d28f8SManuel Klimek void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) {
550d6d0dc1fSManuel Klimek   for (int i = 0; i < Level; ++i)
551d6d0dc1fSManuel Klimek     llvm::dbgs() << " ";
552d6d0dc1fSManuel Klimek   for (const auto &N : Line.Tokens) {
553d6d0dc1fSManuel Klimek     if (!N)
554d6d0dc1fSManuel Klimek       continue;
555d6d0dc1fSManuel Klimek     if (N->Tok)
556d6d0dc1fSManuel Klimek       llvm::dbgs() << N->Tok->TokenText << " ";
557d6d0dc1fSManuel Klimek     for (const auto &Child : N->Children) {
558d6d0dc1fSManuel Klimek       llvm::dbgs() << "\n";
559d6d0dc1fSManuel Klimek       debug(*Child, Level + 1);
560d6d0dc1fSManuel Klimek       for (int i = 0; i < Level; ++i)
561d6d0dc1fSManuel Klimek         llvm::dbgs() << " ";
562d6d0dc1fSManuel Klimek     }
563d6d0dc1fSManuel Klimek   }
564d6d0dc1fSManuel Klimek   llvm::dbgs() << "\n";
565d6d0dc1fSManuel Klimek }
566d6d0dc1fSManuel Klimek 
567f44d28f8SManuel Klimek MacroCallReconstructor::ReconstructedLine &
parentLine()568f44d28f8SManuel Klimek MacroCallReconstructor::parentLine() {
569d6d0dc1fSManuel Klimek   return **std::prev(std::prev(ActiveReconstructedLines.end()));
570d6d0dc1fSManuel Klimek }
571d6d0dc1fSManuel Klimek 
572f44d28f8SManuel Klimek MacroCallReconstructor::ReconstructedLine *
currentLine()573f44d28f8SManuel Klimek MacroCallReconstructor::currentLine() {
574d6d0dc1fSManuel Klimek   return ActiveReconstructedLines.back();
575d6d0dc1fSManuel Klimek }
576d6d0dc1fSManuel Klimek 
MacroCallState(MacroCallReconstructor::ReconstructedLine * Line,FormatToken * ParentLastToken,FormatToken * MacroCallLParen)577d6d0dc1fSManuel Klimek MacroCallReconstructor::MacroCallState::MacroCallState(
578f44d28f8SManuel Klimek     MacroCallReconstructor::ReconstructedLine *Line,
579f44d28f8SManuel Klimek     FormatToken *ParentLastToken, FormatToken *MacroCallLParen)
580d6d0dc1fSManuel Klimek     : Line(Line), ParentLastToken(ParentLastToken),
581d6d0dc1fSManuel Klimek       MacroCallLParen(MacroCallLParen) {
582d6d0dc1fSManuel Klimek   LLVM_DEBUG(
583d6d0dc1fSManuel Klimek       llvm::dbgs() << "ParentLastToken: "
584d6d0dc1fSManuel Klimek                    << (ParentLastToken ? ParentLastToken->TokenText : "<null>")
585d6d0dc1fSManuel Klimek                    << "\n");
586d6d0dc1fSManuel Klimek 
587d6d0dc1fSManuel Klimek   assert(MacroCallLParen->is(tok::l_paren));
588d6d0dc1fSManuel Klimek }
589d6d0dc1fSManuel Klimek 
590d6d0dc1fSManuel Klimek } // namespace format
591d6d0dc1fSManuel Klimek } // namespace clang
592