xref: /openbsd-src/gnu/llvm/clang/lib/Format/MacroCallReconstructor.cpp (revision 12c855180aad702bbcca06e0398d774beeafb155)
1*12c85518Srobert //===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//
2*12c85518Srobert //
3*12c85518Srobert //                     The LLVM Compiler Infrastructure
4*12c85518Srobert //
5*12c85518Srobert // This file is distributed under the University of Illinois Open Source
6*12c85518Srobert // License. See LICENSE.TXT for details.
7*12c85518Srobert //
8*12c85518Srobert //===----------------------------------------------------------------------===//
9*12c85518Srobert ///
10*12c85518Srobert /// \file
11*12c85518Srobert /// This file contains the implementation of MacroCallReconstructor, which fits
12*12c85518Srobert /// an reconstructed macro call to a parsed set of UnwrappedLines.
13*12c85518Srobert ///
14*12c85518Srobert //===----------------------------------------------------------------------===//
15*12c85518Srobert 
16*12c85518Srobert #include "Macros.h"
17*12c85518Srobert 
18*12c85518Srobert #include "UnwrappedLineParser.h"
19*12c85518Srobert #include "clang/Basic/TokenKinds.h"
20*12c85518Srobert #include "llvm/ADT/DenseSet.h"
21*12c85518Srobert #include "llvm/Support/Debug.h"
22*12c85518Srobert #include <cassert>
23*12c85518Srobert 
24*12c85518Srobert #define DEBUG_TYPE "format-reconstruct"
25*12c85518Srobert 
26*12c85518Srobert namespace clang {
27*12c85518Srobert namespace format {
28*12c85518Srobert 
29*12c85518Srobert // Call \p Call for each token in the unwrapped line given, passing
30*12c85518Srobert // the token, its parent and whether it is the first token in the line.
31*12c85518Srobert template <typename T>
forEachToken(const UnwrappedLine & Line,const T & Call,FormatToken * Parent=nullptr)32*12c85518Srobert void forEachToken(const UnwrappedLine &Line, const T &Call,
33*12c85518Srobert                   FormatToken *Parent = nullptr) {
34*12c85518Srobert   bool First = true;
35*12c85518Srobert   for (const auto &N : Line.Tokens) {
36*12c85518Srobert     Call(N.Tok, Parent, First);
37*12c85518Srobert     First = false;
38*12c85518Srobert     for (const auto &Child : N.Children)
39*12c85518Srobert       forEachToken(Child, Call, N.Tok);
40*12c85518Srobert   }
41*12c85518Srobert }
42*12c85518Srobert 
MacroCallReconstructor(unsigned Level,const llvm::DenseMap<FormatToken *,std::unique_ptr<UnwrappedLine>> & ActiveExpansions)43*12c85518Srobert MacroCallReconstructor::MacroCallReconstructor(
44*12c85518Srobert     unsigned Level,
45*12c85518Srobert     const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
46*12c85518Srobert         &ActiveExpansions)
47*12c85518Srobert     : Level(Level), IdToReconstructed(ActiveExpansions) {
48*12c85518Srobert   Result.Tokens.push_back(std::make_unique<LineNode>());
49*12c85518Srobert   ActiveReconstructedLines.push_back(&Result);
50*12c85518Srobert }
51*12c85518Srobert 
addLine(const UnwrappedLine & Line)52*12c85518Srobert void MacroCallReconstructor::addLine(const UnwrappedLine &Line) {
53*12c85518Srobert   assert(State != Finalized);
54*12c85518Srobert   LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
55*12c85518Srobert   forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) {
56*12c85518Srobert     add(Token, Parent, First);
57*12c85518Srobert   });
58*12c85518Srobert   assert(InProgress || finished());
59*12c85518Srobert }
60*12c85518Srobert 
takeResult()61*12c85518Srobert UnwrappedLine MacroCallReconstructor::takeResult() && {
62*12c85518Srobert   finalize();
63*12c85518Srobert   assert(Result.Tokens.size() == 1 &&
64*12c85518Srobert          Result.Tokens.front()->Children.size() == 1);
65*12c85518Srobert   UnwrappedLine Final =
66*12c85518Srobert       createUnwrappedLine(*Result.Tokens.front()->Children.front(), Level);
67*12c85518Srobert   assert(!Final.Tokens.empty());
68*12c85518Srobert   return Final;
69*12c85518Srobert }
70*12c85518Srobert 
71*12c85518Srobert // Reconstruct the position of the next \p Token, given its parent \p
72*12c85518Srobert // ExpandedParent in the incoming unwrapped line. \p First specifies whether it
73*12c85518Srobert // is the first token in a given unwrapped line.
add(FormatToken * Token,FormatToken * ExpandedParent,bool First)74*12c85518Srobert void MacroCallReconstructor::add(FormatToken *Token,
75*12c85518Srobert                                  FormatToken *ExpandedParent, bool First) {
76*12c85518Srobert   LLVM_DEBUG(
77*12c85518Srobert       llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: "
78*12c85518Srobert                    << (ExpandedParent ? ExpandedParent->TokenText : "<null>")
79*12c85518Srobert                    << ", First: " << First << "\n");
80*12c85518Srobert   // In order to be able to find the correct parent in the reconstructed token
81*12c85518Srobert   // stream, we need to continue the last open reconstruction until we find the
82*12c85518Srobert   // given token if it is part of the reconstructed token stream.
83*12c85518Srobert   //
84*12c85518Srobert   // Note that hidden tokens can be part of the reconstructed stream in nested
85*12c85518Srobert   // macro calls.
86*12c85518Srobert   // For example, given
87*12c85518Srobert   //   #define C(x, y) x y
88*12c85518Srobert   //   #define B(x) {x}
89*12c85518Srobert   // And the call:
90*12c85518Srobert   //   C(a, B(b))
91*12c85518Srobert   // The outer macro call will be C(a, {b}), and the hidden token '}' can be
92*12c85518Srobert   // found in the reconstructed token stream of that expansion level.
93*12c85518Srobert   // In the expanded token stream
94*12c85518Srobert   //   a {b}
95*12c85518Srobert   // 'b' is a child of '{'. We need to continue the open expansion of the ','
96*12c85518Srobert   // in the call of 'C' in order to correctly set the ',' as the parent of '{',
97*12c85518Srobert   // so we later set the spelled token 'b' as a child of the ','.
98*12c85518Srobert   if (!ActiveExpansions.empty() && Token->MacroCtx &&
99*12c85518Srobert       (Token->MacroCtx->Role != MR_Hidden ||
100*12c85518Srobert        ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) {
101*12c85518Srobert     if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token))
102*12c85518Srobert       First = true;
103*12c85518Srobert   }
104*12c85518Srobert 
105*12c85518Srobert   prepareParent(ExpandedParent, First);
106*12c85518Srobert 
107*12c85518Srobert   if (Token->MacroCtx) {
108*12c85518Srobert     // If this token was generated by a macro call, add the reconstructed
109*12c85518Srobert     // equivalent of the token.
110*12c85518Srobert     reconstruct(Token);
111*12c85518Srobert   } else {
112*12c85518Srobert     // Otherwise, we add it to the current line.
113*12c85518Srobert     appendToken(Token);
114*12c85518Srobert   }
115*12c85518Srobert }
116*12c85518Srobert 
117*12c85518Srobert // Adjusts the stack of active reconstructed lines so we're ready to push
118*12c85518Srobert // tokens. The tokens to be pushed are children of ExpandedParent in the
119*12c85518Srobert // expanded code.
120*12c85518Srobert //
121*12c85518Srobert // This may entail:
122*12c85518Srobert // - creating a new line, if the parent is on the active line
123*12c85518Srobert // - popping active lines, if the parent is further up the stack
124*12c85518Srobert //
125*12c85518Srobert // Postcondition:
126*12c85518Srobert // ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its
127*12c85518Srobert // reconstructed replacement token as a parent (when possible) - that is, the
128*12c85518Srobert // last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]
129*12c85518Srobert // is the parent of ActiveReconstructedLines.back() in the reconstructed
130*12c85518Srobert // unwrapped line.
prepareParent(FormatToken * ExpandedParent,bool NewLine)131*12c85518Srobert void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,
132*12c85518Srobert                                            bool NewLine) {
133*12c85518Srobert   LLVM_DEBUG({
134*12c85518Srobert     llvm::dbgs() << "ParentMap:\n";
135*12c85518Srobert     debugParentMap();
136*12c85518Srobert   });
137*12c85518Srobert   // We want to find the parent in the new unwrapped line, where the expanded
138*12c85518Srobert   // parent might have been replaced during reconstruction.
139*12c85518Srobert   FormatToken *Parent = getParentInResult(ExpandedParent);
140*12c85518Srobert   LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "
141*12c85518Srobert                           << (Parent ? Parent->TokenText : "<null>") << "\n");
142*12c85518Srobert 
143*12c85518Srobert   FormatToken *OpenMacroParent = nullptr;
144*12c85518Srobert   if (!MacroCallStructure.empty()) {
145*12c85518Srobert     // Inside a macro expansion, it is possible to lose track of the correct
146*12c85518Srobert     // parent - either because it is already popped, for example because it was
147*12c85518Srobert     // in a different macro argument (e.g. M({, })), or when we work on invalid
148*12c85518Srobert     // code.
149*12c85518Srobert     // Thus, we use the innermost macro call's parent as the parent at which
150*12c85518Srobert     // we stop; this allows us to stay within the macro expansion and keeps
151*12c85518Srobert     // any problems confined to the extent of the macro call.
152*12c85518Srobert     OpenMacroParent =
153*12c85518Srobert         getParentInResult(MacroCallStructure.back().MacroCallLParen);
154*12c85518Srobert     LLVM_DEBUG(llvm::dbgs()
155*12c85518Srobert                << "MacroCallLParen: "
156*12c85518Srobert                << MacroCallStructure.back().MacroCallLParen->TokenText
157*12c85518Srobert                << ", OpenMacroParent: "
158*12c85518Srobert                << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>")
159*12c85518Srobert                << "\n");
160*12c85518Srobert   }
161*12c85518Srobert   if (NewLine ||
162*12c85518Srobert       (!ActiveReconstructedLines.back()->Tokens.empty() &&
163*12c85518Srobert        Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) {
164*12c85518Srobert     // If we are at the first token in a new line, we want to also
165*12c85518Srobert     // create a new line in the resulting reconstructed unwrapped line.
166*12c85518Srobert     while (ActiveReconstructedLines.back()->Tokens.empty() ||
167*12c85518Srobert            (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok &&
168*12c85518Srobert             ActiveReconstructedLines.back()->Tokens.back()->Tok !=
169*12c85518Srobert                 OpenMacroParent)) {
170*12c85518Srobert       ActiveReconstructedLines.pop_back();
171*12c85518Srobert       assert(!ActiveReconstructedLines.empty());
172*12c85518Srobert     }
173*12c85518Srobert     assert(!ActiveReconstructedLines.empty());
174*12c85518Srobert     ActiveReconstructedLines.back()->Tokens.back()->Children.push_back(
175*12c85518Srobert         std::make_unique<ReconstructedLine>());
176*12c85518Srobert     ActiveReconstructedLines.push_back(
177*12c85518Srobert         &*ActiveReconstructedLines.back()->Tokens.back()->Children.back());
178*12c85518Srobert   } else if (parentLine().Tokens.back()->Tok != Parent) {
179*12c85518Srobert     // If we're not the first token in a new line, pop lines until we find
180*12c85518Srobert     // the child of \c Parent in the stack.
181*12c85518Srobert     while (Parent != parentLine().Tokens.back()->Tok &&
182*12c85518Srobert            parentLine().Tokens.back()->Tok &&
183*12c85518Srobert            parentLine().Tokens.back()->Tok != OpenMacroParent) {
184*12c85518Srobert       ActiveReconstructedLines.pop_back();
185*12c85518Srobert       assert(!ActiveReconstructedLines.empty());
186*12c85518Srobert     }
187*12c85518Srobert   }
188*12c85518Srobert   assert(!ActiveReconstructedLines.empty());
189*12c85518Srobert }
190*12c85518Srobert 
191*12c85518Srobert // For a given \p Parent in the incoming expanded token stream, find the
192*12c85518Srobert // corresponding parent in the output.
getParentInResult(FormatToken * Parent)193*12c85518Srobert FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) {
194*12c85518Srobert   FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent);
195*12c85518Srobert   if (!Mapped)
196*12c85518Srobert     return Parent;
197*12c85518Srobert   for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent))
198*12c85518Srobert     Parent = Mapped;
199*12c85518Srobert   // If we use a different token than the parent in the expanded token stream
200*12c85518Srobert   // as parent, mark it as a special parent, so the formatting code knows it
201*12c85518Srobert   // needs to have its children formatted.
202*12c85518Srobert   Parent->MacroParent = true;
203*12c85518Srobert   return Parent;
204*12c85518Srobert }
205*12c85518Srobert 
206*12c85518Srobert // Reconstruct a \p Token that was expanded from a macro call.
reconstruct(FormatToken * Token)207*12c85518Srobert void MacroCallReconstructor::reconstruct(FormatToken *Token) {
208*12c85518Srobert   assert(Token->MacroCtx);
209*12c85518Srobert   // A single token can be the only result of a macro call:
210*12c85518Srobert   // Given: #define ID(x, y) ;
211*12c85518Srobert   // And the call: ID(<some>, <tokens>)
212*12c85518Srobert   // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).
213*12c85518Srobert   if (Token->MacroCtx->StartOfExpansion) {
214*12c85518Srobert     startReconstruction(Token);
215*12c85518Srobert     // If the order of tokens in the expanded token stream is not the
216*12c85518Srobert     // same as the order of tokens in the reconstructed stream, we need
217*12c85518Srobert     // to reconstruct tokens that arrive later in the stream.
218*12c85518Srobert     if (Token->MacroCtx->Role != MR_Hidden)
219*12c85518Srobert       reconstructActiveCallUntil(Token);
220*12c85518Srobert   }
221*12c85518Srobert   assert(!ActiveExpansions.empty());
222*12c85518Srobert   if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) {
223*12c85518Srobert     assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size());
224*12c85518Srobert     if (Token->MacroCtx->Role != MR_Hidden) {
225*12c85518Srobert       // The current token in the reconstructed token stream must be the token
226*12c85518Srobert       // we're looking for - we either arrive here after startReconstruction,
227*12c85518Srobert       // which initiates the stream to the first token, or after
228*12c85518Srobert       // continueReconstructionUntil skipped until the expected token in the
229*12c85518Srobert       // reconstructed stream at the start of add(...).
230*12c85518Srobert       assert(ActiveExpansions.back().SpelledI->Tok == Token);
231*12c85518Srobert       processNextReconstructed();
232*12c85518Srobert     } else if (!currentLine()->Tokens.empty()) {
233*12c85518Srobert       // Map all hidden tokens to the last visible token in the output.
234*12c85518Srobert       // If the hidden token is a parent, we'll use the last visible
235*12c85518Srobert       // token as the parent of the hidden token's children.
236*12c85518Srobert       SpelledParentToReconstructedParent[Token] =
237*12c85518Srobert           currentLine()->Tokens.back()->Tok;
238*12c85518Srobert     } else {
239*12c85518Srobert       for (auto I = ActiveReconstructedLines.rbegin(),
240*12c85518Srobert                 E = ActiveReconstructedLines.rend();
241*12c85518Srobert            I != E; ++I) {
242*12c85518Srobert         if (!(*I)->Tokens.empty()) {
243*12c85518Srobert           SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok;
244*12c85518Srobert           break;
245*12c85518Srobert         }
246*12c85518Srobert       }
247*12c85518Srobert     }
248*12c85518Srobert   }
249*12c85518Srobert   if (Token->MacroCtx->EndOfExpansion)
250*12c85518Srobert     endReconstruction(Token);
251*12c85518Srobert }
252*12c85518Srobert 
253*12c85518Srobert // Given a \p Token that starts an expansion, reconstruct the beginning of the
254*12c85518Srobert // macro call.
255*12c85518Srobert // For example, given: #define ID(x) x
256*12c85518Srobert // And the call: ID(int a)
257*12c85518Srobert // Reconstructs: ID(
startReconstruction(FormatToken * Token)258*12c85518Srobert void MacroCallReconstructor::startReconstruction(FormatToken *Token) {
259*12c85518Srobert   assert(Token->MacroCtx);
260*12c85518Srobert   assert(!Token->MacroCtx->ExpandedFrom.empty());
261*12c85518Srobert   assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size());
262*12c85518Srobert #ifndef NDEBUG
263*12c85518Srobert   // Check that the token's reconstruction stack matches our current
264*12c85518Srobert   // reconstruction stack.
265*12c85518Srobert   for (size_t I = 0; I < ActiveExpansions.size(); ++I) {
266*12c85518Srobert     assert(ActiveExpansions[I].ID ==
267*12c85518Srobert            Token->MacroCtx
268*12c85518Srobert                ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);
269*12c85518Srobert   }
270*12c85518Srobert #endif
271*12c85518Srobert   // Start reconstruction for all calls for which this token is the first token
272*12c85518Srobert   // generated by the call.
273*12c85518Srobert   // Note that the token's expanded from stack is inside-to-outside, and the
274*12c85518Srobert   // expansions for which this token is not the first are the outermost ones.
275*12c85518Srobert   ArrayRef<FormatToken *> StartedMacros =
276*12c85518Srobert       ArrayRef(Token->MacroCtx->ExpandedFrom)
277*12c85518Srobert           .drop_back(ActiveExpansions.size());
278*12c85518Srobert   assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion);
279*12c85518Srobert   // We reconstruct macro calls outside-to-inside.
280*12c85518Srobert   for (FormatToken *ID : llvm::reverse(StartedMacros)) {
281*12c85518Srobert     // We found a macro call to be reconstructed; the next time our
282*12c85518Srobert     // reconstruction stack is empty we know we finished an reconstruction.
283*12c85518Srobert #ifndef NDEBUG
284*12c85518Srobert     State = InProgress;
285*12c85518Srobert #endif
286*12c85518Srobert     // Put the reconstructed macro call's token into our reconstruction stack.
287*12c85518Srobert     auto IU = IdToReconstructed.find(ID);
288*12c85518Srobert     assert(IU != IdToReconstructed.end());
289*12c85518Srobert     ActiveExpansions.push_back(
290*12c85518Srobert         {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});
291*12c85518Srobert     // Process the macro call's identifier.
292*12c85518Srobert     processNextReconstructed();
293*12c85518Srobert     if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE)
294*12c85518Srobert       continue;
295*12c85518Srobert     if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) {
296*12c85518Srobert       // Process the optional opening parenthesis.
297*12c85518Srobert       processNextReconstructed();
298*12c85518Srobert     }
299*12c85518Srobert   }
300*12c85518Srobert }
301*12c85518Srobert 
302*12c85518Srobert // Add all tokens in the reconstruction stream to the output until we find the
303*12c85518Srobert // given \p Token.
reconstructActiveCallUntil(FormatToken * Token)304*12c85518Srobert bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) {
305*12c85518Srobert   assert(!ActiveExpansions.empty());
306*12c85518Srobert   bool PassedMacroComma = false;
307*12c85518Srobert   // FIXME: If Token was already expanded earlier, due to
308*12c85518Srobert   // a change in order, we will not find it, but need to
309*12c85518Srobert   // skip it.
310*12c85518Srobert   while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE &&
311*12c85518Srobert          ActiveExpansions.back().SpelledI->Tok != Token) {
312*12c85518Srobert     PassedMacroComma = processNextReconstructed() || PassedMacroComma;
313*12c85518Srobert   }
314*12c85518Srobert   return PassedMacroComma;
315*12c85518Srobert }
316*12c85518Srobert 
317*12c85518Srobert // End all reconstructions for which \p Token is the final token.
endReconstruction(FormatToken * Token)318*12c85518Srobert void MacroCallReconstructor::endReconstruction(FormatToken *Token) {
319*12c85518Srobert   assert(Token->MacroCtx &&
320*12c85518Srobert          (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion));
321*12c85518Srobert   for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {
322*12c85518Srobert #ifndef NDEBUG
323*12c85518Srobert     // Check all remaining tokens but the final closing parenthesis and optional
324*12c85518Srobert     // trailing comment were already reconstructed at an inner expansion level.
325*12c85518Srobert     for (auto T = ActiveExpansions.back().SpelledI;
326*12c85518Srobert          T != ActiveExpansions.back().SpelledE; ++T) {
327*12c85518Srobert       FormatToken *Token = T->Tok;
328*12c85518Srobert       bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE ||
329*12c85518Srobert                            std::next(T)->Tok->isTrailingComment()) &&
330*12c85518Srobert                           !Token->MacroCtx && Token->is(tok::r_paren);
331*12c85518Srobert       bool TrailingComment = Token->isTrailingComment();
332*12c85518Srobert       bool PreviousLevel =
333*12c85518Srobert           Token->MacroCtx &&
334*12c85518Srobert           (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size());
335*12c85518Srobert       if (!ClosingParen && !TrailingComment && !PreviousLevel)
336*12c85518Srobert         llvm::dbgs() << "At token: " << Token->TokenText << "\n";
337*12c85518Srobert       // In addition to the following cases, we can also run into this
338*12c85518Srobert       // when a macro call had more arguments than expected; in that case,
339*12c85518Srobert       // the comma and the remaining tokens in the macro call will potentially
340*12c85518Srobert       // end up in the line when we finish the expansion.
341*12c85518Srobert       // FIXME: Add the information which arguments are unused, and assert
342*12c85518Srobert       // one of the cases below plus reconstructed macro argument tokens.
343*12c85518Srobert       // assert(ClosingParen || TrailingComment || PreviousLevel);
344*12c85518Srobert     }
345*12c85518Srobert #endif
346*12c85518Srobert     // Handle the remaining open tokens:
347*12c85518Srobert     // - expand the closing parenthesis, if it exists, including an optional
348*12c85518Srobert     //   trailing comment
349*12c85518Srobert     // - handle tokens that were already reconstructed at an inner expansion
350*12c85518Srobert     //   level
351*12c85518Srobert     // - handle tokens when a macro call had more than the expected number of
352*12c85518Srobert     //   arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end
353*12c85518Srobert     //   up with the sequence ", b, c)" being open at the end of the
354*12c85518Srobert     //   reconstruction; we want to gracefully handle that case
355*12c85518Srobert     //
356*12c85518Srobert     // FIXME: See the above debug-check for what we will need to do to be
357*12c85518Srobert     // able to assert this.
358*12c85518Srobert     for (auto T = ActiveExpansions.back().SpelledI;
359*12c85518Srobert          T != ActiveExpansions.back().SpelledE; ++T) {
360*12c85518Srobert       processNextReconstructed();
361*12c85518Srobert     }
362*12c85518Srobert     ActiveExpansions.pop_back();
363*12c85518Srobert   }
364*12c85518Srobert }
365*12c85518Srobert 
debugParentMap() const366*12c85518Srobert void MacroCallReconstructor::debugParentMap() const {
367*12c85518Srobert   llvm::DenseSet<FormatToken *> Values;
368*12c85518Srobert   for (const auto &P : SpelledParentToReconstructedParent)
369*12c85518Srobert     Values.insert(P.second);
370*12c85518Srobert 
371*12c85518Srobert   for (const auto &P : SpelledParentToReconstructedParent) {
372*12c85518Srobert     if (Values.contains(P.first))
373*12c85518Srobert       continue;
374*12c85518Srobert     llvm::dbgs() << (P.first ? P.first->TokenText : "<null>");
375*12c85518Srobert     for (auto I = SpelledParentToReconstructedParent.find(P.first),
376*12c85518Srobert               E = SpelledParentToReconstructedParent.end();
377*12c85518Srobert          I != E; I = SpelledParentToReconstructedParent.find(I->second)) {
378*12c85518Srobert       llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>");
379*12c85518Srobert     }
380*12c85518Srobert     llvm::dbgs() << "\n";
381*12c85518Srobert   }
382*12c85518Srobert }
383*12c85518Srobert 
384*12c85518Srobert // If visible, add the next token of the reconstructed token sequence to the
385*12c85518Srobert // output. Returns whether reconstruction passed a comma that is part of a
386*12c85518Srobert // macro call.
processNextReconstructed()387*12c85518Srobert bool MacroCallReconstructor::processNextReconstructed() {
388*12c85518Srobert   FormatToken *Token = ActiveExpansions.back().SpelledI->Tok;
389*12c85518Srobert   ++ActiveExpansions.back().SpelledI;
390*12c85518Srobert   if (Token->MacroCtx) {
391*12c85518Srobert     // Skip tokens that are not part of the macro call.
392*12c85518Srobert     if (Token->MacroCtx->Role == MR_Hidden)
393*12c85518Srobert       return false;
394*12c85518Srobert     // Skip tokens we already expanded during an inner reconstruction.
395*12c85518Srobert     // For example, given: #define ID(x) {x}
396*12c85518Srobert     // And the call: ID(ID(f))
397*12c85518Srobert     // We get two reconstructions:
398*12c85518Srobert     // ID(f) -> {f}
399*12c85518Srobert     // ID({f}) -> {{f}}
400*12c85518Srobert     // We reconstruct f during the first reconstruction, and skip it during the
401*12c85518Srobert     // second reconstruction.
402*12c85518Srobert     if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size())
403*12c85518Srobert       return false;
404*12c85518Srobert   }
405*12c85518Srobert   // Tokens that do not have a macro context are tokens in that are part of the
406*12c85518Srobert   // macro call that have not taken part in expansion.
407*12c85518Srobert   if (!Token->MacroCtx) {
408*12c85518Srobert     // Put the parentheses and commas of a macro call into the same line;
409*12c85518Srobert     // if the arguments produce new unwrapped lines, they will become children
410*12c85518Srobert     // of the corresponding opening parenthesis or comma tokens in the
411*12c85518Srobert     // reconstructed call.
412*12c85518Srobert     if (Token->is(tok::l_paren)) {
413*12c85518Srobert       MacroCallStructure.push_back(MacroCallState(
414*12c85518Srobert           currentLine(), parentLine().Tokens.back()->Tok, Token));
415*12c85518Srobert       // All tokens that are children of the previous line's last token in the
416*12c85518Srobert       // reconstructed token stream will now be children of the l_paren token.
417*12c85518Srobert       // For example, for the line containing the macro calls:
418*12c85518Srobert       //   auto x = ID({ID(2)});
419*12c85518Srobert       // We will build up a map <null> -> ( -> ( with the first and second
420*12c85518Srobert       // l_paren of the macro call respectively. New lines that come in with a
421*12c85518Srobert       // <null> parent will then become children of the l_paren token of the
422*12c85518Srobert       // currently innermost macro call.
423*12c85518Srobert       SpelledParentToReconstructedParent[MacroCallStructure.back()
424*12c85518Srobert                                              .ParentLastToken] = Token;
425*12c85518Srobert       appendToken(Token);
426*12c85518Srobert       prepareParent(Token, /*NewLine=*/true);
427*12c85518Srobert       Token->MacroParent = true;
428*12c85518Srobert       return false;
429*12c85518Srobert     }
430*12c85518Srobert     if (!MacroCallStructure.empty()) {
431*12c85518Srobert       if (Token->is(tok::comma)) {
432*12c85518Srobert         // Make new lines inside the next argument children of the comma token.
433*12c85518Srobert         SpelledParentToReconstructedParent
434*12c85518Srobert             [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
435*12c85518Srobert         Token->MacroParent = true;
436*12c85518Srobert         appendToken(Token, MacroCallStructure.back().Line);
437*12c85518Srobert         prepareParent(Token, /*NewLine=*/true);
438*12c85518Srobert         return true;
439*12c85518Srobert       }
440*12c85518Srobert       if (Token->is(tok::r_paren)) {
441*12c85518Srobert         appendToken(Token, MacroCallStructure.back().Line);
442*12c85518Srobert         SpelledParentToReconstructedParent.erase(
443*12c85518Srobert             MacroCallStructure.back().ParentLastToken);
444*12c85518Srobert         MacroCallStructure.pop_back();
445*12c85518Srobert         return false;
446*12c85518Srobert       }
447*12c85518Srobert     }
448*12c85518Srobert   }
449*12c85518Srobert   // Note that any tokens that are tagged with MR_None have been passed as
450*12c85518Srobert   // arguments to the macro that have not been expanded, for example:
451*12c85518Srobert   // Given: #define ID(X) x
452*12c85518Srobert   // When calling: ID(a, b)
453*12c85518Srobert   // 'b' will be part of the reconstructed token stream, but tagged MR_None.
454*12c85518Srobert   // Given that erroring out in this case would be disruptive, we continue
455*12c85518Srobert   // pushing the (unformatted) token.
456*12c85518Srobert   // FIXME: This can lead to unfortunate formatting decisions - give the user
457*12c85518Srobert   // a hint that their macro definition is broken.
458*12c85518Srobert   appendToken(Token);
459*12c85518Srobert   return false;
460*12c85518Srobert }
461*12c85518Srobert 
finalize()462*12c85518Srobert void MacroCallReconstructor::finalize() {
463*12c85518Srobert #ifndef NDEBUG
464*12c85518Srobert   assert(State != Finalized && finished());
465*12c85518Srobert   State = Finalized;
466*12c85518Srobert #endif
467*12c85518Srobert 
468*12c85518Srobert   // We created corresponding unwrapped lines for each incoming line as children
469*12c85518Srobert   // the the toplevel null token.
470*12c85518Srobert   assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty());
471*12c85518Srobert   LLVM_DEBUG({
472*12c85518Srobert     llvm::dbgs() << "Finalizing reconstructed lines:\n";
473*12c85518Srobert     debug(Result, 0);
474*12c85518Srobert   });
475*12c85518Srobert 
476*12c85518Srobert   // The first line becomes the top level line in the resulting unwrapped line.
477*12c85518Srobert   LineNode &Top = *Result.Tokens.front();
478*12c85518Srobert   auto *I = Top.Children.begin();
479*12c85518Srobert   // Every subsequent line will become a child of the last token in the previous
480*12c85518Srobert   // line, which is the token prior to the first token in the line.
481*12c85518Srobert   LineNode *Last = (*I)->Tokens.back().get();
482*12c85518Srobert   ++I;
483*12c85518Srobert   for (auto *E = Top.Children.end(); I != E; ++I) {
484*12c85518Srobert     assert(Last->Children.empty());
485*12c85518Srobert     Last->Children.push_back(std::move(*I));
486*12c85518Srobert 
487*12c85518Srobert     // Mark the previous line's last token as generated by a macro expansion
488*12c85518Srobert     // so the formatting algorithm can take that into account.
489*12c85518Srobert     Last->Tok->MacroParent = true;
490*12c85518Srobert 
491*12c85518Srobert     Last = Last->Children.back()->Tokens.back().get();
492*12c85518Srobert   }
493*12c85518Srobert   Top.Children.resize(1);
494*12c85518Srobert }
495*12c85518Srobert 
appendToken(FormatToken * Token,ReconstructedLine * L)496*12c85518Srobert void MacroCallReconstructor::appendToken(FormatToken *Token,
497*12c85518Srobert                                          ReconstructedLine *L) {
498*12c85518Srobert   L = L ? L : currentLine();
499*12c85518Srobert   LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");
500*12c85518Srobert   L->Tokens.push_back(std::make_unique<LineNode>(Token));
501*12c85518Srobert }
502*12c85518Srobert 
503*12c85518Srobert UnwrappedLine
createUnwrappedLine(const ReconstructedLine & Line,int Level)504*12c85518Srobert MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line,
505*12c85518Srobert                                             int Level) {
506*12c85518Srobert   UnwrappedLine Result;
507*12c85518Srobert   Result.Level = Level;
508*12c85518Srobert   for (const auto &N : Line.Tokens) {
509*12c85518Srobert     Result.Tokens.push_back(N->Tok);
510*12c85518Srobert     UnwrappedLineNode &Current = Result.Tokens.back();
511*12c85518Srobert     for (const auto &Child : N->Children) {
512*12c85518Srobert       if (Child->Tokens.empty())
513*12c85518Srobert         continue;
514*12c85518Srobert       Current.Children.push_back(createUnwrappedLine(*Child, Level + 1));
515*12c85518Srobert     }
516*12c85518Srobert     if (Current.Children.size() == 1 &&
517*12c85518Srobert         Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
518*12c85518Srobert       Result.Tokens.splice(Result.Tokens.end(),
519*12c85518Srobert                            Current.Children.front().Tokens);
520*12c85518Srobert       Current.Children.clear();
521*12c85518Srobert     }
522*12c85518Srobert   }
523*12c85518Srobert   return Result;
524*12c85518Srobert }
525*12c85518Srobert 
debug(const ReconstructedLine & Line,int Level)526*12c85518Srobert void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) {
527*12c85518Srobert   for (int i = 0; i < Level; ++i)
528*12c85518Srobert     llvm::dbgs() << " ";
529*12c85518Srobert   for (const auto &N : Line.Tokens) {
530*12c85518Srobert     if (!N)
531*12c85518Srobert       continue;
532*12c85518Srobert     if (N->Tok)
533*12c85518Srobert       llvm::dbgs() << N->Tok->TokenText << " ";
534*12c85518Srobert     for (const auto &Child : N->Children) {
535*12c85518Srobert       llvm::dbgs() << "\n";
536*12c85518Srobert       debug(*Child, Level + 1);
537*12c85518Srobert       for (int i = 0; i < Level; ++i)
538*12c85518Srobert         llvm::dbgs() << " ";
539*12c85518Srobert     }
540*12c85518Srobert   }
541*12c85518Srobert   llvm::dbgs() << "\n";
542*12c85518Srobert }
543*12c85518Srobert 
544*12c85518Srobert MacroCallReconstructor::ReconstructedLine &
parentLine()545*12c85518Srobert MacroCallReconstructor::parentLine() {
546*12c85518Srobert   return **std::prev(std::prev(ActiveReconstructedLines.end()));
547*12c85518Srobert }
548*12c85518Srobert 
549*12c85518Srobert MacroCallReconstructor::ReconstructedLine *
currentLine()550*12c85518Srobert MacroCallReconstructor::currentLine() {
551*12c85518Srobert   return ActiveReconstructedLines.back();
552*12c85518Srobert }
553*12c85518Srobert 
MacroCallState(MacroCallReconstructor::ReconstructedLine * Line,FormatToken * ParentLastToken,FormatToken * MacroCallLParen)554*12c85518Srobert MacroCallReconstructor::MacroCallState::MacroCallState(
555*12c85518Srobert     MacroCallReconstructor::ReconstructedLine *Line,
556*12c85518Srobert     FormatToken *ParentLastToken, FormatToken *MacroCallLParen)
557*12c85518Srobert     : Line(Line), ParentLastToken(ParentLastToken),
558*12c85518Srobert       MacroCallLParen(MacroCallLParen) {
559*12c85518Srobert   LLVM_DEBUG(
560*12c85518Srobert       llvm::dbgs() << "ParentLastToken: "
561*12c85518Srobert                    << (ParentLastToken ? ParentLastToken->TokenText : "<null>")
562*12c85518Srobert                    << "\n");
563*12c85518Srobert 
564*12c85518Srobert   assert(MacroCallLParen->is(tok::l_paren));
565*12c85518Srobert }
566*12c85518Srobert 
567*12c85518Srobert } // namespace format
568*12c85518Srobert } // namespace clang
569