xref: /freebsd-src/contrib/llvm-project/clang/lib/Format/MacroCallReconstructor.cpp (revision 753f127f3ace09432b2baeffd71a308760641a62)
1*753f127fSDimitry Andric //===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//
2*753f127fSDimitry Andric //
3*753f127fSDimitry Andric //                     The LLVM Compiler Infrastructure
4*753f127fSDimitry Andric //
5*753f127fSDimitry Andric // This file is distributed under the University of Illinois Open Source
6*753f127fSDimitry Andric // License. See LICENSE.TXT for details.
7*753f127fSDimitry Andric //
8*753f127fSDimitry Andric //===----------------------------------------------------------------------===//
9*753f127fSDimitry Andric ///
10*753f127fSDimitry Andric /// \file
11*753f127fSDimitry Andric /// This file contains the implementation of MacroCallReconstructor, which fits
12*753f127fSDimitry Andric /// an reconstructed macro call to a parsed set of UnwrappedLines.
13*753f127fSDimitry Andric ///
14*753f127fSDimitry Andric //===----------------------------------------------------------------------===//
15*753f127fSDimitry Andric 
16*753f127fSDimitry Andric #include "Macros.h"
17*753f127fSDimitry Andric 
18*753f127fSDimitry Andric #include "UnwrappedLineParser.h"
19*753f127fSDimitry Andric #include "clang/Basic/TokenKinds.h"
20*753f127fSDimitry Andric #include "llvm/ADT/DenseSet.h"
21*753f127fSDimitry Andric #include "llvm/Support/Debug.h"
22*753f127fSDimitry Andric #include <cassert>
23*753f127fSDimitry Andric 
24*753f127fSDimitry Andric #define DEBUG_TYPE "format-reconstruct"
25*753f127fSDimitry Andric 
26*753f127fSDimitry Andric namespace clang {
27*753f127fSDimitry Andric namespace format {
28*753f127fSDimitry Andric 
29*753f127fSDimitry Andric // Call \p Call for each token in the unwrapped line given, passing
30*753f127fSDimitry Andric // the token, its parent and whether it is the first token in the line.
31*753f127fSDimitry Andric template <typename T>
32*753f127fSDimitry Andric void forEachToken(const UnwrappedLine &Line, const T &Call,
33*753f127fSDimitry Andric                   FormatToken *Parent = nullptr) {
34*753f127fSDimitry Andric   bool First = true;
35*753f127fSDimitry Andric   for (const auto &N : Line.Tokens) {
36*753f127fSDimitry Andric     Call(N.Tok, Parent, First);
37*753f127fSDimitry Andric     First = false;
38*753f127fSDimitry Andric     for (const auto &Child : N.Children) {
39*753f127fSDimitry Andric       forEachToken(Child, Call, N.Tok);
40*753f127fSDimitry Andric     }
41*753f127fSDimitry Andric   }
42*753f127fSDimitry Andric }
43*753f127fSDimitry Andric 
44*753f127fSDimitry Andric MacroCallReconstructor::MacroCallReconstructor(
45*753f127fSDimitry Andric     unsigned Level,
46*753f127fSDimitry Andric     const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
47*753f127fSDimitry Andric         &ActiveExpansions)
48*753f127fSDimitry Andric     : Level(Level), IdToReconstructed(ActiveExpansions) {
49*753f127fSDimitry Andric   Result.Tokens.push_back(std::make_unique<LineNode>());
50*753f127fSDimitry Andric   ActiveReconstructedLines.push_back(&Result);
51*753f127fSDimitry Andric }
52*753f127fSDimitry Andric 
53*753f127fSDimitry Andric void MacroCallReconstructor::addLine(const UnwrappedLine &Line) {
54*753f127fSDimitry Andric   assert(State != Finalized);
55*753f127fSDimitry Andric   LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
56*753f127fSDimitry Andric   forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) {
57*753f127fSDimitry Andric     add(Token, Parent, First);
58*753f127fSDimitry Andric   });
59*753f127fSDimitry Andric   assert(InProgress || finished());
60*753f127fSDimitry Andric }
61*753f127fSDimitry Andric 
62*753f127fSDimitry Andric UnwrappedLine MacroCallReconstructor::takeResult() && {
63*753f127fSDimitry Andric   finalize();
64*753f127fSDimitry Andric   assert(Result.Tokens.size() == 1 && Result.Tokens.front()->Children.size() == 1);
65*753f127fSDimitry Andric   UnwrappedLine Final =
66*753f127fSDimitry Andric       createUnwrappedLine(*Result.Tokens.front()->Children.front(), Level);
67*753f127fSDimitry Andric   assert(!Final.Tokens.empty());
68*753f127fSDimitry Andric   return Final;
69*753f127fSDimitry Andric }
70*753f127fSDimitry Andric 
71*753f127fSDimitry Andric // Reconstruct the position of the next \p Token, given its parent \p
72*753f127fSDimitry Andric // ExpandedParent in the incoming unwrapped line. \p First specifies whether it
73*753f127fSDimitry Andric // is the first token in a given unwrapped line.
74*753f127fSDimitry Andric void MacroCallReconstructor::add(FormatToken *Token,
75*753f127fSDimitry Andric                                  FormatToken *ExpandedParent, bool First) {
76*753f127fSDimitry Andric   LLVM_DEBUG(
77*753f127fSDimitry Andric       llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: "
78*753f127fSDimitry Andric                    << (ExpandedParent ? ExpandedParent->TokenText : "<null>")
79*753f127fSDimitry Andric                    << ", First: " << First << "\n");
80*753f127fSDimitry Andric   // In order to be able to find the correct parent in the reconstructed token
81*753f127fSDimitry Andric   // stream, we need to continue the last open reconstruction until we find the
82*753f127fSDimitry Andric   // given token if it is part of the reconstructed token stream.
83*753f127fSDimitry Andric   //
84*753f127fSDimitry Andric   // Note that hidden tokens can be part of the reconstructed stream in nested
85*753f127fSDimitry Andric   // macro calls.
86*753f127fSDimitry Andric   // For example, given
87*753f127fSDimitry Andric   //   #define C(x, y) x y
88*753f127fSDimitry Andric   //   #define B(x) {x}
89*753f127fSDimitry Andric   // And the call:
90*753f127fSDimitry Andric   //   C(a, B(b))
91*753f127fSDimitry Andric   // The outer macro call will be C(a, {b}), and the hidden token '}' can be
92*753f127fSDimitry Andric   // found in the reconstructed token stream of that expansion level.
93*753f127fSDimitry Andric   // In the expanded token stream
94*753f127fSDimitry Andric   //   a {b}
95*753f127fSDimitry Andric   // 'b' is a child of '{'. We need to continue the open expansion of the ','
96*753f127fSDimitry Andric   // in the call of 'C' in order to correctly set the ',' as the parent of '{',
97*753f127fSDimitry Andric   // so we later set the spelled token 'b' as a child of the ','.
98*753f127fSDimitry Andric   if (!ActiveExpansions.empty() && Token->MacroCtx &&
99*753f127fSDimitry Andric       (Token->MacroCtx->Role != MR_Hidden ||
100*753f127fSDimitry Andric        ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) {
101*753f127fSDimitry Andric     if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token))
102*753f127fSDimitry Andric       First = true;
103*753f127fSDimitry Andric   }
104*753f127fSDimitry Andric 
105*753f127fSDimitry Andric   prepareParent(ExpandedParent, First);
106*753f127fSDimitry Andric 
107*753f127fSDimitry Andric   if (Token->MacroCtx) {
108*753f127fSDimitry Andric     // If this token was generated by a macro call, add the reconstructed
109*753f127fSDimitry Andric     // equivalent of the token.
110*753f127fSDimitry Andric     reconstruct(Token);
111*753f127fSDimitry Andric   } else {
112*753f127fSDimitry Andric     // Otherwise, we add it to the current line.
113*753f127fSDimitry Andric     appendToken(Token);
114*753f127fSDimitry Andric   }
115*753f127fSDimitry Andric }
116*753f127fSDimitry Andric 
117*753f127fSDimitry Andric // Adjusts the stack of active reconstructed lines so we're ready to push
118*753f127fSDimitry Andric // tokens. The tokens to be pushed are children of ExpandedParent in the
119*753f127fSDimitry Andric // expanded code.
120*753f127fSDimitry Andric //
121*753f127fSDimitry Andric // This may entail:
122*753f127fSDimitry Andric // - creating a new line, if the parent is on the active line
123*753f127fSDimitry Andric // - popping active lines, if the parent is further up the stack
124*753f127fSDimitry Andric //
125*753f127fSDimitry Andric // Postcondition:
126*753f127fSDimitry Andric // ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its
127*753f127fSDimitry Andric // reconstructed replacement token as a parent (when possible) - that is, the
128*753f127fSDimitry Andric // last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]
129*753f127fSDimitry Andric // is the parent of ActiveReconstructedLines.back() in the reconstructed
130*753f127fSDimitry Andric // unwrapped line.
131*753f127fSDimitry Andric void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,
132*753f127fSDimitry Andric                                            bool NewLine) {
133*753f127fSDimitry Andric   LLVM_DEBUG({
134*753f127fSDimitry Andric     llvm::dbgs() << "ParentMap:\n";
135*753f127fSDimitry Andric     debugParentMap();
136*753f127fSDimitry Andric   });
137*753f127fSDimitry Andric   // We want to find the parent in the new unwrapped line, where the expanded
138*753f127fSDimitry Andric   // parent might have been replaced during reconstruction.
139*753f127fSDimitry Andric   FormatToken *Parent = getParentInResult(ExpandedParent);
140*753f127fSDimitry Andric   LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "
141*753f127fSDimitry Andric                           << (Parent ? Parent->TokenText : "<null>") << "\n");
142*753f127fSDimitry Andric 
143*753f127fSDimitry Andric   FormatToken *OpenMacroParent = nullptr;
144*753f127fSDimitry Andric   if (!MacroCallStructure.empty()) {
145*753f127fSDimitry Andric     // Inside a macro expansion, it is possible to lose track of the correct
146*753f127fSDimitry Andric     // parent - either because it is already popped, for example because it was
147*753f127fSDimitry Andric     // in a different macro argument (e.g. M({, })), or when we work on invalid
148*753f127fSDimitry Andric     // code.
149*753f127fSDimitry Andric     // Thus, we use the innermost macro call's parent as the parent at which
150*753f127fSDimitry Andric     // we stop; this allows us to stay within the macro expansion and keeps
151*753f127fSDimitry Andric     // any problems confined to the extent of the macro call.
152*753f127fSDimitry Andric     OpenMacroParent =
153*753f127fSDimitry Andric         getParentInResult(MacroCallStructure.back().MacroCallLParen);
154*753f127fSDimitry Andric     LLVM_DEBUG(llvm::dbgs()
155*753f127fSDimitry Andric                << "MacroCallLParen: "
156*753f127fSDimitry Andric                << MacroCallStructure.back().MacroCallLParen->TokenText
157*753f127fSDimitry Andric                << ", OpenMacroParent: "
158*753f127fSDimitry Andric                << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>")
159*753f127fSDimitry Andric                << "\n");
160*753f127fSDimitry Andric   }
161*753f127fSDimitry Andric   if (NewLine ||
162*753f127fSDimitry Andric       (!ActiveReconstructedLines.back()->Tokens.empty() &&
163*753f127fSDimitry Andric        Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) {
164*753f127fSDimitry Andric     // If we are at the first token in a new line, we want to also
165*753f127fSDimitry Andric     // create a new line in the resulting reconstructed unwrapped line.
166*753f127fSDimitry Andric     while (ActiveReconstructedLines.back()->Tokens.empty() ||
167*753f127fSDimitry Andric            (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok &&
168*753f127fSDimitry Andric             ActiveReconstructedLines.back()->Tokens.back()->Tok !=
169*753f127fSDimitry Andric                 OpenMacroParent)) {
170*753f127fSDimitry Andric       ActiveReconstructedLines.pop_back();
171*753f127fSDimitry Andric       assert(!ActiveReconstructedLines.empty());
172*753f127fSDimitry Andric     }
173*753f127fSDimitry Andric     assert(!ActiveReconstructedLines.empty());
174*753f127fSDimitry Andric     ActiveReconstructedLines.back()->Tokens.back()->Children.push_back(
175*753f127fSDimitry Andric         std::make_unique<ReconstructedLine>());
176*753f127fSDimitry Andric     ActiveReconstructedLines.push_back(
177*753f127fSDimitry Andric         &*ActiveReconstructedLines.back()->Tokens.back()->Children.back());
178*753f127fSDimitry Andric   } else if (parentLine().Tokens.back()->Tok != Parent) {
179*753f127fSDimitry Andric     // If we're not the first token in a new line, pop lines until we find
180*753f127fSDimitry Andric     // the child of \c Parent in the stack.
181*753f127fSDimitry Andric     while (Parent != parentLine().Tokens.back()->Tok &&
182*753f127fSDimitry Andric            parentLine().Tokens.back()->Tok &&
183*753f127fSDimitry Andric            parentLine().Tokens.back()->Tok != OpenMacroParent) {
184*753f127fSDimitry Andric       ActiveReconstructedLines.pop_back();
185*753f127fSDimitry Andric       assert(!ActiveReconstructedLines.empty());
186*753f127fSDimitry Andric     }
187*753f127fSDimitry Andric   }
188*753f127fSDimitry Andric   assert(!ActiveReconstructedLines.empty());
189*753f127fSDimitry Andric }
190*753f127fSDimitry Andric 
191*753f127fSDimitry Andric // For a given \p Parent in the incoming expanded token stream, find the
192*753f127fSDimitry Andric // corresponding parent in the output.
193*753f127fSDimitry Andric FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) {
194*753f127fSDimitry Andric   FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent);
195*753f127fSDimitry Andric   if (!Mapped)
196*753f127fSDimitry Andric     return Parent;
197*753f127fSDimitry Andric   for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent)) {
198*753f127fSDimitry Andric     Parent = Mapped;
199*753f127fSDimitry Andric   }
200*753f127fSDimitry Andric   // If we use a different token than the parent in the expanded token stream
201*753f127fSDimitry Andric   // as parent, mark it as a special parent, so the formatting code knows it
202*753f127fSDimitry Andric   // needs to have its children formatted.
203*753f127fSDimitry Andric   Parent->MacroParent = true;
204*753f127fSDimitry Andric   return Parent;
205*753f127fSDimitry Andric }
206*753f127fSDimitry Andric 
207*753f127fSDimitry Andric // Reconstruct a \p Token that was expanded from a macro call.
208*753f127fSDimitry Andric void MacroCallReconstructor::reconstruct(FormatToken *Token) {
209*753f127fSDimitry Andric   assert(Token->MacroCtx);
210*753f127fSDimitry Andric   // A single token can be the only result of a macro call:
211*753f127fSDimitry Andric   // Given: #define ID(x, y) ;
212*753f127fSDimitry Andric   // And the call: ID(<some>, <tokens>)
213*753f127fSDimitry Andric   // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).
214*753f127fSDimitry Andric   if (Token->MacroCtx->StartOfExpansion) {
215*753f127fSDimitry Andric     startReconstruction(Token);
216*753f127fSDimitry Andric     // If the order of tokens in the expanded token stream is not the
217*753f127fSDimitry Andric     // same as the order of tokens in the reconstructed stream, we need
218*753f127fSDimitry Andric     // to reconstruct tokens that arrive later in the stream.
219*753f127fSDimitry Andric     if (Token->MacroCtx->Role != MR_Hidden) {
220*753f127fSDimitry Andric       reconstructActiveCallUntil(Token);
221*753f127fSDimitry Andric     }
222*753f127fSDimitry Andric   }
223*753f127fSDimitry Andric   assert(!ActiveExpansions.empty());
224*753f127fSDimitry Andric   if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) {
225*753f127fSDimitry Andric     assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size());
226*753f127fSDimitry Andric     if (Token->MacroCtx->Role != MR_Hidden) {
227*753f127fSDimitry Andric       // The current token in the reconstructed token stream must be the token
228*753f127fSDimitry Andric       // we're looking for - we either arrive here after startReconstruction,
229*753f127fSDimitry Andric       // which initiates the stream to the first token, or after
230*753f127fSDimitry Andric       // continueReconstructionUntil skipped until the expected token in the
231*753f127fSDimitry Andric       // reconstructed stream at the start of add(...).
232*753f127fSDimitry Andric       assert(ActiveExpansions.back().SpelledI->Tok == Token);
233*753f127fSDimitry Andric       processNextReconstructed();
234*753f127fSDimitry Andric     } else if (!currentLine()->Tokens.empty()) {
235*753f127fSDimitry Andric       // Map all hidden tokens to the last visible token in the output.
236*753f127fSDimitry Andric       // If the hidden token is a parent, we'll use the last visible
237*753f127fSDimitry Andric       // token as the parent of the hidden token's children.
238*753f127fSDimitry Andric       SpelledParentToReconstructedParent[Token] =
239*753f127fSDimitry Andric           currentLine()->Tokens.back()->Tok;
240*753f127fSDimitry Andric     } else {
241*753f127fSDimitry Andric       for (auto I = ActiveReconstructedLines.rbegin(),
242*753f127fSDimitry Andric                 E = ActiveReconstructedLines.rend();
243*753f127fSDimitry Andric            I != E; ++I) {
244*753f127fSDimitry Andric         if (!(*I)->Tokens.empty()) {
245*753f127fSDimitry Andric           SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok;
246*753f127fSDimitry Andric           break;
247*753f127fSDimitry Andric         }
248*753f127fSDimitry Andric       }
249*753f127fSDimitry Andric     }
250*753f127fSDimitry Andric   }
251*753f127fSDimitry Andric   if (Token->MacroCtx->EndOfExpansion)
252*753f127fSDimitry Andric     endReconstruction(Token);
253*753f127fSDimitry Andric }
254*753f127fSDimitry Andric 
255*753f127fSDimitry Andric // Given a \p Token that starts an expansion, reconstruct the beginning of the
256*753f127fSDimitry Andric // macro call.
257*753f127fSDimitry Andric // For example, given: #define ID(x) x
258*753f127fSDimitry Andric // And the call: ID(int a)
259*753f127fSDimitry Andric // Reconstructs: ID(
260*753f127fSDimitry Andric void MacroCallReconstructor::startReconstruction(FormatToken *Token) {
261*753f127fSDimitry Andric   assert(Token->MacroCtx);
262*753f127fSDimitry Andric   assert(!Token->MacroCtx->ExpandedFrom.empty());
263*753f127fSDimitry Andric   assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size());
264*753f127fSDimitry Andric #ifndef NDEBUG
265*753f127fSDimitry Andric   // Check that the token's reconstruction stack matches our current
266*753f127fSDimitry Andric   // reconstruction stack.
267*753f127fSDimitry Andric   for (size_t I = 0; I < ActiveExpansions.size(); ++I) {
268*753f127fSDimitry Andric     assert(ActiveExpansions[I].ID ==
269*753f127fSDimitry Andric            Token->MacroCtx
270*753f127fSDimitry Andric                ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);
271*753f127fSDimitry Andric   }
272*753f127fSDimitry Andric #endif
273*753f127fSDimitry Andric   // Start reconstruction for all calls for which this token is the first token
274*753f127fSDimitry Andric   // generated by the call.
275*753f127fSDimitry Andric   // Note that the token's expanded from stack is inside-to-outside, and the
276*753f127fSDimitry Andric   // expansions for which this token is not the first are the outermost ones.
277*753f127fSDimitry Andric   ArrayRef<FormatToken *> StartedMacros =
278*753f127fSDimitry Andric       makeArrayRef(Token->MacroCtx->ExpandedFrom)
279*753f127fSDimitry Andric           .drop_back(ActiveExpansions.size());
280*753f127fSDimitry Andric   assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion);
281*753f127fSDimitry Andric   // We reconstruct macro calls outside-to-inside.
282*753f127fSDimitry Andric   for (FormatToken *ID : llvm::reverse(StartedMacros)) {
283*753f127fSDimitry Andric     // We found a macro call to be reconstructed; the next time our
284*753f127fSDimitry Andric     // reconstruction stack is empty we know we finished an reconstruction.
285*753f127fSDimitry Andric #ifndef NDEBUG
286*753f127fSDimitry Andric     State = InProgress;
287*753f127fSDimitry Andric #endif
288*753f127fSDimitry Andric     // Put the reconstructed macro call's token into our reconstruction stack.
289*753f127fSDimitry Andric     auto IU = IdToReconstructed.find(ID);
290*753f127fSDimitry Andric     assert(IU != IdToReconstructed.end());
291*753f127fSDimitry Andric     ActiveExpansions.push_back(
292*753f127fSDimitry Andric         {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});
293*753f127fSDimitry Andric     // Process the macro call's identifier.
294*753f127fSDimitry Andric     processNextReconstructed();
295*753f127fSDimitry Andric     if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE)
296*753f127fSDimitry Andric       continue;
297*753f127fSDimitry Andric     if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) {
298*753f127fSDimitry Andric       // Process the optional opening parenthesis.
299*753f127fSDimitry Andric       processNextReconstructed();
300*753f127fSDimitry Andric     }
301*753f127fSDimitry Andric   }
302*753f127fSDimitry Andric }
303*753f127fSDimitry Andric 
304*753f127fSDimitry Andric // Add all tokens in the reconstruction stream to the output until we find the
305*753f127fSDimitry Andric // given \p Token.
306*753f127fSDimitry Andric bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) {
307*753f127fSDimitry Andric   assert(!ActiveExpansions.empty());
308*753f127fSDimitry Andric   bool PassedMacroComma = false;
309*753f127fSDimitry Andric   // FIXME: If Token was already expanded earlier, due to
310*753f127fSDimitry Andric   // a change in order, we will not find it, but need to
311*753f127fSDimitry Andric   // skip it.
312*753f127fSDimitry Andric   while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE &&
313*753f127fSDimitry Andric          ActiveExpansions.back().SpelledI->Tok != Token) {
314*753f127fSDimitry Andric     PassedMacroComma = processNextReconstructed() || PassedMacroComma;
315*753f127fSDimitry Andric   }
316*753f127fSDimitry Andric   return PassedMacroComma;
317*753f127fSDimitry Andric }
318*753f127fSDimitry Andric 
319*753f127fSDimitry Andric // End all reconstructions for which \p Token is the final token.
320*753f127fSDimitry Andric void MacroCallReconstructor::endReconstruction(FormatToken *Token) {
321*753f127fSDimitry Andric   assert(Token->MacroCtx &&
322*753f127fSDimitry Andric          (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion));
323*753f127fSDimitry Andric   for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {
324*753f127fSDimitry Andric #ifndef NDEBUG
325*753f127fSDimitry Andric     // Check all remaining tokens but the final closing parenthesis and optional
326*753f127fSDimitry Andric     // trailing comment were already reconstructed at an inner expansion level.
327*753f127fSDimitry Andric     for (auto T = ActiveExpansions.back().SpelledI;
328*753f127fSDimitry Andric          T != ActiveExpansions.back().SpelledE; ++T) {
329*753f127fSDimitry Andric       FormatToken *Token = T->Tok;
330*753f127fSDimitry Andric       bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE ||
331*753f127fSDimitry Andric                            std::next(T)->Tok->isTrailingComment()) &&
332*753f127fSDimitry Andric                           !Token->MacroCtx && Token->is(tok::r_paren);
333*753f127fSDimitry Andric       bool TrailingComment = Token->isTrailingComment();
334*753f127fSDimitry Andric       bool PreviousLevel =
335*753f127fSDimitry Andric           Token->MacroCtx &&
336*753f127fSDimitry Andric           (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size());
337*753f127fSDimitry Andric       if (!ClosingParen && !TrailingComment && !PreviousLevel) {
338*753f127fSDimitry Andric         llvm::dbgs() << "At token: " << Token->TokenText << "\n";
339*753f127fSDimitry Andric       }
340*753f127fSDimitry Andric       // In addition to the following cases, we can also run into this
341*753f127fSDimitry Andric       // when a macro call had more arguments than expected; in that case,
342*753f127fSDimitry Andric       // the comma and the remaining tokens in the macro call will potentially
343*753f127fSDimitry Andric       // end up in the line when we finish the expansion.
344*753f127fSDimitry Andric       // FIXME: Add the information which arguments are unused, and assert
345*753f127fSDimitry Andric       // one of the cases below plus reconstructed macro argument tokens.
346*753f127fSDimitry Andric       // assert(ClosingParen || TrailingComment || PreviousLevel);
347*753f127fSDimitry Andric     }
348*753f127fSDimitry Andric #endif
349*753f127fSDimitry Andric     // Handle the remaining open tokens:
350*753f127fSDimitry Andric     // - expand the closing parenthesis, if it exists, including an optional
351*753f127fSDimitry Andric     //   trailing comment
352*753f127fSDimitry Andric     // - handle tokens that were already reconstructed at an inner expansion
353*753f127fSDimitry Andric     //   level
354*753f127fSDimitry Andric     // - handle tokens when a macro call had more than the expected number of
355*753f127fSDimitry Andric     //   arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end
356*753f127fSDimitry Andric     //   up with the sequence ", b, c)" being open at the end of the
357*753f127fSDimitry Andric     //   reconstruction; we want to gracefully handle that case
358*753f127fSDimitry Andric     //
359*753f127fSDimitry Andric     // FIXME: See the above debug-check for what we will need to do to be
360*753f127fSDimitry Andric     // able to assert this.
361*753f127fSDimitry Andric     for (auto T = ActiveExpansions.back().SpelledI;
362*753f127fSDimitry Andric          T != ActiveExpansions.back().SpelledE; ++T) {
363*753f127fSDimitry Andric       processNextReconstructed();
364*753f127fSDimitry Andric     }
365*753f127fSDimitry Andric     ActiveExpansions.pop_back();
366*753f127fSDimitry Andric   }
367*753f127fSDimitry Andric }
368*753f127fSDimitry Andric 
369*753f127fSDimitry Andric void MacroCallReconstructor::debugParentMap() const {
370*753f127fSDimitry Andric   llvm::DenseSet<FormatToken *> Values;
371*753f127fSDimitry Andric   for (const auto &P : SpelledParentToReconstructedParent)
372*753f127fSDimitry Andric     Values.insert(P.second);
373*753f127fSDimitry Andric 
374*753f127fSDimitry Andric   for (const auto &P : SpelledParentToReconstructedParent) {
375*753f127fSDimitry Andric     if (Values.contains(P.first))
376*753f127fSDimitry Andric       continue;
377*753f127fSDimitry Andric     llvm::dbgs() << (P.first ? P.first->TokenText : "<null>");
378*753f127fSDimitry Andric     for (auto I = SpelledParentToReconstructedParent.find(P.first),
379*753f127fSDimitry Andric               E = SpelledParentToReconstructedParent.end();
380*753f127fSDimitry Andric          I != E; I = SpelledParentToReconstructedParent.find(I->second)) {
381*753f127fSDimitry Andric       llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>");
382*753f127fSDimitry Andric     }
383*753f127fSDimitry Andric     llvm::dbgs() << "\n";
384*753f127fSDimitry Andric   }
385*753f127fSDimitry Andric }
386*753f127fSDimitry Andric 
387*753f127fSDimitry Andric // If visible, add the next token of the reconstructed token sequence to the
388*753f127fSDimitry Andric // output. Returns whether reconstruction passed a comma that is part of a
389*753f127fSDimitry Andric // macro call.
390*753f127fSDimitry Andric bool MacroCallReconstructor::processNextReconstructed() {
391*753f127fSDimitry Andric   FormatToken *Token = ActiveExpansions.back().SpelledI->Tok;
392*753f127fSDimitry Andric   ++ActiveExpansions.back().SpelledI;
393*753f127fSDimitry Andric   if (Token->MacroCtx) {
394*753f127fSDimitry Andric     // Skip tokens that are not part of the macro call.
395*753f127fSDimitry Andric     if (Token->MacroCtx->Role == MR_Hidden) {
396*753f127fSDimitry Andric       return false;
397*753f127fSDimitry Andric     }
398*753f127fSDimitry Andric     // Skip tokens we already expanded during an inner reconstruction.
399*753f127fSDimitry Andric     // For example, given: #define ID(x) {x}
400*753f127fSDimitry Andric     // And the call: ID(ID(f))
401*753f127fSDimitry Andric     // We get two reconstructions:
402*753f127fSDimitry Andric     // ID(f) -> {f}
403*753f127fSDimitry Andric     // ID({f}) -> {{f}}
404*753f127fSDimitry Andric     // We reconstruct f during the first reconstruction, and skip it during the
405*753f127fSDimitry Andric     // second reconstruction.
406*753f127fSDimitry Andric     if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size()) {
407*753f127fSDimitry Andric       return false;
408*753f127fSDimitry Andric     }
409*753f127fSDimitry Andric   }
410*753f127fSDimitry Andric   // Tokens that do not have a macro context are tokens in that are part of the
411*753f127fSDimitry Andric   // macro call that have not taken part in expansion.
412*753f127fSDimitry Andric   if (!Token->MacroCtx) {
413*753f127fSDimitry Andric     // Put the parentheses and commas of a macro call into the same line;
414*753f127fSDimitry Andric     // if the arguments produce new unwrapped lines, they will become children
415*753f127fSDimitry Andric     // of the corresponding opening parenthesis or comma tokens in the
416*753f127fSDimitry Andric     // reconstructed call.
417*753f127fSDimitry Andric     if (Token->is(tok::l_paren)) {
418*753f127fSDimitry Andric       MacroCallStructure.push_back(MacroCallState(
419*753f127fSDimitry Andric           currentLine(), parentLine().Tokens.back()->Tok, Token));
420*753f127fSDimitry Andric       // All tokens that are children of the previous line's last token in the
421*753f127fSDimitry Andric       // reconstructed token stream will now be children of the l_paren token.
422*753f127fSDimitry Andric       // For example, for the line containing the macro calls:
423*753f127fSDimitry Andric       //   auto x = ID({ID(2)});
424*753f127fSDimitry Andric       // We will build up a map <null> -> ( -> ( with the first and second
425*753f127fSDimitry Andric       // l_paren of the macro call respectively. New lines that come in with a
426*753f127fSDimitry Andric       // <null> parent will then become children of the l_paren token of the
427*753f127fSDimitry Andric       // currently innermost macro call.
428*753f127fSDimitry Andric       SpelledParentToReconstructedParent[MacroCallStructure.back()
429*753f127fSDimitry Andric                                              .ParentLastToken] = Token;
430*753f127fSDimitry Andric       appendToken(Token);
431*753f127fSDimitry Andric       prepareParent(Token, /*NewLine=*/true);
432*753f127fSDimitry Andric       Token->MacroParent = true;
433*753f127fSDimitry Andric       return false;
434*753f127fSDimitry Andric     }
435*753f127fSDimitry Andric     if (!MacroCallStructure.empty()) {
436*753f127fSDimitry Andric       if (Token->is(tok::comma)) {
437*753f127fSDimitry Andric         // Make new lines inside the next argument children of the comma token.
438*753f127fSDimitry Andric         SpelledParentToReconstructedParent
439*753f127fSDimitry Andric             [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
440*753f127fSDimitry Andric         Token->MacroParent = true;
441*753f127fSDimitry Andric         appendToken(Token, MacroCallStructure.back().Line);
442*753f127fSDimitry Andric         prepareParent(Token, /*NewLine=*/true);
443*753f127fSDimitry Andric         return true;
444*753f127fSDimitry Andric       }
445*753f127fSDimitry Andric       if (Token->is(tok::r_paren)) {
446*753f127fSDimitry Andric         appendToken(Token, MacroCallStructure.back().Line);
447*753f127fSDimitry Andric         SpelledParentToReconstructedParent.erase(
448*753f127fSDimitry Andric             MacroCallStructure.back().ParentLastToken);
449*753f127fSDimitry Andric         MacroCallStructure.pop_back();
450*753f127fSDimitry Andric         return false;
451*753f127fSDimitry Andric       }
452*753f127fSDimitry Andric     }
453*753f127fSDimitry Andric   }
454*753f127fSDimitry Andric   // Note that any tokens that are tagged with MR_None have been passed as
455*753f127fSDimitry Andric   // arguments to the macro that have not been expanded, for example:
456*753f127fSDimitry Andric   // Given: #define ID(X) x
457*753f127fSDimitry Andric   // When calling: ID(a, b)
458*753f127fSDimitry Andric   // 'b' will be part of the reconstructed token stream, but tagged MR_None.
459*753f127fSDimitry Andric   // Given that erroring out in this case would be disruptive, we continue
460*753f127fSDimitry Andric   // pushing the (unformatted) token.
461*753f127fSDimitry Andric   // FIXME: This can lead to unfortunate formatting decisions - give the user
462*753f127fSDimitry Andric   // a hint that their macro definition is broken.
463*753f127fSDimitry Andric   appendToken(Token);
464*753f127fSDimitry Andric   return false;
465*753f127fSDimitry Andric }
466*753f127fSDimitry Andric 
467*753f127fSDimitry Andric void MacroCallReconstructor::finalize() {
468*753f127fSDimitry Andric #ifndef NDEBUG
469*753f127fSDimitry Andric   assert(State != Finalized && finished());
470*753f127fSDimitry Andric   State = Finalized;
471*753f127fSDimitry Andric #endif
472*753f127fSDimitry Andric 
473*753f127fSDimitry Andric   // We created corresponding unwrapped lines for each incoming line as children
474*753f127fSDimitry Andric   // the the toplevel null token.
475*753f127fSDimitry Andric   assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty());
476*753f127fSDimitry Andric   LLVM_DEBUG({
477*753f127fSDimitry Andric     llvm::dbgs() << "Finalizing reconstructed lines:\n";
478*753f127fSDimitry Andric     debug(Result, 0);
479*753f127fSDimitry Andric   });
480*753f127fSDimitry Andric 
481*753f127fSDimitry Andric   // The first line becomes the top level line in the resulting unwrapped line.
482*753f127fSDimitry Andric   LineNode &Top = *Result.Tokens.front();
483*753f127fSDimitry Andric   auto *I = Top.Children.begin();
484*753f127fSDimitry Andric   // Every subsequent line will become a child of the last token in the previous
485*753f127fSDimitry Andric   // line, which is the token prior to the first token in the line.
486*753f127fSDimitry Andric   LineNode *Last = (*I)->Tokens.back().get();
487*753f127fSDimitry Andric   ++I;
488*753f127fSDimitry Andric   for (auto *E = Top.Children.end(); I != E; ++I) {
489*753f127fSDimitry Andric     assert(Last->Children.empty());
490*753f127fSDimitry Andric     Last->Children.push_back(std::move(*I));
491*753f127fSDimitry Andric 
492*753f127fSDimitry Andric     // Mark the previous line's last token as generated by a macro expansion
493*753f127fSDimitry Andric     // so the formatting algorithm can take that into account.
494*753f127fSDimitry Andric     Last->Tok->MacroParent = true;
495*753f127fSDimitry Andric 
496*753f127fSDimitry Andric     Last = Last->Children.back()->Tokens.back().get();
497*753f127fSDimitry Andric   }
498*753f127fSDimitry Andric   Top.Children.resize(1);
499*753f127fSDimitry Andric }
500*753f127fSDimitry Andric 
501*753f127fSDimitry Andric void MacroCallReconstructor::appendToken(FormatToken *Token,
502*753f127fSDimitry Andric                                          ReconstructedLine *L) {
503*753f127fSDimitry Andric   L = L ? L : currentLine();
504*753f127fSDimitry Andric   LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");
505*753f127fSDimitry Andric   L->Tokens.push_back(std::make_unique<LineNode>(Token));
506*753f127fSDimitry Andric }
507*753f127fSDimitry Andric 
508*753f127fSDimitry Andric UnwrappedLine
509*753f127fSDimitry Andric MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line,
510*753f127fSDimitry Andric                                             int Level) {
511*753f127fSDimitry Andric   UnwrappedLine Result;
512*753f127fSDimitry Andric   Result.Level = Level;
513*753f127fSDimitry Andric   for (const auto &N : Line.Tokens) {
514*753f127fSDimitry Andric     Result.Tokens.push_back(N->Tok);
515*753f127fSDimitry Andric     UnwrappedLineNode &Current = Result.Tokens.back();
516*753f127fSDimitry Andric     for (const auto &Child : N->Children) {
517*753f127fSDimitry Andric       if (Child->Tokens.empty())
518*753f127fSDimitry Andric         continue;
519*753f127fSDimitry Andric       Current.Children.push_back(createUnwrappedLine(*Child, Level + 1));
520*753f127fSDimitry Andric     }
521*753f127fSDimitry Andric     if (Current.Children.size() == 1 &&
522*753f127fSDimitry Andric         Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
523*753f127fSDimitry Andric       Result.Tokens.splice(Result.Tokens.end(),
524*753f127fSDimitry Andric                            Current.Children.front().Tokens);
525*753f127fSDimitry Andric       Current.Children.clear();
526*753f127fSDimitry Andric     }
527*753f127fSDimitry Andric   }
528*753f127fSDimitry Andric   return Result;
529*753f127fSDimitry Andric }
530*753f127fSDimitry Andric 
531*753f127fSDimitry Andric void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) {
532*753f127fSDimitry Andric   for (int i = 0; i < Level; ++i)
533*753f127fSDimitry Andric     llvm::dbgs() << " ";
534*753f127fSDimitry Andric   for (const auto &N : Line.Tokens) {
535*753f127fSDimitry Andric     if (!N)
536*753f127fSDimitry Andric       continue;
537*753f127fSDimitry Andric     if (N->Tok)
538*753f127fSDimitry Andric       llvm::dbgs() << N->Tok->TokenText << " ";
539*753f127fSDimitry Andric     for (const auto &Child : N->Children) {
540*753f127fSDimitry Andric       llvm::dbgs() << "\n";
541*753f127fSDimitry Andric       debug(*Child, Level + 1);
542*753f127fSDimitry Andric       for (int i = 0; i < Level; ++i)
543*753f127fSDimitry Andric         llvm::dbgs() << " ";
544*753f127fSDimitry Andric     }
545*753f127fSDimitry Andric   }
546*753f127fSDimitry Andric   llvm::dbgs() << "\n";
547*753f127fSDimitry Andric }
548*753f127fSDimitry Andric 
549*753f127fSDimitry Andric MacroCallReconstructor::ReconstructedLine &
550*753f127fSDimitry Andric MacroCallReconstructor::parentLine() {
551*753f127fSDimitry Andric   return **std::prev(std::prev(ActiveReconstructedLines.end()));
552*753f127fSDimitry Andric }
553*753f127fSDimitry Andric 
554*753f127fSDimitry Andric MacroCallReconstructor::ReconstructedLine *
555*753f127fSDimitry Andric MacroCallReconstructor::currentLine() {
556*753f127fSDimitry Andric   return ActiveReconstructedLines.back();
557*753f127fSDimitry Andric }
558*753f127fSDimitry Andric 
559*753f127fSDimitry Andric MacroCallReconstructor::MacroCallState::MacroCallState(
560*753f127fSDimitry Andric     MacroCallReconstructor::ReconstructedLine *Line,
561*753f127fSDimitry Andric     FormatToken *ParentLastToken, FormatToken *MacroCallLParen)
562*753f127fSDimitry Andric     : Line(Line), ParentLastToken(ParentLastToken),
563*753f127fSDimitry Andric       MacroCallLParen(MacroCallLParen) {
564*753f127fSDimitry Andric   LLVM_DEBUG(
565*753f127fSDimitry Andric       llvm::dbgs() << "ParentLastToken: "
566*753f127fSDimitry Andric                    << (ParentLastToken ? ParentLastToken->TokenText : "<null>")
567*753f127fSDimitry Andric                    << "\n");
568*753f127fSDimitry Andric 
569*753f127fSDimitry Andric   assert(MacroCallLParen->is(tok::l_paren));
570*753f127fSDimitry Andric }
571*753f127fSDimitry Andric 
572*753f127fSDimitry Andric } // namespace format
573*753f127fSDimitry Andric } // namespace clang
574