xref: /freebsd-src/contrib/llvm-project/clang/lib/Format/MacroCallReconstructor.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1753f127fSDimitry Andric //===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//
2753f127fSDimitry Andric //
3753f127fSDimitry Andric //                     The LLVM Compiler Infrastructure
4753f127fSDimitry Andric //
5753f127fSDimitry Andric // This file is distributed under the University of Illinois Open Source
6753f127fSDimitry Andric // License. See LICENSE.TXT for details.
7753f127fSDimitry Andric //
8753f127fSDimitry Andric //===----------------------------------------------------------------------===//
9753f127fSDimitry Andric ///
10753f127fSDimitry Andric /// \file
11753f127fSDimitry Andric /// This file contains the implementation of MacroCallReconstructor, which fits
12753f127fSDimitry Andric /// an reconstructed macro call to a parsed set of UnwrappedLines.
13753f127fSDimitry Andric ///
14753f127fSDimitry Andric //===----------------------------------------------------------------------===//
15753f127fSDimitry Andric 
16753f127fSDimitry Andric #include "Macros.h"
17753f127fSDimitry Andric 
18753f127fSDimitry Andric #include "UnwrappedLineParser.h"
19753f127fSDimitry Andric #include "clang/Basic/TokenKinds.h"
20753f127fSDimitry Andric #include "llvm/ADT/DenseSet.h"
21753f127fSDimitry Andric #include "llvm/Support/Debug.h"
22753f127fSDimitry Andric #include <cassert>
23753f127fSDimitry Andric 
24753f127fSDimitry Andric #define DEBUG_TYPE "format-reconstruct"
25753f127fSDimitry Andric 
26753f127fSDimitry Andric namespace clang {
27753f127fSDimitry Andric namespace format {
28753f127fSDimitry Andric 
29753f127fSDimitry Andric // Call \p Call for each token in the unwrapped line given, passing
30753f127fSDimitry Andric // the token, its parent and whether it is the first token in the line.
31753f127fSDimitry Andric template <typename T>
32753f127fSDimitry Andric void forEachToken(const UnwrappedLine &Line, const T &Call,
33753f127fSDimitry Andric                   FormatToken *Parent = nullptr) {
34753f127fSDimitry Andric   bool First = true;
35753f127fSDimitry Andric   for (const auto &N : Line.Tokens) {
36753f127fSDimitry Andric     Call(N.Tok, Parent, First);
37753f127fSDimitry Andric     First = false;
38*bdd1243dSDimitry Andric     for (const auto &Child : N.Children)
39753f127fSDimitry Andric       forEachToken(Child, Call, N.Tok);
40753f127fSDimitry Andric   }
41753f127fSDimitry Andric }
42753f127fSDimitry Andric 
43753f127fSDimitry Andric MacroCallReconstructor::MacroCallReconstructor(
44753f127fSDimitry Andric     unsigned Level,
45753f127fSDimitry Andric     const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
46753f127fSDimitry Andric         &ActiveExpansions)
47753f127fSDimitry Andric     : Level(Level), IdToReconstructed(ActiveExpansions) {
48753f127fSDimitry Andric   Result.Tokens.push_back(std::make_unique<LineNode>());
49753f127fSDimitry Andric   ActiveReconstructedLines.push_back(&Result);
50753f127fSDimitry Andric }
51753f127fSDimitry Andric 
52753f127fSDimitry Andric void MacroCallReconstructor::addLine(const UnwrappedLine &Line) {
53753f127fSDimitry Andric   assert(State != Finalized);
54753f127fSDimitry Andric   LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
55753f127fSDimitry Andric   forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) {
56753f127fSDimitry Andric     add(Token, Parent, First);
57753f127fSDimitry Andric   });
58753f127fSDimitry Andric   assert(InProgress || finished());
59753f127fSDimitry Andric }
60753f127fSDimitry Andric 
61753f127fSDimitry Andric UnwrappedLine MacroCallReconstructor::takeResult() && {
62753f127fSDimitry Andric   finalize();
63*bdd1243dSDimitry Andric   assert(Result.Tokens.size() == 1 &&
64*bdd1243dSDimitry Andric          Result.Tokens.front()->Children.size() == 1);
65753f127fSDimitry Andric   UnwrappedLine Final =
66753f127fSDimitry Andric       createUnwrappedLine(*Result.Tokens.front()->Children.front(), Level);
67753f127fSDimitry Andric   assert(!Final.Tokens.empty());
68753f127fSDimitry Andric   return Final;
69753f127fSDimitry Andric }
70753f127fSDimitry Andric 
71753f127fSDimitry Andric // Reconstruct the position of the next \p Token, given its parent \p
72753f127fSDimitry Andric // ExpandedParent in the incoming unwrapped line. \p First specifies whether it
73753f127fSDimitry Andric // is the first token in a given unwrapped line.
74753f127fSDimitry Andric void MacroCallReconstructor::add(FormatToken *Token,
75753f127fSDimitry Andric                                  FormatToken *ExpandedParent, bool First) {
76753f127fSDimitry Andric   LLVM_DEBUG(
77753f127fSDimitry Andric       llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: "
78753f127fSDimitry Andric                    << (ExpandedParent ? ExpandedParent->TokenText : "<null>")
79753f127fSDimitry Andric                    << ", First: " << First << "\n");
80753f127fSDimitry Andric   // In order to be able to find the correct parent in the reconstructed token
81753f127fSDimitry Andric   // stream, we need to continue the last open reconstruction until we find the
82753f127fSDimitry Andric   // given token if it is part of the reconstructed token stream.
83753f127fSDimitry Andric   //
84753f127fSDimitry Andric   // Note that hidden tokens can be part of the reconstructed stream in nested
85753f127fSDimitry Andric   // macro calls.
86753f127fSDimitry Andric   // For example, given
87753f127fSDimitry Andric   //   #define C(x, y) x y
88753f127fSDimitry Andric   //   #define B(x) {x}
89753f127fSDimitry Andric   // And the call:
90753f127fSDimitry Andric   //   C(a, B(b))
91753f127fSDimitry Andric   // The outer macro call will be C(a, {b}), and the hidden token '}' can be
92753f127fSDimitry Andric   // found in the reconstructed token stream of that expansion level.
93753f127fSDimitry Andric   // In the expanded token stream
94753f127fSDimitry Andric   //   a {b}
95753f127fSDimitry Andric   // 'b' is a child of '{'. We need to continue the open expansion of the ','
96753f127fSDimitry Andric   // in the call of 'C' in order to correctly set the ',' as the parent of '{',
97753f127fSDimitry Andric   // so we later set the spelled token 'b' as a child of the ','.
98753f127fSDimitry Andric   if (!ActiveExpansions.empty() && Token->MacroCtx &&
99753f127fSDimitry Andric       (Token->MacroCtx->Role != MR_Hidden ||
100753f127fSDimitry Andric        ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) {
101753f127fSDimitry Andric     if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token))
102753f127fSDimitry Andric       First = true;
103753f127fSDimitry Andric   }
104753f127fSDimitry Andric 
105753f127fSDimitry Andric   prepareParent(ExpandedParent, First);
106753f127fSDimitry Andric 
107753f127fSDimitry Andric   if (Token->MacroCtx) {
108753f127fSDimitry Andric     // If this token was generated by a macro call, add the reconstructed
109753f127fSDimitry Andric     // equivalent of the token.
110753f127fSDimitry Andric     reconstruct(Token);
111753f127fSDimitry Andric   } else {
112753f127fSDimitry Andric     // Otherwise, we add it to the current line.
113753f127fSDimitry Andric     appendToken(Token);
114753f127fSDimitry Andric   }
115753f127fSDimitry Andric }
116753f127fSDimitry Andric 
117753f127fSDimitry Andric // Adjusts the stack of active reconstructed lines so we're ready to push
118753f127fSDimitry Andric // tokens. The tokens to be pushed are children of ExpandedParent in the
119753f127fSDimitry Andric // expanded code.
120753f127fSDimitry Andric //
121753f127fSDimitry Andric // This may entail:
122753f127fSDimitry Andric // - creating a new line, if the parent is on the active line
123753f127fSDimitry Andric // - popping active lines, if the parent is further up the stack
124753f127fSDimitry Andric //
125753f127fSDimitry Andric // Postcondition:
126753f127fSDimitry Andric // ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its
127753f127fSDimitry Andric // reconstructed replacement token as a parent (when possible) - that is, the
128753f127fSDimitry Andric // last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]
129753f127fSDimitry Andric // is the parent of ActiveReconstructedLines.back() in the reconstructed
130753f127fSDimitry Andric // unwrapped line.
131753f127fSDimitry Andric void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,
132753f127fSDimitry Andric                                            bool NewLine) {
133753f127fSDimitry Andric   LLVM_DEBUG({
134753f127fSDimitry Andric     llvm::dbgs() << "ParentMap:\n";
135753f127fSDimitry Andric     debugParentMap();
136753f127fSDimitry Andric   });
137753f127fSDimitry Andric   // We want to find the parent in the new unwrapped line, where the expanded
138753f127fSDimitry Andric   // parent might have been replaced during reconstruction.
139753f127fSDimitry Andric   FormatToken *Parent = getParentInResult(ExpandedParent);
140753f127fSDimitry Andric   LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "
141753f127fSDimitry Andric                           << (Parent ? Parent->TokenText : "<null>") << "\n");
142753f127fSDimitry Andric 
143753f127fSDimitry Andric   FormatToken *OpenMacroParent = nullptr;
144753f127fSDimitry Andric   if (!MacroCallStructure.empty()) {
145753f127fSDimitry Andric     // Inside a macro expansion, it is possible to lose track of the correct
146753f127fSDimitry Andric     // parent - either because it is already popped, for example because it was
147753f127fSDimitry Andric     // in a different macro argument (e.g. M({, })), or when we work on invalid
148753f127fSDimitry Andric     // code.
149753f127fSDimitry Andric     // Thus, we use the innermost macro call's parent as the parent at which
150753f127fSDimitry Andric     // we stop; this allows us to stay within the macro expansion and keeps
151753f127fSDimitry Andric     // any problems confined to the extent of the macro call.
152753f127fSDimitry Andric     OpenMacroParent =
153753f127fSDimitry Andric         getParentInResult(MacroCallStructure.back().MacroCallLParen);
154753f127fSDimitry Andric     LLVM_DEBUG(llvm::dbgs()
155753f127fSDimitry Andric                << "MacroCallLParen: "
156753f127fSDimitry Andric                << MacroCallStructure.back().MacroCallLParen->TokenText
157753f127fSDimitry Andric                << ", OpenMacroParent: "
158753f127fSDimitry Andric                << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>")
159753f127fSDimitry Andric                << "\n");
160753f127fSDimitry Andric   }
161753f127fSDimitry Andric   if (NewLine ||
162753f127fSDimitry Andric       (!ActiveReconstructedLines.back()->Tokens.empty() &&
163753f127fSDimitry Andric        Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) {
164753f127fSDimitry Andric     // If we are at the first token in a new line, we want to also
165753f127fSDimitry Andric     // create a new line in the resulting reconstructed unwrapped line.
166753f127fSDimitry Andric     while (ActiveReconstructedLines.back()->Tokens.empty() ||
167753f127fSDimitry Andric            (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok &&
168753f127fSDimitry Andric             ActiveReconstructedLines.back()->Tokens.back()->Tok !=
169753f127fSDimitry Andric                 OpenMacroParent)) {
170753f127fSDimitry Andric       ActiveReconstructedLines.pop_back();
171753f127fSDimitry Andric       assert(!ActiveReconstructedLines.empty());
172753f127fSDimitry Andric     }
173753f127fSDimitry Andric     assert(!ActiveReconstructedLines.empty());
174753f127fSDimitry Andric     ActiveReconstructedLines.back()->Tokens.back()->Children.push_back(
175753f127fSDimitry Andric         std::make_unique<ReconstructedLine>());
176753f127fSDimitry Andric     ActiveReconstructedLines.push_back(
177753f127fSDimitry Andric         &*ActiveReconstructedLines.back()->Tokens.back()->Children.back());
178753f127fSDimitry Andric   } else if (parentLine().Tokens.back()->Tok != Parent) {
179753f127fSDimitry Andric     // If we're not the first token in a new line, pop lines until we find
180753f127fSDimitry Andric     // the child of \c Parent in the stack.
181753f127fSDimitry Andric     while (Parent != parentLine().Tokens.back()->Tok &&
182753f127fSDimitry Andric            parentLine().Tokens.back()->Tok &&
183753f127fSDimitry Andric            parentLine().Tokens.back()->Tok != OpenMacroParent) {
184753f127fSDimitry Andric       ActiveReconstructedLines.pop_back();
185753f127fSDimitry Andric       assert(!ActiveReconstructedLines.empty());
186753f127fSDimitry Andric     }
187753f127fSDimitry Andric   }
188753f127fSDimitry Andric   assert(!ActiveReconstructedLines.empty());
189753f127fSDimitry Andric }
190753f127fSDimitry Andric 
191753f127fSDimitry Andric // For a given \p Parent in the incoming expanded token stream, find the
192753f127fSDimitry Andric // corresponding parent in the output.
193753f127fSDimitry Andric FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) {
194753f127fSDimitry Andric   FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent);
195753f127fSDimitry Andric   if (!Mapped)
196753f127fSDimitry Andric     return Parent;
197*bdd1243dSDimitry Andric   for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent))
198753f127fSDimitry Andric     Parent = Mapped;
199753f127fSDimitry Andric   // If we use a different token than the parent in the expanded token stream
200753f127fSDimitry Andric   // as parent, mark it as a special parent, so the formatting code knows it
201753f127fSDimitry Andric   // needs to have its children formatted.
202753f127fSDimitry Andric   Parent->MacroParent = true;
203753f127fSDimitry Andric   return Parent;
204753f127fSDimitry Andric }
205753f127fSDimitry Andric 
206753f127fSDimitry Andric // Reconstruct a \p Token that was expanded from a macro call.
207753f127fSDimitry Andric void MacroCallReconstructor::reconstruct(FormatToken *Token) {
208753f127fSDimitry Andric   assert(Token->MacroCtx);
209753f127fSDimitry Andric   // A single token can be the only result of a macro call:
210753f127fSDimitry Andric   // Given: #define ID(x, y) ;
211753f127fSDimitry Andric   // And the call: ID(<some>, <tokens>)
212753f127fSDimitry Andric   // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).
213753f127fSDimitry Andric   if (Token->MacroCtx->StartOfExpansion) {
214753f127fSDimitry Andric     startReconstruction(Token);
215753f127fSDimitry Andric     // If the order of tokens in the expanded token stream is not the
216753f127fSDimitry Andric     // same as the order of tokens in the reconstructed stream, we need
217753f127fSDimitry Andric     // to reconstruct tokens that arrive later in the stream.
218*bdd1243dSDimitry Andric     if (Token->MacroCtx->Role != MR_Hidden)
219753f127fSDimitry Andric       reconstructActiveCallUntil(Token);
220753f127fSDimitry Andric   }
221753f127fSDimitry Andric   assert(!ActiveExpansions.empty());
222753f127fSDimitry Andric   if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) {
223753f127fSDimitry Andric     assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size());
224753f127fSDimitry Andric     if (Token->MacroCtx->Role != MR_Hidden) {
225753f127fSDimitry Andric       // The current token in the reconstructed token stream must be the token
226753f127fSDimitry Andric       // we're looking for - we either arrive here after startReconstruction,
227753f127fSDimitry Andric       // which initiates the stream to the first token, or after
228753f127fSDimitry Andric       // continueReconstructionUntil skipped until the expected token in the
229753f127fSDimitry Andric       // reconstructed stream at the start of add(...).
230753f127fSDimitry Andric       assert(ActiveExpansions.back().SpelledI->Tok == Token);
231753f127fSDimitry Andric       processNextReconstructed();
232753f127fSDimitry Andric     } else if (!currentLine()->Tokens.empty()) {
233753f127fSDimitry Andric       // Map all hidden tokens to the last visible token in the output.
234753f127fSDimitry Andric       // If the hidden token is a parent, we'll use the last visible
235753f127fSDimitry Andric       // token as the parent of the hidden token's children.
236753f127fSDimitry Andric       SpelledParentToReconstructedParent[Token] =
237753f127fSDimitry Andric           currentLine()->Tokens.back()->Tok;
238753f127fSDimitry Andric     } else {
239753f127fSDimitry Andric       for (auto I = ActiveReconstructedLines.rbegin(),
240753f127fSDimitry Andric                 E = ActiveReconstructedLines.rend();
241753f127fSDimitry Andric            I != E; ++I) {
242753f127fSDimitry Andric         if (!(*I)->Tokens.empty()) {
243753f127fSDimitry Andric           SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok;
244753f127fSDimitry Andric           break;
245753f127fSDimitry Andric         }
246753f127fSDimitry Andric       }
247753f127fSDimitry Andric     }
248753f127fSDimitry Andric   }
249753f127fSDimitry Andric   if (Token->MacroCtx->EndOfExpansion)
250753f127fSDimitry Andric     endReconstruction(Token);
251753f127fSDimitry Andric }
252753f127fSDimitry Andric 
253753f127fSDimitry Andric // Given a \p Token that starts an expansion, reconstruct the beginning of the
254753f127fSDimitry Andric // macro call.
255753f127fSDimitry Andric // For example, given: #define ID(x) x
256753f127fSDimitry Andric // And the call: ID(int a)
257753f127fSDimitry Andric // Reconstructs: ID(
258753f127fSDimitry Andric void MacroCallReconstructor::startReconstruction(FormatToken *Token) {
259753f127fSDimitry Andric   assert(Token->MacroCtx);
260753f127fSDimitry Andric   assert(!Token->MacroCtx->ExpandedFrom.empty());
261753f127fSDimitry Andric   assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size());
262753f127fSDimitry Andric #ifndef NDEBUG
263753f127fSDimitry Andric   // Check that the token's reconstruction stack matches our current
264753f127fSDimitry Andric   // reconstruction stack.
265753f127fSDimitry Andric   for (size_t I = 0; I < ActiveExpansions.size(); ++I) {
266753f127fSDimitry Andric     assert(ActiveExpansions[I].ID ==
267753f127fSDimitry Andric            Token->MacroCtx
268753f127fSDimitry Andric                ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);
269753f127fSDimitry Andric   }
270753f127fSDimitry Andric #endif
271753f127fSDimitry Andric   // Start reconstruction for all calls for which this token is the first token
272753f127fSDimitry Andric   // generated by the call.
273753f127fSDimitry Andric   // Note that the token's expanded from stack is inside-to-outside, and the
274753f127fSDimitry Andric   // expansions for which this token is not the first are the outermost ones.
275753f127fSDimitry Andric   ArrayRef<FormatToken *> StartedMacros =
276*bdd1243dSDimitry Andric       ArrayRef(Token->MacroCtx->ExpandedFrom)
277753f127fSDimitry Andric           .drop_back(ActiveExpansions.size());
278753f127fSDimitry Andric   assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion);
279753f127fSDimitry Andric   // We reconstruct macro calls outside-to-inside.
280753f127fSDimitry Andric   for (FormatToken *ID : llvm::reverse(StartedMacros)) {
281753f127fSDimitry Andric     // We found a macro call to be reconstructed; the next time our
282753f127fSDimitry Andric     // reconstruction stack is empty we know we finished an reconstruction.
283753f127fSDimitry Andric #ifndef NDEBUG
284753f127fSDimitry Andric     State = InProgress;
285753f127fSDimitry Andric #endif
286753f127fSDimitry Andric     // Put the reconstructed macro call's token into our reconstruction stack.
287753f127fSDimitry Andric     auto IU = IdToReconstructed.find(ID);
288753f127fSDimitry Andric     assert(IU != IdToReconstructed.end());
289753f127fSDimitry Andric     ActiveExpansions.push_back(
290753f127fSDimitry Andric         {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});
291753f127fSDimitry Andric     // Process the macro call's identifier.
292753f127fSDimitry Andric     processNextReconstructed();
293753f127fSDimitry Andric     if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE)
294753f127fSDimitry Andric       continue;
295753f127fSDimitry Andric     if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) {
296753f127fSDimitry Andric       // Process the optional opening parenthesis.
297753f127fSDimitry Andric       processNextReconstructed();
298753f127fSDimitry Andric     }
299753f127fSDimitry Andric   }
300753f127fSDimitry Andric }
301753f127fSDimitry Andric 
302753f127fSDimitry Andric // Add all tokens in the reconstruction stream to the output until we find the
303753f127fSDimitry Andric // given \p Token.
304753f127fSDimitry Andric bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) {
305753f127fSDimitry Andric   assert(!ActiveExpansions.empty());
306753f127fSDimitry Andric   bool PassedMacroComma = false;
307753f127fSDimitry Andric   // FIXME: If Token was already expanded earlier, due to
308753f127fSDimitry Andric   // a change in order, we will not find it, but need to
309753f127fSDimitry Andric   // skip it.
310753f127fSDimitry Andric   while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE &&
311753f127fSDimitry Andric          ActiveExpansions.back().SpelledI->Tok != Token) {
312753f127fSDimitry Andric     PassedMacroComma = processNextReconstructed() || PassedMacroComma;
313753f127fSDimitry Andric   }
314753f127fSDimitry Andric   return PassedMacroComma;
315753f127fSDimitry Andric }
316753f127fSDimitry Andric 
317753f127fSDimitry Andric // End all reconstructions for which \p Token is the final token.
318753f127fSDimitry Andric void MacroCallReconstructor::endReconstruction(FormatToken *Token) {
319753f127fSDimitry Andric   assert(Token->MacroCtx &&
320753f127fSDimitry Andric          (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion));
321753f127fSDimitry Andric   for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {
322753f127fSDimitry Andric #ifndef NDEBUG
323753f127fSDimitry Andric     // Check all remaining tokens but the final closing parenthesis and optional
324753f127fSDimitry Andric     // trailing comment were already reconstructed at an inner expansion level.
325753f127fSDimitry Andric     for (auto T = ActiveExpansions.back().SpelledI;
326753f127fSDimitry Andric          T != ActiveExpansions.back().SpelledE; ++T) {
327753f127fSDimitry Andric       FormatToken *Token = T->Tok;
328753f127fSDimitry Andric       bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE ||
329753f127fSDimitry Andric                            std::next(T)->Tok->isTrailingComment()) &&
330753f127fSDimitry Andric                           !Token->MacroCtx && Token->is(tok::r_paren);
331753f127fSDimitry Andric       bool TrailingComment = Token->isTrailingComment();
332753f127fSDimitry Andric       bool PreviousLevel =
333753f127fSDimitry Andric           Token->MacroCtx &&
334753f127fSDimitry Andric           (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size());
335*bdd1243dSDimitry Andric       if (!ClosingParen && !TrailingComment && !PreviousLevel)
336753f127fSDimitry Andric         llvm::dbgs() << "At token: " << Token->TokenText << "\n";
337753f127fSDimitry Andric       // In addition to the following cases, we can also run into this
338753f127fSDimitry Andric       // when a macro call had more arguments than expected; in that case,
339753f127fSDimitry Andric       // the comma and the remaining tokens in the macro call will potentially
340753f127fSDimitry Andric       // end up in the line when we finish the expansion.
341753f127fSDimitry Andric       // FIXME: Add the information which arguments are unused, and assert
342753f127fSDimitry Andric       // one of the cases below plus reconstructed macro argument tokens.
343753f127fSDimitry Andric       // assert(ClosingParen || TrailingComment || PreviousLevel);
344753f127fSDimitry Andric     }
345753f127fSDimitry Andric #endif
346753f127fSDimitry Andric     // Handle the remaining open tokens:
347753f127fSDimitry Andric     // - expand the closing parenthesis, if it exists, including an optional
348753f127fSDimitry Andric     //   trailing comment
349753f127fSDimitry Andric     // - handle tokens that were already reconstructed at an inner expansion
350753f127fSDimitry Andric     //   level
351753f127fSDimitry Andric     // - handle tokens when a macro call had more than the expected number of
352753f127fSDimitry Andric     //   arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end
353753f127fSDimitry Andric     //   up with the sequence ", b, c)" being open at the end of the
354753f127fSDimitry Andric     //   reconstruction; we want to gracefully handle that case
355753f127fSDimitry Andric     //
356753f127fSDimitry Andric     // FIXME: See the above debug-check for what we will need to do to be
357753f127fSDimitry Andric     // able to assert this.
358753f127fSDimitry Andric     for (auto T = ActiveExpansions.back().SpelledI;
359753f127fSDimitry Andric          T != ActiveExpansions.back().SpelledE; ++T) {
360753f127fSDimitry Andric       processNextReconstructed();
361753f127fSDimitry Andric     }
362753f127fSDimitry Andric     ActiveExpansions.pop_back();
363753f127fSDimitry Andric   }
364753f127fSDimitry Andric }
365753f127fSDimitry Andric 
366753f127fSDimitry Andric void MacroCallReconstructor::debugParentMap() const {
367753f127fSDimitry Andric   llvm::DenseSet<FormatToken *> Values;
368753f127fSDimitry Andric   for (const auto &P : SpelledParentToReconstructedParent)
369753f127fSDimitry Andric     Values.insert(P.second);
370753f127fSDimitry Andric 
371753f127fSDimitry Andric   for (const auto &P : SpelledParentToReconstructedParent) {
372753f127fSDimitry Andric     if (Values.contains(P.first))
373753f127fSDimitry Andric       continue;
374753f127fSDimitry Andric     llvm::dbgs() << (P.first ? P.first->TokenText : "<null>");
375753f127fSDimitry Andric     for (auto I = SpelledParentToReconstructedParent.find(P.first),
376753f127fSDimitry Andric               E = SpelledParentToReconstructedParent.end();
377753f127fSDimitry Andric          I != E; I = SpelledParentToReconstructedParent.find(I->second)) {
378753f127fSDimitry Andric       llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>");
379753f127fSDimitry Andric     }
380753f127fSDimitry Andric     llvm::dbgs() << "\n";
381753f127fSDimitry Andric   }
382753f127fSDimitry Andric }
383753f127fSDimitry Andric 
384753f127fSDimitry Andric // If visible, add the next token of the reconstructed token sequence to the
385753f127fSDimitry Andric // output. Returns whether reconstruction passed a comma that is part of a
386753f127fSDimitry Andric // macro call.
387753f127fSDimitry Andric bool MacroCallReconstructor::processNextReconstructed() {
388753f127fSDimitry Andric   FormatToken *Token = ActiveExpansions.back().SpelledI->Tok;
389753f127fSDimitry Andric   ++ActiveExpansions.back().SpelledI;
390753f127fSDimitry Andric   if (Token->MacroCtx) {
391753f127fSDimitry Andric     // Skip tokens that are not part of the macro call.
392*bdd1243dSDimitry Andric     if (Token->MacroCtx->Role == MR_Hidden)
393753f127fSDimitry Andric       return false;
394753f127fSDimitry Andric     // Skip tokens we already expanded during an inner reconstruction.
395753f127fSDimitry Andric     // For example, given: #define ID(x) {x}
396753f127fSDimitry Andric     // And the call: ID(ID(f))
397753f127fSDimitry Andric     // We get two reconstructions:
398753f127fSDimitry Andric     // ID(f) -> {f}
399753f127fSDimitry Andric     // ID({f}) -> {{f}}
400753f127fSDimitry Andric     // We reconstruct f during the first reconstruction, and skip it during the
401753f127fSDimitry Andric     // second reconstruction.
402*bdd1243dSDimitry Andric     if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size())
403753f127fSDimitry Andric       return false;
404753f127fSDimitry Andric   }
405753f127fSDimitry Andric   // Tokens that do not have a macro context are tokens in that are part of the
406753f127fSDimitry Andric   // macro call that have not taken part in expansion.
407753f127fSDimitry Andric   if (!Token->MacroCtx) {
408753f127fSDimitry Andric     // Put the parentheses and commas of a macro call into the same line;
409753f127fSDimitry Andric     // if the arguments produce new unwrapped lines, they will become children
410753f127fSDimitry Andric     // of the corresponding opening parenthesis or comma tokens in the
411753f127fSDimitry Andric     // reconstructed call.
412753f127fSDimitry Andric     if (Token->is(tok::l_paren)) {
413753f127fSDimitry Andric       MacroCallStructure.push_back(MacroCallState(
414753f127fSDimitry Andric           currentLine(), parentLine().Tokens.back()->Tok, Token));
415753f127fSDimitry Andric       // All tokens that are children of the previous line's last token in the
416753f127fSDimitry Andric       // reconstructed token stream will now be children of the l_paren token.
417753f127fSDimitry Andric       // For example, for the line containing the macro calls:
418753f127fSDimitry Andric       //   auto x = ID({ID(2)});
419753f127fSDimitry Andric       // We will build up a map <null> -> ( -> ( with the first and second
420753f127fSDimitry Andric       // l_paren of the macro call respectively. New lines that come in with a
421753f127fSDimitry Andric       // <null> parent will then become children of the l_paren token of the
422753f127fSDimitry Andric       // currently innermost macro call.
423753f127fSDimitry Andric       SpelledParentToReconstructedParent[MacroCallStructure.back()
424753f127fSDimitry Andric                                              .ParentLastToken] = Token;
425753f127fSDimitry Andric       appendToken(Token);
426753f127fSDimitry Andric       prepareParent(Token, /*NewLine=*/true);
427753f127fSDimitry Andric       Token->MacroParent = true;
428753f127fSDimitry Andric       return false;
429753f127fSDimitry Andric     }
430753f127fSDimitry Andric     if (!MacroCallStructure.empty()) {
431753f127fSDimitry Andric       if (Token->is(tok::comma)) {
432753f127fSDimitry Andric         // Make new lines inside the next argument children of the comma token.
433753f127fSDimitry Andric         SpelledParentToReconstructedParent
434753f127fSDimitry Andric             [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
435753f127fSDimitry Andric         Token->MacroParent = true;
436753f127fSDimitry Andric         appendToken(Token, MacroCallStructure.back().Line);
437753f127fSDimitry Andric         prepareParent(Token, /*NewLine=*/true);
438753f127fSDimitry Andric         return true;
439753f127fSDimitry Andric       }
440753f127fSDimitry Andric       if (Token->is(tok::r_paren)) {
441753f127fSDimitry Andric         appendToken(Token, MacroCallStructure.back().Line);
442753f127fSDimitry Andric         SpelledParentToReconstructedParent.erase(
443753f127fSDimitry Andric             MacroCallStructure.back().ParentLastToken);
444753f127fSDimitry Andric         MacroCallStructure.pop_back();
445753f127fSDimitry Andric         return false;
446753f127fSDimitry Andric       }
447753f127fSDimitry Andric     }
448753f127fSDimitry Andric   }
449753f127fSDimitry Andric   // Note that any tokens that are tagged with MR_None have been passed as
450753f127fSDimitry Andric   // arguments to the macro that have not been expanded, for example:
451753f127fSDimitry Andric   // Given: #define ID(X) x
452753f127fSDimitry Andric   // When calling: ID(a, b)
453753f127fSDimitry Andric   // 'b' will be part of the reconstructed token stream, but tagged MR_None.
454753f127fSDimitry Andric   // Given that erroring out in this case would be disruptive, we continue
455753f127fSDimitry Andric   // pushing the (unformatted) token.
456753f127fSDimitry Andric   // FIXME: This can lead to unfortunate formatting decisions - give the user
457753f127fSDimitry Andric   // a hint that their macro definition is broken.
458753f127fSDimitry Andric   appendToken(Token);
459753f127fSDimitry Andric   return false;
460753f127fSDimitry Andric }
461753f127fSDimitry Andric 
462753f127fSDimitry Andric void MacroCallReconstructor::finalize() {
463753f127fSDimitry Andric #ifndef NDEBUG
464753f127fSDimitry Andric   assert(State != Finalized && finished());
465753f127fSDimitry Andric   State = Finalized;
466753f127fSDimitry Andric #endif
467753f127fSDimitry Andric 
468753f127fSDimitry Andric   // We created corresponding unwrapped lines for each incoming line as children
469753f127fSDimitry Andric   // the the toplevel null token.
470753f127fSDimitry Andric   assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty());
471753f127fSDimitry Andric   LLVM_DEBUG({
472753f127fSDimitry Andric     llvm::dbgs() << "Finalizing reconstructed lines:\n";
473753f127fSDimitry Andric     debug(Result, 0);
474753f127fSDimitry Andric   });
475753f127fSDimitry Andric 
476753f127fSDimitry Andric   // The first line becomes the top level line in the resulting unwrapped line.
477753f127fSDimitry Andric   LineNode &Top = *Result.Tokens.front();
478753f127fSDimitry Andric   auto *I = Top.Children.begin();
479753f127fSDimitry Andric   // Every subsequent line will become a child of the last token in the previous
480753f127fSDimitry Andric   // line, which is the token prior to the first token in the line.
481753f127fSDimitry Andric   LineNode *Last = (*I)->Tokens.back().get();
482753f127fSDimitry Andric   ++I;
483753f127fSDimitry Andric   for (auto *E = Top.Children.end(); I != E; ++I) {
484753f127fSDimitry Andric     assert(Last->Children.empty());
485753f127fSDimitry Andric     Last->Children.push_back(std::move(*I));
486753f127fSDimitry Andric 
487753f127fSDimitry Andric     // Mark the previous line's last token as generated by a macro expansion
488753f127fSDimitry Andric     // so the formatting algorithm can take that into account.
489753f127fSDimitry Andric     Last->Tok->MacroParent = true;
490753f127fSDimitry Andric 
491753f127fSDimitry Andric     Last = Last->Children.back()->Tokens.back().get();
492753f127fSDimitry Andric   }
493753f127fSDimitry Andric   Top.Children.resize(1);
494753f127fSDimitry Andric }
495753f127fSDimitry Andric 
496753f127fSDimitry Andric void MacroCallReconstructor::appendToken(FormatToken *Token,
497753f127fSDimitry Andric                                          ReconstructedLine *L) {
498753f127fSDimitry Andric   L = L ? L : currentLine();
499753f127fSDimitry Andric   LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");
500753f127fSDimitry Andric   L->Tokens.push_back(std::make_unique<LineNode>(Token));
501753f127fSDimitry Andric }
502753f127fSDimitry Andric 
503753f127fSDimitry Andric UnwrappedLine
504753f127fSDimitry Andric MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line,
505753f127fSDimitry Andric                                             int Level) {
506753f127fSDimitry Andric   UnwrappedLine Result;
507753f127fSDimitry Andric   Result.Level = Level;
508753f127fSDimitry Andric   for (const auto &N : Line.Tokens) {
509753f127fSDimitry Andric     Result.Tokens.push_back(N->Tok);
510753f127fSDimitry Andric     UnwrappedLineNode &Current = Result.Tokens.back();
511753f127fSDimitry Andric     for (const auto &Child : N->Children) {
512753f127fSDimitry Andric       if (Child->Tokens.empty())
513753f127fSDimitry Andric         continue;
514753f127fSDimitry Andric       Current.Children.push_back(createUnwrappedLine(*Child, Level + 1));
515753f127fSDimitry Andric     }
516753f127fSDimitry Andric     if (Current.Children.size() == 1 &&
517753f127fSDimitry Andric         Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
518753f127fSDimitry Andric       Result.Tokens.splice(Result.Tokens.end(),
519753f127fSDimitry Andric                            Current.Children.front().Tokens);
520753f127fSDimitry Andric       Current.Children.clear();
521753f127fSDimitry Andric     }
522753f127fSDimitry Andric   }
523753f127fSDimitry Andric   return Result;
524753f127fSDimitry Andric }
525753f127fSDimitry Andric 
526753f127fSDimitry Andric void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) {
527753f127fSDimitry Andric   for (int i = 0; i < Level; ++i)
528753f127fSDimitry Andric     llvm::dbgs() << " ";
529753f127fSDimitry Andric   for (const auto &N : Line.Tokens) {
530753f127fSDimitry Andric     if (!N)
531753f127fSDimitry Andric       continue;
532753f127fSDimitry Andric     if (N->Tok)
533753f127fSDimitry Andric       llvm::dbgs() << N->Tok->TokenText << " ";
534753f127fSDimitry Andric     for (const auto &Child : N->Children) {
535753f127fSDimitry Andric       llvm::dbgs() << "\n";
536753f127fSDimitry Andric       debug(*Child, Level + 1);
537753f127fSDimitry Andric       for (int i = 0; i < Level; ++i)
538753f127fSDimitry Andric         llvm::dbgs() << " ";
539753f127fSDimitry Andric     }
540753f127fSDimitry Andric   }
541753f127fSDimitry Andric   llvm::dbgs() << "\n";
542753f127fSDimitry Andric }
543753f127fSDimitry Andric 
544753f127fSDimitry Andric MacroCallReconstructor::ReconstructedLine &
545753f127fSDimitry Andric MacroCallReconstructor::parentLine() {
546753f127fSDimitry Andric   return **std::prev(std::prev(ActiveReconstructedLines.end()));
547753f127fSDimitry Andric }
548753f127fSDimitry Andric 
549753f127fSDimitry Andric MacroCallReconstructor::ReconstructedLine *
550753f127fSDimitry Andric MacroCallReconstructor::currentLine() {
551753f127fSDimitry Andric   return ActiveReconstructedLines.back();
552753f127fSDimitry Andric }
553753f127fSDimitry Andric 
554753f127fSDimitry Andric MacroCallReconstructor::MacroCallState::MacroCallState(
555753f127fSDimitry Andric     MacroCallReconstructor::ReconstructedLine *Line,
556753f127fSDimitry Andric     FormatToken *ParentLastToken, FormatToken *MacroCallLParen)
557753f127fSDimitry Andric     : Line(Line), ParentLastToken(ParentLastToken),
558753f127fSDimitry Andric       MacroCallLParen(MacroCallLParen) {
559753f127fSDimitry Andric   LLVM_DEBUG(
560753f127fSDimitry Andric       llvm::dbgs() << "ParentLastToken: "
561753f127fSDimitry Andric                    << (ParentLastToken ? ParentLastToken->TokenText : "<null>")
562753f127fSDimitry Andric                    << "\n");
563753f127fSDimitry Andric 
564753f127fSDimitry Andric   assert(MacroCallLParen->is(tok::l_paren));
565753f127fSDimitry Andric }
566753f127fSDimitry Andric 
567753f127fSDimitry Andric } // namespace format
568753f127fSDimitry Andric } // namespace clang
569