xref: /freebsd-src/contrib/llvm-project/clang/lib/Format/MacroCallReconstructor.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1753f127fSDimitry Andric //===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//
2753f127fSDimitry Andric //
3753f127fSDimitry Andric //                     The LLVM Compiler Infrastructure
4753f127fSDimitry Andric //
5753f127fSDimitry Andric // This file is distributed under the University of Illinois Open Source
6753f127fSDimitry Andric // License. See LICENSE.TXT for details.
7753f127fSDimitry Andric //
8753f127fSDimitry Andric //===----------------------------------------------------------------------===//
9753f127fSDimitry Andric ///
10753f127fSDimitry Andric /// \file
11753f127fSDimitry Andric /// This file contains the implementation of MacroCallReconstructor, which fits
12753f127fSDimitry Andric /// an reconstructed macro call to a parsed set of UnwrappedLines.
13753f127fSDimitry Andric ///
14753f127fSDimitry Andric //===----------------------------------------------------------------------===//
15753f127fSDimitry Andric 
16753f127fSDimitry Andric #include "Macros.h"
17753f127fSDimitry Andric 
18753f127fSDimitry Andric #include "UnwrappedLineParser.h"
19753f127fSDimitry Andric #include "clang/Basic/TokenKinds.h"
20753f127fSDimitry Andric #include "llvm/ADT/DenseSet.h"
21753f127fSDimitry Andric #include "llvm/Support/Debug.h"
22753f127fSDimitry Andric #include <cassert>
23753f127fSDimitry Andric 
24753f127fSDimitry Andric #define DEBUG_TYPE "format-reconstruct"
25753f127fSDimitry Andric 
26753f127fSDimitry Andric namespace clang {
27753f127fSDimitry Andric namespace format {
28753f127fSDimitry Andric 
29753f127fSDimitry Andric // Call \p Call for each token in the unwrapped line given, passing
30753f127fSDimitry Andric // the token, its parent and whether it is the first token in the line.
31753f127fSDimitry Andric template <typename T>
32753f127fSDimitry Andric void forEachToken(const UnwrappedLine &Line, const T &Call,
33753f127fSDimitry Andric                   FormatToken *Parent = nullptr) {
34753f127fSDimitry Andric   bool First = true;
35753f127fSDimitry Andric   for (const auto &N : Line.Tokens) {
36*0fca6ea1SDimitry Andric     Call(N.Tok, Parent, First, Line.Level);
37753f127fSDimitry Andric     First = false;
38bdd1243dSDimitry Andric     for (const auto &Child : N.Children)
39753f127fSDimitry Andric       forEachToken(Child, Call, N.Tok);
40753f127fSDimitry Andric   }
41753f127fSDimitry Andric }
42753f127fSDimitry Andric 
43753f127fSDimitry Andric MacroCallReconstructor::MacroCallReconstructor(
44753f127fSDimitry Andric     unsigned Level,
45753f127fSDimitry Andric     const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
46753f127fSDimitry Andric         &ActiveExpansions)
47*0fca6ea1SDimitry Andric     : Result(Level), IdToReconstructed(ActiveExpansions) {
48753f127fSDimitry Andric   Result.Tokens.push_back(std::make_unique<LineNode>());
49753f127fSDimitry Andric   ActiveReconstructedLines.push_back(&Result);
50753f127fSDimitry Andric }
51753f127fSDimitry Andric 
52753f127fSDimitry Andric void MacroCallReconstructor::addLine(const UnwrappedLine &Line) {
53753f127fSDimitry Andric   assert(State != Finalized);
54753f127fSDimitry Andric   LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
55*0fca6ea1SDimitry Andric   forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First,
56*0fca6ea1SDimitry Andric                          unsigned Level) { add(Token, Parent, First, Level); });
57753f127fSDimitry Andric   assert(InProgress || finished());
58753f127fSDimitry Andric }
59753f127fSDimitry Andric 
60753f127fSDimitry Andric UnwrappedLine MacroCallReconstructor::takeResult() && {
61753f127fSDimitry Andric   finalize();
62bdd1243dSDimitry Andric   assert(Result.Tokens.size() == 1 &&
63bdd1243dSDimitry Andric          Result.Tokens.front()->Children.size() == 1);
64*0fca6ea1SDimitry Andric   UnwrappedLine Final = createUnwrappedLine(
65*0fca6ea1SDimitry Andric       *Result.Tokens.front()->Children.front(), Result.Level);
66753f127fSDimitry Andric   assert(!Final.Tokens.empty());
67753f127fSDimitry Andric   return Final;
68753f127fSDimitry Andric }
69753f127fSDimitry Andric 
70753f127fSDimitry Andric // Reconstruct the position of the next \p Token, given its parent \p
71753f127fSDimitry Andric // ExpandedParent in the incoming unwrapped line. \p First specifies whether it
72753f127fSDimitry Andric // is the first token in a given unwrapped line.
73753f127fSDimitry Andric void MacroCallReconstructor::add(FormatToken *Token,
74*0fca6ea1SDimitry Andric                                  FormatToken *ExpandedParent, bool First,
75*0fca6ea1SDimitry Andric                                  unsigned Level) {
76753f127fSDimitry Andric   LLVM_DEBUG(
77753f127fSDimitry Andric       llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: "
78753f127fSDimitry Andric                    << (ExpandedParent ? ExpandedParent->TokenText : "<null>")
79753f127fSDimitry Andric                    << ", First: " << First << "\n");
80753f127fSDimitry Andric   // In order to be able to find the correct parent in the reconstructed token
81753f127fSDimitry Andric   // stream, we need to continue the last open reconstruction until we find the
82753f127fSDimitry Andric   // given token if it is part of the reconstructed token stream.
83753f127fSDimitry Andric   //
84753f127fSDimitry Andric   // Note that hidden tokens can be part of the reconstructed stream in nested
85753f127fSDimitry Andric   // macro calls.
86753f127fSDimitry Andric   // For example, given
87753f127fSDimitry Andric   //   #define C(x, y) x y
88753f127fSDimitry Andric   //   #define B(x) {x}
89753f127fSDimitry Andric   // And the call:
90753f127fSDimitry Andric   //   C(a, B(b))
91753f127fSDimitry Andric   // The outer macro call will be C(a, {b}), and the hidden token '}' can be
92753f127fSDimitry Andric   // found in the reconstructed token stream of that expansion level.
93753f127fSDimitry Andric   // In the expanded token stream
94753f127fSDimitry Andric   //   a {b}
95753f127fSDimitry Andric   // 'b' is a child of '{'. We need to continue the open expansion of the ','
96753f127fSDimitry Andric   // in the call of 'C' in order to correctly set the ',' as the parent of '{',
97753f127fSDimitry Andric   // so we later set the spelled token 'b' as a child of the ','.
98753f127fSDimitry Andric   if (!ActiveExpansions.empty() && Token->MacroCtx &&
99753f127fSDimitry Andric       (Token->MacroCtx->Role != MR_Hidden ||
100753f127fSDimitry Andric        ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) {
101753f127fSDimitry Andric     if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token))
102753f127fSDimitry Andric       First = true;
103753f127fSDimitry Andric   }
104753f127fSDimitry Andric 
105*0fca6ea1SDimitry Andric   prepareParent(ExpandedParent, First, Level);
106753f127fSDimitry Andric 
107753f127fSDimitry Andric   if (Token->MacroCtx) {
108753f127fSDimitry Andric     // If this token was generated by a macro call, add the reconstructed
109753f127fSDimitry Andric     // equivalent of the token.
110753f127fSDimitry Andric     reconstruct(Token);
111753f127fSDimitry Andric   } else {
112753f127fSDimitry Andric     // Otherwise, we add it to the current line.
113753f127fSDimitry Andric     appendToken(Token);
114753f127fSDimitry Andric   }
115753f127fSDimitry Andric }
116753f127fSDimitry Andric 
117753f127fSDimitry Andric // Adjusts the stack of active reconstructed lines so we're ready to push
118753f127fSDimitry Andric // tokens. The tokens to be pushed are children of ExpandedParent in the
119753f127fSDimitry Andric // expanded code.
120753f127fSDimitry Andric //
121753f127fSDimitry Andric // This may entail:
122753f127fSDimitry Andric // - creating a new line, if the parent is on the active line
123753f127fSDimitry Andric // - popping active lines, if the parent is further up the stack
124753f127fSDimitry Andric //
125753f127fSDimitry Andric // Postcondition:
126753f127fSDimitry Andric // ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its
127753f127fSDimitry Andric // reconstructed replacement token as a parent (when possible) - that is, the
128753f127fSDimitry Andric // last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]
129753f127fSDimitry Andric // is the parent of ActiveReconstructedLines.back() in the reconstructed
130753f127fSDimitry Andric // unwrapped line.
131753f127fSDimitry Andric void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,
132*0fca6ea1SDimitry Andric                                            bool NewLine, unsigned Level) {
133753f127fSDimitry Andric   LLVM_DEBUG({
134753f127fSDimitry Andric     llvm::dbgs() << "ParentMap:\n";
135753f127fSDimitry Andric     debugParentMap();
136753f127fSDimitry Andric   });
137753f127fSDimitry Andric   // We want to find the parent in the new unwrapped line, where the expanded
138753f127fSDimitry Andric   // parent might have been replaced during reconstruction.
139753f127fSDimitry Andric   FormatToken *Parent = getParentInResult(ExpandedParent);
140753f127fSDimitry Andric   LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "
141753f127fSDimitry Andric                           << (Parent ? Parent->TokenText : "<null>") << "\n");
142753f127fSDimitry Andric 
143753f127fSDimitry Andric   FormatToken *OpenMacroParent = nullptr;
144753f127fSDimitry Andric   if (!MacroCallStructure.empty()) {
145753f127fSDimitry Andric     // Inside a macro expansion, it is possible to lose track of the correct
146753f127fSDimitry Andric     // parent - either because it is already popped, for example because it was
147753f127fSDimitry Andric     // in a different macro argument (e.g. M({, })), or when we work on invalid
148753f127fSDimitry Andric     // code.
149753f127fSDimitry Andric     // Thus, we use the innermost macro call's parent as the parent at which
150753f127fSDimitry Andric     // we stop; this allows us to stay within the macro expansion and keeps
151753f127fSDimitry Andric     // any problems confined to the extent of the macro call.
152753f127fSDimitry Andric     OpenMacroParent =
153753f127fSDimitry Andric         getParentInResult(MacroCallStructure.back().MacroCallLParen);
154753f127fSDimitry Andric     LLVM_DEBUG(llvm::dbgs()
155753f127fSDimitry Andric                << "MacroCallLParen: "
156753f127fSDimitry Andric                << MacroCallStructure.back().MacroCallLParen->TokenText
157753f127fSDimitry Andric                << ", OpenMacroParent: "
158753f127fSDimitry Andric                << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>")
159753f127fSDimitry Andric                << "\n");
160753f127fSDimitry Andric   }
161753f127fSDimitry Andric   if (NewLine ||
162753f127fSDimitry Andric       (!ActiveReconstructedLines.back()->Tokens.empty() &&
163753f127fSDimitry Andric        Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) {
164753f127fSDimitry Andric     // If we are at the first token in a new line, we want to also
165753f127fSDimitry Andric     // create a new line in the resulting reconstructed unwrapped line.
166753f127fSDimitry Andric     while (ActiveReconstructedLines.back()->Tokens.empty() ||
167753f127fSDimitry Andric            (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok &&
168753f127fSDimitry Andric             ActiveReconstructedLines.back()->Tokens.back()->Tok !=
169753f127fSDimitry Andric                 OpenMacroParent)) {
170753f127fSDimitry Andric       ActiveReconstructedLines.pop_back();
171753f127fSDimitry Andric       assert(!ActiveReconstructedLines.empty());
172753f127fSDimitry Andric     }
173753f127fSDimitry Andric     assert(!ActiveReconstructedLines.empty());
174753f127fSDimitry Andric     ActiveReconstructedLines.back()->Tokens.back()->Children.push_back(
175*0fca6ea1SDimitry Andric         std::make_unique<ReconstructedLine>(Level));
176753f127fSDimitry Andric     ActiveReconstructedLines.push_back(
177753f127fSDimitry Andric         &*ActiveReconstructedLines.back()->Tokens.back()->Children.back());
178753f127fSDimitry Andric   } else if (parentLine().Tokens.back()->Tok != Parent) {
179753f127fSDimitry Andric     // If we're not the first token in a new line, pop lines until we find
180753f127fSDimitry Andric     // the child of \c Parent in the stack.
181753f127fSDimitry Andric     while (Parent != parentLine().Tokens.back()->Tok &&
182753f127fSDimitry Andric            parentLine().Tokens.back()->Tok &&
183753f127fSDimitry Andric            parentLine().Tokens.back()->Tok != OpenMacroParent) {
184753f127fSDimitry Andric       ActiveReconstructedLines.pop_back();
185753f127fSDimitry Andric       assert(!ActiveReconstructedLines.empty());
186753f127fSDimitry Andric     }
187753f127fSDimitry Andric   }
188753f127fSDimitry Andric   assert(!ActiveReconstructedLines.empty());
189753f127fSDimitry Andric }
190753f127fSDimitry Andric 
191753f127fSDimitry Andric // For a given \p Parent in the incoming expanded token stream, find the
192753f127fSDimitry Andric // corresponding parent in the output.
193753f127fSDimitry Andric FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) {
194753f127fSDimitry Andric   FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent);
195753f127fSDimitry Andric   if (!Mapped)
196753f127fSDimitry Andric     return Parent;
197bdd1243dSDimitry Andric   for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent))
198753f127fSDimitry Andric     Parent = Mapped;
199753f127fSDimitry Andric   // If we use a different token than the parent in the expanded token stream
200753f127fSDimitry Andric   // as parent, mark it as a special parent, so the formatting code knows it
201753f127fSDimitry Andric   // needs to have its children formatted.
202753f127fSDimitry Andric   Parent->MacroParent = true;
203753f127fSDimitry Andric   return Parent;
204753f127fSDimitry Andric }
205753f127fSDimitry Andric 
206753f127fSDimitry Andric // Reconstruct a \p Token that was expanded from a macro call.
207753f127fSDimitry Andric void MacroCallReconstructor::reconstruct(FormatToken *Token) {
208753f127fSDimitry Andric   assert(Token->MacroCtx);
209753f127fSDimitry Andric   // A single token can be the only result of a macro call:
210753f127fSDimitry Andric   // Given: #define ID(x, y) ;
211753f127fSDimitry Andric   // And the call: ID(<some>, <tokens>)
212753f127fSDimitry Andric   // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).
213753f127fSDimitry Andric   if (Token->MacroCtx->StartOfExpansion) {
214753f127fSDimitry Andric     startReconstruction(Token);
215753f127fSDimitry Andric     // If the order of tokens in the expanded token stream is not the
216753f127fSDimitry Andric     // same as the order of tokens in the reconstructed stream, we need
217753f127fSDimitry Andric     // to reconstruct tokens that arrive later in the stream.
218bdd1243dSDimitry Andric     if (Token->MacroCtx->Role != MR_Hidden)
219753f127fSDimitry Andric       reconstructActiveCallUntil(Token);
220753f127fSDimitry Andric   }
221753f127fSDimitry Andric   assert(!ActiveExpansions.empty());
222753f127fSDimitry Andric   if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) {
223753f127fSDimitry Andric     assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size());
224753f127fSDimitry Andric     if (Token->MacroCtx->Role != MR_Hidden) {
225753f127fSDimitry Andric       // The current token in the reconstructed token stream must be the token
226753f127fSDimitry Andric       // we're looking for - we either arrive here after startReconstruction,
227753f127fSDimitry Andric       // which initiates the stream to the first token, or after
228753f127fSDimitry Andric       // continueReconstructionUntil skipped until the expected token in the
229753f127fSDimitry Andric       // reconstructed stream at the start of add(...).
230753f127fSDimitry Andric       assert(ActiveExpansions.back().SpelledI->Tok == Token);
231753f127fSDimitry Andric       processNextReconstructed();
232753f127fSDimitry Andric     } else if (!currentLine()->Tokens.empty()) {
233753f127fSDimitry Andric       // Map all hidden tokens to the last visible token in the output.
234753f127fSDimitry Andric       // If the hidden token is a parent, we'll use the last visible
235753f127fSDimitry Andric       // token as the parent of the hidden token's children.
236753f127fSDimitry Andric       SpelledParentToReconstructedParent[Token] =
237753f127fSDimitry Andric           currentLine()->Tokens.back()->Tok;
238753f127fSDimitry Andric     } else {
239753f127fSDimitry Andric       for (auto I = ActiveReconstructedLines.rbegin(),
240753f127fSDimitry Andric                 E = ActiveReconstructedLines.rend();
241753f127fSDimitry Andric            I != E; ++I) {
242753f127fSDimitry Andric         if (!(*I)->Tokens.empty()) {
243753f127fSDimitry Andric           SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok;
244753f127fSDimitry Andric           break;
245753f127fSDimitry Andric         }
246753f127fSDimitry Andric       }
247753f127fSDimitry Andric     }
248753f127fSDimitry Andric   }
249753f127fSDimitry Andric   if (Token->MacroCtx->EndOfExpansion)
250753f127fSDimitry Andric     endReconstruction(Token);
251753f127fSDimitry Andric }
252753f127fSDimitry Andric 
253753f127fSDimitry Andric // Given a \p Token that starts an expansion, reconstruct the beginning of the
254753f127fSDimitry Andric // macro call.
255753f127fSDimitry Andric // For example, given: #define ID(x) x
256753f127fSDimitry Andric // And the call: ID(int a)
257753f127fSDimitry Andric // Reconstructs: ID(
258753f127fSDimitry Andric void MacroCallReconstructor::startReconstruction(FormatToken *Token) {
259753f127fSDimitry Andric   assert(Token->MacroCtx);
260753f127fSDimitry Andric   assert(!Token->MacroCtx->ExpandedFrom.empty());
261753f127fSDimitry Andric   assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size());
262753f127fSDimitry Andric #ifndef NDEBUG
263753f127fSDimitry Andric   // Check that the token's reconstruction stack matches our current
264753f127fSDimitry Andric   // reconstruction stack.
265753f127fSDimitry Andric   for (size_t I = 0; I < ActiveExpansions.size(); ++I) {
266753f127fSDimitry Andric     assert(ActiveExpansions[I].ID ==
267753f127fSDimitry Andric            Token->MacroCtx
268753f127fSDimitry Andric                ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);
269753f127fSDimitry Andric   }
270753f127fSDimitry Andric #endif
271753f127fSDimitry Andric   // Start reconstruction for all calls for which this token is the first token
272753f127fSDimitry Andric   // generated by the call.
273753f127fSDimitry Andric   // Note that the token's expanded from stack is inside-to-outside, and the
274753f127fSDimitry Andric   // expansions for which this token is not the first are the outermost ones.
275753f127fSDimitry Andric   ArrayRef<FormatToken *> StartedMacros =
276bdd1243dSDimitry Andric       ArrayRef(Token->MacroCtx->ExpandedFrom)
277753f127fSDimitry Andric           .drop_back(ActiveExpansions.size());
278753f127fSDimitry Andric   assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion);
279753f127fSDimitry Andric   // We reconstruct macro calls outside-to-inside.
280753f127fSDimitry Andric   for (FormatToken *ID : llvm::reverse(StartedMacros)) {
281753f127fSDimitry Andric     // We found a macro call to be reconstructed; the next time our
282753f127fSDimitry Andric     // reconstruction stack is empty we know we finished an reconstruction.
283753f127fSDimitry Andric #ifndef NDEBUG
284753f127fSDimitry Andric     State = InProgress;
285753f127fSDimitry Andric #endif
286753f127fSDimitry Andric     // Put the reconstructed macro call's token into our reconstruction stack.
287753f127fSDimitry Andric     auto IU = IdToReconstructed.find(ID);
288753f127fSDimitry Andric     assert(IU != IdToReconstructed.end());
289753f127fSDimitry Andric     ActiveExpansions.push_back(
290753f127fSDimitry Andric         {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});
291753f127fSDimitry Andric     // Process the macro call's identifier.
292753f127fSDimitry Andric     processNextReconstructed();
293753f127fSDimitry Andric     if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE)
294753f127fSDimitry Andric       continue;
295753f127fSDimitry Andric     if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) {
296753f127fSDimitry Andric       // Process the optional opening parenthesis.
297753f127fSDimitry Andric       processNextReconstructed();
298753f127fSDimitry Andric     }
299753f127fSDimitry Andric   }
300753f127fSDimitry Andric }
301753f127fSDimitry Andric 
302753f127fSDimitry Andric // Add all tokens in the reconstruction stream to the output until we find the
303753f127fSDimitry Andric // given \p Token.
304753f127fSDimitry Andric bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) {
305753f127fSDimitry Andric   assert(!ActiveExpansions.empty());
306753f127fSDimitry Andric   bool PassedMacroComma = false;
307753f127fSDimitry Andric   // FIXME: If Token was already expanded earlier, due to
308753f127fSDimitry Andric   // a change in order, we will not find it, but need to
309753f127fSDimitry Andric   // skip it.
310753f127fSDimitry Andric   while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE &&
311753f127fSDimitry Andric          ActiveExpansions.back().SpelledI->Tok != Token) {
312753f127fSDimitry Andric     PassedMacroComma = processNextReconstructed() || PassedMacroComma;
313753f127fSDimitry Andric   }
314753f127fSDimitry Andric   return PassedMacroComma;
315753f127fSDimitry Andric }
316753f127fSDimitry Andric 
317753f127fSDimitry Andric // End all reconstructions for which \p Token is the final token.
318753f127fSDimitry Andric void MacroCallReconstructor::endReconstruction(FormatToken *Token) {
319753f127fSDimitry Andric   assert(Token->MacroCtx &&
320753f127fSDimitry Andric          (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion));
321753f127fSDimitry Andric   for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {
3225f757f3fSDimitry Andric     LLVM_DEBUG([&] {
3235f757f3fSDimitry Andric       // Check all remaining tokens but the final closing parenthesis and
3245f757f3fSDimitry Andric       // optional trailing comment were already reconstructed at an inner
3255f757f3fSDimitry Andric       // expansion level.
326753f127fSDimitry Andric       for (auto T = ActiveExpansions.back().SpelledI;
327753f127fSDimitry Andric            T != ActiveExpansions.back().SpelledE; ++T) {
328753f127fSDimitry Andric         FormatToken *Token = T->Tok;
329753f127fSDimitry Andric         bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE ||
330753f127fSDimitry Andric                              std::next(T)->Tok->isTrailingComment()) &&
331753f127fSDimitry Andric                             !Token->MacroCtx && Token->is(tok::r_paren);
332753f127fSDimitry Andric         bool TrailingComment = Token->isTrailingComment();
333753f127fSDimitry Andric         bool PreviousLevel =
334753f127fSDimitry Andric             Token->MacroCtx &&
335753f127fSDimitry Andric             (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size());
336bdd1243dSDimitry Andric         if (!ClosingParen && !TrailingComment && !PreviousLevel)
337753f127fSDimitry Andric           llvm::dbgs() << "At token: " << Token->TokenText << "\n";
338753f127fSDimitry Andric         // In addition to the following cases, we can also run into this
339753f127fSDimitry Andric         // when a macro call had more arguments than expected; in that case,
3405f757f3fSDimitry Andric         // the comma and the remaining tokens in the macro call will
3415f757f3fSDimitry Andric         // potentially end up in the line when we finish the expansion.
342753f127fSDimitry Andric         // FIXME: Add the information which arguments are unused, and assert
343753f127fSDimitry Andric         // one of the cases below plus reconstructed macro argument tokens.
344753f127fSDimitry Andric         // assert(ClosingParen || TrailingComment || PreviousLevel);
345753f127fSDimitry Andric       }
3465f757f3fSDimitry Andric     }());
347753f127fSDimitry Andric     // Handle the remaining open tokens:
348753f127fSDimitry Andric     // - expand the closing parenthesis, if it exists, including an optional
349753f127fSDimitry Andric     //   trailing comment
350753f127fSDimitry Andric     // - handle tokens that were already reconstructed at an inner expansion
351753f127fSDimitry Andric     //   level
352753f127fSDimitry Andric     // - handle tokens when a macro call had more than the expected number of
353753f127fSDimitry Andric     //   arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end
354753f127fSDimitry Andric     //   up with the sequence ", b, c)" being open at the end of the
355753f127fSDimitry Andric     //   reconstruction; we want to gracefully handle that case
356753f127fSDimitry Andric     //
357753f127fSDimitry Andric     // FIXME: See the above debug-check for what we will need to do to be
358753f127fSDimitry Andric     // able to assert this.
359753f127fSDimitry Andric     for (auto T = ActiveExpansions.back().SpelledI;
360753f127fSDimitry Andric          T != ActiveExpansions.back().SpelledE; ++T) {
361753f127fSDimitry Andric       processNextReconstructed();
362753f127fSDimitry Andric     }
363753f127fSDimitry Andric     ActiveExpansions.pop_back();
364753f127fSDimitry Andric   }
365753f127fSDimitry Andric }
366753f127fSDimitry Andric 
367753f127fSDimitry Andric void MacroCallReconstructor::debugParentMap() const {
368753f127fSDimitry Andric   llvm::DenseSet<FormatToken *> Values;
369753f127fSDimitry Andric   for (const auto &P : SpelledParentToReconstructedParent)
370753f127fSDimitry Andric     Values.insert(P.second);
371753f127fSDimitry Andric 
372753f127fSDimitry Andric   for (const auto &P : SpelledParentToReconstructedParent) {
373753f127fSDimitry Andric     if (Values.contains(P.first))
374753f127fSDimitry Andric       continue;
375753f127fSDimitry Andric     llvm::dbgs() << (P.first ? P.first->TokenText : "<null>");
376753f127fSDimitry Andric     for (auto I = SpelledParentToReconstructedParent.find(P.first),
377753f127fSDimitry Andric               E = SpelledParentToReconstructedParent.end();
378753f127fSDimitry Andric          I != E; I = SpelledParentToReconstructedParent.find(I->second)) {
379753f127fSDimitry Andric       llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>");
380753f127fSDimitry Andric     }
381753f127fSDimitry Andric     llvm::dbgs() << "\n";
382753f127fSDimitry Andric   }
383753f127fSDimitry Andric }
384753f127fSDimitry Andric 
385753f127fSDimitry Andric // If visible, add the next token of the reconstructed token sequence to the
386753f127fSDimitry Andric // output. Returns whether reconstruction passed a comma that is part of a
387753f127fSDimitry Andric // macro call.
388753f127fSDimitry Andric bool MacroCallReconstructor::processNextReconstructed() {
389753f127fSDimitry Andric   FormatToken *Token = ActiveExpansions.back().SpelledI->Tok;
390753f127fSDimitry Andric   ++ActiveExpansions.back().SpelledI;
391753f127fSDimitry Andric   if (Token->MacroCtx) {
392753f127fSDimitry Andric     // Skip tokens that are not part of the macro call.
393bdd1243dSDimitry Andric     if (Token->MacroCtx->Role == MR_Hidden)
394753f127fSDimitry Andric       return false;
395753f127fSDimitry Andric     // Skip tokens we already expanded during an inner reconstruction.
396753f127fSDimitry Andric     // For example, given: #define ID(x) {x}
397753f127fSDimitry Andric     // And the call: ID(ID(f))
398753f127fSDimitry Andric     // We get two reconstructions:
399753f127fSDimitry Andric     // ID(f) -> {f}
400753f127fSDimitry Andric     // ID({f}) -> {{f}}
401753f127fSDimitry Andric     // We reconstruct f during the first reconstruction, and skip it during the
402753f127fSDimitry Andric     // second reconstruction.
403bdd1243dSDimitry Andric     if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size())
404753f127fSDimitry Andric       return false;
405753f127fSDimitry Andric   }
406753f127fSDimitry Andric   // Tokens that do not have a macro context are tokens in that are part of the
407753f127fSDimitry Andric   // macro call that have not taken part in expansion.
408753f127fSDimitry Andric   if (!Token->MacroCtx) {
409753f127fSDimitry Andric     // Put the parentheses and commas of a macro call into the same line;
410753f127fSDimitry Andric     // if the arguments produce new unwrapped lines, they will become children
411753f127fSDimitry Andric     // of the corresponding opening parenthesis or comma tokens in the
412753f127fSDimitry Andric     // reconstructed call.
413753f127fSDimitry Andric     if (Token->is(tok::l_paren)) {
414753f127fSDimitry Andric       MacroCallStructure.push_back(MacroCallState(
415753f127fSDimitry Andric           currentLine(), parentLine().Tokens.back()->Tok, Token));
416753f127fSDimitry Andric       // All tokens that are children of the previous line's last token in the
417753f127fSDimitry Andric       // reconstructed token stream will now be children of the l_paren token.
418753f127fSDimitry Andric       // For example, for the line containing the macro calls:
419753f127fSDimitry Andric       //   auto x = ID({ID(2)});
420753f127fSDimitry Andric       // We will build up a map <null> -> ( -> ( with the first and second
421753f127fSDimitry Andric       // l_paren of the macro call respectively. New lines that come in with a
422753f127fSDimitry Andric       // <null> parent will then become children of the l_paren token of the
423753f127fSDimitry Andric       // currently innermost macro call.
424753f127fSDimitry Andric       SpelledParentToReconstructedParent[MacroCallStructure.back()
425753f127fSDimitry Andric                                              .ParentLastToken] = Token;
426753f127fSDimitry Andric       appendToken(Token);
427*0fca6ea1SDimitry Andric       prepareParent(Token, /*NewLine=*/true,
428*0fca6ea1SDimitry Andric                     MacroCallStructure.back().Line->Level);
429753f127fSDimitry Andric       Token->MacroParent = true;
430753f127fSDimitry Andric       return false;
431753f127fSDimitry Andric     }
432753f127fSDimitry Andric     if (!MacroCallStructure.empty()) {
433753f127fSDimitry Andric       if (Token->is(tok::comma)) {
434753f127fSDimitry Andric         // Make new lines inside the next argument children of the comma token.
435753f127fSDimitry Andric         SpelledParentToReconstructedParent
436753f127fSDimitry Andric             [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
437753f127fSDimitry Andric         Token->MacroParent = true;
438753f127fSDimitry Andric         appendToken(Token, MacroCallStructure.back().Line);
439*0fca6ea1SDimitry Andric         prepareParent(Token, /*NewLine=*/true,
440*0fca6ea1SDimitry Andric                       MacroCallStructure.back().Line->Level);
441753f127fSDimitry Andric         return true;
442753f127fSDimitry Andric       }
443753f127fSDimitry Andric       if (Token->is(tok::r_paren)) {
444753f127fSDimitry Andric         appendToken(Token, MacroCallStructure.back().Line);
445753f127fSDimitry Andric         SpelledParentToReconstructedParent.erase(
446753f127fSDimitry Andric             MacroCallStructure.back().ParentLastToken);
447753f127fSDimitry Andric         MacroCallStructure.pop_back();
448753f127fSDimitry Andric         return false;
449753f127fSDimitry Andric       }
450753f127fSDimitry Andric     }
451753f127fSDimitry Andric   }
452753f127fSDimitry Andric   // Note that any tokens that are tagged with MR_None have been passed as
453753f127fSDimitry Andric   // arguments to the macro that have not been expanded, for example:
454753f127fSDimitry Andric   // Given: #define ID(X) x
455753f127fSDimitry Andric   // When calling: ID(a, b)
456753f127fSDimitry Andric   // 'b' will be part of the reconstructed token stream, but tagged MR_None.
457753f127fSDimitry Andric   // Given that erroring out in this case would be disruptive, we continue
458753f127fSDimitry Andric   // pushing the (unformatted) token.
459753f127fSDimitry Andric   // FIXME: This can lead to unfortunate formatting decisions - give the user
460753f127fSDimitry Andric   // a hint that their macro definition is broken.
461753f127fSDimitry Andric   appendToken(Token);
462753f127fSDimitry Andric   return false;
463753f127fSDimitry Andric }
464753f127fSDimitry Andric 
465753f127fSDimitry Andric void MacroCallReconstructor::finalize() {
466753f127fSDimitry Andric #ifndef NDEBUG
467753f127fSDimitry Andric   assert(State != Finalized && finished());
468753f127fSDimitry Andric   State = Finalized;
469753f127fSDimitry Andric #endif
470753f127fSDimitry Andric 
471753f127fSDimitry Andric   // We created corresponding unwrapped lines for each incoming line as children
472753f127fSDimitry Andric   // the the toplevel null token.
473753f127fSDimitry Andric   assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty());
474753f127fSDimitry Andric   LLVM_DEBUG({
475753f127fSDimitry Andric     llvm::dbgs() << "Finalizing reconstructed lines:\n";
476753f127fSDimitry Andric     debug(Result, 0);
477753f127fSDimitry Andric   });
478753f127fSDimitry Andric 
479753f127fSDimitry Andric   // The first line becomes the top level line in the resulting unwrapped line.
480753f127fSDimitry Andric   LineNode &Top = *Result.Tokens.front();
481753f127fSDimitry Andric   auto *I = Top.Children.begin();
482753f127fSDimitry Andric   // Every subsequent line will become a child of the last token in the previous
483753f127fSDimitry Andric   // line, which is the token prior to the first token in the line.
484753f127fSDimitry Andric   LineNode *Last = (*I)->Tokens.back().get();
485753f127fSDimitry Andric   ++I;
486753f127fSDimitry Andric   for (auto *E = Top.Children.end(); I != E; ++I) {
487753f127fSDimitry Andric     assert(Last->Children.empty());
488753f127fSDimitry Andric     Last->Children.push_back(std::move(*I));
489753f127fSDimitry Andric 
490753f127fSDimitry Andric     // Mark the previous line's last token as generated by a macro expansion
491753f127fSDimitry Andric     // so the formatting algorithm can take that into account.
492753f127fSDimitry Andric     Last->Tok->MacroParent = true;
493753f127fSDimitry Andric 
494753f127fSDimitry Andric     Last = Last->Children.back()->Tokens.back().get();
495753f127fSDimitry Andric   }
496753f127fSDimitry Andric   Top.Children.resize(1);
497753f127fSDimitry Andric }
498753f127fSDimitry Andric 
499753f127fSDimitry Andric void MacroCallReconstructor::appendToken(FormatToken *Token,
500753f127fSDimitry Andric                                          ReconstructedLine *L) {
501753f127fSDimitry Andric   L = L ? L : currentLine();
502753f127fSDimitry Andric   LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");
503753f127fSDimitry Andric   L->Tokens.push_back(std::make_unique<LineNode>(Token));
504753f127fSDimitry Andric }
505753f127fSDimitry Andric 
506753f127fSDimitry Andric UnwrappedLine
507753f127fSDimitry Andric MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line,
508753f127fSDimitry Andric                                             int Level) {
509753f127fSDimitry Andric   UnwrappedLine Result;
510753f127fSDimitry Andric   Result.Level = Level;
511753f127fSDimitry Andric   for (const auto &N : Line.Tokens) {
512753f127fSDimitry Andric     Result.Tokens.push_back(N->Tok);
513753f127fSDimitry Andric     UnwrappedLineNode &Current = Result.Tokens.back();
514*0fca6ea1SDimitry Andric     auto NumChildren =
515*0fca6ea1SDimitry Andric         std::count_if(N->Children.begin(), N->Children.end(),
516*0fca6ea1SDimitry Andric                       [](const auto &Child) { return !Child->Tokens.empty(); });
517*0fca6ea1SDimitry Andric     if (NumChildren == 1 && Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
518*0fca6ea1SDimitry Andric       // If we only have one child, and the child is due to a macro expansion
519*0fca6ea1SDimitry Andric       // (either attached to a left parenthesis or comma), merge the child into
520*0fca6ea1SDimitry Andric       // the current line to prevent forced breaks for macro arguments.
521*0fca6ea1SDimitry Andric       auto *Child = std::find_if(
522*0fca6ea1SDimitry Andric           N->Children.begin(), N->Children.end(),
523*0fca6ea1SDimitry Andric           [](const auto &Child) { return !Child->Tokens.empty(); });
524*0fca6ea1SDimitry Andric       auto Line = createUnwrappedLine(**Child, Level);
525*0fca6ea1SDimitry Andric       Result.Tokens.splice(Result.Tokens.end(), Line.Tokens);
526*0fca6ea1SDimitry Andric     } else if (NumChildren > 0) {
527*0fca6ea1SDimitry Andric       // When there are multiple children with different indent, make sure that
528*0fca6ea1SDimitry Andric       // we indent them:
529*0fca6ea1SDimitry Andric       // 1. One level below the current line's level.
530*0fca6ea1SDimitry Andric       // 2. At the correct level relative to each other.
531*0fca6ea1SDimitry Andric       unsigned MinChildLevel =
532*0fca6ea1SDimitry Andric           std::min_element(N->Children.begin(), N->Children.end(),
533*0fca6ea1SDimitry Andric                            [](const auto &E1, const auto &E2) {
534*0fca6ea1SDimitry Andric                              return E1->Level < E2->Level;
535*0fca6ea1SDimitry Andric                            })
536*0fca6ea1SDimitry Andric               ->get()
537*0fca6ea1SDimitry Andric               ->Level;
538753f127fSDimitry Andric       for (const auto &Child : N->Children) {
539753f127fSDimitry Andric         if (Child->Tokens.empty())
540753f127fSDimitry Andric           continue;
541*0fca6ea1SDimitry Andric         Current.Children.push_back(createUnwrappedLine(
542*0fca6ea1SDimitry Andric             *Child, Level + 1 + (Child->Level - MinChildLevel)));
543753f127fSDimitry Andric       }
544753f127fSDimitry Andric     }
545753f127fSDimitry Andric   }
546753f127fSDimitry Andric   return Result;
547753f127fSDimitry Andric }
548753f127fSDimitry Andric 
549753f127fSDimitry Andric void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) {
550753f127fSDimitry Andric   for (int i = 0; i < Level; ++i)
551753f127fSDimitry Andric     llvm::dbgs() << " ";
552753f127fSDimitry Andric   for (const auto &N : Line.Tokens) {
553753f127fSDimitry Andric     if (!N)
554753f127fSDimitry Andric       continue;
555753f127fSDimitry Andric     if (N->Tok)
556753f127fSDimitry Andric       llvm::dbgs() << N->Tok->TokenText << " ";
557753f127fSDimitry Andric     for (const auto &Child : N->Children) {
558753f127fSDimitry Andric       llvm::dbgs() << "\n";
559753f127fSDimitry Andric       debug(*Child, Level + 1);
560753f127fSDimitry Andric       for (int i = 0; i < Level; ++i)
561753f127fSDimitry Andric         llvm::dbgs() << " ";
562753f127fSDimitry Andric     }
563753f127fSDimitry Andric   }
564753f127fSDimitry Andric   llvm::dbgs() << "\n";
565753f127fSDimitry Andric }
566753f127fSDimitry Andric 
567753f127fSDimitry Andric MacroCallReconstructor::ReconstructedLine &
568753f127fSDimitry Andric MacroCallReconstructor::parentLine() {
569753f127fSDimitry Andric   return **std::prev(std::prev(ActiveReconstructedLines.end()));
570753f127fSDimitry Andric }
571753f127fSDimitry Andric 
572753f127fSDimitry Andric MacroCallReconstructor::ReconstructedLine *
573753f127fSDimitry Andric MacroCallReconstructor::currentLine() {
574753f127fSDimitry Andric   return ActiveReconstructedLines.back();
575753f127fSDimitry Andric }
576753f127fSDimitry Andric 
577753f127fSDimitry Andric MacroCallReconstructor::MacroCallState::MacroCallState(
578753f127fSDimitry Andric     MacroCallReconstructor::ReconstructedLine *Line,
579753f127fSDimitry Andric     FormatToken *ParentLastToken, FormatToken *MacroCallLParen)
580753f127fSDimitry Andric     : Line(Line), ParentLastToken(ParentLastToken),
581753f127fSDimitry Andric       MacroCallLParen(MacroCallLParen) {
582753f127fSDimitry Andric   LLVM_DEBUG(
583753f127fSDimitry Andric       llvm::dbgs() << "ParentLastToken: "
584753f127fSDimitry Andric                    << (ParentLastToken ? ParentLastToken->TokenText : "<null>")
585753f127fSDimitry Andric                    << "\n");
586753f127fSDimitry Andric 
587753f127fSDimitry Andric   assert(MacroCallLParen->is(tok::l_paren));
588753f127fSDimitry Andric }
589753f127fSDimitry Andric 
590753f127fSDimitry Andric } // namespace format
591753f127fSDimitry Andric } // namespace clang
592