1*753f127fSDimitry Andric //===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===// 2*753f127fSDimitry Andric // 3*753f127fSDimitry Andric // The LLVM Compiler Infrastructure 4*753f127fSDimitry Andric // 5*753f127fSDimitry Andric // This file is distributed under the University of Illinois Open Source 6*753f127fSDimitry Andric // License. See LICENSE.TXT for details. 7*753f127fSDimitry Andric // 8*753f127fSDimitry Andric //===----------------------------------------------------------------------===// 9*753f127fSDimitry Andric /// 10*753f127fSDimitry Andric /// \file 11*753f127fSDimitry Andric /// This file contains the implementation of MacroCallReconstructor, which fits 12*753f127fSDimitry Andric /// an reconstructed macro call to a parsed set of UnwrappedLines. 13*753f127fSDimitry Andric /// 14*753f127fSDimitry Andric //===----------------------------------------------------------------------===// 15*753f127fSDimitry Andric 16*753f127fSDimitry Andric #include "Macros.h" 17*753f127fSDimitry Andric 18*753f127fSDimitry Andric #include "UnwrappedLineParser.h" 19*753f127fSDimitry Andric #include "clang/Basic/TokenKinds.h" 20*753f127fSDimitry Andric #include "llvm/ADT/DenseSet.h" 21*753f127fSDimitry Andric #include "llvm/Support/Debug.h" 22*753f127fSDimitry Andric #include <cassert> 23*753f127fSDimitry Andric 24*753f127fSDimitry Andric #define DEBUG_TYPE "format-reconstruct" 25*753f127fSDimitry Andric 26*753f127fSDimitry Andric namespace clang { 27*753f127fSDimitry Andric namespace format { 28*753f127fSDimitry Andric 29*753f127fSDimitry Andric // Call \p Call for each token in the unwrapped line given, passing 30*753f127fSDimitry Andric // the token, its parent and whether it is the first token in the line. 31*753f127fSDimitry Andric template <typename T> 32*753f127fSDimitry Andric void forEachToken(const UnwrappedLine &Line, const T &Call, 33*753f127fSDimitry Andric FormatToken *Parent = nullptr) { 34*753f127fSDimitry Andric bool First = true; 35*753f127fSDimitry Andric for (const auto &N : Line.Tokens) { 36*753f127fSDimitry Andric Call(N.Tok, Parent, First); 37*753f127fSDimitry Andric First = false; 38*753f127fSDimitry Andric for (const auto &Child : N.Children) { 39*753f127fSDimitry Andric forEachToken(Child, Call, N.Tok); 40*753f127fSDimitry Andric } 41*753f127fSDimitry Andric } 42*753f127fSDimitry Andric } 43*753f127fSDimitry Andric 44*753f127fSDimitry Andric MacroCallReconstructor::MacroCallReconstructor( 45*753f127fSDimitry Andric unsigned Level, 46*753f127fSDimitry Andric const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> 47*753f127fSDimitry Andric &ActiveExpansions) 48*753f127fSDimitry Andric : Level(Level), IdToReconstructed(ActiveExpansions) { 49*753f127fSDimitry Andric Result.Tokens.push_back(std::make_unique<LineNode>()); 50*753f127fSDimitry Andric ActiveReconstructedLines.push_back(&Result); 51*753f127fSDimitry Andric } 52*753f127fSDimitry Andric 53*753f127fSDimitry Andric void MacroCallReconstructor::addLine(const UnwrappedLine &Line) { 54*753f127fSDimitry Andric assert(State != Finalized); 55*753f127fSDimitry Andric LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n"); 56*753f127fSDimitry Andric forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) { 57*753f127fSDimitry Andric add(Token, Parent, First); 58*753f127fSDimitry Andric }); 59*753f127fSDimitry Andric assert(InProgress || finished()); 60*753f127fSDimitry Andric } 61*753f127fSDimitry Andric 62*753f127fSDimitry Andric UnwrappedLine MacroCallReconstructor::takeResult() && { 63*753f127fSDimitry Andric finalize(); 64*753f127fSDimitry Andric assert(Result.Tokens.size() == 1 && Result.Tokens.front()->Children.size() == 1); 65*753f127fSDimitry Andric UnwrappedLine Final = 66*753f127fSDimitry Andric createUnwrappedLine(*Result.Tokens.front()->Children.front(), Level); 67*753f127fSDimitry Andric assert(!Final.Tokens.empty()); 68*753f127fSDimitry Andric return Final; 69*753f127fSDimitry Andric } 70*753f127fSDimitry Andric 71*753f127fSDimitry Andric // Reconstruct the position of the next \p Token, given its parent \p 72*753f127fSDimitry Andric // ExpandedParent in the incoming unwrapped line. \p First specifies whether it 73*753f127fSDimitry Andric // is the first token in a given unwrapped line. 74*753f127fSDimitry Andric void MacroCallReconstructor::add(FormatToken *Token, 75*753f127fSDimitry Andric FormatToken *ExpandedParent, bool First) { 76*753f127fSDimitry Andric LLVM_DEBUG( 77*753f127fSDimitry Andric llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: " 78*753f127fSDimitry Andric << (ExpandedParent ? ExpandedParent->TokenText : "<null>") 79*753f127fSDimitry Andric << ", First: " << First << "\n"); 80*753f127fSDimitry Andric // In order to be able to find the correct parent in the reconstructed token 81*753f127fSDimitry Andric // stream, we need to continue the last open reconstruction until we find the 82*753f127fSDimitry Andric // given token if it is part of the reconstructed token stream. 83*753f127fSDimitry Andric // 84*753f127fSDimitry Andric // Note that hidden tokens can be part of the reconstructed stream in nested 85*753f127fSDimitry Andric // macro calls. 86*753f127fSDimitry Andric // For example, given 87*753f127fSDimitry Andric // #define C(x, y) x y 88*753f127fSDimitry Andric // #define B(x) {x} 89*753f127fSDimitry Andric // And the call: 90*753f127fSDimitry Andric // C(a, B(b)) 91*753f127fSDimitry Andric // The outer macro call will be C(a, {b}), and the hidden token '}' can be 92*753f127fSDimitry Andric // found in the reconstructed token stream of that expansion level. 93*753f127fSDimitry Andric // In the expanded token stream 94*753f127fSDimitry Andric // a {b} 95*753f127fSDimitry Andric // 'b' is a child of '{'. We need to continue the open expansion of the ',' 96*753f127fSDimitry Andric // in the call of 'C' in order to correctly set the ',' as the parent of '{', 97*753f127fSDimitry Andric // so we later set the spelled token 'b' as a child of the ','. 98*753f127fSDimitry Andric if (!ActiveExpansions.empty() && Token->MacroCtx && 99*753f127fSDimitry Andric (Token->MacroCtx->Role != MR_Hidden || 100*753f127fSDimitry Andric ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) { 101*753f127fSDimitry Andric if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token)) 102*753f127fSDimitry Andric First = true; 103*753f127fSDimitry Andric } 104*753f127fSDimitry Andric 105*753f127fSDimitry Andric prepareParent(ExpandedParent, First); 106*753f127fSDimitry Andric 107*753f127fSDimitry Andric if (Token->MacroCtx) { 108*753f127fSDimitry Andric // If this token was generated by a macro call, add the reconstructed 109*753f127fSDimitry Andric // equivalent of the token. 110*753f127fSDimitry Andric reconstruct(Token); 111*753f127fSDimitry Andric } else { 112*753f127fSDimitry Andric // Otherwise, we add it to the current line. 113*753f127fSDimitry Andric appendToken(Token); 114*753f127fSDimitry Andric } 115*753f127fSDimitry Andric } 116*753f127fSDimitry Andric 117*753f127fSDimitry Andric // Adjusts the stack of active reconstructed lines so we're ready to push 118*753f127fSDimitry Andric // tokens. The tokens to be pushed are children of ExpandedParent in the 119*753f127fSDimitry Andric // expanded code. 120*753f127fSDimitry Andric // 121*753f127fSDimitry Andric // This may entail: 122*753f127fSDimitry Andric // - creating a new line, if the parent is on the active line 123*753f127fSDimitry Andric // - popping active lines, if the parent is further up the stack 124*753f127fSDimitry Andric // 125*753f127fSDimitry Andric // Postcondition: 126*753f127fSDimitry Andric // ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its 127*753f127fSDimitry Andric // reconstructed replacement token as a parent (when possible) - that is, the 128*753f127fSDimitry Andric // last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2] 129*753f127fSDimitry Andric // is the parent of ActiveReconstructedLines.back() in the reconstructed 130*753f127fSDimitry Andric // unwrapped line. 131*753f127fSDimitry Andric void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent, 132*753f127fSDimitry Andric bool NewLine) { 133*753f127fSDimitry Andric LLVM_DEBUG({ 134*753f127fSDimitry Andric llvm::dbgs() << "ParentMap:\n"; 135*753f127fSDimitry Andric debugParentMap(); 136*753f127fSDimitry Andric }); 137*753f127fSDimitry Andric // We want to find the parent in the new unwrapped line, where the expanded 138*753f127fSDimitry Andric // parent might have been replaced during reconstruction. 139*753f127fSDimitry Andric FormatToken *Parent = getParentInResult(ExpandedParent); 140*753f127fSDimitry Andric LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: " 141*753f127fSDimitry Andric << (Parent ? Parent->TokenText : "<null>") << "\n"); 142*753f127fSDimitry Andric 143*753f127fSDimitry Andric FormatToken *OpenMacroParent = nullptr; 144*753f127fSDimitry Andric if (!MacroCallStructure.empty()) { 145*753f127fSDimitry Andric // Inside a macro expansion, it is possible to lose track of the correct 146*753f127fSDimitry Andric // parent - either because it is already popped, for example because it was 147*753f127fSDimitry Andric // in a different macro argument (e.g. M({, })), or when we work on invalid 148*753f127fSDimitry Andric // code. 149*753f127fSDimitry Andric // Thus, we use the innermost macro call's parent as the parent at which 150*753f127fSDimitry Andric // we stop; this allows us to stay within the macro expansion and keeps 151*753f127fSDimitry Andric // any problems confined to the extent of the macro call. 152*753f127fSDimitry Andric OpenMacroParent = 153*753f127fSDimitry Andric getParentInResult(MacroCallStructure.back().MacroCallLParen); 154*753f127fSDimitry Andric LLVM_DEBUG(llvm::dbgs() 155*753f127fSDimitry Andric << "MacroCallLParen: " 156*753f127fSDimitry Andric << MacroCallStructure.back().MacroCallLParen->TokenText 157*753f127fSDimitry Andric << ", OpenMacroParent: " 158*753f127fSDimitry Andric << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>") 159*753f127fSDimitry Andric << "\n"); 160*753f127fSDimitry Andric } 161*753f127fSDimitry Andric if (NewLine || 162*753f127fSDimitry Andric (!ActiveReconstructedLines.back()->Tokens.empty() && 163*753f127fSDimitry Andric Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) { 164*753f127fSDimitry Andric // If we are at the first token in a new line, we want to also 165*753f127fSDimitry Andric // create a new line in the resulting reconstructed unwrapped line. 166*753f127fSDimitry Andric while (ActiveReconstructedLines.back()->Tokens.empty() || 167*753f127fSDimitry Andric (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok && 168*753f127fSDimitry Andric ActiveReconstructedLines.back()->Tokens.back()->Tok != 169*753f127fSDimitry Andric OpenMacroParent)) { 170*753f127fSDimitry Andric ActiveReconstructedLines.pop_back(); 171*753f127fSDimitry Andric assert(!ActiveReconstructedLines.empty()); 172*753f127fSDimitry Andric } 173*753f127fSDimitry Andric assert(!ActiveReconstructedLines.empty()); 174*753f127fSDimitry Andric ActiveReconstructedLines.back()->Tokens.back()->Children.push_back( 175*753f127fSDimitry Andric std::make_unique<ReconstructedLine>()); 176*753f127fSDimitry Andric ActiveReconstructedLines.push_back( 177*753f127fSDimitry Andric &*ActiveReconstructedLines.back()->Tokens.back()->Children.back()); 178*753f127fSDimitry Andric } else if (parentLine().Tokens.back()->Tok != Parent) { 179*753f127fSDimitry Andric // If we're not the first token in a new line, pop lines until we find 180*753f127fSDimitry Andric // the child of \c Parent in the stack. 181*753f127fSDimitry Andric while (Parent != parentLine().Tokens.back()->Tok && 182*753f127fSDimitry Andric parentLine().Tokens.back()->Tok && 183*753f127fSDimitry Andric parentLine().Tokens.back()->Tok != OpenMacroParent) { 184*753f127fSDimitry Andric ActiveReconstructedLines.pop_back(); 185*753f127fSDimitry Andric assert(!ActiveReconstructedLines.empty()); 186*753f127fSDimitry Andric } 187*753f127fSDimitry Andric } 188*753f127fSDimitry Andric assert(!ActiveReconstructedLines.empty()); 189*753f127fSDimitry Andric } 190*753f127fSDimitry Andric 191*753f127fSDimitry Andric // For a given \p Parent in the incoming expanded token stream, find the 192*753f127fSDimitry Andric // corresponding parent in the output. 193*753f127fSDimitry Andric FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) { 194*753f127fSDimitry Andric FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent); 195*753f127fSDimitry Andric if (!Mapped) 196*753f127fSDimitry Andric return Parent; 197*753f127fSDimitry Andric for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent)) { 198*753f127fSDimitry Andric Parent = Mapped; 199*753f127fSDimitry Andric } 200*753f127fSDimitry Andric // If we use a different token than the parent in the expanded token stream 201*753f127fSDimitry Andric // as parent, mark it as a special parent, so the formatting code knows it 202*753f127fSDimitry Andric // needs to have its children formatted. 203*753f127fSDimitry Andric Parent->MacroParent = true; 204*753f127fSDimitry Andric return Parent; 205*753f127fSDimitry Andric } 206*753f127fSDimitry Andric 207*753f127fSDimitry Andric // Reconstruct a \p Token that was expanded from a macro call. 208*753f127fSDimitry Andric void MacroCallReconstructor::reconstruct(FormatToken *Token) { 209*753f127fSDimitry Andric assert(Token->MacroCtx); 210*753f127fSDimitry Andric // A single token can be the only result of a macro call: 211*753f127fSDimitry Andric // Given: #define ID(x, y) ; 212*753f127fSDimitry Andric // And the call: ID(<some>, <tokens>) 213*753f127fSDimitry Andric // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>). 214*753f127fSDimitry Andric if (Token->MacroCtx->StartOfExpansion) { 215*753f127fSDimitry Andric startReconstruction(Token); 216*753f127fSDimitry Andric // If the order of tokens in the expanded token stream is not the 217*753f127fSDimitry Andric // same as the order of tokens in the reconstructed stream, we need 218*753f127fSDimitry Andric // to reconstruct tokens that arrive later in the stream. 219*753f127fSDimitry Andric if (Token->MacroCtx->Role != MR_Hidden) { 220*753f127fSDimitry Andric reconstructActiveCallUntil(Token); 221*753f127fSDimitry Andric } 222*753f127fSDimitry Andric } 223*753f127fSDimitry Andric assert(!ActiveExpansions.empty()); 224*753f127fSDimitry Andric if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) { 225*753f127fSDimitry Andric assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size()); 226*753f127fSDimitry Andric if (Token->MacroCtx->Role != MR_Hidden) { 227*753f127fSDimitry Andric // The current token in the reconstructed token stream must be the token 228*753f127fSDimitry Andric // we're looking for - we either arrive here after startReconstruction, 229*753f127fSDimitry Andric // which initiates the stream to the first token, or after 230*753f127fSDimitry Andric // continueReconstructionUntil skipped until the expected token in the 231*753f127fSDimitry Andric // reconstructed stream at the start of add(...). 232*753f127fSDimitry Andric assert(ActiveExpansions.back().SpelledI->Tok == Token); 233*753f127fSDimitry Andric processNextReconstructed(); 234*753f127fSDimitry Andric } else if (!currentLine()->Tokens.empty()) { 235*753f127fSDimitry Andric // Map all hidden tokens to the last visible token in the output. 236*753f127fSDimitry Andric // If the hidden token is a parent, we'll use the last visible 237*753f127fSDimitry Andric // token as the parent of the hidden token's children. 238*753f127fSDimitry Andric SpelledParentToReconstructedParent[Token] = 239*753f127fSDimitry Andric currentLine()->Tokens.back()->Tok; 240*753f127fSDimitry Andric } else { 241*753f127fSDimitry Andric for (auto I = ActiveReconstructedLines.rbegin(), 242*753f127fSDimitry Andric E = ActiveReconstructedLines.rend(); 243*753f127fSDimitry Andric I != E; ++I) { 244*753f127fSDimitry Andric if (!(*I)->Tokens.empty()) { 245*753f127fSDimitry Andric SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok; 246*753f127fSDimitry Andric break; 247*753f127fSDimitry Andric } 248*753f127fSDimitry Andric } 249*753f127fSDimitry Andric } 250*753f127fSDimitry Andric } 251*753f127fSDimitry Andric if (Token->MacroCtx->EndOfExpansion) 252*753f127fSDimitry Andric endReconstruction(Token); 253*753f127fSDimitry Andric } 254*753f127fSDimitry Andric 255*753f127fSDimitry Andric // Given a \p Token that starts an expansion, reconstruct the beginning of the 256*753f127fSDimitry Andric // macro call. 257*753f127fSDimitry Andric // For example, given: #define ID(x) x 258*753f127fSDimitry Andric // And the call: ID(int a) 259*753f127fSDimitry Andric // Reconstructs: ID( 260*753f127fSDimitry Andric void MacroCallReconstructor::startReconstruction(FormatToken *Token) { 261*753f127fSDimitry Andric assert(Token->MacroCtx); 262*753f127fSDimitry Andric assert(!Token->MacroCtx->ExpandedFrom.empty()); 263*753f127fSDimitry Andric assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size()); 264*753f127fSDimitry Andric #ifndef NDEBUG 265*753f127fSDimitry Andric // Check that the token's reconstruction stack matches our current 266*753f127fSDimitry Andric // reconstruction stack. 267*753f127fSDimitry Andric for (size_t I = 0; I < ActiveExpansions.size(); ++I) { 268*753f127fSDimitry Andric assert(ActiveExpansions[I].ID == 269*753f127fSDimitry Andric Token->MacroCtx 270*753f127fSDimitry Andric ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]); 271*753f127fSDimitry Andric } 272*753f127fSDimitry Andric #endif 273*753f127fSDimitry Andric // Start reconstruction for all calls for which this token is the first token 274*753f127fSDimitry Andric // generated by the call. 275*753f127fSDimitry Andric // Note that the token's expanded from stack is inside-to-outside, and the 276*753f127fSDimitry Andric // expansions for which this token is not the first are the outermost ones. 277*753f127fSDimitry Andric ArrayRef<FormatToken *> StartedMacros = 278*753f127fSDimitry Andric makeArrayRef(Token->MacroCtx->ExpandedFrom) 279*753f127fSDimitry Andric .drop_back(ActiveExpansions.size()); 280*753f127fSDimitry Andric assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion); 281*753f127fSDimitry Andric // We reconstruct macro calls outside-to-inside. 282*753f127fSDimitry Andric for (FormatToken *ID : llvm::reverse(StartedMacros)) { 283*753f127fSDimitry Andric // We found a macro call to be reconstructed; the next time our 284*753f127fSDimitry Andric // reconstruction stack is empty we know we finished an reconstruction. 285*753f127fSDimitry Andric #ifndef NDEBUG 286*753f127fSDimitry Andric State = InProgress; 287*753f127fSDimitry Andric #endif 288*753f127fSDimitry Andric // Put the reconstructed macro call's token into our reconstruction stack. 289*753f127fSDimitry Andric auto IU = IdToReconstructed.find(ID); 290*753f127fSDimitry Andric assert(IU != IdToReconstructed.end()); 291*753f127fSDimitry Andric ActiveExpansions.push_back( 292*753f127fSDimitry Andric {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()}); 293*753f127fSDimitry Andric // Process the macro call's identifier. 294*753f127fSDimitry Andric processNextReconstructed(); 295*753f127fSDimitry Andric if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE) 296*753f127fSDimitry Andric continue; 297*753f127fSDimitry Andric if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) { 298*753f127fSDimitry Andric // Process the optional opening parenthesis. 299*753f127fSDimitry Andric processNextReconstructed(); 300*753f127fSDimitry Andric } 301*753f127fSDimitry Andric } 302*753f127fSDimitry Andric } 303*753f127fSDimitry Andric 304*753f127fSDimitry Andric // Add all tokens in the reconstruction stream to the output until we find the 305*753f127fSDimitry Andric // given \p Token. 306*753f127fSDimitry Andric bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) { 307*753f127fSDimitry Andric assert(!ActiveExpansions.empty()); 308*753f127fSDimitry Andric bool PassedMacroComma = false; 309*753f127fSDimitry Andric // FIXME: If Token was already expanded earlier, due to 310*753f127fSDimitry Andric // a change in order, we will not find it, but need to 311*753f127fSDimitry Andric // skip it. 312*753f127fSDimitry Andric while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE && 313*753f127fSDimitry Andric ActiveExpansions.back().SpelledI->Tok != Token) { 314*753f127fSDimitry Andric PassedMacroComma = processNextReconstructed() || PassedMacroComma; 315*753f127fSDimitry Andric } 316*753f127fSDimitry Andric return PassedMacroComma; 317*753f127fSDimitry Andric } 318*753f127fSDimitry Andric 319*753f127fSDimitry Andric // End all reconstructions for which \p Token is the final token. 320*753f127fSDimitry Andric void MacroCallReconstructor::endReconstruction(FormatToken *Token) { 321*753f127fSDimitry Andric assert(Token->MacroCtx && 322*753f127fSDimitry Andric (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion)); 323*753f127fSDimitry Andric for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) { 324*753f127fSDimitry Andric #ifndef NDEBUG 325*753f127fSDimitry Andric // Check all remaining tokens but the final closing parenthesis and optional 326*753f127fSDimitry Andric // trailing comment were already reconstructed at an inner expansion level. 327*753f127fSDimitry Andric for (auto T = ActiveExpansions.back().SpelledI; 328*753f127fSDimitry Andric T != ActiveExpansions.back().SpelledE; ++T) { 329*753f127fSDimitry Andric FormatToken *Token = T->Tok; 330*753f127fSDimitry Andric bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE || 331*753f127fSDimitry Andric std::next(T)->Tok->isTrailingComment()) && 332*753f127fSDimitry Andric !Token->MacroCtx && Token->is(tok::r_paren); 333*753f127fSDimitry Andric bool TrailingComment = Token->isTrailingComment(); 334*753f127fSDimitry Andric bool PreviousLevel = 335*753f127fSDimitry Andric Token->MacroCtx && 336*753f127fSDimitry Andric (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size()); 337*753f127fSDimitry Andric if (!ClosingParen && !TrailingComment && !PreviousLevel) { 338*753f127fSDimitry Andric llvm::dbgs() << "At token: " << Token->TokenText << "\n"; 339*753f127fSDimitry Andric } 340*753f127fSDimitry Andric // In addition to the following cases, we can also run into this 341*753f127fSDimitry Andric // when a macro call had more arguments than expected; in that case, 342*753f127fSDimitry Andric // the comma and the remaining tokens in the macro call will potentially 343*753f127fSDimitry Andric // end up in the line when we finish the expansion. 344*753f127fSDimitry Andric // FIXME: Add the information which arguments are unused, and assert 345*753f127fSDimitry Andric // one of the cases below plus reconstructed macro argument tokens. 346*753f127fSDimitry Andric // assert(ClosingParen || TrailingComment || PreviousLevel); 347*753f127fSDimitry Andric } 348*753f127fSDimitry Andric #endif 349*753f127fSDimitry Andric // Handle the remaining open tokens: 350*753f127fSDimitry Andric // - expand the closing parenthesis, if it exists, including an optional 351*753f127fSDimitry Andric // trailing comment 352*753f127fSDimitry Andric // - handle tokens that were already reconstructed at an inner expansion 353*753f127fSDimitry Andric // level 354*753f127fSDimitry Andric // - handle tokens when a macro call had more than the expected number of 355*753f127fSDimitry Andric // arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end 356*753f127fSDimitry Andric // up with the sequence ", b, c)" being open at the end of the 357*753f127fSDimitry Andric // reconstruction; we want to gracefully handle that case 358*753f127fSDimitry Andric // 359*753f127fSDimitry Andric // FIXME: See the above debug-check for what we will need to do to be 360*753f127fSDimitry Andric // able to assert this. 361*753f127fSDimitry Andric for (auto T = ActiveExpansions.back().SpelledI; 362*753f127fSDimitry Andric T != ActiveExpansions.back().SpelledE; ++T) { 363*753f127fSDimitry Andric processNextReconstructed(); 364*753f127fSDimitry Andric } 365*753f127fSDimitry Andric ActiveExpansions.pop_back(); 366*753f127fSDimitry Andric } 367*753f127fSDimitry Andric } 368*753f127fSDimitry Andric 369*753f127fSDimitry Andric void MacroCallReconstructor::debugParentMap() const { 370*753f127fSDimitry Andric llvm::DenseSet<FormatToken *> Values; 371*753f127fSDimitry Andric for (const auto &P : SpelledParentToReconstructedParent) 372*753f127fSDimitry Andric Values.insert(P.second); 373*753f127fSDimitry Andric 374*753f127fSDimitry Andric for (const auto &P : SpelledParentToReconstructedParent) { 375*753f127fSDimitry Andric if (Values.contains(P.first)) 376*753f127fSDimitry Andric continue; 377*753f127fSDimitry Andric llvm::dbgs() << (P.first ? P.first->TokenText : "<null>"); 378*753f127fSDimitry Andric for (auto I = SpelledParentToReconstructedParent.find(P.first), 379*753f127fSDimitry Andric E = SpelledParentToReconstructedParent.end(); 380*753f127fSDimitry Andric I != E; I = SpelledParentToReconstructedParent.find(I->second)) { 381*753f127fSDimitry Andric llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>"); 382*753f127fSDimitry Andric } 383*753f127fSDimitry Andric llvm::dbgs() << "\n"; 384*753f127fSDimitry Andric } 385*753f127fSDimitry Andric } 386*753f127fSDimitry Andric 387*753f127fSDimitry Andric // If visible, add the next token of the reconstructed token sequence to the 388*753f127fSDimitry Andric // output. Returns whether reconstruction passed a comma that is part of a 389*753f127fSDimitry Andric // macro call. 390*753f127fSDimitry Andric bool MacroCallReconstructor::processNextReconstructed() { 391*753f127fSDimitry Andric FormatToken *Token = ActiveExpansions.back().SpelledI->Tok; 392*753f127fSDimitry Andric ++ActiveExpansions.back().SpelledI; 393*753f127fSDimitry Andric if (Token->MacroCtx) { 394*753f127fSDimitry Andric // Skip tokens that are not part of the macro call. 395*753f127fSDimitry Andric if (Token->MacroCtx->Role == MR_Hidden) { 396*753f127fSDimitry Andric return false; 397*753f127fSDimitry Andric } 398*753f127fSDimitry Andric // Skip tokens we already expanded during an inner reconstruction. 399*753f127fSDimitry Andric // For example, given: #define ID(x) {x} 400*753f127fSDimitry Andric // And the call: ID(ID(f)) 401*753f127fSDimitry Andric // We get two reconstructions: 402*753f127fSDimitry Andric // ID(f) -> {f} 403*753f127fSDimitry Andric // ID({f}) -> {{f}} 404*753f127fSDimitry Andric // We reconstruct f during the first reconstruction, and skip it during the 405*753f127fSDimitry Andric // second reconstruction. 406*753f127fSDimitry Andric if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size()) { 407*753f127fSDimitry Andric return false; 408*753f127fSDimitry Andric } 409*753f127fSDimitry Andric } 410*753f127fSDimitry Andric // Tokens that do not have a macro context are tokens in that are part of the 411*753f127fSDimitry Andric // macro call that have not taken part in expansion. 412*753f127fSDimitry Andric if (!Token->MacroCtx) { 413*753f127fSDimitry Andric // Put the parentheses and commas of a macro call into the same line; 414*753f127fSDimitry Andric // if the arguments produce new unwrapped lines, they will become children 415*753f127fSDimitry Andric // of the corresponding opening parenthesis or comma tokens in the 416*753f127fSDimitry Andric // reconstructed call. 417*753f127fSDimitry Andric if (Token->is(tok::l_paren)) { 418*753f127fSDimitry Andric MacroCallStructure.push_back(MacroCallState( 419*753f127fSDimitry Andric currentLine(), parentLine().Tokens.back()->Tok, Token)); 420*753f127fSDimitry Andric // All tokens that are children of the previous line's last token in the 421*753f127fSDimitry Andric // reconstructed token stream will now be children of the l_paren token. 422*753f127fSDimitry Andric // For example, for the line containing the macro calls: 423*753f127fSDimitry Andric // auto x = ID({ID(2)}); 424*753f127fSDimitry Andric // We will build up a map <null> -> ( -> ( with the first and second 425*753f127fSDimitry Andric // l_paren of the macro call respectively. New lines that come in with a 426*753f127fSDimitry Andric // <null> parent will then become children of the l_paren token of the 427*753f127fSDimitry Andric // currently innermost macro call. 428*753f127fSDimitry Andric SpelledParentToReconstructedParent[MacroCallStructure.back() 429*753f127fSDimitry Andric .ParentLastToken] = Token; 430*753f127fSDimitry Andric appendToken(Token); 431*753f127fSDimitry Andric prepareParent(Token, /*NewLine=*/true); 432*753f127fSDimitry Andric Token->MacroParent = true; 433*753f127fSDimitry Andric return false; 434*753f127fSDimitry Andric } 435*753f127fSDimitry Andric if (!MacroCallStructure.empty()) { 436*753f127fSDimitry Andric if (Token->is(tok::comma)) { 437*753f127fSDimitry Andric // Make new lines inside the next argument children of the comma token. 438*753f127fSDimitry Andric SpelledParentToReconstructedParent 439*753f127fSDimitry Andric [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token; 440*753f127fSDimitry Andric Token->MacroParent = true; 441*753f127fSDimitry Andric appendToken(Token, MacroCallStructure.back().Line); 442*753f127fSDimitry Andric prepareParent(Token, /*NewLine=*/true); 443*753f127fSDimitry Andric return true; 444*753f127fSDimitry Andric } 445*753f127fSDimitry Andric if (Token->is(tok::r_paren)) { 446*753f127fSDimitry Andric appendToken(Token, MacroCallStructure.back().Line); 447*753f127fSDimitry Andric SpelledParentToReconstructedParent.erase( 448*753f127fSDimitry Andric MacroCallStructure.back().ParentLastToken); 449*753f127fSDimitry Andric MacroCallStructure.pop_back(); 450*753f127fSDimitry Andric return false; 451*753f127fSDimitry Andric } 452*753f127fSDimitry Andric } 453*753f127fSDimitry Andric } 454*753f127fSDimitry Andric // Note that any tokens that are tagged with MR_None have been passed as 455*753f127fSDimitry Andric // arguments to the macro that have not been expanded, for example: 456*753f127fSDimitry Andric // Given: #define ID(X) x 457*753f127fSDimitry Andric // When calling: ID(a, b) 458*753f127fSDimitry Andric // 'b' will be part of the reconstructed token stream, but tagged MR_None. 459*753f127fSDimitry Andric // Given that erroring out in this case would be disruptive, we continue 460*753f127fSDimitry Andric // pushing the (unformatted) token. 461*753f127fSDimitry Andric // FIXME: This can lead to unfortunate formatting decisions - give the user 462*753f127fSDimitry Andric // a hint that their macro definition is broken. 463*753f127fSDimitry Andric appendToken(Token); 464*753f127fSDimitry Andric return false; 465*753f127fSDimitry Andric } 466*753f127fSDimitry Andric 467*753f127fSDimitry Andric void MacroCallReconstructor::finalize() { 468*753f127fSDimitry Andric #ifndef NDEBUG 469*753f127fSDimitry Andric assert(State != Finalized && finished()); 470*753f127fSDimitry Andric State = Finalized; 471*753f127fSDimitry Andric #endif 472*753f127fSDimitry Andric 473*753f127fSDimitry Andric // We created corresponding unwrapped lines for each incoming line as children 474*753f127fSDimitry Andric // the the toplevel null token. 475*753f127fSDimitry Andric assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty()); 476*753f127fSDimitry Andric LLVM_DEBUG({ 477*753f127fSDimitry Andric llvm::dbgs() << "Finalizing reconstructed lines:\n"; 478*753f127fSDimitry Andric debug(Result, 0); 479*753f127fSDimitry Andric }); 480*753f127fSDimitry Andric 481*753f127fSDimitry Andric // The first line becomes the top level line in the resulting unwrapped line. 482*753f127fSDimitry Andric LineNode &Top = *Result.Tokens.front(); 483*753f127fSDimitry Andric auto *I = Top.Children.begin(); 484*753f127fSDimitry Andric // Every subsequent line will become a child of the last token in the previous 485*753f127fSDimitry Andric // line, which is the token prior to the first token in the line. 486*753f127fSDimitry Andric LineNode *Last = (*I)->Tokens.back().get(); 487*753f127fSDimitry Andric ++I; 488*753f127fSDimitry Andric for (auto *E = Top.Children.end(); I != E; ++I) { 489*753f127fSDimitry Andric assert(Last->Children.empty()); 490*753f127fSDimitry Andric Last->Children.push_back(std::move(*I)); 491*753f127fSDimitry Andric 492*753f127fSDimitry Andric // Mark the previous line's last token as generated by a macro expansion 493*753f127fSDimitry Andric // so the formatting algorithm can take that into account. 494*753f127fSDimitry Andric Last->Tok->MacroParent = true; 495*753f127fSDimitry Andric 496*753f127fSDimitry Andric Last = Last->Children.back()->Tokens.back().get(); 497*753f127fSDimitry Andric } 498*753f127fSDimitry Andric Top.Children.resize(1); 499*753f127fSDimitry Andric } 500*753f127fSDimitry Andric 501*753f127fSDimitry Andric void MacroCallReconstructor::appendToken(FormatToken *Token, 502*753f127fSDimitry Andric ReconstructedLine *L) { 503*753f127fSDimitry Andric L = L ? L : currentLine(); 504*753f127fSDimitry Andric LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n"); 505*753f127fSDimitry Andric L->Tokens.push_back(std::make_unique<LineNode>(Token)); 506*753f127fSDimitry Andric } 507*753f127fSDimitry Andric 508*753f127fSDimitry Andric UnwrappedLine 509*753f127fSDimitry Andric MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line, 510*753f127fSDimitry Andric int Level) { 511*753f127fSDimitry Andric UnwrappedLine Result; 512*753f127fSDimitry Andric Result.Level = Level; 513*753f127fSDimitry Andric for (const auto &N : Line.Tokens) { 514*753f127fSDimitry Andric Result.Tokens.push_back(N->Tok); 515*753f127fSDimitry Andric UnwrappedLineNode &Current = Result.Tokens.back(); 516*753f127fSDimitry Andric for (const auto &Child : N->Children) { 517*753f127fSDimitry Andric if (Child->Tokens.empty()) 518*753f127fSDimitry Andric continue; 519*753f127fSDimitry Andric Current.Children.push_back(createUnwrappedLine(*Child, Level + 1)); 520*753f127fSDimitry Andric } 521*753f127fSDimitry Andric if (Current.Children.size() == 1 && 522*753f127fSDimitry Andric Current.Tok->isOneOf(tok::l_paren, tok::comma)) { 523*753f127fSDimitry Andric Result.Tokens.splice(Result.Tokens.end(), 524*753f127fSDimitry Andric Current.Children.front().Tokens); 525*753f127fSDimitry Andric Current.Children.clear(); 526*753f127fSDimitry Andric } 527*753f127fSDimitry Andric } 528*753f127fSDimitry Andric return Result; 529*753f127fSDimitry Andric } 530*753f127fSDimitry Andric 531*753f127fSDimitry Andric void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) { 532*753f127fSDimitry Andric for (int i = 0; i < Level; ++i) 533*753f127fSDimitry Andric llvm::dbgs() << " "; 534*753f127fSDimitry Andric for (const auto &N : Line.Tokens) { 535*753f127fSDimitry Andric if (!N) 536*753f127fSDimitry Andric continue; 537*753f127fSDimitry Andric if (N->Tok) 538*753f127fSDimitry Andric llvm::dbgs() << N->Tok->TokenText << " "; 539*753f127fSDimitry Andric for (const auto &Child : N->Children) { 540*753f127fSDimitry Andric llvm::dbgs() << "\n"; 541*753f127fSDimitry Andric debug(*Child, Level + 1); 542*753f127fSDimitry Andric for (int i = 0; i < Level; ++i) 543*753f127fSDimitry Andric llvm::dbgs() << " "; 544*753f127fSDimitry Andric } 545*753f127fSDimitry Andric } 546*753f127fSDimitry Andric llvm::dbgs() << "\n"; 547*753f127fSDimitry Andric } 548*753f127fSDimitry Andric 549*753f127fSDimitry Andric MacroCallReconstructor::ReconstructedLine & 550*753f127fSDimitry Andric MacroCallReconstructor::parentLine() { 551*753f127fSDimitry Andric return **std::prev(std::prev(ActiveReconstructedLines.end())); 552*753f127fSDimitry Andric } 553*753f127fSDimitry Andric 554*753f127fSDimitry Andric MacroCallReconstructor::ReconstructedLine * 555*753f127fSDimitry Andric MacroCallReconstructor::currentLine() { 556*753f127fSDimitry Andric return ActiveReconstructedLines.back(); 557*753f127fSDimitry Andric } 558*753f127fSDimitry Andric 559*753f127fSDimitry Andric MacroCallReconstructor::MacroCallState::MacroCallState( 560*753f127fSDimitry Andric MacroCallReconstructor::ReconstructedLine *Line, 561*753f127fSDimitry Andric FormatToken *ParentLastToken, FormatToken *MacroCallLParen) 562*753f127fSDimitry Andric : Line(Line), ParentLastToken(ParentLastToken), 563*753f127fSDimitry Andric MacroCallLParen(MacroCallLParen) { 564*753f127fSDimitry Andric LLVM_DEBUG( 565*753f127fSDimitry Andric llvm::dbgs() << "ParentLastToken: " 566*753f127fSDimitry Andric << (ParentLastToken ? ParentLastToken->TokenText : "<null>") 567*753f127fSDimitry Andric << "\n"); 568*753f127fSDimitry Andric 569*753f127fSDimitry Andric assert(MacroCallLParen->is(tok::l_paren)); 570*753f127fSDimitry Andric } 571*753f127fSDimitry Andric 572*753f127fSDimitry Andric } // namespace format 573*753f127fSDimitry Andric } // namespace clang 574