1753f127fSDimitry Andric //===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===// 2753f127fSDimitry Andric // 3753f127fSDimitry Andric // The LLVM Compiler Infrastructure 4753f127fSDimitry Andric // 5753f127fSDimitry Andric // This file is distributed under the University of Illinois Open Source 6753f127fSDimitry Andric // License. See LICENSE.TXT for details. 7753f127fSDimitry Andric // 8753f127fSDimitry Andric //===----------------------------------------------------------------------===// 9753f127fSDimitry Andric /// 10753f127fSDimitry Andric /// \file 11753f127fSDimitry Andric /// This file contains the implementation of MacroCallReconstructor, which fits 12753f127fSDimitry Andric /// an reconstructed macro call to a parsed set of UnwrappedLines. 13753f127fSDimitry Andric /// 14753f127fSDimitry Andric //===----------------------------------------------------------------------===// 15753f127fSDimitry Andric 16753f127fSDimitry Andric #include "Macros.h" 17753f127fSDimitry Andric 18753f127fSDimitry Andric #include "UnwrappedLineParser.h" 19753f127fSDimitry Andric #include "clang/Basic/TokenKinds.h" 20753f127fSDimitry Andric #include "llvm/ADT/DenseSet.h" 21753f127fSDimitry Andric #include "llvm/Support/Debug.h" 22753f127fSDimitry Andric #include <cassert> 23753f127fSDimitry Andric 24753f127fSDimitry Andric #define DEBUG_TYPE "format-reconstruct" 25753f127fSDimitry Andric 26753f127fSDimitry Andric namespace clang { 27753f127fSDimitry Andric namespace format { 28753f127fSDimitry Andric 29753f127fSDimitry Andric // Call \p Call for each token in the unwrapped line given, passing 30753f127fSDimitry Andric // the token, its parent and whether it is the first token in the line. 31753f127fSDimitry Andric template <typename T> 32753f127fSDimitry Andric void forEachToken(const UnwrappedLine &Line, const T &Call, 33753f127fSDimitry Andric FormatToken *Parent = nullptr) { 34753f127fSDimitry Andric bool First = true; 35753f127fSDimitry Andric for (const auto &N : Line.Tokens) { 36*0fca6ea1SDimitry Andric Call(N.Tok, Parent, First, Line.Level); 37753f127fSDimitry Andric First = false; 38bdd1243dSDimitry Andric for (const auto &Child : N.Children) 39753f127fSDimitry Andric forEachToken(Child, Call, N.Tok); 40753f127fSDimitry Andric } 41753f127fSDimitry Andric } 42753f127fSDimitry Andric 43753f127fSDimitry Andric MacroCallReconstructor::MacroCallReconstructor( 44753f127fSDimitry Andric unsigned Level, 45753f127fSDimitry Andric const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> 46753f127fSDimitry Andric &ActiveExpansions) 47*0fca6ea1SDimitry Andric : Result(Level), IdToReconstructed(ActiveExpansions) { 48753f127fSDimitry Andric Result.Tokens.push_back(std::make_unique<LineNode>()); 49753f127fSDimitry Andric ActiveReconstructedLines.push_back(&Result); 50753f127fSDimitry Andric } 51753f127fSDimitry Andric 52753f127fSDimitry Andric void MacroCallReconstructor::addLine(const UnwrappedLine &Line) { 53753f127fSDimitry Andric assert(State != Finalized); 54753f127fSDimitry Andric LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n"); 55*0fca6ea1SDimitry Andric forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First, 56*0fca6ea1SDimitry Andric unsigned Level) { add(Token, Parent, First, Level); }); 57753f127fSDimitry Andric assert(InProgress || finished()); 58753f127fSDimitry Andric } 59753f127fSDimitry Andric 60753f127fSDimitry Andric UnwrappedLine MacroCallReconstructor::takeResult() && { 61753f127fSDimitry Andric finalize(); 62bdd1243dSDimitry Andric assert(Result.Tokens.size() == 1 && 63bdd1243dSDimitry Andric Result.Tokens.front()->Children.size() == 1); 64*0fca6ea1SDimitry Andric UnwrappedLine Final = createUnwrappedLine( 65*0fca6ea1SDimitry Andric *Result.Tokens.front()->Children.front(), Result.Level); 66753f127fSDimitry Andric assert(!Final.Tokens.empty()); 67753f127fSDimitry Andric return Final; 68753f127fSDimitry Andric } 69753f127fSDimitry Andric 70753f127fSDimitry Andric // Reconstruct the position of the next \p Token, given its parent \p 71753f127fSDimitry Andric // ExpandedParent in the incoming unwrapped line. \p First specifies whether it 72753f127fSDimitry Andric // is the first token in a given unwrapped line. 73753f127fSDimitry Andric void MacroCallReconstructor::add(FormatToken *Token, 74*0fca6ea1SDimitry Andric FormatToken *ExpandedParent, bool First, 75*0fca6ea1SDimitry Andric unsigned Level) { 76753f127fSDimitry Andric LLVM_DEBUG( 77753f127fSDimitry Andric llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: " 78753f127fSDimitry Andric << (ExpandedParent ? ExpandedParent->TokenText : "<null>") 79753f127fSDimitry Andric << ", First: " << First << "\n"); 80753f127fSDimitry Andric // In order to be able to find the correct parent in the reconstructed token 81753f127fSDimitry Andric // stream, we need to continue the last open reconstruction until we find the 82753f127fSDimitry Andric // given token if it is part of the reconstructed token stream. 83753f127fSDimitry Andric // 84753f127fSDimitry Andric // Note that hidden tokens can be part of the reconstructed stream in nested 85753f127fSDimitry Andric // macro calls. 86753f127fSDimitry Andric // For example, given 87753f127fSDimitry Andric // #define C(x, y) x y 88753f127fSDimitry Andric // #define B(x) {x} 89753f127fSDimitry Andric // And the call: 90753f127fSDimitry Andric // C(a, B(b)) 91753f127fSDimitry Andric // The outer macro call will be C(a, {b}), and the hidden token '}' can be 92753f127fSDimitry Andric // found in the reconstructed token stream of that expansion level. 93753f127fSDimitry Andric // In the expanded token stream 94753f127fSDimitry Andric // a {b} 95753f127fSDimitry Andric // 'b' is a child of '{'. We need to continue the open expansion of the ',' 96753f127fSDimitry Andric // in the call of 'C' in order to correctly set the ',' as the parent of '{', 97753f127fSDimitry Andric // so we later set the spelled token 'b' as a child of the ','. 98753f127fSDimitry Andric if (!ActiveExpansions.empty() && Token->MacroCtx && 99753f127fSDimitry Andric (Token->MacroCtx->Role != MR_Hidden || 100753f127fSDimitry Andric ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) { 101753f127fSDimitry Andric if (/*PassedMacroComma = */ reconstructActiveCallUntil(Token)) 102753f127fSDimitry Andric First = true; 103753f127fSDimitry Andric } 104753f127fSDimitry Andric 105*0fca6ea1SDimitry Andric prepareParent(ExpandedParent, First, Level); 106753f127fSDimitry Andric 107753f127fSDimitry Andric if (Token->MacroCtx) { 108753f127fSDimitry Andric // If this token was generated by a macro call, add the reconstructed 109753f127fSDimitry Andric // equivalent of the token. 110753f127fSDimitry Andric reconstruct(Token); 111753f127fSDimitry Andric } else { 112753f127fSDimitry Andric // Otherwise, we add it to the current line. 113753f127fSDimitry Andric appendToken(Token); 114753f127fSDimitry Andric } 115753f127fSDimitry Andric } 116753f127fSDimitry Andric 117753f127fSDimitry Andric // Adjusts the stack of active reconstructed lines so we're ready to push 118753f127fSDimitry Andric // tokens. The tokens to be pushed are children of ExpandedParent in the 119753f127fSDimitry Andric // expanded code. 120753f127fSDimitry Andric // 121753f127fSDimitry Andric // This may entail: 122753f127fSDimitry Andric // - creating a new line, if the parent is on the active line 123753f127fSDimitry Andric // - popping active lines, if the parent is further up the stack 124753f127fSDimitry Andric // 125753f127fSDimitry Andric // Postcondition: 126753f127fSDimitry Andric // ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its 127753f127fSDimitry Andric // reconstructed replacement token as a parent (when possible) - that is, the 128753f127fSDimitry Andric // last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2] 129753f127fSDimitry Andric // is the parent of ActiveReconstructedLines.back() in the reconstructed 130753f127fSDimitry Andric // unwrapped line. 131753f127fSDimitry Andric void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent, 132*0fca6ea1SDimitry Andric bool NewLine, unsigned Level) { 133753f127fSDimitry Andric LLVM_DEBUG({ 134753f127fSDimitry Andric llvm::dbgs() << "ParentMap:\n"; 135753f127fSDimitry Andric debugParentMap(); 136753f127fSDimitry Andric }); 137753f127fSDimitry Andric // We want to find the parent in the new unwrapped line, where the expanded 138753f127fSDimitry Andric // parent might have been replaced during reconstruction. 139753f127fSDimitry Andric FormatToken *Parent = getParentInResult(ExpandedParent); 140753f127fSDimitry Andric LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: " 141753f127fSDimitry Andric << (Parent ? Parent->TokenText : "<null>") << "\n"); 142753f127fSDimitry Andric 143753f127fSDimitry Andric FormatToken *OpenMacroParent = nullptr; 144753f127fSDimitry Andric if (!MacroCallStructure.empty()) { 145753f127fSDimitry Andric // Inside a macro expansion, it is possible to lose track of the correct 146753f127fSDimitry Andric // parent - either because it is already popped, for example because it was 147753f127fSDimitry Andric // in a different macro argument (e.g. M({, })), or when we work on invalid 148753f127fSDimitry Andric // code. 149753f127fSDimitry Andric // Thus, we use the innermost macro call's parent as the parent at which 150753f127fSDimitry Andric // we stop; this allows us to stay within the macro expansion and keeps 151753f127fSDimitry Andric // any problems confined to the extent of the macro call. 152753f127fSDimitry Andric OpenMacroParent = 153753f127fSDimitry Andric getParentInResult(MacroCallStructure.back().MacroCallLParen); 154753f127fSDimitry Andric LLVM_DEBUG(llvm::dbgs() 155753f127fSDimitry Andric << "MacroCallLParen: " 156753f127fSDimitry Andric << MacroCallStructure.back().MacroCallLParen->TokenText 157753f127fSDimitry Andric << ", OpenMacroParent: " 158753f127fSDimitry Andric << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>") 159753f127fSDimitry Andric << "\n"); 160753f127fSDimitry Andric } 161753f127fSDimitry Andric if (NewLine || 162753f127fSDimitry Andric (!ActiveReconstructedLines.back()->Tokens.empty() && 163753f127fSDimitry Andric Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) { 164753f127fSDimitry Andric // If we are at the first token in a new line, we want to also 165753f127fSDimitry Andric // create a new line in the resulting reconstructed unwrapped line. 166753f127fSDimitry Andric while (ActiveReconstructedLines.back()->Tokens.empty() || 167753f127fSDimitry Andric (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok && 168753f127fSDimitry Andric ActiveReconstructedLines.back()->Tokens.back()->Tok != 169753f127fSDimitry Andric OpenMacroParent)) { 170753f127fSDimitry Andric ActiveReconstructedLines.pop_back(); 171753f127fSDimitry Andric assert(!ActiveReconstructedLines.empty()); 172753f127fSDimitry Andric } 173753f127fSDimitry Andric assert(!ActiveReconstructedLines.empty()); 174753f127fSDimitry Andric ActiveReconstructedLines.back()->Tokens.back()->Children.push_back( 175*0fca6ea1SDimitry Andric std::make_unique<ReconstructedLine>(Level)); 176753f127fSDimitry Andric ActiveReconstructedLines.push_back( 177753f127fSDimitry Andric &*ActiveReconstructedLines.back()->Tokens.back()->Children.back()); 178753f127fSDimitry Andric } else if (parentLine().Tokens.back()->Tok != Parent) { 179753f127fSDimitry Andric // If we're not the first token in a new line, pop lines until we find 180753f127fSDimitry Andric // the child of \c Parent in the stack. 181753f127fSDimitry Andric while (Parent != parentLine().Tokens.back()->Tok && 182753f127fSDimitry Andric parentLine().Tokens.back()->Tok && 183753f127fSDimitry Andric parentLine().Tokens.back()->Tok != OpenMacroParent) { 184753f127fSDimitry Andric ActiveReconstructedLines.pop_back(); 185753f127fSDimitry Andric assert(!ActiveReconstructedLines.empty()); 186753f127fSDimitry Andric } 187753f127fSDimitry Andric } 188753f127fSDimitry Andric assert(!ActiveReconstructedLines.empty()); 189753f127fSDimitry Andric } 190753f127fSDimitry Andric 191753f127fSDimitry Andric // For a given \p Parent in the incoming expanded token stream, find the 192753f127fSDimitry Andric // corresponding parent in the output. 193753f127fSDimitry Andric FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) { 194753f127fSDimitry Andric FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent); 195753f127fSDimitry Andric if (!Mapped) 196753f127fSDimitry Andric return Parent; 197bdd1243dSDimitry Andric for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent)) 198753f127fSDimitry Andric Parent = Mapped; 199753f127fSDimitry Andric // If we use a different token than the parent in the expanded token stream 200753f127fSDimitry Andric // as parent, mark it as a special parent, so the formatting code knows it 201753f127fSDimitry Andric // needs to have its children formatted. 202753f127fSDimitry Andric Parent->MacroParent = true; 203753f127fSDimitry Andric return Parent; 204753f127fSDimitry Andric } 205753f127fSDimitry Andric 206753f127fSDimitry Andric // Reconstruct a \p Token that was expanded from a macro call. 207753f127fSDimitry Andric void MacroCallReconstructor::reconstruct(FormatToken *Token) { 208753f127fSDimitry Andric assert(Token->MacroCtx); 209753f127fSDimitry Andric // A single token can be the only result of a macro call: 210753f127fSDimitry Andric // Given: #define ID(x, y) ; 211753f127fSDimitry Andric // And the call: ID(<some>, <tokens>) 212753f127fSDimitry Andric // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>). 213753f127fSDimitry Andric if (Token->MacroCtx->StartOfExpansion) { 214753f127fSDimitry Andric startReconstruction(Token); 215753f127fSDimitry Andric // If the order of tokens in the expanded token stream is not the 216753f127fSDimitry Andric // same as the order of tokens in the reconstructed stream, we need 217753f127fSDimitry Andric // to reconstruct tokens that arrive later in the stream. 218bdd1243dSDimitry Andric if (Token->MacroCtx->Role != MR_Hidden) 219753f127fSDimitry Andric reconstructActiveCallUntil(Token); 220753f127fSDimitry Andric } 221753f127fSDimitry Andric assert(!ActiveExpansions.empty()); 222753f127fSDimitry Andric if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) { 223753f127fSDimitry Andric assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size()); 224753f127fSDimitry Andric if (Token->MacroCtx->Role != MR_Hidden) { 225753f127fSDimitry Andric // The current token in the reconstructed token stream must be the token 226753f127fSDimitry Andric // we're looking for - we either arrive here after startReconstruction, 227753f127fSDimitry Andric // which initiates the stream to the first token, or after 228753f127fSDimitry Andric // continueReconstructionUntil skipped until the expected token in the 229753f127fSDimitry Andric // reconstructed stream at the start of add(...). 230753f127fSDimitry Andric assert(ActiveExpansions.back().SpelledI->Tok == Token); 231753f127fSDimitry Andric processNextReconstructed(); 232753f127fSDimitry Andric } else if (!currentLine()->Tokens.empty()) { 233753f127fSDimitry Andric // Map all hidden tokens to the last visible token in the output. 234753f127fSDimitry Andric // If the hidden token is a parent, we'll use the last visible 235753f127fSDimitry Andric // token as the parent of the hidden token's children. 236753f127fSDimitry Andric SpelledParentToReconstructedParent[Token] = 237753f127fSDimitry Andric currentLine()->Tokens.back()->Tok; 238753f127fSDimitry Andric } else { 239753f127fSDimitry Andric for (auto I = ActiveReconstructedLines.rbegin(), 240753f127fSDimitry Andric E = ActiveReconstructedLines.rend(); 241753f127fSDimitry Andric I != E; ++I) { 242753f127fSDimitry Andric if (!(*I)->Tokens.empty()) { 243753f127fSDimitry Andric SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok; 244753f127fSDimitry Andric break; 245753f127fSDimitry Andric } 246753f127fSDimitry Andric } 247753f127fSDimitry Andric } 248753f127fSDimitry Andric } 249753f127fSDimitry Andric if (Token->MacroCtx->EndOfExpansion) 250753f127fSDimitry Andric endReconstruction(Token); 251753f127fSDimitry Andric } 252753f127fSDimitry Andric 253753f127fSDimitry Andric // Given a \p Token that starts an expansion, reconstruct the beginning of the 254753f127fSDimitry Andric // macro call. 255753f127fSDimitry Andric // For example, given: #define ID(x) x 256753f127fSDimitry Andric // And the call: ID(int a) 257753f127fSDimitry Andric // Reconstructs: ID( 258753f127fSDimitry Andric void MacroCallReconstructor::startReconstruction(FormatToken *Token) { 259753f127fSDimitry Andric assert(Token->MacroCtx); 260753f127fSDimitry Andric assert(!Token->MacroCtx->ExpandedFrom.empty()); 261753f127fSDimitry Andric assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size()); 262753f127fSDimitry Andric #ifndef NDEBUG 263753f127fSDimitry Andric // Check that the token's reconstruction stack matches our current 264753f127fSDimitry Andric // reconstruction stack. 265753f127fSDimitry Andric for (size_t I = 0; I < ActiveExpansions.size(); ++I) { 266753f127fSDimitry Andric assert(ActiveExpansions[I].ID == 267753f127fSDimitry Andric Token->MacroCtx 268753f127fSDimitry Andric ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]); 269753f127fSDimitry Andric } 270753f127fSDimitry Andric #endif 271753f127fSDimitry Andric // Start reconstruction for all calls for which this token is the first token 272753f127fSDimitry Andric // generated by the call. 273753f127fSDimitry Andric // Note that the token's expanded from stack is inside-to-outside, and the 274753f127fSDimitry Andric // expansions for which this token is not the first are the outermost ones. 275753f127fSDimitry Andric ArrayRef<FormatToken *> StartedMacros = 276bdd1243dSDimitry Andric ArrayRef(Token->MacroCtx->ExpandedFrom) 277753f127fSDimitry Andric .drop_back(ActiveExpansions.size()); 278753f127fSDimitry Andric assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion); 279753f127fSDimitry Andric // We reconstruct macro calls outside-to-inside. 280753f127fSDimitry Andric for (FormatToken *ID : llvm::reverse(StartedMacros)) { 281753f127fSDimitry Andric // We found a macro call to be reconstructed; the next time our 282753f127fSDimitry Andric // reconstruction stack is empty we know we finished an reconstruction. 283753f127fSDimitry Andric #ifndef NDEBUG 284753f127fSDimitry Andric State = InProgress; 285753f127fSDimitry Andric #endif 286753f127fSDimitry Andric // Put the reconstructed macro call's token into our reconstruction stack. 287753f127fSDimitry Andric auto IU = IdToReconstructed.find(ID); 288753f127fSDimitry Andric assert(IU != IdToReconstructed.end()); 289753f127fSDimitry Andric ActiveExpansions.push_back( 290753f127fSDimitry Andric {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()}); 291753f127fSDimitry Andric // Process the macro call's identifier. 292753f127fSDimitry Andric processNextReconstructed(); 293753f127fSDimitry Andric if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE) 294753f127fSDimitry Andric continue; 295753f127fSDimitry Andric if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) { 296753f127fSDimitry Andric // Process the optional opening parenthesis. 297753f127fSDimitry Andric processNextReconstructed(); 298753f127fSDimitry Andric } 299753f127fSDimitry Andric } 300753f127fSDimitry Andric } 301753f127fSDimitry Andric 302753f127fSDimitry Andric // Add all tokens in the reconstruction stream to the output until we find the 303753f127fSDimitry Andric // given \p Token. 304753f127fSDimitry Andric bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) { 305753f127fSDimitry Andric assert(!ActiveExpansions.empty()); 306753f127fSDimitry Andric bool PassedMacroComma = false; 307753f127fSDimitry Andric // FIXME: If Token was already expanded earlier, due to 308753f127fSDimitry Andric // a change in order, we will not find it, but need to 309753f127fSDimitry Andric // skip it. 310753f127fSDimitry Andric while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE && 311753f127fSDimitry Andric ActiveExpansions.back().SpelledI->Tok != Token) { 312753f127fSDimitry Andric PassedMacroComma = processNextReconstructed() || PassedMacroComma; 313753f127fSDimitry Andric } 314753f127fSDimitry Andric return PassedMacroComma; 315753f127fSDimitry Andric } 316753f127fSDimitry Andric 317753f127fSDimitry Andric // End all reconstructions for which \p Token is the final token. 318753f127fSDimitry Andric void MacroCallReconstructor::endReconstruction(FormatToken *Token) { 319753f127fSDimitry Andric assert(Token->MacroCtx && 320753f127fSDimitry Andric (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion)); 321753f127fSDimitry Andric for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) { 3225f757f3fSDimitry Andric LLVM_DEBUG([&] { 3235f757f3fSDimitry Andric // Check all remaining tokens but the final closing parenthesis and 3245f757f3fSDimitry Andric // optional trailing comment were already reconstructed at an inner 3255f757f3fSDimitry Andric // expansion level. 326753f127fSDimitry Andric for (auto T = ActiveExpansions.back().SpelledI; 327753f127fSDimitry Andric T != ActiveExpansions.back().SpelledE; ++T) { 328753f127fSDimitry Andric FormatToken *Token = T->Tok; 329753f127fSDimitry Andric bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE || 330753f127fSDimitry Andric std::next(T)->Tok->isTrailingComment()) && 331753f127fSDimitry Andric !Token->MacroCtx && Token->is(tok::r_paren); 332753f127fSDimitry Andric bool TrailingComment = Token->isTrailingComment(); 333753f127fSDimitry Andric bool PreviousLevel = 334753f127fSDimitry Andric Token->MacroCtx && 335753f127fSDimitry Andric (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size()); 336bdd1243dSDimitry Andric if (!ClosingParen && !TrailingComment && !PreviousLevel) 337753f127fSDimitry Andric llvm::dbgs() << "At token: " << Token->TokenText << "\n"; 338753f127fSDimitry Andric // In addition to the following cases, we can also run into this 339753f127fSDimitry Andric // when a macro call had more arguments than expected; in that case, 3405f757f3fSDimitry Andric // the comma and the remaining tokens in the macro call will 3415f757f3fSDimitry Andric // potentially end up in the line when we finish the expansion. 342753f127fSDimitry Andric // FIXME: Add the information which arguments are unused, and assert 343753f127fSDimitry Andric // one of the cases below plus reconstructed macro argument tokens. 344753f127fSDimitry Andric // assert(ClosingParen || TrailingComment || PreviousLevel); 345753f127fSDimitry Andric } 3465f757f3fSDimitry Andric }()); 347753f127fSDimitry Andric // Handle the remaining open tokens: 348753f127fSDimitry Andric // - expand the closing parenthesis, if it exists, including an optional 349753f127fSDimitry Andric // trailing comment 350753f127fSDimitry Andric // - handle tokens that were already reconstructed at an inner expansion 351753f127fSDimitry Andric // level 352753f127fSDimitry Andric // - handle tokens when a macro call had more than the expected number of 353753f127fSDimitry Andric // arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end 354753f127fSDimitry Andric // up with the sequence ", b, c)" being open at the end of the 355753f127fSDimitry Andric // reconstruction; we want to gracefully handle that case 356753f127fSDimitry Andric // 357753f127fSDimitry Andric // FIXME: See the above debug-check for what we will need to do to be 358753f127fSDimitry Andric // able to assert this. 359753f127fSDimitry Andric for (auto T = ActiveExpansions.back().SpelledI; 360753f127fSDimitry Andric T != ActiveExpansions.back().SpelledE; ++T) { 361753f127fSDimitry Andric processNextReconstructed(); 362753f127fSDimitry Andric } 363753f127fSDimitry Andric ActiveExpansions.pop_back(); 364753f127fSDimitry Andric } 365753f127fSDimitry Andric } 366753f127fSDimitry Andric 367753f127fSDimitry Andric void MacroCallReconstructor::debugParentMap() const { 368753f127fSDimitry Andric llvm::DenseSet<FormatToken *> Values; 369753f127fSDimitry Andric for (const auto &P : SpelledParentToReconstructedParent) 370753f127fSDimitry Andric Values.insert(P.second); 371753f127fSDimitry Andric 372753f127fSDimitry Andric for (const auto &P : SpelledParentToReconstructedParent) { 373753f127fSDimitry Andric if (Values.contains(P.first)) 374753f127fSDimitry Andric continue; 375753f127fSDimitry Andric llvm::dbgs() << (P.first ? P.first->TokenText : "<null>"); 376753f127fSDimitry Andric for (auto I = SpelledParentToReconstructedParent.find(P.first), 377753f127fSDimitry Andric E = SpelledParentToReconstructedParent.end(); 378753f127fSDimitry Andric I != E; I = SpelledParentToReconstructedParent.find(I->second)) { 379753f127fSDimitry Andric llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>"); 380753f127fSDimitry Andric } 381753f127fSDimitry Andric llvm::dbgs() << "\n"; 382753f127fSDimitry Andric } 383753f127fSDimitry Andric } 384753f127fSDimitry Andric 385753f127fSDimitry Andric // If visible, add the next token of the reconstructed token sequence to the 386753f127fSDimitry Andric // output. Returns whether reconstruction passed a comma that is part of a 387753f127fSDimitry Andric // macro call. 388753f127fSDimitry Andric bool MacroCallReconstructor::processNextReconstructed() { 389753f127fSDimitry Andric FormatToken *Token = ActiveExpansions.back().SpelledI->Tok; 390753f127fSDimitry Andric ++ActiveExpansions.back().SpelledI; 391753f127fSDimitry Andric if (Token->MacroCtx) { 392753f127fSDimitry Andric // Skip tokens that are not part of the macro call. 393bdd1243dSDimitry Andric if (Token->MacroCtx->Role == MR_Hidden) 394753f127fSDimitry Andric return false; 395753f127fSDimitry Andric // Skip tokens we already expanded during an inner reconstruction. 396753f127fSDimitry Andric // For example, given: #define ID(x) {x} 397753f127fSDimitry Andric // And the call: ID(ID(f)) 398753f127fSDimitry Andric // We get two reconstructions: 399753f127fSDimitry Andric // ID(f) -> {f} 400753f127fSDimitry Andric // ID({f}) -> {{f}} 401753f127fSDimitry Andric // We reconstruct f during the first reconstruction, and skip it during the 402753f127fSDimitry Andric // second reconstruction. 403bdd1243dSDimitry Andric if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size()) 404753f127fSDimitry Andric return false; 405753f127fSDimitry Andric } 406753f127fSDimitry Andric // Tokens that do not have a macro context are tokens in that are part of the 407753f127fSDimitry Andric // macro call that have not taken part in expansion. 408753f127fSDimitry Andric if (!Token->MacroCtx) { 409753f127fSDimitry Andric // Put the parentheses and commas of a macro call into the same line; 410753f127fSDimitry Andric // if the arguments produce new unwrapped lines, they will become children 411753f127fSDimitry Andric // of the corresponding opening parenthesis or comma tokens in the 412753f127fSDimitry Andric // reconstructed call. 413753f127fSDimitry Andric if (Token->is(tok::l_paren)) { 414753f127fSDimitry Andric MacroCallStructure.push_back(MacroCallState( 415753f127fSDimitry Andric currentLine(), parentLine().Tokens.back()->Tok, Token)); 416753f127fSDimitry Andric // All tokens that are children of the previous line's last token in the 417753f127fSDimitry Andric // reconstructed token stream will now be children of the l_paren token. 418753f127fSDimitry Andric // For example, for the line containing the macro calls: 419753f127fSDimitry Andric // auto x = ID({ID(2)}); 420753f127fSDimitry Andric // We will build up a map <null> -> ( -> ( with the first and second 421753f127fSDimitry Andric // l_paren of the macro call respectively. New lines that come in with a 422753f127fSDimitry Andric // <null> parent will then become children of the l_paren token of the 423753f127fSDimitry Andric // currently innermost macro call. 424753f127fSDimitry Andric SpelledParentToReconstructedParent[MacroCallStructure.back() 425753f127fSDimitry Andric .ParentLastToken] = Token; 426753f127fSDimitry Andric appendToken(Token); 427*0fca6ea1SDimitry Andric prepareParent(Token, /*NewLine=*/true, 428*0fca6ea1SDimitry Andric MacroCallStructure.back().Line->Level); 429753f127fSDimitry Andric Token->MacroParent = true; 430753f127fSDimitry Andric return false; 431753f127fSDimitry Andric } 432753f127fSDimitry Andric if (!MacroCallStructure.empty()) { 433753f127fSDimitry Andric if (Token->is(tok::comma)) { 434753f127fSDimitry Andric // Make new lines inside the next argument children of the comma token. 435753f127fSDimitry Andric SpelledParentToReconstructedParent 436753f127fSDimitry Andric [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token; 437753f127fSDimitry Andric Token->MacroParent = true; 438753f127fSDimitry Andric appendToken(Token, MacroCallStructure.back().Line); 439*0fca6ea1SDimitry Andric prepareParent(Token, /*NewLine=*/true, 440*0fca6ea1SDimitry Andric MacroCallStructure.back().Line->Level); 441753f127fSDimitry Andric return true; 442753f127fSDimitry Andric } 443753f127fSDimitry Andric if (Token->is(tok::r_paren)) { 444753f127fSDimitry Andric appendToken(Token, MacroCallStructure.back().Line); 445753f127fSDimitry Andric SpelledParentToReconstructedParent.erase( 446753f127fSDimitry Andric MacroCallStructure.back().ParentLastToken); 447753f127fSDimitry Andric MacroCallStructure.pop_back(); 448753f127fSDimitry Andric return false; 449753f127fSDimitry Andric } 450753f127fSDimitry Andric } 451753f127fSDimitry Andric } 452753f127fSDimitry Andric // Note that any tokens that are tagged with MR_None have been passed as 453753f127fSDimitry Andric // arguments to the macro that have not been expanded, for example: 454753f127fSDimitry Andric // Given: #define ID(X) x 455753f127fSDimitry Andric // When calling: ID(a, b) 456753f127fSDimitry Andric // 'b' will be part of the reconstructed token stream, but tagged MR_None. 457753f127fSDimitry Andric // Given that erroring out in this case would be disruptive, we continue 458753f127fSDimitry Andric // pushing the (unformatted) token. 459753f127fSDimitry Andric // FIXME: This can lead to unfortunate formatting decisions - give the user 460753f127fSDimitry Andric // a hint that their macro definition is broken. 461753f127fSDimitry Andric appendToken(Token); 462753f127fSDimitry Andric return false; 463753f127fSDimitry Andric } 464753f127fSDimitry Andric 465753f127fSDimitry Andric void MacroCallReconstructor::finalize() { 466753f127fSDimitry Andric #ifndef NDEBUG 467753f127fSDimitry Andric assert(State != Finalized && finished()); 468753f127fSDimitry Andric State = Finalized; 469753f127fSDimitry Andric #endif 470753f127fSDimitry Andric 471753f127fSDimitry Andric // We created corresponding unwrapped lines for each incoming line as children 472753f127fSDimitry Andric // the the toplevel null token. 473753f127fSDimitry Andric assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty()); 474753f127fSDimitry Andric LLVM_DEBUG({ 475753f127fSDimitry Andric llvm::dbgs() << "Finalizing reconstructed lines:\n"; 476753f127fSDimitry Andric debug(Result, 0); 477753f127fSDimitry Andric }); 478753f127fSDimitry Andric 479753f127fSDimitry Andric // The first line becomes the top level line in the resulting unwrapped line. 480753f127fSDimitry Andric LineNode &Top = *Result.Tokens.front(); 481753f127fSDimitry Andric auto *I = Top.Children.begin(); 482753f127fSDimitry Andric // Every subsequent line will become a child of the last token in the previous 483753f127fSDimitry Andric // line, which is the token prior to the first token in the line. 484753f127fSDimitry Andric LineNode *Last = (*I)->Tokens.back().get(); 485753f127fSDimitry Andric ++I; 486753f127fSDimitry Andric for (auto *E = Top.Children.end(); I != E; ++I) { 487753f127fSDimitry Andric assert(Last->Children.empty()); 488753f127fSDimitry Andric Last->Children.push_back(std::move(*I)); 489753f127fSDimitry Andric 490753f127fSDimitry Andric // Mark the previous line's last token as generated by a macro expansion 491753f127fSDimitry Andric // so the formatting algorithm can take that into account. 492753f127fSDimitry Andric Last->Tok->MacroParent = true; 493753f127fSDimitry Andric 494753f127fSDimitry Andric Last = Last->Children.back()->Tokens.back().get(); 495753f127fSDimitry Andric } 496753f127fSDimitry Andric Top.Children.resize(1); 497753f127fSDimitry Andric } 498753f127fSDimitry Andric 499753f127fSDimitry Andric void MacroCallReconstructor::appendToken(FormatToken *Token, 500753f127fSDimitry Andric ReconstructedLine *L) { 501753f127fSDimitry Andric L = L ? L : currentLine(); 502753f127fSDimitry Andric LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n"); 503753f127fSDimitry Andric L->Tokens.push_back(std::make_unique<LineNode>(Token)); 504753f127fSDimitry Andric } 505753f127fSDimitry Andric 506753f127fSDimitry Andric UnwrappedLine 507753f127fSDimitry Andric MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line, 508753f127fSDimitry Andric int Level) { 509753f127fSDimitry Andric UnwrappedLine Result; 510753f127fSDimitry Andric Result.Level = Level; 511753f127fSDimitry Andric for (const auto &N : Line.Tokens) { 512753f127fSDimitry Andric Result.Tokens.push_back(N->Tok); 513753f127fSDimitry Andric UnwrappedLineNode &Current = Result.Tokens.back(); 514*0fca6ea1SDimitry Andric auto NumChildren = 515*0fca6ea1SDimitry Andric std::count_if(N->Children.begin(), N->Children.end(), 516*0fca6ea1SDimitry Andric [](const auto &Child) { return !Child->Tokens.empty(); }); 517*0fca6ea1SDimitry Andric if (NumChildren == 1 && Current.Tok->isOneOf(tok::l_paren, tok::comma)) { 518*0fca6ea1SDimitry Andric // If we only have one child, and the child is due to a macro expansion 519*0fca6ea1SDimitry Andric // (either attached to a left parenthesis or comma), merge the child into 520*0fca6ea1SDimitry Andric // the current line to prevent forced breaks for macro arguments. 521*0fca6ea1SDimitry Andric auto *Child = std::find_if( 522*0fca6ea1SDimitry Andric N->Children.begin(), N->Children.end(), 523*0fca6ea1SDimitry Andric [](const auto &Child) { return !Child->Tokens.empty(); }); 524*0fca6ea1SDimitry Andric auto Line = createUnwrappedLine(**Child, Level); 525*0fca6ea1SDimitry Andric Result.Tokens.splice(Result.Tokens.end(), Line.Tokens); 526*0fca6ea1SDimitry Andric } else if (NumChildren > 0) { 527*0fca6ea1SDimitry Andric // When there are multiple children with different indent, make sure that 528*0fca6ea1SDimitry Andric // we indent them: 529*0fca6ea1SDimitry Andric // 1. One level below the current line's level. 530*0fca6ea1SDimitry Andric // 2. At the correct level relative to each other. 531*0fca6ea1SDimitry Andric unsigned MinChildLevel = 532*0fca6ea1SDimitry Andric std::min_element(N->Children.begin(), N->Children.end(), 533*0fca6ea1SDimitry Andric [](const auto &E1, const auto &E2) { 534*0fca6ea1SDimitry Andric return E1->Level < E2->Level; 535*0fca6ea1SDimitry Andric }) 536*0fca6ea1SDimitry Andric ->get() 537*0fca6ea1SDimitry Andric ->Level; 538753f127fSDimitry Andric for (const auto &Child : N->Children) { 539753f127fSDimitry Andric if (Child->Tokens.empty()) 540753f127fSDimitry Andric continue; 541*0fca6ea1SDimitry Andric Current.Children.push_back(createUnwrappedLine( 542*0fca6ea1SDimitry Andric *Child, Level + 1 + (Child->Level - MinChildLevel))); 543753f127fSDimitry Andric } 544753f127fSDimitry Andric } 545753f127fSDimitry Andric } 546753f127fSDimitry Andric return Result; 547753f127fSDimitry Andric } 548753f127fSDimitry Andric 549753f127fSDimitry Andric void MacroCallReconstructor::debug(const ReconstructedLine &Line, int Level) { 550753f127fSDimitry Andric for (int i = 0; i < Level; ++i) 551753f127fSDimitry Andric llvm::dbgs() << " "; 552753f127fSDimitry Andric for (const auto &N : Line.Tokens) { 553753f127fSDimitry Andric if (!N) 554753f127fSDimitry Andric continue; 555753f127fSDimitry Andric if (N->Tok) 556753f127fSDimitry Andric llvm::dbgs() << N->Tok->TokenText << " "; 557753f127fSDimitry Andric for (const auto &Child : N->Children) { 558753f127fSDimitry Andric llvm::dbgs() << "\n"; 559753f127fSDimitry Andric debug(*Child, Level + 1); 560753f127fSDimitry Andric for (int i = 0; i < Level; ++i) 561753f127fSDimitry Andric llvm::dbgs() << " "; 562753f127fSDimitry Andric } 563753f127fSDimitry Andric } 564753f127fSDimitry Andric llvm::dbgs() << "\n"; 565753f127fSDimitry Andric } 566753f127fSDimitry Andric 567753f127fSDimitry Andric MacroCallReconstructor::ReconstructedLine & 568753f127fSDimitry Andric MacroCallReconstructor::parentLine() { 569753f127fSDimitry Andric return **std::prev(std::prev(ActiveReconstructedLines.end())); 570753f127fSDimitry Andric } 571753f127fSDimitry Andric 572753f127fSDimitry Andric MacroCallReconstructor::ReconstructedLine * 573753f127fSDimitry Andric MacroCallReconstructor::currentLine() { 574753f127fSDimitry Andric return ActiveReconstructedLines.back(); 575753f127fSDimitry Andric } 576753f127fSDimitry Andric 577753f127fSDimitry Andric MacroCallReconstructor::MacroCallState::MacroCallState( 578753f127fSDimitry Andric MacroCallReconstructor::ReconstructedLine *Line, 579753f127fSDimitry Andric FormatToken *ParentLastToken, FormatToken *MacroCallLParen) 580753f127fSDimitry Andric : Line(Line), ParentLastToken(ParentLastToken), 581753f127fSDimitry Andric MacroCallLParen(MacroCallLParen) { 582753f127fSDimitry Andric LLVM_DEBUG( 583753f127fSDimitry Andric llvm::dbgs() << "ParentLastToken: " 584753f127fSDimitry Andric << (ParentLastToken ? ParentLastToken->TokenText : "<null>") 585753f127fSDimitry Andric << "\n"); 586753f127fSDimitry Andric 587753f127fSDimitry Andric assert(MacroCallLParen->is(tok::l_paren)); 588753f127fSDimitry Andric } 589753f127fSDimitry Andric 590753f127fSDimitry Andric } // namespace format 591753f127fSDimitry Andric } // namespace clang 592