1e8d8bef9SDimitry Andric //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===// 2e8d8bef9SDimitry Andric // 3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric /// 9e8d8bef9SDimitry Andric /// \file 10e8d8bef9SDimitry Andric /// This file contains the implementation of MacroExpander, which handles macro 11e8d8bef9SDimitry Andric /// configuration and expansion while formatting. 12e8d8bef9SDimitry Andric /// 13e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 14e8d8bef9SDimitry Andric 15e8d8bef9SDimitry Andric #include "Macros.h" 16e8d8bef9SDimitry Andric 17e8d8bef9SDimitry Andric #include "Encoding.h" 18e8d8bef9SDimitry Andric #include "FormatToken.h" 19e8d8bef9SDimitry Andric #include "FormatTokenLexer.h" 20e8d8bef9SDimitry Andric #include "clang/Basic/TokenKinds.h" 21e8d8bef9SDimitry Andric #include "clang/Format/Format.h" 22e8d8bef9SDimitry Andric #include "clang/Lex/HeaderSearch.h" 23e8d8bef9SDimitry Andric #include "clang/Lex/HeaderSearchOptions.h" 24e8d8bef9SDimitry Andric #include "clang/Lex/Lexer.h" 25e8d8bef9SDimitry Andric #include "clang/Lex/ModuleLoader.h" 26e8d8bef9SDimitry Andric #include "clang/Lex/Preprocessor.h" 27e8d8bef9SDimitry Andric #include "clang/Lex/PreprocessorOptions.h" 28e8d8bef9SDimitry Andric #include "llvm/ADT/StringSet.h" 29e8d8bef9SDimitry Andric #include "llvm/Support/ErrorHandling.h" 30e8d8bef9SDimitry Andric 31e8d8bef9SDimitry Andric namespace clang { 32e8d8bef9SDimitry Andric namespace format { 33e8d8bef9SDimitry Andric 34e8d8bef9SDimitry Andric struct MacroExpander::Definition { 35e8d8bef9SDimitry Andric StringRef Name; 36e8d8bef9SDimitry Andric SmallVector<FormatToken *, 8> Params; 37e8d8bef9SDimitry Andric SmallVector<FormatToken *, 8> Body; 38e8d8bef9SDimitry Andric 39e8d8bef9SDimitry Andric // Map from each argument's name to its position in the argument list. 40e8d8bef9SDimitry Andric // With "M(x, y) x + y": 41e8d8bef9SDimitry Andric // x -> 0 42e8d8bef9SDimitry Andric // y -> 1 43e8d8bef9SDimitry Andric llvm::StringMap<size_t> ArgMap; 44e8d8bef9SDimitry Andric 45e8d8bef9SDimitry Andric bool ObjectLike = true; 46e8d8bef9SDimitry Andric }; 47e8d8bef9SDimitry Andric 48e8d8bef9SDimitry Andric class MacroExpander::DefinitionParser { 49e8d8bef9SDimitry Andric public: 50e8d8bef9SDimitry Andric DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) { 51e8d8bef9SDimitry Andric assert(!Tokens.empty()); 52e8d8bef9SDimitry Andric Current = Tokens[0]; 53e8d8bef9SDimitry Andric } 54e8d8bef9SDimitry Andric 55349cc55cSDimitry Andric // Parse the token stream and return the corresponding Definition object. 56e8d8bef9SDimitry Andric // Returns an empty definition object with a null-Name on error. 57e8d8bef9SDimitry Andric MacroExpander::Definition parse() { 585f757f3fSDimitry Andric if (Current->isNot(tok::identifier)) 59e8d8bef9SDimitry Andric return {}; 60e8d8bef9SDimitry Andric Def.Name = Current->TokenText; 61e8d8bef9SDimitry Andric nextToken(); 62e8d8bef9SDimitry Andric if (Current->is(tok::l_paren)) { 63e8d8bef9SDimitry Andric Def.ObjectLike = false; 64e8d8bef9SDimitry Andric if (!parseParams()) 65e8d8bef9SDimitry Andric return {}; 66e8d8bef9SDimitry Andric } 67e8d8bef9SDimitry Andric if (!parseExpansion()) 68e8d8bef9SDimitry Andric return {}; 69e8d8bef9SDimitry Andric 70e8d8bef9SDimitry Andric return Def; 71e8d8bef9SDimitry Andric } 72e8d8bef9SDimitry Andric 73e8d8bef9SDimitry Andric private: 74e8d8bef9SDimitry Andric bool parseParams() { 75e8d8bef9SDimitry Andric assert(Current->is(tok::l_paren)); 76e8d8bef9SDimitry Andric nextToken(); 77e8d8bef9SDimitry Andric while (Current->is(tok::identifier)) { 78e8d8bef9SDimitry Andric Def.Params.push_back(Current); 79e8d8bef9SDimitry Andric Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1; 80e8d8bef9SDimitry Andric nextToken(); 81e8d8bef9SDimitry Andric if (Current->isNot(tok::comma)) 82e8d8bef9SDimitry Andric break; 83e8d8bef9SDimitry Andric nextToken(); 84e8d8bef9SDimitry Andric } 85e8d8bef9SDimitry Andric if (Current->isNot(tok::r_paren)) 86e8d8bef9SDimitry Andric return false; 87e8d8bef9SDimitry Andric nextToken(); 88e8d8bef9SDimitry Andric return true; 89e8d8bef9SDimitry Andric } 90e8d8bef9SDimitry Andric 91e8d8bef9SDimitry Andric bool parseExpansion() { 92e8d8bef9SDimitry Andric if (!Current->isOneOf(tok::equal, tok::eof)) 93e8d8bef9SDimitry Andric return false; 94e8d8bef9SDimitry Andric if (Current->is(tok::equal)) 95e8d8bef9SDimitry Andric nextToken(); 96e8d8bef9SDimitry Andric parseTail(); 97e8d8bef9SDimitry Andric return true; 98e8d8bef9SDimitry Andric } 99e8d8bef9SDimitry Andric 100e8d8bef9SDimitry Andric void parseTail() { 101e8d8bef9SDimitry Andric while (Current->isNot(tok::eof)) { 102e8d8bef9SDimitry Andric Def.Body.push_back(Current); 103e8d8bef9SDimitry Andric nextToken(); 104e8d8bef9SDimitry Andric } 105e8d8bef9SDimitry Andric Def.Body.push_back(Current); 106e8d8bef9SDimitry Andric } 107e8d8bef9SDimitry Andric 108e8d8bef9SDimitry Andric void nextToken() { 109e8d8bef9SDimitry Andric if (Pos + 1 < Tokens.size()) 110e8d8bef9SDimitry Andric ++Pos; 111e8d8bef9SDimitry Andric Current = Tokens[Pos]; 112e8d8bef9SDimitry Andric Current->Finalized = true; 113e8d8bef9SDimitry Andric } 114e8d8bef9SDimitry Andric 115e8d8bef9SDimitry Andric size_t Pos = 0; 116e8d8bef9SDimitry Andric FormatToken *Current = nullptr; 117e8d8bef9SDimitry Andric Definition Def; 118e8d8bef9SDimitry Andric ArrayRef<FormatToken *> Tokens; 119e8d8bef9SDimitry Andric }; 120e8d8bef9SDimitry Andric 121e8d8bef9SDimitry Andric MacroExpander::MacroExpander( 122*0fca6ea1SDimitry Andric const std::vector<std::string> &Macros, SourceManager &SourceMgr, 123e8d8bef9SDimitry Andric const FormatStyle &Style, 124e8d8bef9SDimitry Andric llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 125e8d8bef9SDimitry Andric IdentifierTable &IdentTable) 126e8d8bef9SDimitry Andric : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator), 127e8d8bef9SDimitry Andric IdentTable(IdentTable) { 12881ad6265SDimitry Andric for (const std::string &Macro : Macros) 129e8d8bef9SDimitry Andric parseDefinition(Macro); 130e8d8bef9SDimitry Andric } 131e8d8bef9SDimitry Andric 132e8d8bef9SDimitry Andric MacroExpander::~MacroExpander() = default; 133e8d8bef9SDimitry Andric 134e8d8bef9SDimitry Andric void MacroExpander::parseDefinition(const std::string &Macro) { 135e8d8bef9SDimitry Andric Buffers.push_back( 136e8d8bef9SDimitry Andric llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>")); 137*0fca6ea1SDimitry Andric FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef()); 138e8d8bef9SDimitry Andric FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8, 139e8d8bef9SDimitry Andric Allocator, IdentTable); 140e8d8bef9SDimitry Andric const auto Tokens = Lex.lex(); 141e8d8bef9SDimitry Andric if (!Tokens.empty()) { 142e8d8bef9SDimitry Andric DefinitionParser Parser(Tokens); 143e8d8bef9SDimitry Andric auto Definition = Parser.parse(); 14406c3fb27SDimitry Andric if (Definition.ObjectLike) { 14506c3fb27SDimitry Andric ObjectLike[Definition.Name] = std::move(Definition); 14606c3fb27SDimitry Andric } else { 14706c3fb27SDimitry Andric FunctionLike[Definition.Name][Definition.Params.size()] = 14806c3fb27SDimitry Andric std::move(Definition); 14906c3fb27SDimitry Andric } 150e8d8bef9SDimitry Andric } 151e8d8bef9SDimitry Andric } 152e8d8bef9SDimitry Andric 153*0fca6ea1SDimitry Andric bool MacroExpander::defined(StringRef Name) const { 15406c3fb27SDimitry Andric return FunctionLike.contains(Name) || ObjectLike.contains(Name); 155e8d8bef9SDimitry Andric } 156e8d8bef9SDimitry Andric 157*0fca6ea1SDimitry Andric bool MacroExpander::objectLike(StringRef Name) const { 15806c3fb27SDimitry Andric return ObjectLike.contains(Name); 159e8d8bef9SDimitry Andric } 160e8d8bef9SDimitry Andric 161*0fca6ea1SDimitry Andric bool MacroExpander::hasArity(StringRef Name, unsigned Arity) const { 16206c3fb27SDimitry Andric auto it = FunctionLike.find(Name); 16306c3fb27SDimitry Andric return it != FunctionLike.end() && it->second.contains(Arity); 16406c3fb27SDimitry Andric } 165e8d8bef9SDimitry Andric 166*0fca6ea1SDimitry Andric SmallVector<FormatToken *, 8> 16706c3fb27SDimitry Andric MacroExpander::expand(FormatToken *ID, 16806c3fb27SDimitry Andric std::optional<ArgsList> OptionalArgs) const { 16906c3fb27SDimitry Andric if (OptionalArgs) 17006c3fb27SDimitry Andric assert(hasArity(ID->TokenText, OptionalArgs->size())); 17106c3fb27SDimitry Andric else 17206c3fb27SDimitry Andric assert(objectLike(ID->TokenText)); 17306c3fb27SDimitry Andric const Definition &Def = OptionalArgs 17406c3fb27SDimitry Andric ? FunctionLike.find(ID->TokenText) 17506c3fb27SDimitry Andric ->second.find(OptionalArgs.value().size()) 17606c3fb27SDimitry Andric ->second 17706c3fb27SDimitry Andric : ObjectLike.find(ID->TokenText)->second; 17806c3fb27SDimitry Andric ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList(); 17906c3fb27SDimitry Andric SmallVector<FormatToken *, 8> Result; 180e8d8bef9SDimitry Andric // Expand each argument at most once. 181e8d8bef9SDimitry Andric llvm::StringSet<> ExpandedArgs; 182e8d8bef9SDimitry Andric 183e8d8bef9SDimitry Andric // Adds the given token to Result. 184e8d8bef9SDimitry Andric auto pushToken = [&](FormatToken *Tok) { 185e8d8bef9SDimitry Andric Tok->MacroCtx->ExpandedFrom.push_back(ID); 186e8d8bef9SDimitry Andric Result.push_back(Tok); 187e8d8bef9SDimitry Andric }; 188e8d8bef9SDimitry Andric 189e8d8bef9SDimitry Andric // If Tok references a parameter, adds the corresponding argument to Result. 190e8d8bef9SDimitry Andric // Returns false if Tok does not reference a parameter. 191e8d8bef9SDimitry Andric auto expandArgument = [&](FormatToken *Tok) -> bool { 192e8d8bef9SDimitry Andric // If the current token references a parameter, expand the corresponding 193e8d8bef9SDimitry Andric // argument. 1945f757f3fSDimitry Andric if (Tok->isNot(tok::identifier) || ExpandedArgs.contains(Tok->TokenText)) 195e8d8bef9SDimitry Andric return false; 196e8d8bef9SDimitry Andric ExpandedArgs.insert(Tok->TokenText); 197e8d8bef9SDimitry Andric auto I = Def.ArgMap.find(Tok->TokenText); 198e8d8bef9SDimitry Andric if (I == Def.ArgMap.end()) 199e8d8bef9SDimitry Andric return false; 200e8d8bef9SDimitry Andric // If there are fewer arguments than referenced parameters, treat the 201e8d8bef9SDimitry Andric // parameter as empty. 202e8d8bef9SDimitry Andric // FIXME: Potentially fully abort the expansion instead. 203e8d8bef9SDimitry Andric if (I->getValue() >= Args.size()) 204e8d8bef9SDimitry Andric return true; 205e8d8bef9SDimitry Andric for (FormatToken *Arg : Args[I->getValue()]) { 206e8d8bef9SDimitry Andric // A token can be part of a macro argument at multiple levels. 207e8d8bef9SDimitry Andric // For example, with "ID(x) x": 208e8d8bef9SDimitry Andric // in ID(ID(x)), 'x' is expanded first as argument to the inner 209e8d8bef9SDimitry Andric // ID, then again as argument to the outer ID. We keep the macro 210e8d8bef9SDimitry Andric // role the token had from the inner expansion. 211e8d8bef9SDimitry Andric if (!Arg->MacroCtx) 212e8d8bef9SDimitry Andric Arg->MacroCtx = MacroExpansion(MR_ExpandedArg); 213e8d8bef9SDimitry Andric pushToken(Arg); 214e8d8bef9SDimitry Andric } 215e8d8bef9SDimitry Andric return true; 216e8d8bef9SDimitry Andric }; 217e8d8bef9SDimitry Andric 218e8d8bef9SDimitry Andric // Expand the definition into Result. 219e8d8bef9SDimitry Andric for (FormatToken *Tok : Def.Body) { 220e8d8bef9SDimitry Andric if (expandArgument(Tok)) 221e8d8bef9SDimitry Andric continue; 222e8d8bef9SDimitry Andric // Create a copy of the tokens from the macro body, i.e. were not provided 223e8d8bef9SDimitry Andric // by user code. 224e8d8bef9SDimitry Andric FormatToken *New = new (Allocator.Allocate()) FormatToken; 225e8d8bef9SDimitry Andric New->copyFrom(*Tok); 226e8d8bef9SDimitry Andric assert(!New->MacroCtx); 227e8d8bef9SDimitry Andric // Tokens that are not part of the user code are not formatted. 228e8d8bef9SDimitry Andric New->MacroCtx = MacroExpansion(MR_Hidden); 229e8d8bef9SDimitry Andric pushToken(New); 230e8d8bef9SDimitry Andric } 231e8d8bef9SDimitry Andric assert(Result.size() >= 1 && Result.back()->is(tok::eof)); 232e8d8bef9SDimitry Andric if (Result.size() > 1) { 233e8d8bef9SDimitry Andric ++Result[0]->MacroCtx->StartOfExpansion; 234e8d8bef9SDimitry Andric ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion; 235e8d8bef9SDimitry Andric } 236e8d8bef9SDimitry Andric return Result; 237e8d8bef9SDimitry Andric } 238e8d8bef9SDimitry Andric 239e8d8bef9SDimitry Andric } // namespace format 240e8d8bef9SDimitry Andric } // namespace clang 241