1e336b74cSManuel Klimek //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===// 2e336b74cSManuel Klimek // 3c874dd53SChristopher Di Bella // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4c874dd53SChristopher Di Bella // See https://llvm.org/LICENSE.txt for license information. 5c874dd53SChristopher Di Bella // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e336b74cSManuel Klimek // 7e336b74cSManuel Klimek //===----------------------------------------------------------------------===// 8e336b74cSManuel Klimek /// 9e336b74cSManuel Klimek /// \file 10e336b74cSManuel Klimek /// This file contains the implementation of MacroExpander, which handles macro 11e336b74cSManuel Klimek /// configuration and expansion while formatting. 12e336b74cSManuel Klimek /// 13e336b74cSManuel Klimek //===----------------------------------------------------------------------===// 14e336b74cSManuel Klimek 15e336b74cSManuel Klimek #include "Macros.h" 16e336b74cSManuel Klimek 17b2082a98SOwen Pan #include "Encoding.h" 18b2082a98SOwen Pan #include "FormatToken.h" 19e336b74cSManuel Klimek #include "FormatTokenLexer.h" 20b2082a98SOwen Pan #include "clang/Basic/TokenKinds.h" 21b2082a98SOwen Pan #include "clang/Format/Format.h" 22b2082a98SOwen Pan #include "clang/Lex/HeaderSearch.h" 23b2082a98SOwen Pan #include "clang/Lex/HeaderSearchOptions.h" 24b2082a98SOwen Pan #include "clang/Lex/Lexer.h" 25b2082a98SOwen Pan #include "clang/Lex/ModuleLoader.h" 26e336b74cSManuel Klimek #include "clang/Lex/Preprocessor.h" 27b2082a98SOwen Pan #include "clang/Lex/PreprocessorOptions.h" 28b2082a98SOwen Pan #include "llvm/ADT/StringSet.h" 29b2082a98SOwen Pan #include "llvm/Support/ErrorHandling.h" 30e336b74cSManuel Klimek 31e336b74cSManuel Klimek namespace clang { 32e336b74cSManuel Klimek namespace format { 33e336b74cSManuel Klimek 34e336b74cSManuel Klimek struct MacroExpander::Definition { 35e336b74cSManuel Klimek StringRef Name; 36e336b74cSManuel Klimek SmallVector<FormatToken *, 8> Params; 37e336b74cSManuel Klimek SmallVector<FormatToken *, 8> Body; 38e336b74cSManuel Klimek 39e336b74cSManuel Klimek // Map from each argument's name to its position in the argument list. 40e336b74cSManuel Klimek // With "M(x, y) x + y": 41e336b74cSManuel Klimek // x -> 0 42e336b74cSManuel Klimek // y -> 1 43e336b74cSManuel Klimek llvm::StringMap<size_t> ArgMap; 44e336b74cSManuel Klimek 45e336b74cSManuel Klimek bool ObjectLike = true; 46e336b74cSManuel Klimek }; 47e336b74cSManuel Klimek 48e336b74cSManuel Klimek class MacroExpander::DefinitionParser { 49e336b74cSManuel Klimek public: 50e336b74cSManuel Klimek DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) { 51e336b74cSManuel Klimek assert(!Tokens.empty()); 52e336b74cSManuel Klimek Current = Tokens[0]; 53e336b74cSManuel Klimek } 54e336b74cSManuel Klimek 55f1191705SNico Weber // Parse the token stream and return the corresponding Definition object. 56e336b74cSManuel Klimek // Returns an empty definition object with a null-Name on error. 57e336b74cSManuel Klimek MacroExpander::Definition parse() { 5891c4db00SOwen Pan if (Current->isNot(tok::identifier)) 59e336b74cSManuel Klimek return {}; 60e336b74cSManuel Klimek Def.Name = Current->TokenText; 61e336b74cSManuel Klimek nextToken(); 62e336b74cSManuel Klimek if (Current->is(tok::l_paren)) { 63e336b74cSManuel Klimek Def.ObjectLike = false; 64e336b74cSManuel Klimek if (!parseParams()) 65e336b74cSManuel Klimek return {}; 66e336b74cSManuel Klimek } 67e336b74cSManuel Klimek if (!parseExpansion()) 68e336b74cSManuel Klimek return {}; 69e336b74cSManuel Klimek 70e336b74cSManuel Klimek return Def; 71e336b74cSManuel Klimek } 72e336b74cSManuel Klimek 73e336b74cSManuel Klimek private: 74e336b74cSManuel Klimek bool parseParams() { 75e336b74cSManuel Klimek assert(Current->is(tok::l_paren)); 76e336b74cSManuel Klimek nextToken(); 77e336b74cSManuel Klimek while (Current->is(tok::identifier)) { 78e336b74cSManuel Klimek Def.Params.push_back(Current); 79e336b74cSManuel Klimek Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1; 80e336b74cSManuel Klimek nextToken(); 81e336b74cSManuel Klimek if (Current->isNot(tok::comma)) 82e336b74cSManuel Klimek break; 83e336b74cSManuel Klimek nextToken(); 84e336b74cSManuel Klimek } 85e336b74cSManuel Klimek if (Current->isNot(tok::r_paren)) 86e336b74cSManuel Klimek return false; 87e336b74cSManuel Klimek nextToken(); 88e336b74cSManuel Klimek return true; 89e336b74cSManuel Klimek } 90e336b74cSManuel Klimek 91e336b74cSManuel Klimek bool parseExpansion() { 92e336b74cSManuel Klimek if (!Current->isOneOf(tok::equal, tok::eof)) 93e336b74cSManuel Klimek return false; 94e336b74cSManuel Klimek if (Current->is(tok::equal)) 95e336b74cSManuel Klimek nextToken(); 96e336b74cSManuel Klimek parseTail(); 97e336b74cSManuel Klimek return true; 98e336b74cSManuel Klimek } 99e336b74cSManuel Klimek 100e336b74cSManuel Klimek void parseTail() { 101e336b74cSManuel Klimek while (Current->isNot(tok::eof)) { 102e336b74cSManuel Klimek Def.Body.push_back(Current); 103e336b74cSManuel Klimek nextToken(); 104e336b74cSManuel Klimek } 105e336b74cSManuel Klimek Def.Body.push_back(Current); 106e336b74cSManuel Klimek } 107e336b74cSManuel Klimek 108e336b74cSManuel Klimek void nextToken() { 109e336b74cSManuel Klimek if (Pos + 1 < Tokens.size()) 110e336b74cSManuel Klimek ++Pos; 111e336b74cSManuel Klimek Current = Tokens[Pos]; 112e336b74cSManuel Klimek Current->Finalized = true; 113e336b74cSManuel Klimek } 114e336b74cSManuel Klimek 115e336b74cSManuel Klimek size_t Pos = 0; 116e336b74cSManuel Klimek FormatToken *Current = nullptr; 117e336b74cSManuel Klimek Definition Def; 118e336b74cSManuel Klimek ArrayRef<FormatToken *> Tokens; 119e336b74cSManuel Klimek }; 120e336b74cSManuel Klimek 121e336b74cSManuel Klimek MacroExpander::MacroExpander( 1221c58208dSOwen Pan const std::vector<std::string> &Macros, SourceManager &SourceMgr, 123e336b74cSManuel Klimek const FormatStyle &Style, 124e336b74cSManuel Klimek llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 125e336b74cSManuel Klimek IdentifierTable &IdentTable) 126e336b74cSManuel Klimek : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator), 127e336b74cSManuel Klimek IdentTable(IdentTable) { 128630c7360SMarek Kurdej for (const std::string &Macro : Macros) 129e336b74cSManuel Klimek parseDefinition(Macro); 130e336b74cSManuel Klimek } 131e336b74cSManuel Klimek 132e336b74cSManuel Klimek MacroExpander::~MacroExpander() = default; 133e336b74cSManuel Klimek 134e336b74cSManuel Klimek void MacroExpander::parseDefinition(const std::string &Macro) { 135e336b74cSManuel Klimek Buffers.push_back( 136e336b74cSManuel Klimek llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>")); 1371c58208dSOwen Pan FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef()); 138e336b74cSManuel Klimek FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8, 139e336b74cSManuel Klimek Allocator, IdentTable); 140e336b74cSManuel Klimek const auto Tokens = Lex.lex(); 141e336b74cSManuel Klimek if (!Tokens.empty()) { 142e336b74cSManuel Klimek DefinitionParser Parser(Tokens); 143e336b74cSManuel Klimek auto Definition = Parser.parse(); 14401402831SManuel Klimek if (Definition.ObjectLike) { 14501402831SManuel Klimek ObjectLike[Definition.Name] = std::move(Definition); 14601402831SManuel Klimek } else { 14701402831SManuel Klimek FunctionLike[Definition.Name][Definition.Params.size()] = 14801402831SManuel Klimek std::move(Definition); 14901402831SManuel Klimek } 150e336b74cSManuel Klimek } 151e336b74cSManuel Klimek } 152e336b74cSManuel Klimek 1531c58208dSOwen Pan bool MacroExpander::defined(StringRef Name) const { 154ea9d4040SKazu Hirata return FunctionLike.contains(Name) || ObjectLike.contains(Name); 155e336b74cSManuel Klimek } 156e336b74cSManuel Klimek 1571c58208dSOwen Pan bool MacroExpander::objectLike(StringRef Name) const { 158ea9d4040SKazu Hirata return ObjectLike.contains(Name); 159e336b74cSManuel Klimek } 160e336b74cSManuel Klimek 1611c58208dSOwen Pan bool MacroExpander::hasArity(StringRef Name, unsigned Arity) const { 16201402831SManuel Klimek auto it = FunctionLike.find(Name); 163ea9d4040SKazu Hirata return it != FunctionLike.end() && it->second.contains(Arity); 16401402831SManuel Klimek } 165e336b74cSManuel Klimek 1661c58208dSOwen Pan SmallVector<FormatToken *, 8> 16701402831SManuel Klimek MacroExpander::expand(FormatToken *ID, 16801402831SManuel Klimek std::optional<ArgsList> OptionalArgs) const { 16901402831SManuel Klimek if (OptionalArgs) 17001402831SManuel Klimek assert(hasArity(ID->TokenText, OptionalArgs->size())); 17101402831SManuel Klimek else 17201402831SManuel Klimek assert(objectLike(ID->TokenText)); 17301402831SManuel Klimek const Definition &Def = OptionalArgs 17401402831SManuel Klimek ? FunctionLike.find(ID->TokenText) 17501402831SManuel Klimek ->second.find(OptionalArgs.value().size()) 17601402831SManuel Klimek ->second 17701402831SManuel Klimek : ObjectLike.find(ID->TokenText)->second; 17801402831SManuel Klimek ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList(); 17901402831SManuel Klimek SmallVector<FormatToken *, 8> Result; 180e336b74cSManuel Klimek // Expand each argument at most once. 181e336b74cSManuel Klimek llvm::StringSet<> ExpandedArgs; 182e336b74cSManuel Klimek 183e336b74cSManuel Klimek // Adds the given token to Result. 184e336b74cSManuel Klimek auto pushToken = [&](FormatToken *Tok) { 185e336b74cSManuel Klimek Tok->MacroCtx->ExpandedFrom.push_back(ID); 186e336b74cSManuel Klimek Result.push_back(Tok); 187e336b74cSManuel Klimek }; 188e336b74cSManuel Klimek 189e336b74cSManuel Klimek // If Tok references a parameter, adds the corresponding argument to Result. 190e336b74cSManuel Klimek // Returns false if Tok does not reference a parameter. 191e336b74cSManuel Klimek auto expandArgument = [&](FormatToken *Tok) -> bool { 192e336b74cSManuel Klimek // If the current token references a parameter, expand the corresponding 193e336b74cSManuel Klimek // argument. 19491591794SKazu Hirata if (Tok->isNot(tok::identifier)) 195e336b74cSManuel Klimek return false; 19691591794SKazu Hirata if (!ExpandedArgs.insert(Tok->TokenText).second) 19791591794SKazu Hirata return false; 198e336b74cSManuel Klimek auto I = Def.ArgMap.find(Tok->TokenText); 199e336b74cSManuel Klimek if (I == Def.ArgMap.end()) 200e336b74cSManuel Klimek return false; 201e336b74cSManuel Klimek // If there are fewer arguments than referenced parameters, treat the 202e336b74cSManuel Klimek // parameter as empty. 203e336b74cSManuel Klimek // FIXME: Potentially fully abort the expansion instead. 204e336b74cSManuel Klimek if (I->getValue() >= Args.size()) 205e336b74cSManuel Klimek return true; 206e336b74cSManuel Klimek for (FormatToken *Arg : Args[I->getValue()]) { 207e336b74cSManuel Klimek // A token can be part of a macro argument at multiple levels. 208e336b74cSManuel Klimek // For example, with "ID(x) x": 209e336b74cSManuel Klimek // in ID(ID(x)), 'x' is expanded first as argument to the inner 210e336b74cSManuel Klimek // ID, then again as argument to the outer ID. We keep the macro 211e336b74cSManuel Klimek // role the token had from the inner expansion. 212e336b74cSManuel Klimek if (!Arg->MacroCtx) 213e336b74cSManuel Klimek Arg->MacroCtx = MacroExpansion(MR_ExpandedArg); 214e336b74cSManuel Klimek pushToken(Arg); 215e336b74cSManuel Klimek } 216e336b74cSManuel Klimek return true; 217e336b74cSManuel Klimek }; 218e336b74cSManuel Klimek 219e336b74cSManuel Klimek // Expand the definition into Result. 220e336b74cSManuel Klimek for (FormatToken *Tok : Def.Body) { 221e336b74cSManuel Klimek if (expandArgument(Tok)) 222e336b74cSManuel Klimek continue; 223e336b74cSManuel Klimek // Create a copy of the tokens from the macro body, i.e. were not provided 224e336b74cSManuel Klimek // by user code. 225e336b74cSManuel Klimek FormatToken *New = new (Allocator.Allocate()) FormatToken; 226e336b74cSManuel Klimek New->copyFrom(*Tok); 227e336b74cSManuel Klimek assert(!New->MacroCtx); 228e336b74cSManuel Klimek // Tokens that are not part of the user code are not formatted. 229e336b74cSManuel Klimek New->MacroCtx = MacroExpansion(MR_Hidden); 230e336b74cSManuel Klimek pushToken(New); 231e336b74cSManuel Klimek } 232e336b74cSManuel Klimek assert(Result.size() >= 1 && Result.back()->is(tok::eof)); 233e336b74cSManuel Klimek if (Result.size() > 1) { 234e336b74cSManuel Klimek ++Result[0]->MacroCtx->StartOfExpansion; 235e336b74cSManuel Klimek ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion; 236*cac6777cS天音あめ } else { 237*cac6777cS天音あめ // If the macro expansion is empty, mark the start and end. 238*cac6777cS天音あめ Result[0]->MacroCtx->StartOfExpansion = 1; 239*cac6777cS天音あめ Result[0]->MacroCtx->EndOfExpansion = 1; 240e336b74cSManuel Klimek } 241e336b74cSManuel Klimek return Result; 242e336b74cSManuel Klimek } 243e336b74cSManuel Klimek 244e336b74cSManuel Klimek } // namespace format 245e336b74cSManuel Klimek } // namespace clang 246