1 //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of MacroExpander, which handles macro 11 /// configuration and expansion while formatting. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "Macros.h" 16 17 #include "Encoding.h" 18 #include "FormatToken.h" 19 #include "FormatTokenLexer.h" 20 #include "clang/Basic/TokenKinds.h" 21 #include "clang/Format/Format.h" 22 #include "clang/Lex/HeaderSearch.h" 23 #include "clang/Lex/HeaderSearchOptions.h" 24 #include "clang/Lex/Lexer.h" 25 #include "clang/Lex/ModuleLoader.h" 26 #include "clang/Lex/Preprocessor.h" 27 #include "clang/Lex/PreprocessorOptions.h" 28 #include "llvm/ADT/StringSet.h" 29 #include "llvm/Support/ErrorHandling.h" 30 31 namespace clang { 32 namespace format { 33 34 struct MacroExpander::Definition { 35 StringRef Name; 36 SmallVector<FormatToken *, 8> Params; 37 SmallVector<FormatToken *, 8> Body; 38 39 // Map from each argument's name to its position in the argument list. 40 // With "M(x, y) x + y": 41 // x -> 0 42 // y -> 1 43 llvm::StringMap<size_t> ArgMap; 44 45 bool ObjectLike = true; 46 }; 47 48 class MacroExpander::DefinitionParser { 49 public: 50 DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) { 51 assert(!Tokens.empty()); 52 Current = Tokens[0]; 53 } 54 55 // Parse the token stream and return the corresponding Definition object. 56 // Returns an empty definition object with a null-Name on error. 57 MacroExpander::Definition parse() { 58 if (Current->isNot(tok::identifier)) 59 return {}; 60 Def.Name = Current->TokenText; 61 nextToken(); 62 if (Current->is(tok::l_paren)) { 63 Def.ObjectLike = false; 64 if (!parseParams()) 65 return {}; 66 } 67 if (!parseExpansion()) 68 return {}; 69 70 return Def; 71 } 72 73 private: 74 bool parseParams() { 75 assert(Current->is(tok::l_paren)); 76 nextToken(); 77 while (Current->is(tok::identifier)) { 78 Def.Params.push_back(Current); 79 Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1; 80 nextToken(); 81 if (Current->isNot(tok::comma)) 82 break; 83 nextToken(); 84 } 85 if (Current->isNot(tok::r_paren)) 86 return false; 87 nextToken(); 88 return true; 89 } 90 91 bool parseExpansion() { 92 if (!Current->isOneOf(tok::equal, tok::eof)) 93 return false; 94 if (Current->is(tok::equal)) 95 nextToken(); 96 parseTail(); 97 return true; 98 } 99 100 void parseTail() { 101 while (Current->isNot(tok::eof)) { 102 Def.Body.push_back(Current); 103 nextToken(); 104 } 105 Def.Body.push_back(Current); 106 } 107 108 void nextToken() { 109 if (Pos + 1 < Tokens.size()) 110 ++Pos; 111 Current = Tokens[Pos]; 112 Current->Finalized = true; 113 } 114 115 size_t Pos = 0; 116 FormatToken *Current = nullptr; 117 Definition Def; 118 ArrayRef<FormatToken *> Tokens; 119 }; 120 121 MacroExpander::MacroExpander( 122 const std::vector<std::string> &Macros, SourceManager &SourceMgr, 123 const FormatStyle &Style, 124 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 125 IdentifierTable &IdentTable) 126 : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator), 127 IdentTable(IdentTable) { 128 for (const std::string &Macro : Macros) 129 parseDefinition(Macro); 130 } 131 132 MacroExpander::~MacroExpander() = default; 133 134 void MacroExpander::parseDefinition(const std::string &Macro) { 135 Buffers.push_back( 136 llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>")); 137 FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef()); 138 FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8, 139 Allocator, IdentTable); 140 const auto Tokens = Lex.lex(); 141 if (!Tokens.empty()) { 142 DefinitionParser Parser(Tokens); 143 auto Definition = Parser.parse(); 144 if (Definition.ObjectLike) { 145 ObjectLike[Definition.Name] = std::move(Definition); 146 } else { 147 FunctionLike[Definition.Name][Definition.Params.size()] = 148 std::move(Definition); 149 } 150 } 151 } 152 153 bool MacroExpander::defined(StringRef Name) const { 154 return FunctionLike.contains(Name) || ObjectLike.contains(Name); 155 } 156 157 bool MacroExpander::objectLike(StringRef Name) const { 158 return ObjectLike.contains(Name); 159 } 160 161 bool MacroExpander::hasArity(StringRef Name, unsigned Arity) const { 162 auto it = FunctionLike.find(Name); 163 return it != FunctionLike.end() && it->second.contains(Arity); 164 } 165 166 SmallVector<FormatToken *, 8> 167 MacroExpander::expand(FormatToken *ID, 168 std::optional<ArgsList> OptionalArgs) const { 169 if (OptionalArgs) 170 assert(hasArity(ID->TokenText, OptionalArgs->size())); 171 else 172 assert(objectLike(ID->TokenText)); 173 const Definition &Def = OptionalArgs 174 ? FunctionLike.find(ID->TokenText) 175 ->second.find(OptionalArgs.value().size()) 176 ->second 177 : ObjectLike.find(ID->TokenText)->second; 178 ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList(); 179 SmallVector<FormatToken *, 8> Result; 180 // Expand each argument at most once. 181 llvm::StringSet<> ExpandedArgs; 182 183 // Adds the given token to Result. 184 auto pushToken = [&](FormatToken *Tok) { 185 Tok->MacroCtx->ExpandedFrom.push_back(ID); 186 Result.push_back(Tok); 187 }; 188 189 // If Tok references a parameter, adds the corresponding argument to Result. 190 // Returns false if Tok does not reference a parameter. 191 auto expandArgument = [&](FormatToken *Tok) -> bool { 192 // If the current token references a parameter, expand the corresponding 193 // argument. 194 if (Tok->isNot(tok::identifier)) 195 return false; 196 if (!ExpandedArgs.insert(Tok->TokenText).second) 197 return false; 198 auto I = Def.ArgMap.find(Tok->TokenText); 199 if (I == Def.ArgMap.end()) 200 return false; 201 // If there are fewer arguments than referenced parameters, treat the 202 // parameter as empty. 203 // FIXME: Potentially fully abort the expansion instead. 204 if (I->getValue() >= Args.size()) 205 return true; 206 for (FormatToken *Arg : Args[I->getValue()]) { 207 // A token can be part of a macro argument at multiple levels. 208 // For example, with "ID(x) x": 209 // in ID(ID(x)), 'x' is expanded first as argument to the inner 210 // ID, then again as argument to the outer ID. We keep the macro 211 // role the token had from the inner expansion. 212 if (!Arg->MacroCtx) 213 Arg->MacroCtx = MacroExpansion(MR_ExpandedArg); 214 pushToken(Arg); 215 } 216 return true; 217 }; 218 219 // Expand the definition into Result. 220 for (FormatToken *Tok : Def.Body) { 221 if (expandArgument(Tok)) 222 continue; 223 // Create a copy of the tokens from the macro body, i.e. were not provided 224 // by user code. 225 FormatToken *New = new (Allocator.Allocate()) FormatToken; 226 New->copyFrom(*Tok); 227 assert(!New->MacroCtx); 228 // Tokens that are not part of the user code are not formatted. 229 New->MacroCtx = MacroExpansion(MR_Hidden); 230 pushToken(New); 231 } 232 assert(Result.size() >= 1 && Result.back()->is(tok::eof)); 233 if (Result.size() > 1) { 234 ++Result[0]->MacroCtx->StartOfExpansion; 235 ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion; 236 } else { 237 // If the macro expansion is empty, mark the start and end. 238 Result[0]->MacroCtx->StartOfExpansion = 1; 239 Result[0]->MacroCtx->EndOfExpansion = 1; 240 } 241 return Result; 242 } 243 244 } // namespace format 245 } // namespace clang 246