xref: /llvm-project/clang/lib/Format/MacroExpander.cpp (revision cac6777ca14e162eb6e97e20da266802846ab953)
1e336b74cSManuel Klimek //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2e336b74cSManuel Klimek //
3c874dd53SChristopher Di Bella // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c874dd53SChristopher Di Bella // See https://llvm.org/LICENSE.txt for license information.
5c874dd53SChristopher Di Bella // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e336b74cSManuel Klimek //
7e336b74cSManuel Klimek //===----------------------------------------------------------------------===//
8e336b74cSManuel Klimek ///
9e336b74cSManuel Klimek /// \file
10e336b74cSManuel Klimek /// This file contains the implementation of MacroExpander, which handles macro
11e336b74cSManuel Klimek /// configuration and expansion while formatting.
12e336b74cSManuel Klimek ///
13e336b74cSManuel Klimek //===----------------------------------------------------------------------===//
14e336b74cSManuel Klimek 
15e336b74cSManuel Klimek #include "Macros.h"
16e336b74cSManuel Klimek 
17b2082a98SOwen Pan #include "Encoding.h"
18b2082a98SOwen Pan #include "FormatToken.h"
19e336b74cSManuel Klimek #include "FormatTokenLexer.h"
20b2082a98SOwen Pan #include "clang/Basic/TokenKinds.h"
21b2082a98SOwen Pan #include "clang/Format/Format.h"
22b2082a98SOwen Pan #include "clang/Lex/HeaderSearch.h"
23b2082a98SOwen Pan #include "clang/Lex/HeaderSearchOptions.h"
24b2082a98SOwen Pan #include "clang/Lex/Lexer.h"
25b2082a98SOwen Pan #include "clang/Lex/ModuleLoader.h"
26e336b74cSManuel Klimek #include "clang/Lex/Preprocessor.h"
27b2082a98SOwen Pan #include "clang/Lex/PreprocessorOptions.h"
28b2082a98SOwen Pan #include "llvm/ADT/StringSet.h"
29b2082a98SOwen Pan #include "llvm/Support/ErrorHandling.h"
30e336b74cSManuel Klimek 
31e336b74cSManuel Klimek namespace clang {
32e336b74cSManuel Klimek namespace format {
33e336b74cSManuel Klimek 
34e336b74cSManuel Klimek struct MacroExpander::Definition {
35e336b74cSManuel Klimek   StringRef Name;
36e336b74cSManuel Klimek   SmallVector<FormatToken *, 8> Params;
37e336b74cSManuel Klimek   SmallVector<FormatToken *, 8> Body;
38e336b74cSManuel Klimek 
39e336b74cSManuel Klimek   // Map from each argument's name to its position in the argument list.
40e336b74cSManuel Klimek   // With "M(x, y) x + y":
41e336b74cSManuel Klimek   //   x -> 0
42e336b74cSManuel Klimek   //   y -> 1
43e336b74cSManuel Klimek   llvm::StringMap<size_t> ArgMap;
44e336b74cSManuel Klimek 
45e336b74cSManuel Klimek   bool ObjectLike = true;
46e336b74cSManuel Klimek };
47e336b74cSManuel Klimek 
48e336b74cSManuel Klimek class MacroExpander::DefinitionParser {
49e336b74cSManuel Klimek public:
50e336b74cSManuel Klimek   DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
51e336b74cSManuel Klimek     assert(!Tokens.empty());
52e336b74cSManuel Klimek     Current = Tokens[0];
53e336b74cSManuel Klimek   }
54e336b74cSManuel Klimek 
55f1191705SNico Weber   // Parse the token stream and return the corresponding Definition object.
56e336b74cSManuel Klimek   // Returns an empty definition object with a null-Name on error.
57e336b74cSManuel Klimek   MacroExpander::Definition parse() {
5891c4db00SOwen Pan     if (Current->isNot(tok::identifier))
59e336b74cSManuel Klimek       return {};
60e336b74cSManuel Klimek     Def.Name = Current->TokenText;
61e336b74cSManuel Klimek     nextToken();
62e336b74cSManuel Klimek     if (Current->is(tok::l_paren)) {
63e336b74cSManuel Klimek       Def.ObjectLike = false;
64e336b74cSManuel Klimek       if (!parseParams())
65e336b74cSManuel Klimek         return {};
66e336b74cSManuel Klimek     }
67e336b74cSManuel Klimek     if (!parseExpansion())
68e336b74cSManuel Klimek       return {};
69e336b74cSManuel Klimek 
70e336b74cSManuel Klimek     return Def;
71e336b74cSManuel Klimek   }
72e336b74cSManuel Klimek 
73e336b74cSManuel Klimek private:
74e336b74cSManuel Klimek   bool parseParams() {
75e336b74cSManuel Klimek     assert(Current->is(tok::l_paren));
76e336b74cSManuel Klimek     nextToken();
77e336b74cSManuel Klimek     while (Current->is(tok::identifier)) {
78e336b74cSManuel Klimek       Def.Params.push_back(Current);
79e336b74cSManuel Klimek       Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
80e336b74cSManuel Klimek       nextToken();
81e336b74cSManuel Klimek       if (Current->isNot(tok::comma))
82e336b74cSManuel Klimek         break;
83e336b74cSManuel Klimek       nextToken();
84e336b74cSManuel Klimek     }
85e336b74cSManuel Klimek     if (Current->isNot(tok::r_paren))
86e336b74cSManuel Klimek       return false;
87e336b74cSManuel Klimek     nextToken();
88e336b74cSManuel Klimek     return true;
89e336b74cSManuel Klimek   }
90e336b74cSManuel Klimek 
91e336b74cSManuel Klimek   bool parseExpansion() {
92e336b74cSManuel Klimek     if (!Current->isOneOf(tok::equal, tok::eof))
93e336b74cSManuel Klimek       return false;
94e336b74cSManuel Klimek     if (Current->is(tok::equal))
95e336b74cSManuel Klimek       nextToken();
96e336b74cSManuel Klimek     parseTail();
97e336b74cSManuel Klimek     return true;
98e336b74cSManuel Klimek   }
99e336b74cSManuel Klimek 
100e336b74cSManuel Klimek   void parseTail() {
101e336b74cSManuel Klimek     while (Current->isNot(tok::eof)) {
102e336b74cSManuel Klimek       Def.Body.push_back(Current);
103e336b74cSManuel Klimek       nextToken();
104e336b74cSManuel Klimek     }
105e336b74cSManuel Klimek     Def.Body.push_back(Current);
106e336b74cSManuel Klimek   }
107e336b74cSManuel Klimek 
108e336b74cSManuel Klimek   void nextToken() {
109e336b74cSManuel Klimek     if (Pos + 1 < Tokens.size())
110e336b74cSManuel Klimek       ++Pos;
111e336b74cSManuel Klimek     Current = Tokens[Pos];
112e336b74cSManuel Klimek     Current->Finalized = true;
113e336b74cSManuel Klimek   }
114e336b74cSManuel Klimek 
115e336b74cSManuel Klimek   size_t Pos = 0;
116e336b74cSManuel Klimek   FormatToken *Current = nullptr;
117e336b74cSManuel Klimek   Definition Def;
118e336b74cSManuel Klimek   ArrayRef<FormatToken *> Tokens;
119e336b74cSManuel Klimek };
120e336b74cSManuel Klimek 
121e336b74cSManuel Klimek MacroExpander::MacroExpander(
1221c58208dSOwen Pan     const std::vector<std::string> &Macros, SourceManager &SourceMgr,
123e336b74cSManuel Klimek     const FormatStyle &Style,
124e336b74cSManuel Klimek     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
125e336b74cSManuel Klimek     IdentifierTable &IdentTable)
126e336b74cSManuel Klimek     : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
127e336b74cSManuel Klimek       IdentTable(IdentTable) {
128630c7360SMarek Kurdej   for (const std::string &Macro : Macros)
129e336b74cSManuel Klimek     parseDefinition(Macro);
130e336b74cSManuel Klimek }
131e336b74cSManuel Klimek 
132e336b74cSManuel Klimek MacroExpander::~MacroExpander() = default;
133e336b74cSManuel Klimek 
134e336b74cSManuel Klimek void MacroExpander::parseDefinition(const std::string &Macro) {
135e336b74cSManuel Klimek   Buffers.push_back(
136e336b74cSManuel Klimek       llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
1371c58208dSOwen Pan   FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
138e336b74cSManuel Klimek   FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
139e336b74cSManuel Klimek                        Allocator, IdentTable);
140e336b74cSManuel Klimek   const auto Tokens = Lex.lex();
141e336b74cSManuel Klimek   if (!Tokens.empty()) {
142e336b74cSManuel Klimek     DefinitionParser Parser(Tokens);
143e336b74cSManuel Klimek     auto Definition = Parser.parse();
14401402831SManuel Klimek     if (Definition.ObjectLike) {
14501402831SManuel Klimek       ObjectLike[Definition.Name] = std::move(Definition);
14601402831SManuel Klimek     } else {
14701402831SManuel Klimek       FunctionLike[Definition.Name][Definition.Params.size()] =
14801402831SManuel Klimek           std::move(Definition);
14901402831SManuel Klimek     }
150e336b74cSManuel Klimek   }
151e336b74cSManuel Klimek }
152e336b74cSManuel Klimek 
1531c58208dSOwen Pan bool MacroExpander::defined(StringRef Name) const {
154ea9d4040SKazu Hirata   return FunctionLike.contains(Name) || ObjectLike.contains(Name);
155e336b74cSManuel Klimek }
156e336b74cSManuel Klimek 
1571c58208dSOwen Pan bool MacroExpander::objectLike(StringRef Name) const {
158ea9d4040SKazu Hirata   return ObjectLike.contains(Name);
159e336b74cSManuel Klimek }
160e336b74cSManuel Klimek 
1611c58208dSOwen Pan bool MacroExpander::hasArity(StringRef Name, unsigned Arity) const {
16201402831SManuel Klimek   auto it = FunctionLike.find(Name);
163ea9d4040SKazu Hirata   return it != FunctionLike.end() && it->second.contains(Arity);
16401402831SManuel Klimek }
165e336b74cSManuel Klimek 
1661c58208dSOwen Pan SmallVector<FormatToken *, 8>
16701402831SManuel Klimek MacroExpander::expand(FormatToken *ID,
16801402831SManuel Klimek                       std::optional<ArgsList> OptionalArgs) const {
16901402831SManuel Klimek   if (OptionalArgs)
17001402831SManuel Klimek     assert(hasArity(ID->TokenText, OptionalArgs->size()));
17101402831SManuel Klimek   else
17201402831SManuel Klimek     assert(objectLike(ID->TokenText));
17301402831SManuel Klimek   const Definition &Def = OptionalArgs
17401402831SManuel Klimek                               ? FunctionLike.find(ID->TokenText)
17501402831SManuel Klimek                                     ->second.find(OptionalArgs.value().size())
17601402831SManuel Klimek                                     ->second
17701402831SManuel Klimek                               : ObjectLike.find(ID->TokenText)->second;
17801402831SManuel Klimek   ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList();
17901402831SManuel Klimek   SmallVector<FormatToken *, 8> Result;
180e336b74cSManuel Klimek   // Expand each argument at most once.
181e336b74cSManuel Klimek   llvm::StringSet<> ExpandedArgs;
182e336b74cSManuel Klimek 
183e336b74cSManuel Klimek   // Adds the given token to Result.
184e336b74cSManuel Klimek   auto pushToken = [&](FormatToken *Tok) {
185e336b74cSManuel Klimek     Tok->MacroCtx->ExpandedFrom.push_back(ID);
186e336b74cSManuel Klimek     Result.push_back(Tok);
187e336b74cSManuel Klimek   };
188e336b74cSManuel Klimek 
189e336b74cSManuel Klimek   // If Tok references a parameter, adds the corresponding argument to Result.
190e336b74cSManuel Klimek   // Returns false if Tok does not reference a parameter.
191e336b74cSManuel Klimek   auto expandArgument = [&](FormatToken *Tok) -> bool {
192e336b74cSManuel Klimek     // If the current token references a parameter, expand the corresponding
193e336b74cSManuel Klimek     // argument.
19491591794SKazu Hirata     if (Tok->isNot(tok::identifier))
195e336b74cSManuel Klimek       return false;
19691591794SKazu Hirata     if (!ExpandedArgs.insert(Tok->TokenText).second)
19791591794SKazu Hirata       return false;
198e336b74cSManuel Klimek     auto I = Def.ArgMap.find(Tok->TokenText);
199e336b74cSManuel Klimek     if (I == Def.ArgMap.end())
200e336b74cSManuel Klimek       return false;
201e336b74cSManuel Klimek     // If there are fewer arguments than referenced parameters, treat the
202e336b74cSManuel Klimek     // parameter as empty.
203e336b74cSManuel Klimek     // FIXME: Potentially fully abort the expansion instead.
204e336b74cSManuel Klimek     if (I->getValue() >= Args.size())
205e336b74cSManuel Klimek       return true;
206e336b74cSManuel Klimek     for (FormatToken *Arg : Args[I->getValue()]) {
207e336b74cSManuel Klimek       // A token can be part of a macro argument at multiple levels.
208e336b74cSManuel Klimek       // For example, with "ID(x) x":
209e336b74cSManuel Klimek       // in ID(ID(x)), 'x' is expanded first as argument to the inner
210e336b74cSManuel Klimek       // ID, then again as argument to the outer ID. We keep the macro
211e336b74cSManuel Klimek       // role the token had from the inner expansion.
212e336b74cSManuel Klimek       if (!Arg->MacroCtx)
213e336b74cSManuel Klimek         Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
214e336b74cSManuel Klimek       pushToken(Arg);
215e336b74cSManuel Klimek     }
216e336b74cSManuel Klimek     return true;
217e336b74cSManuel Klimek   };
218e336b74cSManuel Klimek 
219e336b74cSManuel Klimek   // Expand the definition into Result.
220e336b74cSManuel Klimek   for (FormatToken *Tok : Def.Body) {
221e336b74cSManuel Klimek     if (expandArgument(Tok))
222e336b74cSManuel Klimek       continue;
223e336b74cSManuel Klimek     // Create a copy of the tokens from the macro body, i.e. were not provided
224e336b74cSManuel Klimek     // by user code.
225e336b74cSManuel Klimek     FormatToken *New = new (Allocator.Allocate()) FormatToken;
226e336b74cSManuel Klimek     New->copyFrom(*Tok);
227e336b74cSManuel Klimek     assert(!New->MacroCtx);
228e336b74cSManuel Klimek     // Tokens that are not part of the user code are not formatted.
229e336b74cSManuel Klimek     New->MacroCtx = MacroExpansion(MR_Hidden);
230e336b74cSManuel Klimek     pushToken(New);
231e336b74cSManuel Klimek   }
232e336b74cSManuel Klimek   assert(Result.size() >= 1 && Result.back()->is(tok::eof));
233e336b74cSManuel Klimek   if (Result.size() > 1) {
234e336b74cSManuel Klimek     ++Result[0]->MacroCtx->StartOfExpansion;
235e336b74cSManuel Klimek     ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
236*cac6777cS天音あめ   } else {
237*cac6777cS天音あめ     // If the macro expansion is empty, mark the start and end.
238*cac6777cS天音あめ     Result[0]->MacroCtx->StartOfExpansion = 1;
239*cac6777cS天音あめ     Result[0]->MacroCtx->EndOfExpansion = 1;
240e336b74cSManuel Klimek   }
241e336b74cSManuel Klimek   return Result;
242e336b74cSManuel Klimek }
243e336b74cSManuel Klimek 
244e336b74cSManuel Klimek } // namespace format
245e336b74cSManuel Klimek } // namespace clang
246