xref: /freebsd-src/contrib/llvm-project/clang/lib/Format/MacroExpander.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1e8d8bef9SDimitry Andric //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2e8d8bef9SDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric ///
9e8d8bef9SDimitry Andric /// \file
10e8d8bef9SDimitry Andric /// This file contains the implementation of MacroExpander, which handles macro
11e8d8bef9SDimitry Andric /// configuration and expansion while formatting.
12e8d8bef9SDimitry Andric ///
13e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
14e8d8bef9SDimitry Andric 
15e8d8bef9SDimitry Andric #include "Macros.h"
16e8d8bef9SDimitry Andric 
17e8d8bef9SDimitry Andric #include "Encoding.h"
18e8d8bef9SDimitry Andric #include "FormatToken.h"
19e8d8bef9SDimitry Andric #include "FormatTokenLexer.h"
20e8d8bef9SDimitry Andric #include "clang/Basic/TokenKinds.h"
21e8d8bef9SDimitry Andric #include "clang/Format/Format.h"
22e8d8bef9SDimitry Andric #include "clang/Lex/HeaderSearch.h"
23e8d8bef9SDimitry Andric #include "clang/Lex/HeaderSearchOptions.h"
24e8d8bef9SDimitry Andric #include "clang/Lex/Lexer.h"
25e8d8bef9SDimitry Andric #include "clang/Lex/ModuleLoader.h"
26e8d8bef9SDimitry Andric #include "clang/Lex/Preprocessor.h"
27e8d8bef9SDimitry Andric #include "clang/Lex/PreprocessorOptions.h"
28e8d8bef9SDimitry Andric #include "llvm/ADT/StringSet.h"
29e8d8bef9SDimitry Andric #include "llvm/Support/ErrorHandling.h"
30e8d8bef9SDimitry Andric 
31e8d8bef9SDimitry Andric namespace clang {
32e8d8bef9SDimitry Andric namespace format {
33e8d8bef9SDimitry Andric 
34e8d8bef9SDimitry Andric struct MacroExpander::Definition {
35e8d8bef9SDimitry Andric   StringRef Name;
36e8d8bef9SDimitry Andric   SmallVector<FormatToken *, 8> Params;
37e8d8bef9SDimitry Andric   SmallVector<FormatToken *, 8> Body;
38e8d8bef9SDimitry Andric 
39e8d8bef9SDimitry Andric   // Map from each argument's name to its position in the argument list.
40e8d8bef9SDimitry Andric   // With "M(x, y) x + y":
41e8d8bef9SDimitry Andric   //   x -> 0
42e8d8bef9SDimitry Andric   //   y -> 1
43e8d8bef9SDimitry Andric   llvm::StringMap<size_t> ArgMap;
44e8d8bef9SDimitry Andric 
45e8d8bef9SDimitry Andric   bool ObjectLike = true;
46e8d8bef9SDimitry Andric };
47e8d8bef9SDimitry Andric 
48e8d8bef9SDimitry Andric class MacroExpander::DefinitionParser {
49e8d8bef9SDimitry Andric public:
50e8d8bef9SDimitry Andric   DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
51e8d8bef9SDimitry Andric     assert(!Tokens.empty());
52e8d8bef9SDimitry Andric     Current = Tokens[0];
53e8d8bef9SDimitry Andric   }
54e8d8bef9SDimitry Andric 
55349cc55cSDimitry Andric   // Parse the token stream and return the corresponding Definition object.
56e8d8bef9SDimitry Andric   // Returns an empty definition object with a null-Name on error.
57e8d8bef9SDimitry Andric   MacroExpander::Definition parse() {
585f757f3fSDimitry Andric     if (Current->isNot(tok::identifier))
59e8d8bef9SDimitry Andric       return {};
60e8d8bef9SDimitry Andric     Def.Name = Current->TokenText;
61e8d8bef9SDimitry Andric     nextToken();
62e8d8bef9SDimitry Andric     if (Current->is(tok::l_paren)) {
63e8d8bef9SDimitry Andric       Def.ObjectLike = false;
64e8d8bef9SDimitry Andric       if (!parseParams())
65e8d8bef9SDimitry Andric         return {};
66e8d8bef9SDimitry Andric     }
67e8d8bef9SDimitry Andric     if (!parseExpansion())
68e8d8bef9SDimitry Andric       return {};
69e8d8bef9SDimitry Andric 
70e8d8bef9SDimitry Andric     return Def;
71e8d8bef9SDimitry Andric   }
72e8d8bef9SDimitry Andric 
73e8d8bef9SDimitry Andric private:
74e8d8bef9SDimitry Andric   bool parseParams() {
75e8d8bef9SDimitry Andric     assert(Current->is(tok::l_paren));
76e8d8bef9SDimitry Andric     nextToken();
77e8d8bef9SDimitry Andric     while (Current->is(tok::identifier)) {
78e8d8bef9SDimitry Andric       Def.Params.push_back(Current);
79e8d8bef9SDimitry Andric       Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
80e8d8bef9SDimitry Andric       nextToken();
81e8d8bef9SDimitry Andric       if (Current->isNot(tok::comma))
82e8d8bef9SDimitry Andric         break;
83e8d8bef9SDimitry Andric       nextToken();
84e8d8bef9SDimitry Andric     }
85e8d8bef9SDimitry Andric     if (Current->isNot(tok::r_paren))
86e8d8bef9SDimitry Andric       return false;
87e8d8bef9SDimitry Andric     nextToken();
88e8d8bef9SDimitry Andric     return true;
89e8d8bef9SDimitry Andric   }
90e8d8bef9SDimitry Andric 
91e8d8bef9SDimitry Andric   bool parseExpansion() {
92e8d8bef9SDimitry Andric     if (!Current->isOneOf(tok::equal, tok::eof))
93e8d8bef9SDimitry Andric       return false;
94e8d8bef9SDimitry Andric     if (Current->is(tok::equal))
95e8d8bef9SDimitry Andric       nextToken();
96e8d8bef9SDimitry Andric     parseTail();
97e8d8bef9SDimitry Andric     return true;
98e8d8bef9SDimitry Andric   }
99e8d8bef9SDimitry Andric 
100e8d8bef9SDimitry Andric   void parseTail() {
101e8d8bef9SDimitry Andric     while (Current->isNot(tok::eof)) {
102e8d8bef9SDimitry Andric       Def.Body.push_back(Current);
103e8d8bef9SDimitry Andric       nextToken();
104e8d8bef9SDimitry Andric     }
105e8d8bef9SDimitry Andric     Def.Body.push_back(Current);
106e8d8bef9SDimitry Andric   }
107e8d8bef9SDimitry Andric 
108e8d8bef9SDimitry Andric   void nextToken() {
109e8d8bef9SDimitry Andric     if (Pos + 1 < Tokens.size())
110e8d8bef9SDimitry Andric       ++Pos;
111e8d8bef9SDimitry Andric     Current = Tokens[Pos];
112e8d8bef9SDimitry Andric     Current->Finalized = true;
113e8d8bef9SDimitry Andric   }
114e8d8bef9SDimitry Andric 
115e8d8bef9SDimitry Andric   size_t Pos = 0;
116e8d8bef9SDimitry Andric   FormatToken *Current = nullptr;
117e8d8bef9SDimitry Andric   Definition Def;
118e8d8bef9SDimitry Andric   ArrayRef<FormatToken *> Tokens;
119e8d8bef9SDimitry Andric };
120e8d8bef9SDimitry Andric 
121e8d8bef9SDimitry Andric MacroExpander::MacroExpander(
122*0fca6ea1SDimitry Andric     const std::vector<std::string> &Macros, SourceManager &SourceMgr,
123e8d8bef9SDimitry Andric     const FormatStyle &Style,
124e8d8bef9SDimitry Andric     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
125e8d8bef9SDimitry Andric     IdentifierTable &IdentTable)
126e8d8bef9SDimitry Andric     : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
127e8d8bef9SDimitry Andric       IdentTable(IdentTable) {
12881ad6265SDimitry Andric   for (const std::string &Macro : Macros)
129e8d8bef9SDimitry Andric     parseDefinition(Macro);
130e8d8bef9SDimitry Andric }
131e8d8bef9SDimitry Andric 
132e8d8bef9SDimitry Andric MacroExpander::~MacroExpander() = default;
133e8d8bef9SDimitry Andric 
134e8d8bef9SDimitry Andric void MacroExpander::parseDefinition(const std::string &Macro) {
135e8d8bef9SDimitry Andric   Buffers.push_back(
136e8d8bef9SDimitry Andric       llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
137*0fca6ea1SDimitry Andric   FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
138e8d8bef9SDimitry Andric   FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
139e8d8bef9SDimitry Andric                        Allocator, IdentTable);
140e8d8bef9SDimitry Andric   const auto Tokens = Lex.lex();
141e8d8bef9SDimitry Andric   if (!Tokens.empty()) {
142e8d8bef9SDimitry Andric     DefinitionParser Parser(Tokens);
143e8d8bef9SDimitry Andric     auto Definition = Parser.parse();
14406c3fb27SDimitry Andric     if (Definition.ObjectLike) {
14506c3fb27SDimitry Andric       ObjectLike[Definition.Name] = std::move(Definition);
14606c3fb27SDimitry Andric     } else {
14706c3fb27SDimitry Andric       FunctionLike[Definition.Name][Definition.Params.size()] =
14806c3fb27SDimitry Andric           std::move(Definition);
14906c3fb27SDimitry Andric     }
150e8d8bef9SDimitry Andric   }
151e8d8bef9SDimitry Andric }
152e8d8bef9SDimitry Andric 
153*0fca6ea1SDimitry Andric bool MacroExpander::defined(StringRef Name) const {
15406c3fb27SDimitry Andric   return FunctionLike.contains(Name) || ObjectLike.contains(Name);
155e8d8bef9SDimitry Andric }
156e8d8bef9SDimitry Andric 
157*0fca6ea1SDimitry Andric bool MacroExpander::objectLike(StringRef Name) const {
15806c3fb27SDimitry Andric   return ObjectLike.contains(Name);
159e8d8bef9SDimitry Andric }
160e8d8bef9SDimitry Andric 
161*0fca6ea1SDimitry Andric bool MacroExpander::hasArity(StringRef Name, unsigned Arity) const {
16206c3fb27SDimitry Andric   auto it = FunctionLike.find(Name);
16306c3fb27SDimitry Andric   return it != FunctionLike.end() && it->second.contains(Arity);
16406c3fb27SDimitry Andric }
165e8d8bef9SDimitry Andric 
166*0fca6ea1SDimitry Andric SmallVector<FormatToken *, 8>
16706c3fb27SDimitry Andric MacroExpander::expand(FormatToken *ID,
16806c3fb27SDimitry Andric                       std::optional<ArgsList> OptionalArgs) const {
16906c3fb27SDimitry Andric   if (OptionalArgs)
17006c3fb27SDimitry Andric     assert(hasArity(ID->TokenText, OptionalArgs->size()));
17106c3fb27SDimitry Andric   else
17206c3fb27SDimitry Andric     assert(objectLike(ID->TokenText));
17306c3fb27SDimitry Andric   const Definition &Def = OptionalArgs
17406c3fb27SDimitry Andric                               ? FunctionLike.find(ID->TokenText)
17506c3fb27SDimitry Andric                                     ->second.find(OptionalArgs.value().size())
17606c3fb27SDimitry Andric                                     ->second
17706c3fb27SDimitry Andric                               : ObjectLike.find(ID->TokenText)->second;
17806c3fb27SDimitry Andric   ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList();
17906c3fb27SDimitry Andric   SmallVector<FormatToken *, 8> Result;
180e8d8bef9SDimitry Andric   // Expand each argument at most once.
181e8d8bef9SDimitry Andric   llvm::StringSet<> ExpandedArgs;
182e8d8bef9SDimitry Andric 
183e8d8bef9SDimitry Andric   // Adds the given token to Result.
184e8d8bef9SDimitry Andric   auto pushToken = [&](FormatToken *Tok) {
185e8d8bef9SDimitry Andric     Tok->MacroCtx->ExpandedFrom.push_back(ID);
186e8d8bef9SDimitry Andric     Result.push_back(Tok);
187e8d8bef9SDimitry Andric   };
188e8d8bef9SDimitry Andric 
189e8d8bef9SDimitry Andric   // If Tok references a parameter, adds the corresponding argument to Result.
190e8d8bef9SDimitry Andric   // Returns false if Tok does not reference a parameter.
191e8d8bef9SDimitry Andric   auto expandArgument = [&](FormatToken *Tok) -> bool {
192e8d8bef9SDimitry Andric     // If the current token references a parameter, expand the corresponding
193e8d8bef9SDimitry Andric     // argument.
1945f757f3fSDimitry Andric     if (Tok->isNot(tok::identifier) || ExpandedArgs.contains(Tok->TokenText))
195e8d8bef9SDimitry Andric       return false;
196e8d8bef9SDimitry Andric     ExpandedArgs.insert(Tok->TokenText);
197e8d8bef9SDimitry Andric     auto I = Def.ArgMap.find(Tok->TokenText);
198e8d8bef9SDimitry Andric     if (I == Def.ArgMap.end())
199e8d8bef9SDimitry Andric       return false;
200e8d8bef9SDimitry Andric     // If there are fewer arguments than referenced parameters, treat the
201e8d8bef9SDimitry Andric     // parameter as empty.
202e8d8bef9SDimitry Andric     // FIXME: Potentially fully abort the expansion instead.
203e8d8bef9SDimitry Andric     if (I->getValue() >= Args.size())
204e8d8bef9SDimitry Andric       return true;
205e8d8bef9SDimitry Andric     for (FormatToken *Arg : Args[I->getValue()]) {
206e8d8bef9SDimitry Andric       // A token can be part of a macro argument at multiple levels.
207e8d8bef9SDimitry Andric       // For example, with "ID(x) x":
208e8d8bef9SDimitry Andric       // in ID(ID(x)), 'x' is expanded first as argument to the inner
209e8d8bef9SDimitry Andric       // ID, then again as argument to the outer ID. We keep the macro
210e8d8bef9SDimitry Andric       // role the token had from the inner expansion.
211e8d8bef9SDimitry Andric       if (!Arg->MacroCtx)
212e8d8bef9SDimitry Andric         Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
213e8d8bef9SDimitry Andric       pushToken(Arg);
214e8d8bef9SDimitry Andric     }
215e8d8bef9SDimitry Andric     return true;
216e8d8bef9SDimitry Andric   };
217e8d8bef9SDimitry Andric 
218e8d8bef9SDimitry Andric   // Expand the definition into Result.
219e8d8bef9SDimitry Andric   for (FormatToken *Tok : Def.Body) {
220e8d8bef9SDimitry Andric     if (expandArgument(Tok))
221e8d8bef9SDimitry Andric       continue;
222e8d8bef9SDimitry Andric     // Create a copy of the tokens from the macro body, i.e. were not provided
223e8d8bef9SDimitry Andric     // by user code.
224e8d8bef9SDimitry Andric     FormatToken *New = new (Allocator.Allocate()) FormatToken;
225e8d8bef9SDimitry Andric     New->copyFrom(*Tok);
226e8d8bef9SDimitry Andric     assert(!New->MacroCtx);
227e8d8bef9SDimitry Andric     // Tokens that are not part of the user code are not formatted.
228e8d8bef9SDimitry Andric     New->MacroCtx = MacroExpansion(MR_Hidden);
229e8d8bef9SDimitry Andric     pushToken(New);
230e8d8bef9SDimitry Andric   }
231e8d8bef9SDimitry Andric   assert(Result.size() >= 1 && Result.back()->is(tok::eof));
232e8d8bef9SDimitry Andric   if (Result.size() > 1) {
233e8d8bef9SDimitry Andric     ++Result[0]->MacroCtx->StartOfExpansion;
234e8d8bef9SDimitry Andric     ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
235e8d8bef9SDimitry Andric   }
236e8d8bef9SDimitry Andric   return Result;
237e8d8bef9SDimitry Andric }
238e8d8bef9SDimitry Andric 
239e8d8bef9SDimitry Andric } // namespace format
240e8d8bef9SDimitry Andric } // namespace clang
241