xref: /llvm-project/clang/lib/Format/MacroExpander.cpp (revision b92d6dd704d789240685a336ad8b25a9f381b4cc)
1 //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of MacroExpander, which handles macro
11 /// configuration and expansion while formatting.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "Macros.h"
16 
17 #include "FormatTokenLexer.h"
18 #include "clang/Lex/Preprocessor.h"
19 
20 namespace clang {
21 namespace format {
22 
23 struct MacroExpander::Definition {
24   StringRef Name;
25   SmallVector<FormatToken *, 8> Params;
26   SmallVector<FormatToken *, 8> Body;
27 
28   // Map from each argument's name to its position in the argument list.
29   // With "M(x, y) x + y":
30   //   x -> 0
31   //   y -> 1
32   llvm::StringMap<size_t> ArgMap;
33 
34   bool ObjectLike = true;
35 };
36 
37 class MacroExpander::DefinitionParser {
38 public:
39   DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
40     assert(!Tokens.empty());
41     Current = Tokens[0];
42   }
43 
44   // Parse the token stream and return the corresponding Definition object.
45   // Returns an empty definition object with a null-Name on error.
46   MacroExpander::Definition parse() {
47     if (Current->isNot(tok::identifier))
48       return {};
49     Def.Name = Current->TokenText;
50     nextToken();
51     if (Current->is(tok::l_paren)) {
52       Def.ObjectLike = false;
53       if (!parseParams())
54         return {};
55     }
56     if (!parseExpansion())
57       return {};
58 
59     return Def;
60   }
61 
62 private:
63   bool parseParams() {
64     assert(Current->is(tok::l_paren));
65     nextToken();
66     while (Current->is(tok::identifier)) {
67       Def.Params.push_back(Current);
68       Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
69       nextToken();
70       if (Current->isNot(tok::comma))
71         break;
72       nextToken();
73     }
74     if (Current->isNot(tok::r_paren))
75       return false;
76     nextToken();
77     return true;
78   }
79 
80   bool parseExpansion() {
81     if (!Current->isOneOf(tok::equal, tok::eof))
82       return false;
83     if (Current->is(tok::equal))
84       nextToken();
85     parseTail();
86     return true;
87   }
88 
89   void parseTail() {
90     while (Current->isNot(tok::eof)) {
91       Def.Body.push_back(Current);
92       nextToken();
93     }
94     Def.Body.push_back(Current);
95   }
96 
97   void nextToken() {
98     if (Pos + 1 < Tokens.size())
99       ++Pos;
100     Current = Tokens[Pos];
101     Current->Finalized = true;
102   }
103 
104   size_t Pos = 0;
105   FormatToken *Current = nullptr;
106   Definition Def;
107   ArrayRef<FormatToken *> Tokens;
108 };
109 
110 MacroExpander::MacroExpander(
111     const std::vector<std::string> &Macros, clang::SourceManager &SourceMgr,
112     const FormatStyle &Style,
113     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
114     IdentifierTable &IdentTable)
115     : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
116       IdentTable(IdentTable) {
117   for (const std::string &Macro : Macros)
118     parseDefinition(Macro);
119 }
120 
121 MacroExpander::~MacroExpander() = default;
122 
123 void MacroExpander::parseDefinition(const std::string &Macro) {
124   Buffers.push_back(
125       llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
126   clang::FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
127   FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
128                        Allocator, IdentTable);
129   const auto Tokens = Lex.lex();
130   if (!Tokens.empty()) {
131     DefinitionParser Parser(Tokens);
132     auto Definition = Parser.parse();
133     if (Definition.ObjectLike) {
134       ObjectLike[Definition.Name] = std::move(Definition);
135     } else {
136       FunctionLike[Definition.Name][Definition.Params.size()] =
137           std::move(Definition);
138     }
139   }
140 }
141 
142 bool MacroExpander::defined(llvm::StringRef Name) const {
143   return FunctionLike.contains(Name) || ObjectLike.contains(Name);
144 }
145 
146 bool MacroExpander::objectLike(llvm::StringRef Name) const {
147   return ObjectLike.contains(Name);
148 }
149 
150 bool MacroExpander::hasArity(llvm::StringRef Name, unsigned Arity) const {
151   auto it = FunctionLike.find(Name);
152   return it != FunctionLike.end() && it->second.contains(Arity);
153 }
154 
155 llvm::SmallVector<FormatToken *, 8>
156 MacroExpander::expand(FormatToken *ID,
157                       std::optional<ArgsList> OptionalArgs) const {
158   if (OptionalArgs)
159     assert(hasArity(ID->TokenText, OptionalArgs->size()));
160   else
161     assert(objectLike(ID->TokenText));
162   const Definition &Def = OptionalArgs
163                               ? FunctionLike.find(ID->TokenText)
164                                     ->second.find(OptionalArgs.value().size())
165                                     ->second
166                               : ObjectLike.find(ID->TokenText)->second;
167   ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList();
168   SmallVector<FormatToken *, 8> Result;
169   // Expand each argument at most once.
170   llvm::StringSet<> ExpandedArgs;
171 
172   // Adds the given token to Result.
173   auto pushToken = [&](FormatToken *Tok) {
174     Tok->MacroCtx->ExpandedFrom.push_back(ID);
175     Result.push_back(Tok);
176   };
177 
178   // If Tok references a parameter, adds the corresponding argument to Result.
179   // Returns false if Tok does not reference a parameter.
180   auto expandArgument = [&](FormatToken *Tok) -> bool {
181     // If the current token references a parameter, expand the corresponding
182     // argument.
183     if (Tok->isNot(tok::identifier) || ExpandedArgs.contains(Tok->TokenText))
184       return false;
185     ExpandedArgs.insert(Tok->TokenText);
186     auto I = Def.ArgMap.find(Tok->TokenText);
187     if (I == Def.ArgMap.end())
188       return false;
189     // If there are fewer arguments than referenced parameters, treat the
190     // parameter as empty.
191     // FIXME: Potentially fully abort the expansion instead.
192     if (I->getValue() >= Args.size())
193       return true;
194     for (FormatToken *Arg : Args[I->getValue()]) {
195       // A token can be part of a macro argument at multiple levels.
196       // For example, with "ID(x) x":
197       // in ID(ID(x)), 'x' is expanded first as argument to the inner
198       // ID, then again as argument to the outer ID. We keep the macro
199       // role the token had from the inner expansion.
200       if (!Arg->MacroCtx)
201         Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
202       pushToken(Arg);
203     }
204     return true;
205   };
206 
207   // Expand the definition into Result.
208   for (FormatToken *Tok : Def.Body) {
209     if (expandArgument(Tok))
210       continue;
211     // Create a copy of the tokens from the macro body, i.e. were not provided
212     // by user code.
213     FormatToken *New = new (Allocator.Allocate()) FormatToken;
214     New->copyFrom(*Tok);
215     assert(!New->MacroCtx);
216     // Tokens that are not part of the user code are not formatted.
217     New->MacroCtx = MacroExpansion(MR_Hidden);
218     pushToken(New);
219   }
220   assert(Result.size() >= 1 && Result.back()->is(tok::eof));
221   if (Result.size() > 1) {
222     ++Result[0]->MacroCtx->StartOfExpansion;
223     ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
224   }
225   return Result;
226 }
227 
228 } // namespace format
229 } // namespace clang
230