xref: /llvm-project/clang/lib/Format/MacroExpander.cpp (revision e336b74c995d665bc3fb75164375bbb0f78f516c)
1 //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file contains the implementation of MacroExpander, which handles macro
12 /// configuration and expansion while formatting.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "Macros.h"
17 
18 #include "Encoding.h"
19 #include "FormatToken.h"
20 #include "FormatTokenLexer.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "clang/Format/Format.h"
23 #include "clang/Lex/HeaderSearch.h"
24 #include "clang/Lex/HeaderSearchOptions.h"
25 #include "clang/Lex/Lexer.h"
26 #include "clang/Lex/ModuleLoader.h"
27 #include "clang/Lex/Preprocessor.h"
28 #include "clang/Lex/PreprocessorOptions.h"
29 #include "llvm/ADT/StringSet.h"
30 #include "llvm/Support/ErrorHandling.h"
31 
32 namespace clang {
33 namespace format {
34 
35 struct MacroExpander::Definition {
36   StringRef Name;
37   SmallVector<FormatToken *, 8> Params;
38   SmallVector<FormatToken *, 8> Body;
39 
40   // Map from each argument's name to its position in the argument list.
41   // With "M(x, y) x + y":
42   //   x -> 0
43   //   y -> 1
44   llvm::StringMap<size_t> ArgMap;
45 
46   bool ObjectLike = true;
47 };
48 
49 class MacroExpander::DefinitionParser {
50 public:
51   DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
52     assert(!Tokens.empty());
53     Current = Tokens[0];
54   }
55 
56   // Parse the token stream and return the corresonding Definition object.
57   // Returns an empty definition object with a null-Name on error.
58   MacroExpander::Definition parse() {
59     if (!Current->is(tok::identifier))
60       return {};
61     Def.Name = Current->TokenText;
62     nextToken();
63     if (Current->is(tok::l_paren)) {
64       Def.ObjectLike = false;
65       if (!parseParams())
66         return {};
67     }
68     if (!parseExpansion())
69       return {};
70 
71     return Def;
72   }
73 
74 private:
75   bool parseParams() {
76     assert(Current->is(tok::l_paren));
77     nextToken();
78     while (Current->is(tok::identifier)) {
79       Def.Params.push_back(Current);
80       Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
81       nextToken();
82       if (Current->isNot(tok::comma))
83         break;
84       nextToken();
85     }
86     if (Current->isNot(tok::r_paren))
87       return false;
88     nextToken();
89     return true;
90   }
91 
92   bool parseExpansion() {
93     if (!Current->isOneOf(tok::equal, tok::eof))
94       return false;
95     if (Current->is(tok::equal))
96       nextToken();
97     parseTail();
98     return true;
99   }
100 
101   void parseTail() {
102     while (Current->isNot(tok::eof)) {
103       Def.Body.push_back(Current);
104       nextToken();
105     }
106     Def.Body.push_back(Current);
107   }
108 
109   void nextToken() {
110     if (Pos + 1 < Tokens.size())
111       ++Pos;
112     Current = Tokens[Pos];
113     Current->Finalized = true;
114   }
115 
116   size_t Pos = 0;
117   FormatToken *Current = nullptr;
118   Definition Def;
119   ArrayRef<FormatToken *> Tokens;
120 };
121 
122 MacroExpander::MacroExpander(
123     const std::vector<std::string> &Macros, clang::SourceManager &SourceMgr,
124     const FormatStyle &Style,
125     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
126     IdentifierTable &IdentTable)
127     : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
128       IdentTable(IdentTable) {
129   for (const std::string &Macro : Macros) {
130     parseDefinition(Macro);
131   }
132 }
133 
134 MacroExpander::~MacroExpander() = default;
135 
136 void MacroExpander::parseDefinition(const std::string &Macro) {
137   Buffers.push_back(
138       llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
139   clang::FileID FID =
140       SourceMgr.createFileID(SourceManager::Unowned, Buffers.back().get());
141   FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
142                        Allocator, IdentTable);
143   const auto Tokens = Lex.lex();
144   if (!Tokens.empty()) {
145     DefinitionParser Parser(Tokens);
146     auto Definition = Parser.parse();
147     Definitions[Definition.Name] = std::move(Definition);
148   }
149 }
150 
151 bool MacroExpander::defined(llvm::StringRef Name) const {
152   return Definitions.find(Name) != Definitions.end();
153 }
154 
155 bool MacroExpander::objectLike(llvm::StringRef Name) const {
156   return Definitions.find(Name)->second.ObjectLike;
157 }
158 
159 llvm::SmallVector<FormatToken *, 8> MacroExpander::expand(FormatToken *ID,
160                                                           ArgsList Args) const {
161   assert(defined(ID->TokenText));
162   SmallVector<FormatToken *, 8> Result;
163   const Definition &Def = Definitions.find(ID->TokenText)->second;
164 
165   // Expand each argument at most once.
166   llvm::StringSet<> ExpandedArgs;
167 
168   // Adds the given token to Result.
169   auto pushToken = [&](FormatToken *Tok) {
170     Tok->MacroCtx->ExpandedFrom.push_back(ID);
171     Result.push_back(Tok);
172   };
173 
174   // If Tok references a parameter, adds the corresponding argument to Result.
175   // Returns false if Tok does not reference a parameter.
176   auto expandArgument = [&](FormatToken *Tok) -> bool {
177     // If the current token references a parameter, expand the corresponding
178     // argument.
179     if (!Tok->is(tok::identifier) || ExpandedArgs.contains(Tok->TokenText))
180       return false;
181     ExpandedArgs.insert(Tok->TokenText);
182     auto I = Def.ArgMap.find(Tok->TokenText);
183     if (I == Def.ArgMap.end())
184       return false;
185     // If there are fewer arguments than referenced parameters, treat the
186     // parameter as empty.
187     // FIXME: Potentially fully abort the expansion instead.
188     if (I->getValue() >= Args.size())
189       return true;
190     for (FormatToken *Arg : Args[I->getValue()]) {
191       // A token can be part of a macro argument at multiple levels.
192       // For example, with "ID(x) x":
193       // in ID(ID(x)), 'x' is expanded first as argument to the inner
194       // ID, then again as argument to the outer ID. We keep the macro
195       // role the token had from the inner expansion.
196       if (!Arg->MacroCtx)
197         Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
198       pushToken(Arg);
199     }
200     return true;
201   };
202 
203   // Expand the definition into Result.
204   for (FormatToken *Tok : Def.Body) {
205     if (expandArgument(Tok))
206       continue;
207     // Create a copy of the tokens from the macro body, i.e. were not provided
208     // by user code.
209     FormatToken *New = new (Allocator.Allocate()) FormatToken;
210     New->copyFrom(*Tok);
211     assert(!New->MacroCtx);
212     // Tokens that are not part of the user code are not formatted.
213     New->MacroCtx = MacroExpansion(MR_Hidden);
214     pushToken(New);
215   }
216   assert(Result.size() >= 1 && Result.back()->is(tok::eof));
217   if (Result.size() > 1) {
218     ++Result[0]->MacroCtx->StartOfExpansion;
219     ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
220   }
221   return Result;
222 }
223 
224 } // namespace format
225 } // namespace clang
226