1dda28197Spatrick //===-- CPlusPlusNameParser.cpp -------------------------------------------===//
2061da546Spatrick //
3061da546Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4061da546Spatrick // See https://llvm.org/LICENSE.txt for license information.
5061da546Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6061da546Spatrick //
7061da546Spatrick //===----------------------------------------------------------------------===//
8061da546Spatrick
9061da546Spatrick #include "CPlusPlusNameParser.h"
10061da546Spatrick
11061da546Spatrick #include "clang/Basic/IdentifierTable.h"
12*f6aab3d8Srobert #include "clang/Basic/TokenKinds.h"
13061da546Spatrick #include "llvm/ADT/StringMap.h"
14061da546Spatrick #include "llvm/Support/Threading.h"
15*f6aab3d8Srobert #include <optional>
16061da546Spatrick
17061da546Spatrick using namespace lldb;
18061da546Spatrick using namespace lldb_private;
19061da546Spatrick using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
20061da546Spatrick using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
21061da546Spatrick namespace tok = clang::tok;
22061da546Spatrick
ParseAsFunctionDefinition()23*f6aab3d8Srobert std::optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
24061da546Spatrick m_next_token_index = 0;
25*f6aab3d8Srobert std::optional<ParsedFunction> result(std::nullopt);
26061da546Spatrick
27061da546Spatrick // Try to parse the name as function without a return type specified e.g.
28061da546Spatrick // main(int, char*[])
29061da546Spatrick {
30061da546Spatrick Bookmark start_position = SetBookmark();
31061da546Spatrick result = ParseFunctionImpl(false);
32061da546Spatrick if (result && !HasMoreTokens())
33061da546Spatrick return result;
34061da546Spatrick }
35061da546Spatrick
36061da546Spatrick // Try to parse the name as function with function pointer return type e.g.
37061da546Spatrick // void (*get_func(const char*))()
38061da546Spatrick result = ParseFuncPtr(true);
39061da546Spatrick if (result)
40061da546Spatrick return result;
41061da546Spatrick
42061da546Spatrick // Finally try to parse the name as a function with non-function return type
43061da546Spatrick // e.g. int main(int, char*[])
44061da546Spatrick result = ParseFunctionImpl(true);
45061da546Spatrick if (HasMoreTokens())
46*f6aab3d8Srobert return std::nullopt;
47061da546Spatrick return result;
48061da546Spatrick }
49061da546Spatrick
ParseAsFullName()50*f6aab3d8Srobert std::optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
51061da546Spatrick m_next_token_index = 0;
52*f6aab3d8Srobert std::optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
53061da546Spatrick if (!name_ranges)
54*f6aab3d8Srobert return std::nullopt;
55061da546Spatrick if (HasMoreTokens())
56*f6aab3d8Srobert return std::nullopt;
57061da546Spatrick ParsedName result;
58*f6aab3d8Srobert result.basename = GetTextForRange(name_ranges->basename_range);
59*f6aab3d8Srobert result.context = GetTextForRange(name_ranges->context_range);
60061da546Spatrick return result;
61061da546Spatrick }
62061da546Spatrick
HasMoreTokens()63061da546Spatrick bool CPlusPlusNameParser::HasMoreTokens() {
64061da546Spatrick return m_next_token_index < m_tokens.size();
65061da546Spatrick }
66061da546Spatrick
Advance()67061da546Spatrick void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
68061da546Spatrick
TakeBack()69061da546Spatrick void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
70061da546Spatrick
ConsumeToken(tok::TokenKind kind)71061da546Spatrick bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
72061da546Spatrick if (!HasMoreTokens())
73061da546Spatrick return false;
74061da546Spatrick
75061da546Spatrick if (!Peek().is(kind))
76061da546Spatrick return false;
77061da546Spatrick
78061da546Spatrick Advance();
79061da546Spatrick return true;
80061da546Spatrick }
81061da546Spatrick
ConsumeToken(Ts...kinds)82061da546Spatrick template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
83061da546Spatrick if (!HasMoreTokens())
84061da546Spatrick return false;
85061da546Spatrick
86061da546Spatrick if (!Peek().isOneOf(kinds...))
87061da546Spatrick return false;
88061da546Spatrick
89061da546Spatrick Advance();
90061da546Spatrick return true;
91061da546Spatrick }
92061da546Spatrick
SetBookmark()93061da546Spatrick CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
94061da546Spatrick return Bookmark(m_next_token_index);
95061da546Spatrick }
96061da546Spatrick
GetCurrentPosition()97061da546Spatrick size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
98061da546Spatrick
Peek()99061da546Spatrick clang::Token &CPlusPlusNameParser::Peek() {
100061da546Spatrick assert(HasMoreTokens());
101061da546Spatrick return m_tokens[m_next_token_index];
102061da546Spatrick }
103061da546Spatrick
104*f6aab3d8Srobert std::optional<ParsedFunction>
ParseFunctionImpl(bool expect_return_type)105061da546Spatrick CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
106061da546Spatrick Bookmark start_position = SetBookmark();
107*f6aab3d8Srobert
108*f6aab3d8Srobert ParsedFunction result;
109061da546Spatrick if (expect_return_type) {
110*f6aab3d8Srobert size_t return_start = GetCurrentPosition();
111061da546Spatrick // Consume return type if it's expected.
112*f6aab3d8Srobert if (!ConsumeToken(tok::kw_auto) && !ConsumeTypename())
113*f6aab3d8Srobert return std::nullopt;
114*f6aab3d8Srobert
115*f6aab3d8Srobert size_t return_end = GetCurrentPosition();
116*f6aab3d8Srobert result.return_type = GetTextForRange(Range(return_start, return_end));
117061da546Spatrick }
118061da546Spatrick
119061da546Spatrick auto maybe_name = ParseFullNameImpl();
120061da546Spatrick if (!maybe_name) {
121*f6aab3d8Srobert return std::nullopt;
122061da546Spatrick }
123061da546Spatrick
124061da546Spatrick size_t argument_start = GetCurrentPosition();
125061da546Spatrick if (!ConsumeArguments()) {
126*f6aab3d8Srobert return std::nullopt;
127061da546Spatrick }
128061da546Spatrick
129061da546Spatrick size_t qualifiers_start = GetCurrentPosition();
130061da546Spatrick SkipFunctionQualifiers();
131061da546Spatrick size_t end_position = GetCurrentPosition();
132061da546Spatrick
133*f6aab3d8Srobert result.name.basename = GetTextForRange(maybe_name->basename_range);
134*f6aab3d8Srobert result.name.context = GetTextForRange(maybe_name->context_range);
135061da546Spatrick result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
136061da546Spatrick result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
137061da546Spatrick start_position.Remove();
138061da546Spatrick return result;
139061da546Spatrick }
140061da546Spatrick
141*f6aab3d8Srobert std::optional<ParsedFunction>
ParseFuncPtr(bool expect_return_type)142061da546Spatrick CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
143*f6aab3d8Srobert // This function parses a function definition
144*f6aab3d8Srobert // that returns a pointer type.
145*f6aab3d8Srobert // E.g., double (*(*func(long))(int))(float)
146*f6aab3d8Srobert
147*f6aab3d8Srobert // Step 1:
148*f6aab3d8Srobert // Remove the return type of the innermost
149*f6aab3d8Srobert // function pointer type.
150*f6aab3d8Srobert //
151*f6aab3d8Srobert // Leaves us with:
152*f6aab3d8Srobert // (*(*func(long))(int))(float)
153061da546Spatrick Bookmark start_position = SetBookmark();
154061da546Spatrick if (expect_return_type) {
155061da546Spatrick // Consume return type.
156061da546Spatrick if (!ConsumeTypename())
157*f6aab3d8Srobert return std::nullopt;
158061da546Spatrick }
159061da546Spatrick
160*f6aab3d8Srobert // Step 2:
161*f6aab3d8Srobert //
162*f6aab3d8Srobert // Skip a pointer and parenthesis pair.
163*f6aab3d8Srobert //
164*f6aab3d8Srobert // Leaves us with:
165*f6aab3d8Srobert // (*func(long))(int))(float)
166061da546Spatrick if (!ConsumeToken(tok::l_paren))
167*f6aab3d8Srobert return std::nullopt;
168061da546Spatrick if (!ConsumePtrsAndRefs())
169*f6aab3d8Srobert return std::nullopt;
170061da546Spatrick
171*f6aab3d8Srobert // Step 3:
172*f6aab3d8Srobert //
173*f6aab3d8Srobert // Consume inner function name. This will fail unless
174*f6aab3d8Srobert // we stripped all the pointers on the left hand side
175*f6aab3d8Srobert // of the funciton name.
176061da546Spatrick {
177061da546Spatrick Bookmark before_inner_function_pos = SetBookmark();
178061da546Spatrick auto maybe_inner_function_name = ParseFunctionImpl(false);
179061da546Spatrick if (maybe_inner_function_name)
180061da546Spatrick if (ConsumeToken(tok::r_paren))
181061da546Spatrick if (ConsumeArguments()) {
182061da546Spatrick SkipFunctionQualifiers();
183061da546Spatrick start_position.Remove();
184061da546Spatrick before_inner_function_pos.Remove();
185061da546Spatrick return maybe_inner_function_name;
186061da546Spatrick }
187061da546Spatrick }
188061da546Spatrick
189*f6aab3d8Srobert // Step 4:
190*f6aab3d8Srobert //
191*f6aab3d8Srobert // Parse the remaining string as a function pointer again.
192*f6aab3d8Srobert // This time don't consume the inner-most typename since
193*f6aab3d8Srobert // we're left with pointers only. This will strip another
194*f6aab3d8Srobert // layer of pointers until we're left with the innermost
195*f6aab3d8Srobert // function name/argument. I.e., func(long))(int))(float)
196*f6aab3d8Srobert //
197*f6aab3d8Srobert // Once we successfully stripped all pointers and gotten
198*f6aab3d8Srobert // the innermost function name from ParseFunctionImpl above,
199*f6aab3d8Srobert // we consume a single ')' and the arguments '(...)' that follows.
200*f6aab3d8Srobert //
201*f6aab3d8Srobert // Leaves us with:
202*f6aab3d8Srobert // )(float)
203*f6aab3d8Srobert //
204*f6aab3d8Srobert // This is the remnant of the outer function pointers' arguments.
205*f6aab3d8Srobert // Unwinding the recursive calls will remove the remaining
206*f6aab3d8Srobert // arguments.
207061da546Spatrick auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
208061da546Spatrick if (maybe_inner_function_ptr_name)
209061da546Spatrick if (ConsumeToken(tok::r_paren))
210061da546Spatrick if (ConsumeArguments()) {
211061da546Spatrick SkipFunctionQualifiers();
212061da546Spatrick start_position.Remove();
213061da546Spatrick return maybe_inner_function_ptr_name;
214061da546Spatrick }
215*f6aab3d8Srobert
216*f6aab3d8Srobert return std::nullopt;
217061da546Spatrick }
218061da546Spatrick
ConsumeArguments()219061da546Spatrick bool CPlusPlusNameParser::ConsumeArguments() {
220061da546Spatrick return ConsumeBrackets(tok::l_paren, tok::r_paren);
221061da546Spatrick }
222061da546Spatrick
ConsumeTemplateArgs()223061da546Spatrick bool CPlusPlusNameParser::ConsumeTemplateArgs() {
224061da546Spatrick Bookmark start_position = SetBookmark();
225061da546Spatrick if (!HasMoreTokens() || Peek().getKind() != tok::less)
226061da546Spatrick return false;
227061da546Spatrick Advance();
228061da546Spatrick
229061da546Spatrick // Consuming template arguments is a bit trickier than consuming function
230061da546Spatrick // arguments, because '<' '>' brackets are not always trivially balanced. In
231061da546Spatrick // some rare cases tokens '<' and '>' can appear inside template arguments as
232061da546Spatrick // arithmetic or shift operators not as template brackets. Examples:
233061da546Spatrick // std::enable_if<(10u)<(64), bool>
234061da546Spatrick // f<A<operator<(X,Y)::Subclass>>
235061da546Spatrick // Good thing that compiler makes sure that really ambiguous cases of '>'
236061da546Spatrick // usage should be enclosed within '()' brackets.
237061da546Spatrick int template_counter = 1;
238061da546Spatrick bool can_open_template = false;
239061da546Spatrick while (HasMoreTokens() && template_counter > 0) {
240061da546Spatrick tok::TokenKind kind = Peek().getKind();
241061da546Spatrick switch (kind) {
242061da546Spatrick case tok::greatergreater:
243061da546Spatrick template_counter -= 2;
244061da546Spatrick can_open_template = false;
245061da546Spatrick Advance();
246061da546Spatrick break;
247061da546Spatrick case tok::greater:
248061da546Spatrick --template_counter;
249061da546Spatrick can_open_template = false;
250061da546Spatrick Advance();
251061da546Spatrick break;
252061da546Spatrick case tok::less:
253061da546Spatrick // '<' is an attempt to open a subteamplte
254061da546Spatrick // check if parser is at the point where it's actually possible,
255061da546Spatrick // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
256061da546Spatrick // need to do the same for '>' because compiler actually makes sure that
257061da546Spatrick // '>' always surrounded by brackets to avoid ambiguity.
258061da546Spatrick if (can_open_template)
259061da546Spatrick ++template_counter;
260061da546Spatrick can_open_template = false;
261061da546Spatrick Advance();
262061da546Spatrick break;
263061da546Spatrick case tok::kw_operator: // C++ operator overloading.
264061da546Spatrick if (!ConsumeOperator())
265061da546Spatrick return false;
266061da546Spatrick can_open_template = true;
267061da546Spatrick break;
268061da546Spatrick case tok::raw_identifier:
269061da546Spatrick can_open_template = true;
270061da546Spatrick Advance();
271061da546Spatrick break;
272061da546Spatrick case tok::l_square:
273*f6aab3d8Srobert // Handle templates tagged with an ABI tag.
274*f6aab3d8Srobert // An example demangled/prettified version is:
275*f6aab3d8Srobert // func[abi:tag1][abi:tag2]<type[abi:tag3]>(int)
276*f6aab3d8Srobert if (ConsumeAbiTag())
277*f6aab3d8Srobert can_open_template = true;
278*f6aab3d8Srobert else if (ConsumeBrackets(tok::l_square, tok::r_square))
279061da546Spatrick can_open_template = false;
280*f6aab3d8Srobert else
281*f6aab3d8Srobert return false;
282061da546Spatrick break;
283061da546Spatrick case tok::l_paren:
284061da546Spatrick if (!ConsumeArguments())
285061da546Spatrick return false;
286061da546Spatrick can_open_template = false;
287061da546Spatrick break;
288061da546Spatrick default:
289061da546Spatrick can_open_template = false;
290061da546Spatrick Advance();
291061da546Spatrick break;
292061da546Spatrick }
293061da546Spatrick }
294061da546Spatrick
295061da546Spatrick if (template_counter != 0) {
296061da546Spatrick return false;
297061da546Spatrick }
298061da546Spatrick start_position.Remove();
299061da546Spatrick return true;
300061da546Spatrick }
301061da546Spatrick
ConsumeAbiTag()302*f6aab3d8Srobert bool CPlusPlusNameParser::ConsumeAbiTag() {
303*f6aab3d8Srobert Bookmark start_position = SetBookmark();
304*f6aab3d8Srobert if (!ConsumeToken(tok::l_square))
305*f6aab3d8Srobert return false;
306*f6aab3d8Srobert
307*f6aab3d8Srobert if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
308*f6aab3d8Srobert Peek().getRawIdentifier() == "abi")
309*f6aab3d8Srobert Advance();
310*f6aab3d8Srobert else
311*f6aab3d8Srobert return false;
312*f6aab3d8Srobert
313*f6aab3d8Srobert if (!ConsumeToken(tok::colon))
314*f6aab3d8Srobert return false;
315*f6aab3d8Srobert
316*f6aab3d8Srobert // Consume the actual tag string (and allow some special characters)
317*f6aab3d8Srobert while (ConsumeToken(tok::raw_identifier, tok::comma, tok::period,
318*f6aab3d8Srobert tok::numeric_constant))
319*f6aab3d8Srobert ;
320*f6aab3d8Srobert
321*f6aab3d8Srobert if (!ConsumeToken(tok::r_square))
322*f6aab3d8Srobert return false;
323*f6aab3d8Srobert
324*f6aab3d8Srobert start_position.Remove();
325*f6aab3d8Srobert return true;
326*f6aab3d8Srobert }
327*f6aab3d8Srobert
ConsumeAnonymousNamespace()328061da546Spatrick bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
329061da546Spatrick Bookmark start_position = SetBookmark();
330061da546Spatrick if (!ConsumeToken(tok::l_paren)) {
331061da546Spatrick return false;
332061da546Spatrick }
333061da546Spatrick constexpr llvm::StringLiteral g_anonymous("anonymous");
334061da546Spatrick if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
335061da546Spatrick Peek().getRawIdentifier() == g_anonymous) {
336061da546Spatrick Advance();
337061da546Spatrick } else {
338061da546Spatrick return false;
339061da546Spatrick }
340061da546Spatrick
341061da546Spatrick if (!ConsumeToken(tok::kw_namespace)) {
342061da546Spatrick return false;
343061da546Spatrick }
344061da546Spatrick
345061da546Spatrick if (!ConsumeToken(tok::r_paren)) {
346061da546Spatrick return false;
347061da546Spatrick }
348061da546Spatrick start_position.Remove();
349061da546Spatrick return true;
350061da546Spatrick }
351061da546Spatrick
ConsumeLambda()352061da546Spatrick bool CPlusPlusNameParser::ConsumeLambda() {
353061da546Spatrick Bookmark start_position = SetBookmark();
354061da546Spatrick if (!ConsumeToken(tok::l_brace)) {
355061da546Spatrick return false;
356061da546Spatrick }
357061da546Spatrick constexpr llvm::StringLiteral g_lambda("lambda");
358061da546Spatrick if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
359061da546Spatrick Peek().getRawIdentifier() == g_lambda) {
360061da546Spatrick // Put the matched brace back so we can use ConsumeBrackets
361061da546Spatrick TakeBack();
362061da546Spatrick } else {
363061da546Spatrick return false;
364061da546Spatrick }
365061da546Spatrick
366061da546Spatrick if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
367061da546Spatrick return false;
368061da546Spatrick }
369061da546Spatrick
370061da546Spatrick start_position.Remove();
371061da546Spatrick return true;
372061da546Spatrick }
373061da546Spatrick
ConsumeBrackets(tok::TokenKind left,tok::TokenKind right)374061da546Spatrick bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
375061da546Spatrick tok::TokenKind right) {
376061da546Spatrick Bookmark start_position = SetBookmark();
377061da546Spatrick if (!HasMoreTokens() || Peek().getKind() != left)
378061da546Spatrick return false;
379061da546Spatrick Advance();
380061da546Spatrick
381061da546Spatrick int counter = 1;
382061da546Spatrick while (HasMoreTokens() && counter > 0) {
383061da546Spatrick tok::TokenKind kind = Peek().getKind();
384061da546Spatrick if (kind == right)
385061da546Spatrick --counter;
386061da546Spatrick else if (kind == left)
387061da546Spatrick ++counter;
388061da546Spatrick Advance();
389061da546Spatrick }
390061da546Spatrick
391061da546Spatrick assert(counter >= 0);
392061da546Spatrick if (counter > 0) {
393061da546Spatrick return false;
394061da546Spatrick }
395061da546Spatrick start_position.Remove();
396061da546Spatrick return true;
397061da546Spatrick }
398061da546Spatrick
ConsumeOperator()399061da546Spatrick bool CPlusPlusNameParser::ConsumeOperator() {
400061da546Spatrick Bookmark start_position = SetBookmark();
401061da546Spatrick if (!ConsumeToken(tok::kw_operator))
402061da546Spatrick return false;
403061da546Spatrick
404061da546Spatrick if (!HasMoreTokens()) {
405061da546Spatrick return false;
406061da546Spatrick }
407061da546Spatrick
408061da546Spatrick const auto &token = Peek();
409dda28197Spatrick
410dda28197Spatrick // When clang generates debug info it adds template parameters to names.
411dda28197Spatrick // Since clang doesn't add a space between the name and the template parameter
412dda28197Spatrick // in some cases we are not generating valid C++ names e.g.:
413dda28197Spatrick //
414dda28197Spatrick // operator<<A::B>
415dda28197Spatrick //
416dda28197Spatrick // In some of these cases we will not parse them correctly. This fixes the
417dda28197Spatrick // issue by detecting this case and inserting tok::less in place of
418dda28197Spatrick // tok::lessless and returning successfully that we consumed the operator.
419dda28197Spatrick if (token.getKind() == tok::lessless) {
420dda28197Spatrick // Make sure we have more tokens before attempting to look ahead one more.
421dda28197Spatrick if (m_next_token_index + 1 < m_tokens.size()) {
422dda28197Spatrick // Look ahead two tokens.
423dda28197Spatrick clang::Token n_token = m_tokens[m_next_token_index + 1];
424dda28197Spatrick // If we find ( or < then this is indeed operator<< no need for fix.
425dda28197Spatrick if (n_token.getKind() != tok::l_paren && n_token.getKind() != tok::less) {
426dda28197Spatrick clang::Token tmp_tok;
427dda28197Spatrick tmp_tok.startToken();
428dda28197Spatrick tmp_tok.setLength(1);
429dda28197Spatrick tmp_tok.setLocation(token.getLocation().getLocWithOffset(1));
430dda28197Spatrick tmp_tok.setKind(tok::less);
431dda28197Spatrick
432dda28197Spatrick m_tokens[m_next_token_index] = tmp_tok;
433dda28197Spatrick
434dda28197Spatrick start_position.Remove();
435dda28197Spatrick return true;
436dda28197Spatrick }
437dda28197Spatrick }
438dda28197Spatrick }
439dda28197Spatrick
440061da546Spatrick switch (token.getKind()) {
441061da546Spatrick case tok::kw_new:
442061da546Spatrick case tok::kw_delete:
443061da546Spatrick // This is 'new' or 'delete' operators.
444061da546Spatrick Advance();
445061da546Spatrick // Check for array new/delete.
446061da546Spatrick if (HasMoreTokens() && Peek().is(tok::l_square)) {
447061da546Spatrick // Consume the '[' and ']'.
448061da546Spatrick if (!ConsumeBrackets(tok::l_square, tok::r_square))
449061da546Spatrick return false;
450061da546Spatrick }
451061da546Spatrick break;
452061da546Spatrick
453061da546Spatrick #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \
454061da546Spatrick case tok::Token: \
455061da546Spatrick Advance(); \
456061da546Spatrick break;
457061da546Spatrick #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
458061da546Spatrick #include "clang/Basic/OperatorKinds.def"
459061da546Spatrick #undef OVERLOADED_OPERATOR
460061da546Spatrick #undef OVERLOADED_OPERATOR_MULTI
461061da546Spatrick
462061da546Spatrick case tok::l_paren:
463061da546Spatrick // Call operator consume '(' ... ')'.
464061da546Spatrick if (ConsumeBrackets(tok::l_paren, tok::r_paren))
465061da546Spatrick break;
466061da546Spatrick return false;
467061da546Spatrick
468061da546Spatrick case tok::l_square:
469061da546Spatrick // This is a [] operator.
470061da546Spatrick // Consume the '[' and ']'.
471061da546Spatrick if (ConsumeBrackets(tok::l_square, tok::r_square))
472061da546Spatrick break;
473061da546Spatrick return false;
474061da546Spatrick
475061da546Spatrick default:
476061da546Spatrick // This might be a cast operator.
477061da546Spatrick if (ConsumeTypename())
478061da546Spatrick break;
479061da546Spatrick return false;
480061da546Spatrick }
481061da546Spatrick start_position.Remove();
482061da546Spatrick return true;
483061da546Spatrick }
484061da546Spatrick
SkipTypeQualifiers()485061da546Spatrick void CPlusPlusNameParser::SkipTypeQualifiers() {
486061da546Spatrick while (ConsumeToken(tok::kw_const, tok::kw_volatile))
487061da546Spatrick ;
488061da546Spatrick }
489061da546Spatrick
SkipFunctionQualifiers()490061da546Spatrick void CPlusPlusNameParser::SkipFunctionQualifiers() {
491061da546Spatrick while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
492061da546Spatrick ;
493061da546Spatrick }
494061da546Spatrick
ConsumeBuiltinType()495061da546Spatrick bool CPlusPlusNameParser::ConsumeBuiltinType() {
496061da546Spatrick bool result = false;
497061da546Spatrick bool continue_parsing = true;
498061da546Spatrick // Built-in types can be made of a few keywords like 'unsigned long long
499061da546Spatrick // int'. This function consumes all built-in type keywords without checking
500061da546Spatrick // if they make sense like 'unsigned char void'.
501061da546Spatrick while (continue_parsing && HasMoreTokens()) {
502061da546Spatrick switch (Peek().getKind()) {
503061da546Spatrick case tok::kw_short:
504061da546Spatrick case tok::kw_long:
505061da546Spatrick case tok::kw___int64:
506061da546Spatrick case tok::kw___int128:
507061da546Spatrick case tok::kw_signed:
508061da546Spatrick case tok::kw_unsigned:
509061da546Spatrick case tok::kw_void:
510061da546Spatrick case tok::kw_char:
511061da546Spatrick case tok::kw_int:
512061da546Spatrick case tok::kw_half:
513061da546Spatrick case tok::kw_float:
514061da546Spatrick case tok::kw_double:
515061da546Spatrick case tok::kw___float128:
516061da546Spatrick case tok::kw_wchar_t:
517061da546Spatrick case tok::kw_bool:
518061da546Spatrick case tok::kw_char16_t:
519061da546Spatrick case tok::kw_char32_t:
520061da546Spatrick result = true;
521061da546Spatrick Advance();
522061da546Spatrick break;
523061da546Spatrick default:
524061da546Spatrick continue_parsing = false;
525061da546Spatrick break;
526061da546Spatrick }
527061da546Spatrick }
528061da546Spatrick return result;
529061da546Spatrick }
530061da546Spatrick
SkipPtrsAndRefs()531061da546Spatrick void CPlusPlusNameParser::SkipPtrsAndRefs() {
532061da546Spatrick // Ignoring result.
533061da546Spatrick ConsumePtrsAndRefs();
534061da546Spatrick }
535061da546Spatrick
ConsumePtrsAndRefs()536061da546Spatrick bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
537061da546Spatrick bool found = false;
538061da546Spatrick SkipTypeQualifiers();
539061da546Spatrick while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
540061da546Spatrick tok::kw_volatile)) {
541061da546Spatrick found = true;
542061da546Spatrick SkipTypeQualifiers();
543061da546Spatrick }
544061da546Spatrick return found;
545061da546Spatrick }
546061da546Spatrick
ConsumeDecltype()547061da546Spatrick bool CPlusPlusNameParser::ConsumeDecltype() {
548061da546Spatrick Bookmark start_position = SetBookmark();
549061da546Spatrick if (!ConsumeToken(tok::kw_decltype))
550061da546Spatrick return false;
551061da546Spatrick
552061da546Spatrick if (!ConsumeArguments())
553061da546Spatrick return false;
554061da546Spatrick
555061da546Spatrick start_position.Remove();
556061da546Spatrick return true;
557061da546Spatrick }
558061da546Spatrick
ConsumeTypename()559061da546Spatrick bool CPlusPlusNameParser::ConsumeTypename() {
560061da546Spatrick Bookmark start_position = SetBookmark();
561061da546Spatrick SkipTypeQualifiers();
562061da546Spatrick if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
563061da546Spatrick if (!ParseFullNameImpl())
564061da546Spatrick return false;
565061da546Spatrick }
566061da546Spatrick SkipPtrsAndRefs();
567061da546Spatrick start_position.Remove();
568061da546Spatrick return true;
569061da546Spatrick }
570061da546Spatrick
571*f6aab3d8Srobert std::optional<CPlusPlusNameParser::ParsedNameRanges>
ParseFullNameImpl()572061da546Spatrick CPlusPlusNameParser::ParseFullNameImpl() {
573061da546Spatrick // Name parsing state machine.
574061da546Spatrick enum class State {
575061da546Spatrick Beginning, // start of the name
576061da546Spatrick AfterTwoColons, // right after ::
577061da546Spatrick AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
578061da546Spatrick AfterTemplate, // right after template brackets (<something>)
579061da546Spatrick AfterOperator, // right after name of C++ operator
580061da546Spatrick };
581061da546Spatrick
582061da546Spatrick Bookmark start_position = SetBookmark();
583061da546Spatrick State state = State::Beginning;
584061da546Spatrick bool continue_parsing = true;
585*f6aab3d8Srobert std::optional<size_t> last_coloncolon_position;
586061da546Spatrick
587061da546Spatrick while (continue_parsing && HasMoreTokens()) {
588061da546Spatrick const auto &token = Peek();
589061da546Spatrick switch (token.getKind()) {
590061da546Spatrick case tok::raw_identifier: // Just a name.
591061da546Spatrick if (state != State::Beginning && state != State::AfterTwoColons) {
592061da546Spatrick continue_parsing = false;
593061da546Spatrick break;
594061da546Spatrick }
595061da546Spatrick Advance();
596061da546Spatrick state = State::AfterIdentifier;
597061da546Spatrick break;
598*f6aab3d8Srobert case tok::l_square: {
599*f6aab3d8Srobert // Handles types or functions that were tagged
600*f6aab3d8Srobert // with, e.g.,
601*f6aab3d8Srobert // [[gnu::abi_tag("tag1","tag2")]] func()
602*f6aab3d8Srobert // and demangled/prettified into:
603*f6aab3d8Srobert // func[abi:tag1][abi:tag2]()
604*f6aab3d8Srobert
605*f6aab3d8Srobert // ABI tags only appear after a method or type name
606*f6aab3d8Srobert const bool valid_state =
607*f6aab3d8Srobert state == State::AfterIdentifier || state == State::AfterOperator;
608*f6aab3d8Srobert if (!valid_state || !ConsumeAbiTag()) {
609*f6aab3d8Srobert continue_parsing = false;
610*f6aab3d8Srobert }
611*f6aab3d8Srobert
612*f6aab3d8Srobert break;
613*f6aab3d8Srobert }
614061da546Spatrick case tok::l_paren: {
615061da546Spatrick if (state == State::Beginning || state == State::AfterTwoColons) {
616061da546Spatrick // (anonymous namespace)
617061da546Spatrick if (ConsumeAnonymousNamespace()) {
618061da546Spatrick state = State::AfterIdentifier;
619061da546Spatrick break;
620061da546Spatrick }
621061da546Spatrick }
622061da546Spatrick
623061da546Spatrick // Type declared inside a function 'func()::Type'
624061da546Spatrick if (state != State::AfterIdentifier && state != State::AfterTemplate &&
625061da546Spatrick state != State::AfterOperator) {
626061da546Spatrick continue_parsing = false;
627061da546Spatrick break;
628061da546Spatrick }
629061da546Spatrick Bookmark l_paren_position = SetBookmark();
630061da546Spatrick // Consume the '(' ... ') [const]'.
631061da546Spatrick if (!ConsumeArguments()) {
632061da546Spatrick continue_parsing = false;
633061da546Spatrick break;
634061da546Spatrick }
635061da546Spatrick SkipFunctionQualifiers();
636061da546Spatrick
637061da546Spatrick // Consume '::'
638061da546Spatrick size_t coloncolon_position = GetCurrentPosition();
639061da546Spatrick if (!ConsumeToken(tok::coloncolon)) {
640061da546Spatrick continue_parsing = false;
641061da546Spatrick break;
642061da546Spatrick }
643061da546Spatrick l_paren_position.Remove();
644061da546Spatrick last_coloncolon_position = coloncolon_position;
645061da546Spatrick state = State::AfterTwoColons;
646061da546Spatrick break;
647061da546Spatrick }
648061da546Spatrick case tok::l_brace:
649061da546Spatrick if (state == State::Beginning || state == State::AfterTwoColons) {
650061da546Spatrick if (ConsumeLambda()) {
651061da546Spatrick state = State::AfterIdentifier;
652061da546Spatrick break;
653061da546Spatrick }
654061da546Spatrick }
655061da546Spatrick continue_parsing = false;
656061da546Spatrick break;
657061da546Spatrick case tok::coloncolon: // Type nesting delimiter.
658061da546Spatrick if (state != State::Beginning && state != State::AfterIdentifier &&
659061da546Spatrick state != State::AfterTemplate) {
660061da546Spatrick continue_parsing = false;
661061da546Spatrick break;
662061da546Spatrick }
663061da546Spatrick last_coloncolon_position = GetCurrentPosition();
664061da546Spatrick Advance();
665061da546Spatrick state = State::AfterTwoColons;
666061da546Spatrick break;
667061da546Spatrick case tok::less: // Template brackets.
668061da546Spatrick if (state != State::AfterIdentifier && state != State::AfterOperator) {
669061da546Spatrick continue_parsing = false;
670061da546Spatrick break;
671061da546Spatrick }
672061da546Spatrick if (!ConsumeTemplateArgs()) {
673061da546Spatrick continue_parsing = false;
674061da546Spatrick break;
675061da546Spatrick }
676061da546Spatrick state = State::AfterTemplate;
677061da546Spatrick break;
678061da546Spatrick case tok::kw_operator: // C++ operator overloading.
679061da546Spatrick if (state != State::Beginning && state != State::AfterTwoColons) {
680061da546Spatrick continue_parsing = false;
681061da546Spatrick break;
682061da546Spatrick }
683061da546Spatrick if (!ConsumeOperator()) {
684061da546Spatrick continue_parsing = false;
685061da546Spatrick break;
686061da546Spatrick }
687061da546Spatrick state = State::AfterOperator;
688061da546Spatrick break;
689061da546Spatrick case tok::tilde: // Destructor.
690061da546Spatrick if (state != State::Beginning && state != State::AfterTwoColons) {
691061da546Spatrick continue_parsing = false;
692061da546Spatrick break;
693061da546Spatrick }
694061da546Spatrick Advance();
695061da546Spatrick if (ConsumeToken(tok::raw_identifier)) {
696061da546Spatrick state = State::AfterIdentifier;
697061da546Spatrick } else {
698061da546Spatrick TakeBack();
699061da546Spatrick continue_parsing = false;
700061da546Spatrick }
701061da546Spatrick break;
702061da546Spatrick default:
703061da546Spatrick continue_parsing = false;
704061da546Spatrick break;
705061da546Spatrick }
706061da546Spatrick }
707061da546Spatrick
708061da546Spatrick if (state == State::AfterIdentifier || state == State::AfterOperator ||
709061da546Spatrick state == State::AfterTemplate) {
710061da546Spatrick ParsedNameRanges result;
711061da546Spatrick if (last_coloncolon_position) {
712*f6aab3d8Srobert result.context_range =
713*f6aab3d8Srobert Range(start_position.GetSavedPosition(), *last_coloncolon_position);
714061da546Spatrick result.basename_range =
715*f6aab3d8Srobert Range(*last_coloncolon_position + 1, GetCurrentPosition());
716061da546Spatrick } else {
717061da546Spatrick result.basename_range =
718061da546Spatrick Range(start_position.GetSavedPosition(), GetCurrentPosition());
719061da546Spatrick }
720061da546Spatrick start_position.Remove();
721061da546Spatrick return result;
722061da546Spatrick } else {
723*f6aab3d8Srobert return std::nullopt;
724061da546Spatrick }
725061da546Spatrick }
726061da546Spatrick
GetTextForRange(const Range & range)727061da546Spatrick llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
728061da546Spatrick if (range.empty())
729061da546Spatrick return llvm::StringRef();
730061da546Spatrick assert(range.begin_index < range.end_index);
731061da546Spatrick assert(range.begin_index < m_tokens.size());
732061da546Spatrick assert(range.end_index <= m_tokens.size());
733061da546Spatrick clang::Token &first_token = m_tokens[range.begin_index];
734061da546Spatrick clang::Token &last_token = m_tokens[range.end_index - 1];
735061da546Spatrick clang::SourceLocation start_loc = first_token.getLocation();
736061da546Spatrick clang::SourceLocation end_loc = last_token.getLocation();
737061da546Spatrick unsigned start_pos = start_loc.getRawEncoding();
738061da546Spatrick unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
739061da546Spatrick return m_text.take_front(end_pos).drop_front(start_pos);
740061da546Spatrick }
741061da546Spatrick
GetLangOptions()742061da546Spatrick static const clang::LangOptions &GetLangOptions() {
743061da546Spatrick static clang::LangOptions g_options;
744061da546Spatrick static llvm::once_flag g_once_flag;
745061da546Spatrick llvm::call_once(g_once_flag, []() {
746061da546Spatrick g_options.LineComment = true;
747061da546Spatrick g_options.C99 = true;
748061da546Spatrick g_options.C11 = true;
749061da546Spatrick g_options.CPlusPlus = true;
750061da546Spatrick g_options.CPlusPlus11 = true;
751061da546Spatrick g_options.CPlusPlus14 = true;
752061da546Spatrick g_options.CPlusPlus17 = true;
753061da546Spatrick });
754061da546Spatrick return g_options;
755061da546Spatrick }
756061da546Spatrick
GetKeywordsMap()757061da546Spatrick static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
758061da546Spatrick static llvm::StringMap<tok::TokenKind> g_map{
759061da546Spatrick #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
760061da546Spatrick #include "clang/Basic/TokenKinds.def"
761061da546Spatrick #undef KEYWORD
762061da546Spatrick };
763061da546Spatrick return g_map;
764061da546Spatrick }
765061da546Spatrick
ExtractTokens()766061da546Spatrick void CPlusPlusNameParser::ExtractTokens() {
767061da546Spatrick if (m_text.empty())
768061da546Spatrick return;
769061da546Spatrick clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
770061da546Spatrick m_text.data(), m_text.data() + m_text.size());
771061da546Spatrick const auto &kw_map = GetKeywordsMap();
772061da546Spatrick clang::Token token;
773061da546Spatrick for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
774061da546Spatrick lexer.LexFromRawLexer(token)) {
775061da546Spatrick if (token.is(clang::tok::raw_identifier)) {
776061da546Spatrick auto it = kw_map.find(token.getRawIdentifier());
777061da546Spatrick if (it != kw_map.end()) {
778061da546Spatrick token.setKind(it->getValue());
779061da546Spatrick }
780061da546Spatrick }
781061da546Spatrick
782061da546Spatrick m_tokens.push_back(token);
783061da546Spatrick }
784061da546Spatrick }
785