xref: /openbsd-src/gnu/llvm/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp (revision f6aab3d83b51b91c24247ad2c2573574de475a82)
1dda28197Spatrick //===-- CPlusPlusNameParser.cpp -------------------------------------------===//
2061da546Spatrick //
3061da546Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4061da546Spatrick // See https://llvm.org/LICENSE.txt for license information.
5061da546Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6061da546Spatrick //
7061da546Spatrick //===----------------------------------------------------------------------===//
8061da546Spatrick 
9061da546Spatrick #include "CPlusPlusNameParser.h"
10061da546Spatrick 
11061da546Spatrick #include "clang/Basic/IdentifierTable.h"
12*f6aab3d8Srobert #include "clang/Basic/TokenKinds.h"
13061da546Spatrick #include "llvm/ADT/StringMap.h"
14061da546Spatrick #include "llvm/Support/Threading.h"
15*f6aab3d8Srobert #include <optional>
16061da546Spatrick 
17061da546Spatrick using namespace lldb;
18061da546Spatrick using namespace lldb_private;
19061da546Spatrick using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
20061da546Spatrick using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
21061da546Spatrick namespace tok = clang::tok;
22061da546Spatrick 
ParseAsFunctionDefinition()23*f6aab3d8Srobert std::optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
24061da546Spatrick   m_next_token_index = 0;
25*f6aab3d8Srobert   std::optional<ParsedFunction> result(std::nullopt);
26061da546Spatrick 
27061da546Spatrick   // Try to parse the name as function without a return type specified e.g.
28061da546Spatrick   // main(int, char*[])
29061da546Spatrick   {
30061da546Spatrick     Bookmark start_position = SetBookmark();
31061da546Spatrick     result = ParseFunctionImpl(false);
32061da546Spatrick     if (result && !HasMoreTokens())
33061da546Spatrick       return result;
34061da546Spatrick   }
35061da546Spatrick 
36061da546Spatrick   // Try to parse the name as function with function pointer return type e.g.
37061da546Spatrick   // void (*get_func(const char*))()
38061da546Spatrick   result = ParseFuncPtr(true);
39061da546Spatrick   if (result)
40061da546Spatrick     return result;
41061da546Spatrick 
42061da546Spatrick   // Finally try to parse the name as a function with non-function return type
43061da546Spatrick   // e.g. int main(int, char*[])
44061da546Spatrick   result = ParseFunctionImpl(true);
45061da546Spatrick   if (HasMoreTokens())
46*f6aab3d8Srobert     return std::nullopt;
47061da546Spatrick   return result;
48061da546Spatrick }
49061da546Spatrick 
ParseAsFullName()50*f6aab3d8Srobert std::optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
51061da546Spatrick   m_next_token_index = 0;
52*f6aab3d8Srobert   std::optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
53061da546Spatrick   if (!name_ranges)
54*f6aab3d8Srobert     return std::nullopt;
55061da546Spatrick   if (HasMoreTokens())
56*f6aab3d8Srobert     return std::nullopt;
57061da546Spatrick   ParsedName result;
58*f6aab3d8Srobert   result.basename = GetTextForRange(name_ranges->basename_range);
59*f6aab3d8Srobert   result.context = GetTextForRange(name_ranges->context_range);
60061da546Spatrick   return result;
61061da546Spatrick }
62061da546Spatrick 
HasMoreTokens()63061da546Spatrick bool CPlusPlusNameParser::HasMoreTokens() {
64061da546Spatrick   return m_next_token_index < m_tokens.size();
65061da546Spatrick }
66061da546Spatrick 
Advance()67061da546Spatrick void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
68061da546Spatrick 
TakeBack()69061da546Spatrick void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
70061da546Spatrick 
ConsumeToken(tok::TokenKind kind)71061da546Spatrick bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
72061da546Spatrick   if (!HasMoreTokens())
73061da546Spatrick     return false;
74061da546Spatrick 
75061da546Spatrick   if (!Peek().is(kind))
76061da546Spatrick     return false;
77061da546Spatrick 
78061da546Spatrick   Advance();
79061da546Spatrick   return true;
80061da546Spatrick }
81061da546Spatrick 
ConsumeToken(Ts...kinds)82061da546Spatrick template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
83061da546Spatrick   if (!HasMoreTokens())
84061da546Spatrick     return false;
85061da546Spatrick 
86061da546Spatrick   if (!Peek().isOneOf(kinds...))
87061da546Spatrick     return false;
88061da546Spatrick 
89061da546Spatrick   Advance();
90061da546Spatrick   return true;
91061da546Spatrick }
92061da546Spatrick 
SetBookmark()93061da546Spatrick CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
94061da546Spatrick   return Bookmark(m_next_token_index);
95061da546Spatrick }
96061da546Spatrick 
GetCurrentPosition()97061da546Spatrick size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
98061da546Spatrick 
Peek()99061da546Spatrick clang::Token &CPlusPlusNameParser::Peek() {
100061da546Spatrick   assert(HasMoreTokens());
101061da546Spatrick   return m_tokens[m_next_token_index];
102061da546Spatrick }
103061da546Spatrick 
104*f6aab3d8Srobert std::optional<ParsedFunction>
ParseFunctionImpl(bool expect_return_type)105061da546Spatrick CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
106061da546Spatrick   Bookmark start_position = SetBookmark();
107*f6aab3d8Srobert 
108*f6aab3d8Srobert   ParsedFunction result;
109061da546Spatrick   if (expect_return_type) {
110*f6aab3d8Srobert     size_t return_start = GetCurrentPosition();
111061da546Spatrick     // Consume return type if it's expected.
112*f6aab3d8Srobert     if (!ConsumeToken(tok::kw_auto) && !ConsumeTypename())
113*f6aab3d8Srobert       return std::nullopt;
114*f6aab3d8Srobert 
115*f6aab3d8Srobert     size_t return_end = GetCurrentPosition();
116*f6aab3d8Srobert     result.return_type = GetTextForRange(Range(return_start, return_end));
117061da546Spatrick   }
118061da546Spatrick 
119061da546Spatrick   auto maybe_name = ParseFullNameImpl();
120061da546Spatrick   if (!maybe_name) {
121*f6aab3d8Srobert     return std::nullopt;
122061da546Spatrick   }
123061da546Spatrick 
124061da546Spatrick   size_t argument_start = GetCurrentPosition();
125061da546Spatrick   if (!ConsumeArguments()) {
126*f6aab3d8Srobert     return std::nullopt;
127061da546Spatrick   }
128061da546Spatrick 
129061da546Spatrick   size_t qualifiers_start = GetCurrentPosition();
130061da546Spatrick   SkipFunctionQualifiers();
131061da546Spatrick   size_t end_position = GetCurrentPosition();
132061da546Spatrick 
133*f6aab3d8Srobert   result.name.basename = GetTextForRange(maybe_name->basename_range);
134*f6aab3d8Srobert   result.name.context = GetTextForRange(maybe_name->context_range);
135061da546Spatrick   result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
136061da546Spatrick   result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
137061da546Spatrick   start_position.Remove();
138061da546Spatrick   return result;
139061da546Spatrick }
140061da546Spatrick 
141*f6aab3d8Srobert std::optional<ParsedFunction>
ParseFuncPtr(bool expect_return_type)142061da546Spatrick CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
143*f6aab3d8Srobert   // This function parses a function definition
144*f6aab3d8Srobert   // that returns a pointer type.
145*f6aab3d8Srobert   // E.g., double (*(*func(long))(int))(float)
146*f6aab3d8Srobert 
147*f6aab3d8Srobert   // Step 1:
148*f6aab3d8Srobert   // Remove the return type of the innermost
149*f6aab3d8Srobert   // function pointer type.
150*f6aab3d8Srobert   //
151*f6aab3d8Srobert   // Leaves us with:
152*f6aab3d8Srobert   //   (*(*func(long))(int))(float)
153061da546Spatrick   Bookmark start_position = SetBookmark();
154061da546Spatrick   if (expect_return_type) {
155061da546Spatrick     // Consume return type.
156061da546Spatrick     if (!ConsumeTypename())
157*f6aab3d8Srobert       return std::nullopt;
158061da546Spatrick   }
159061da546Spatrick 
160*f6aab3d8Srobert   // Step 2:
161*f6aab3d8Srobert   //
162*f6aab3d8Srobert   // Skip a pointer and parenthesis pair.
163*f6aab3d8Srobert   //
164*f6aab3d8Srobert   // Leaves us with:
165*f6aab3d8Srobert   //   (*func(long))(int))(float)
166061da546Spatrick   if (!ConsumeToken(tok::l_paren))
167*f6aab3d8Srobert     return std::nullopt;
168061da546Spatrick   if (!ConsumePtrsAndRefs())
169*f6aab3d8Srobert     return std::nullopt;
170061da546Spatrick 
171*f6aab3d8Srobert   // Step 3:
172*f6aab3d8Srobert   //
173*f6aab3d8Srobert   // Consume inner function name. This will fail unless
174*f6aab3d8Srobert   // we stripped all the pointers on the left hand side
175*f6aab3d8Srobert   // of the funciton name.
176061da546Spatrick   {
177061da546Spatrick     Bookmark before_inner_function_pos = SetBookmark();
178061da546Spatrick     auto maybe_inner_function_name = ParseFunctionImpl(false);
179061da546Spatrick     if (maybe_inner_function_name)
180061da546Spatrick       if (ConsumeToken(tok::r_paren))
181061da546Spatrick         if (ConsumeArguments()) {
182061da546Spatrick           SkipFunctionQualifiers();
183061da546Spatrick           start_position.Remove();
184061da546Spatrick           before_inner_function_pos.Remove();
185061da546Spatrick           return maybe_inner_function_name;
186061da546Spatrick         }
187061da546Spatrick   }
188061da546Spatrick 
189*f6aab3d8Srobert   // Step 4:
190*f6aab3d8Srobert   //
191*f6aab3d8Srobert   // Parse the remaining string as a function pointer again.
192*f6aab3d8Srobert   // This time don't consume the inner-most typename since
193*f6aab3d8Srobert   // we're left with pointers only. This will strip another
194*f6aab3d8Srobert   // layer of pointers until we're left with the innermost
195*f6aab3d8Srobert   // function name/argument. I.e., func(long))(int))(float)
196*f6aab3d8Srobert   //
197*f6aab3d8Srobert   // Once we successfully stripped all pointers and gotten
198*f6aab3d8Srobert   // the innermost function name from ParseFunctionImpl above,
199*f6aab3d8Srobert   // we consume a single ')' and the arguments '(...)' that follows.
200*f6aab3d8Srobert   //
201*f6aab3d8Srobert   // Leaves us with:
202*f6aab3d8Srobert   //   )(float)
203*f6aab3d8Srobert   //
204*f6aab3d8Srobert   // This is the remnant of the outer function pointers' arguments.
205*f6aab3d8Srobert   // Unwinding the recursive calls will remove the remaining
206*f6aab3d8Srobert   // arguments.
207061da546Spatrick   auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
208061da546Spatrick   if (maybe_inner_function_ptr_name)
209061da546Spatrick     if (ConsumeToken(tok::r_paren))
210061da546Spatrick       if (ConsumeArguments()) {
211061da546Spatrick         SkipFunctionQualifiers();
212061da546Spatrick         start_position.Remove();
213061da546Spatrick         return maybe_inner_function_ptr_name;
214061da546Spatrick       }
215*f6aab3d8Srobert 
216*f6aab3d8Srobert   return std::nullopt;
217061da546Spatrick }
218061da546Spatrick 
ConsumeArguments()219061da546Spatrick bool CPlusPlusNameParser::ConsumeArguments() {
220061da546Spatrick   return ConsumeBrackets(tok::l_paren, tok::r_paren);
221061da546Spatrick }
222061da546Spatrick 
ConsumeTemplateArgs()223061da546Spatrick bool CPlusPlusNameParser::ConsumeTemplateArgs() {
224061da546Spatrick   Bookmark start_position = SetBookmark();
225061da546Spatrick   if (!HasMoreTokens() || Peek().getKind() != tok::less)
226061da546Spatrick     return false;
227061da546Spatrick   Advance();
228061da546Spatrick 
229061da546Spatrick   // Consuming template arguments is a bit trickier than consuming function
230061da546Spatrick   // arguments, because '<' '>' brackets are not always trivially balanced. In
231061da546Spatrick   // some rare cases tokens '<' and '>' can appear inside template arguments as
232061da546Spatrick   // arithmetic or shift operators not as template brackets. Examples:
233061da546Spatrick   // std::enable_if<(10u)<(64), bool>
234061da546Spatrick   //           f<A<operator<(X,Y)::Subclass>>
235061da546Spatrick   // Good thing that compiler makes sure that really ambiguous cases of '>'
236061da546Spatrick   // usage should be enclosed within '()' brackets.
237061da546Spatrick   int template_counter = 1;
238061da546Spatrick   bool can_open_template = false;
239061da546Spatrick   while (HasMoreTokens() && template_counter > 0) {
240061da546Spatrick     tok::TokenKind kind = Peek().getKind();
241061da546Spatrick     switch (kind) {
242061da546Spatrick     case tok::greatergreater:
243061da546Spatrick       template_counter -= 2;
244061da546Spatrick       can_open_template = false;
245061da546Spatrick       Advance();
246061da546Spatrick       break;
247061da546Spatrick     case tok::greater:
248061da546Spatrick       --template_counter;
249061da546Spatrick       can_open_template = false;
250061da546Spatrick       Advance();
251061da546Spatrick       break;
252061da546Spatrick     case tok::less:
253061da546Spatrick       // '<' is an attempt to open a subteamplte
254061da546Spatrick       // check if parser is at the point where it's actually possible,
255061da546Spatrick       // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
256061da546Spatrick       // need to do the same for '>' because compiler actually makes sure that
257061da546Spatrick       // '>' always surrounded by brackets to avoid ambiguity.
258061da546Spatrick       if (can_open_template)
259061da546Spatrick         ++template_counter;
260061da546Spatrick       can_open_template = false;
261061da546Spatrick       Advance();
262061da546Spatrick       break;
263061da546Spatrick     case tok::kw_operator: // C++ operator overloading.
264061da546Spatrick       if (!ConsumeOperator())
265061da546Spatrick         return false;
266061da546Spatrick       can_open_template = true;
267061da546Spatrick       break;
268061da546Spatrick     case tok::raw_identifier:
269061da546Spatrick       can_open_template = true;
270061da546Spatrick       Advance();
271061da546Spatrick       break;
272061da546Spatrick     case tok::l_square:
273*f6aab3d8Srobert       // Handle templates tagged with an ABI tag.
274*f6aab3d8Srobert       // An example demangled/prettified version is:
275*f6aab3d8Srobert       //   func[abi:tag1][abi:tag2]<type[abi:tag3]>(int)
276*f6aab3d8Srobert       if (ConsumeAbiTag())
277*f6aab3d8Srobert         can_open_template = true;
278*f6aab3d8Srobert       else if (ConsumeBrackets(tok::l_square, tok::r_square))
279061da546Spatrick         can_open_template = false;
280*f6aab3d8Srobert       else
281*f6aab3d8Srobert         return false;
282061da546Spatrick       break;
283061da546Spatrick     case tok::l_paren:
284061da546Spatrick       if (!ConsumeArguments())
285061da546Spatrick         return false;
286061da546Spatrick       can_open_template = false;
287061da546Spatrick       break;
288061da546Spatrick     default:
289061da546Spatrick       can_open_template = false;
290061da546Spatrick       Advance();
291061da546Spatrick       break;
292061da546Spatrick     }
293061da546Spatrick   }
294061da546Spatrick 
295061da546Spatrick   if (template_counter != 0) {
296061da546Spatrick     return false;
297061da546Spatrick   }
298061da546Spatrick   start_position.Remove();
299061da546Spatrick   return true;
300061da546Spatrick }
301061da546Spatrick 
ConsumeAbiTag()302*f6aab3d8Srobert bool CPlusPlusNameParser::ConsumeAbiTag() {
303*f6aab3d8Srobert   Bookmark start_position = SetBookmark();
304*f6aab3d8Srobert   if (!ConsumeToken(tok::l_square))
305*f6aab3d8Srobert     return false;
306*f6aab3d8Srobert 
307*f6aab3d8Srobert   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
308*f6aab3d8Srobert       Peek().getRawIdentifier() == "abi")
309*f6aab3d8Srobert     Advance();
310*f6aab3d8Srobert   else
311*f6aab3d8Srobert     return false;
312*f6aab3d8Srobert 
313*f6aab3d8Srobert   if (!ConsumeToken(tok::colon))
314*f6aab3d8Srobert     return false;
315*f6aab3d8Srobert 
316*f6aab3d8Srobert   // Consume the actual tag string (and allow some special characters)
317*f6aab3d8Srobert   while (ConsumeToken(tok::raw_identifier, tok::comma, tok::period,
318*f6aab3d8Srobert                       tok::numeric_constant))
319*f6aab3d8Srobert     ;
320*f6aab3d8Srobert 
321*f6aab3d8Srobert   if (!ConsumeToken(tok::r_square))
322*f6aab3d8Srobert     return false;
323*f6aab3d8Srobert 
324*f6aab3d8Srobert   start_position.Remove();
325*f6aab3d8Srobert   return true;
326*f6aab3d8Srobert }
327*f6aab3d8Srobert 
ConsumeAnonymousNamespace()328061da546Spatrick bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
329061da546Spatrick   Bookmark start_position = SetBookmark();
330061da546Spatrick   if (!ConsumeToken(tok::l_paren)) {
331061da546Spatrick     return false;
332061da546Spatrick   }
333061da546Spatrick   constexpr llvm::StringLiteral g_anonymous("anonymous");
334061da546Spatrick   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
335061da546Spatrick       Peek().getRawIdentifier() == g_anonymous) {
336061da546Spatrick     Advance();
337061da546Spatrick   } else {
338061da546Spatrick     return false;
339061da546Spatrick   }
340061da546Spatrick 
341061da546Spatrick   if (!ConsumeToken(tok::kw_namespace)) {
342061da546Spatrick     return false;
343061da546Spatrick   }
344061da546Spatrick 
345061da546Spatrick   if (!ConsumeToken(tok::r_paren)) {
346061da546Spatrick     return false;
347061da546Spatrick   }
348061da546Spatrick   start_position.Remove();
349061da546Spatrick   return true;
350061da546Spatrick }
351061da546Spatrick 
ConsumeLambda()352061da546Spatrick bool CPlusPlusNameParser::ConsumeLambda() {
353061da546Spatrick   Bookmark start_position = SetBookmark();
354061da546Spatrick   if (!ConsumeToken(tok::l_brace)) {
355061da546Spatrick     return false;
356061da546Spatrick   }
357061da546Spatrick   constexpr llvm::StringLiteral g_lambda("lambda");
358061da546Spatrick   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
359061da546Spatrick       Peek().getRawIdentifier() == g_lambda) {
360061da546Spatrick     // Put the matched brace back so we can use ConsumeBrackets
361061da546Spatrick     TakeBack();
362061da546Spatrick   } else {
363061da546Spatrick     return false;
364061da546Spatrick   }
365061da546Spatrick 
366061da546Spatrick   if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
367061da546Spatrick     return false;
368061da546Spatrick   }
369061da546Spatrick 
370061da546Spatrick   start_position.Remove();
371061da546Spatrick   return true;
372061da546Spatrick }
373061da546Spatrick 
ConsumeBrackets(tok::TokenKind left,tok::TokenKind right)374061da546Spatrick bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
375061da546Spatrick                                           tok::TokenKind right) {
376061da546Spatrick   Bookmark start_position = SetBookmark();
377061da546Spatrick   if (!HasMoreTokens() || Peek().getKind() != left)
378061da546Spatrick     return false;
379061da546Spatrick   Advance();
380061da546Spatrick 
381061da546Spatrick   int counter = 1;
382061da546Spatrick   while (HasMoreTokens() && counter > 0) {
383061da546Spatrick     tok::TokenKind kind = Peek().getKind();
384061da546Spatrick     if (kind == right)
385061da546Spatrick       --counter;
386061da546Spatrick     else if (kind == left)
387061da546Spatrick       ++counter;
388061da546Spatrick     Advance();
389061da546Spatrick   }
390061da546Spatrick 
391061da546Spatrick   assert(counter >= 0);
392061da546Spatrick   if (counter > 0) {
393061da546Spatrick     return false;
394061da546Spatrick   }
395061da546Spatrick   start_position.Remove();
396061da546Spatrick   return true;
397061da546Spatrick }
398061da546Spatrick 
ConsumeOperator()399061da546Spatrick bool CPlusPlusNameParser::ConsumeOperator() {
400061da546Spatrick   Bookmark start_position = SetBookmark();
401061da546Spatrick   if (!ConsumeToken(tok::kw_operator))
402061da546Spatrick     return false;
403061da546Spatrick 
404061da546Spatrick   if (!HasMoreTokens()) {
405061da546Spatrick     return false;
406061da546Spatrick   }
407061da546Spatrick 
408061da546Spatrick   const auto &token = Peek();
409dda28197Spatrick 
410dda28197Spatrick   // When clang generates debug info it adds template parameters to names.
411dda28197Spatrick   // Since clang doesn't add a space between the name and the template parameter
412dda28197Spatrick   // in some cases we are not generating valid C++ names e.g.:
413dda28197Spatrick   //
414dda28197Spatrick   //   operator<<A::B>
415dda28197Spatrick   //
416dda28197Spatrick   // In some of these cases we will not parse them correctly. This fixes the
417dda28197Spatrick   // issue by detecting this case and inserting tok::less in place of
418dda28197Spatrick   // tok::lessless and returning successfully that we consumed the operator.
419dda28197Spatrick   if (token.getKind() == tok::lessless) {
420dda28197Spatrick     // Make sure we have more tokens before attempting to look ahead one more.
421dda28197Spatrick     if (m_next_token_index + 1 < m_tokens.size()) {
422dda28197Spatrick       // Look ahead two tokens.
423dda28197Spatrick       clang::Token n_token = m_tokens[m_next_token_index + 1];
424dda28197Spatrick       // If we find ( or < then this is indeed operator<< no need for fix.
425dda28197Spatrick       if (n_token.getKind() != tok::l_paren && n_token.getKind() != tok::less) {
426dda28197Spatrick         clang::Token tmp_tok;
427dda28197Spatrick         tmp_tok.startToken();
428dda28197Spatrick         tmp_tok.setLength(1);
429dda28197Spatrick         tmp_tok.setLocation(token.getLocation().getLocWithOffset(1));
430dda28197Spatrick         tmp_tok.setKind(tok::less);
431dda28197Spatrick 
432dda28197Spatrick         m_tokens[m_next_token_index] = tmp_tok;
433dda28197Spatrick 
434dda28197Spatrick         start_position.Remove();
435dda28197Spatrick         return true;
436dda28197Spatrick       }
437dda28197Spatrick     }
438dda28197Spatrick   }
439dda28197Spatrick 
440061da546Spatrick   switch (token.getKind()) {
441061da546Spatrick   case tok::kw_new:
442061da546Spatrick   case tok::kw_delete:
443061da546Spatrick     // This is 'new' or 'delete' operators.
444061da546Spatrick     Advance();
445061da546Spatrick     // Check for array new/delete.
446061da546Spatrick     if (HasMoreTokens() && Peek().is(tok::l_square)) {
447061da546Spatrick       // Consume the '[' and ']'.
448061da546Spatrick       if (!ConsumeBrackets(tok::l_square, tok::r_square))
449061da546Spatrick         return false;
450061da546Spatrick     }
451061da546Spatrick     break;
452061da546Spatrick 
453061da546Spatrick #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
454061da546Spatrick   case tok::Token:                                                             \
455061da546Spatrick     Advance();                                                                 \
456061da546Spatrick     break;
457061da546Spatrick #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
458061da546Spatrick #include "clang/Basic/OperatorKinds.def"
459061da546Spatrick #undef OVERLOADED_OPERATOR
460061da546Spatrick #undef OVERLOADED_OPERATOR_MULTI
461061da546Spatrick 
462061da546Spatrick   case tok::l_paren:
463061da546Spatrick     // Call operator consume '(' ... ')'.
464061da546Spatrick     if (ConsumeBrackets(tok::l_paren, tok::r_paren))
465061da546Spatrick       break;
466061da546Spatrick     return false;
467061da546Spatrick 
468061da546Spatrick   case tok::l_square:
469061da546Spatrick     // This is a [] operator.
470061da546Spatrick     // Consume the '[' and ']'.
471061da546Spatrick     if (ConsumeBrackets(tok::l_square, tok::r_square))
472061da546Spatrick       break;
473061da546Spatrick     return false;
474061da546Spatrick 
475061da546Spatrick   default:
476061da546Spatrick     // This might be a cast operator.
477061da546Spatrick     if (ConsumeTypename())
478061da546Spatrick       break;
479061da546Spatrick     return false;
480061da546Spatrick   }
481061da546Spatrick   start_position.Remove();
482061da546Spatrick   return true;
483061da546Spatrick }
484061da546Spatrick 
SkipTypeQualifiers()485061da546Spatrick void CPlusPlusNameParser::SkipTypeQualifiers() {
486061da546Spatrick   while (ConsumeToken(tok::kw_const, tok::kw_volatile))
487061da546Spatrick     ;
488061da546Spatrick }
489061da546Spatrick 
SkipFunctionQualifiers()490061da546Spatrick void CPlusPlusNameParser::SkipFunctionQualifiers() {
491061da546Spatrick   while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
492061da546Spatrick     ;
493061da546Spatrick }
494061da546Spatrick 
ConsumeBuiltinType()495061da546Spatrick bool CPlusPlusNameParser::ConsumeBuiltinType() {
496061da546Spatrick   bool result = false;
497061da546Spatrick   bool continue_parsing = true;
498061da546Spatrick   // Built-in types can be made of a few keywords like 'unsigned long long
499061da546Spatrick   // int'. This function consumes all built-in type keywords without checking
500061da546Spatrick   // if they make sense like 'unsigned char void'.
501061da546Spatrick   while (continue_parsing && HasMoreTokens()) {
502061da546Spatrick     switch (Peek().getKind()) {
503061da546Spatrick     case tok::kw_short:
504061da546Spatrick     case tok::kw_long:
505061da546Spatrick     case tok::kw___int64:
506061da546Spatrick     case tok::kw___int128:
507061da546Spatrick     case tok::kw_signed:
508061da546Spatrick     case tok::kw_unsigned:
509061da546Spatrick     case tok::kw_void:
510061da546Spatrick     case tok::kw_char:
511061da546Spatrick     case tok::kw_int:
512061da546Spatrick     case tok::kw_half:
513061da546Spatrick     case tok::kw_float:
514061da546Spatrick     case tok::kw_double:
515061da546Spatrick     case tok::kw___float128:
516061da546Spatrick     case tok::kw_wchar_t:
517061da546Spatrick     case tok::kw_bool:
518061da546Spatrick     case tok::kw_char16_t:
519061da546Spatrick     case tok::kw_char32_t:
520061da546Spatrick       result = true;
521061da546Spatrick       Advance();
522061da546Spatrick       break;
523061da546Spatrick     default:
524061da546Spatrick       continue_parsing = false;
525061da546Spatrick       break;
526061da546Spatrick     }
527061da546Spatrick   }
528061da546Spatrick   return result;
529061da546Spatrick }
530061da546Spatrick 
SkipPtrsAndRefs()531061da546Spatrick void CPlusPlusNameParser::SkipPtrsAndRefs() {
532061da546Spatrick   // Ignoring result.
533061da546Spatrick   ConsumePtrsAndRefs();
534061da546Spatrick }
535061da546Spatrick 
ConsumePtrsAndRefs()536061da546Spatrick bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
537061da546Spatrick   bool found = false;
538061da546Spatrick   SkipTypeQualifiers();
539061da546Spatrick   while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
540061da546Spatrick                       tok::kw_volatile)) {
541061da546Spatrick     found = true;
542061da546Spatrick     SkipTypeQualifiers();
543061da546Spatrick   }
544061da546Spatrick   return found;
545061da546Spatrick }
546061da546Spatrick 
ConsumeDecltype()547061da546Spatrick bool CPlusPlusNameParser::ConsumeDecltype() {
548061da546Spatrick   Bookmark start_position = SetBookmark();
549061da546Spatrick   if (!ConsumeToken(tok::kw_decltype))
550061da546Spatrick     return false;
551061da546Spatrick 
552061da546Spatrick   if (!ConsumeArguments())
553061da546Spatrick     return false;
554061da546Spatrick 
555061da546Spatrick   start_position.Remove();
556061da546Spatrick   return true;
557061da546Spatrick }
558061da546Spatrick 
ConsumeTypename()559061da546Spatrick bool CPlusPlusNameParser::ConsumeTypename() {
560061da546Spatrick   Bookmark start_position = SetBookmark();
561061da546Spatrick   SkipTypeQualifiers();
562061da546Spatrick   if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
563061da546Spatrick     if (!ParseFullNameImpl())
564061da546Spatrick       return false;
565061da546Spatrick   }
566061da546Spatrick   SkipPtrsAndRefs();
567061da546Spatrick   start_position.Remove();
568061da546Spatrick   return true;
569061da546Spatrick }
570061da546Spatrick 
571*f6aab3d8Srobert std::optional<CPlusPlusNameParser::ParsedNameRanges>
ParseFullNameImpl()572061da546Spatrick CPlusPlusNameParser::ParseFullNameImpl() {
573061da546Spatrick   // Name parsing state machine.
574061da546Spatrick   enum class State {
575061da546Spatrick     Beginning,       // start of the name
576061da546Spatrick     AfterTwoColons,  // right after ::
577061da546Spatrick     AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
578061da546Spatrick     AfterTemplate,   // right after template brackets (<something>)
579061da546Spatrick     AfterOperator,   // right after name of C++ operator
580061da546Spatrick   };
581061da546Spatrick 
582061da546Spatrick   Bookmark start_position = SetBookmark();
583061da546Spatrick   State state = State::Beginning;
584061da546Spatrick   bool continue_parsing = true;
585*f6aab3d8Srobert   std::optional<size_t> last_coloncolon_position;
586061da546Spatrick 
587061da546Spatrick   while (continue_parsing && HasMoreTokens()) {
588061da546Spatrick     const auto &token = Peek();
589061da546Spatrick     switch (token.getKind()) {
590061da546Spatrick     case tok::raw_identifier: // Just a name.
591061da546Spatrick       if (state != State::Beginning && state != State::AfterTwoColons) {
592061da546Spatrick         continue_parsing = false;
593061da546Spatrick         break;
594061da546Spatrick       }
595061da546Spatrick       Advance();
596061da546Spatrick       state = State::AfterIdentifier;
597061da546Spatrick       break;
598*f6aab3d8Srobert     case tok::l_square: {
599*f6aab3d8Srobert       // Handles types or functions that were tagged
600*f6aab3d8Srobert       // with, e.g.,
601*f6aab3d8Srobert       //   [[gnu::abi_tag("tag1","tag2")]] func()
602*f6aab3d8Srobert       // and demangled/prettified into:
603*f6aab3d8Srobert       //   func[abi:tag1][abi:tag2]()
604*f6aab3d8Srobert 
605*f6aab3d8Srobert       // ABI tags only appear after a method or type name
606*f6aab3d8Srobert       const bool valid_state =
607*f6aab3d8Srobert           state == State::AfterIdentifier || state == State::AfterOperator;
608*f6aab3d8Srobert       if (!valid_state || !ConsumeAbiTag()) {
609*f6aab3d8Srobert         continue_parsing = false;
610*f6aab3d8Srobert       }
611*f6aab3d8Srobert 
612*f6aab3d8Srobert       break;
613*f6aab3d8Srobert     }
614061da546Spatrick     case tok::l_paren: {
615061da546Spatrick       if (state == State::Beginning || state == State::AfterTwoColons) {
616061da546Spatrick         // (anonymous namespace)
617061da546Spatrick         if (ConsumeAnonymousNamespace()) {
618061da546Spatrick           state = State::AfterIdentifier;
619061da546Spatrick           break;
620061da546Spatrick         }
621061da546Spatrick       }
622061da546Spatrick 
623061da546Spatrick       // Type declared inside a function 'func()::Type'
624061da546Spatrick       if (state != State::AfterIdentifier && state != State::AfterTemplate &&
625061da546Spatrick           state != State::AfterOperator) {
626061da546Spatrick         continue_parsing = false;
627061da546Spatrick         break;
628061da546Spatrick       }
629061da546Spatrick       Bookmark l_paren_position = SetBookmark();
630061da546Spatrick       // Consume the '(' ... ') [const]'.
631061da546Spatrick       if (!ConsumeArguments()) {
632061da546Spatrick         continue_parsing = false;
633061da546Spatrick         break;
634061da546Spatrick       }
635061da546Spatrick       SkipFunctionQualifiers();
636061da546Spatrick 
637061da546Spatrick       // Consume '::'
638061da546Spatrick       size_t coloncolon_position = GetCurrentPosition();
639061da546Spatrick       if (!ConsumeToken(tok::coloncolon)) {
640061da546Spatrick         continue_parsing = false;
641061da546Spatrick         break;
642061da546Spatrick       }
643061da546Spatrick       l_paren_position.Remove();
644061da546Spatrick       last_coloncolon_position = coloncolon_position;
645061da546Spatrick       state = State::AfterTwoColons;
646061da546Spatrick       break;
647061da546Spatrick     }
648061da546Spatrick     case tok::l_brace:
649061da546Spatrick       if (state == State::Beginning || state == State::AfterTwoColons) {
650061da546Spatrick         if (ConsumeLambda()) {
651061da546Spatrick           state = State::AfterIdentifier;
652061da546Spatrick           break;
653061da546Spatrick         }
654061da546Spatrick       }
655061da546Spatrick       continue_parsing = false;
656061da546Spatrick       break;
657061da546Spatrick     case tok::coloncolon: // Type nesting delimiter.
658061da546Spatrick       if (state != State::Beginning && state != State::AfterIdentifier &&
659061da546Spatrick           state != State::AfterTemplate) {
660061da546Spatrick         continue_parsing = false;
661061da546Spatrick         break;
662061da546Spatrick       }
663061da546Spatrick       last_coloncolon_position = GetCurrentPosition();
664061da546Spatrick       Advance();
665061da546Spatrick       state = State::AfterTwoColons;
666061da546Spatrick       break;
667061da546Spatrick     case tok::less: // Template brackets.
668061da546Spatrick       if (state != State::AfterIdentifier && state != State::AfterOperator) {
669061da546Spatrick         continue_parsing = false;
670061da546Spatrick         break;
671061da546Spatrick       }
672061da546Spatrick       if (!ConsumeTemplateArgs()) {
673061da546Spatrick         continue_parsing = false;
674061da546Spatrick         break;
675061da546Spatrick       }
676061da546Spatrick       state = State::AfterTemplate;
677061da546Spatrick       break;
678061da546Spatrick     case tok::kw_operator: // C++ operator overloading.
679061da546Spatrick       if (state != State::Beginning && state != State::AfterTwoColons) {
680061da546Spatrick         continue_parsing = false;
681061da546Spatrick         break;
682061da546Spatrick       }
683061da546Spatrick       if (!ConsumeOperator()) {
684061da546Spatrick         continue_parsing = false;
685061da546Spatrick         break;
686061da546Spatrick       }
687061da546Spatrick       state = State::AfterOperator;
688061da546Spatrick       break;
689061da546Spatrick     case tok::tilde: // Destructor.
690061da546Spatrick       if (state != State::Beginning && state != State::AfterTwoColons) {
691061da546Spatrick         continue_parsing = false;
692061da546Spatrick         break;
693061da546Spatrick       }
694061da546Spatrick       Advance();
695061da546Spatrick       if (ConsumeToken(tok::raw_identifier)) {
696061da546Spatrick         state = State::AfterIdentifier;
697061da546Spatrick       } else {
698061da546Spatrick         TakeBack();
699061da546Spatrick         continue_parsing = false;
700061da546Spatrick       }
701061da546Spatrick       break;
702061da546Spatrick     default:
703061da546Spatrick       continue_parsing = false;
704061da546Spatrick       break;
705061da546Spatrick     }
706061da546Spatrick   }
707061da546Spatrick 
708061da546Spatrick   if (state == State::AfterIdentifier || state == State::AfterOperator ||
709061da546Spatrick       state == State::AfterTemplate) {
710061da546Spatrick     ParsedNameRanges result;
711061da546Spatrick     if (last_coloncolon_position) {
712*f6aab3d8Srobert       result.context_range =
713*f6aab3d8Srobert           Range(start_position.GetSavedPosition(), *last_coloncolon_position);
714061da546Spatrick       result.basename_range =
715*f6aab3d8Srobert           Range(*last_coloncolon_position + 1, GetCurrentPosition());
716061da546Spatrick     } else {
717061da546Spatrick       result.basename_range =
718061da546Spatrick           Range(start_position.GetSavedPosition(), GetCurrentPosition());
719061da546Spatrick     }
720061da546Spatrick     start_position.Remove();
721061da546Spatrick     return result;
722061da546Spatrick   } else {
723*f6aab3d8Srobert     return std::nullopt;
724061da546Spatrick   }
725061da546Spatrick }
726061da546Spatrick 
GetTextForRange(const Range & range)727061da546Spatrick llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
728061da546Spatrick   if (range.empty())
729061da546Spatrick     return llvm::StringRef();
730061da546Spatrick   assert(range.begin_index < range.end_index);
731061da546Spatrick   assert(range.begin_index < m_tokens.size());
732061da546Spatrick   assert(range.end_index <= m_tokens.size());
733061da546Spatrick   clang::Token &first_token = m_tokens[range.begin_index];
734061da546Spatrick   clang::Token &last_token = m_tokens[range.end_index - 1];
735061da546Spatrick   clang::SourceLocation start_loc = first_token.getLocation();
736061da546Spatrick   clang::SourceLocation end_loc = last_token.getLocation();
737061da546Spatrick   unsigned start_pos = start_loc.getRawEncoding();
738061da546Spatrick   unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
739061da546Spatrick   return m_text.take_front(end_pos).drop_front(start_pos);
740061da546Spatrick }
741061da546Spatrick 
GetLangOptions()742061da546Spatrick static const clang::LangOptions &GetLangOptions() {
743061da546Spatrick   static clang::LangOptions g_options;
744061da546Spatrick   static llvm::once_flag g_once_flag;
745061da546Spatrick   llvm::call_once(g_once_flag, []() {
746061da546Spatrick     g_options.LineComment = true;
747061da546Spatrick     g_options.C99 = true;
748061da546Spatrick     g_options.C11 = true;
749061da546Spatrick     g_options.CPlusPlus = true;
750061da546Spatrick     g_options.CPlusPlus11 = true;
751061da546Spatrick     g_options.CPlusPlus14 = true;
752061da546Spatrick     g_options.CPlusPlus17 = true;
753061da546Spatrick   });
754061da546Spatrick   return g_options;
755061da546Spatrick }
756061da546Spatrick 
GetKeywordsMap()757061da546Spatrick static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
758061da546Spatrick   static llvm::StringMap<tok::TokenKind> g_map{
759061da546Spatrick #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
760061da546Spatrick #include "clang/Basic/TokenKinds.def"
761061da546Spatrick #undef KEYWORD
762061da546Spatrick   };
763061da546Spatrick   return g_map;
764061da546Spatrick }
765061da546Spatrick 
ExtractTokens()766061da546Spatrick void CPlusPlusNameParser::ExtractTokens() {
767061da546Spatrick   if (m_text.empty())
768061da546Spatrick     return;
769061da546Spatrick   clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
770061da546Spatrick                      m_text.data(), m_text.data() + m_text.size());
771061da546Spatrick   const auto &kw_map = GetKeywordsMap();
772061da546Spatrick   clang::Token token;
773061da546Spatrick   for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
774061da546Spatrick        lexer.LexFromRawLexer(token)) {
775061da546Spatrick     if (token.is(clang::tok::raw_identifier)) {
776061da546Spatrick       auto it = kw_map.find(token.getRawIdentifier());
777061da546Spatrick       if (it != kw_map.end()) {
778061da546Spatrick         token.setKind(it->getValue());
779061da546Spatrick       }
780061da546Spatrick     }
781061da546Spatrick 
782061da546Spatrick     m_tokens.push_back(token);
783061da546Spatrick   }
784061da546Spatrick }
785