xref: /llvm-project/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp (revision 3cc9884500ad53e878045bc1d119d8a6b326f274)
1 //===-- CPlusPlusNameParser.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "CPlusPlusNameParser.h"
10 
11 #include "clang/Basic/IdentifierTable.h"
12 #include "clang/Basic/TokenKinds.h"
13 #include "llvm/ADT/StringMap.h"
14 #include "llvm/Support/Threading.h"
15 
16 using namespace lldb;
17 using namespace lldb_private;
18 using llvm::Optional;
19 using llvm::None;
20 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
21 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
22 namespace tok = clang::tok;
23 
24 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
25   m_next_token_index = 0;
26   Optional<ParsedFunction> result(None);
27 
28   // Try to parse the name as function without a return type specified e.g.
29   // main(int, char*[])
30   {
31     Bookmark start_position = SetBookmark();
32     result = ParseFunctionImpl(false);
33     if (result && !HasMoreTokens())
34       return result;
35   }
36 
37   // Try to parse the name as function with function pointer return type e.g.
38   // void (*get_func(const char*))()
39   result = ParseFuncPtr(true);
40   if (result)
41     return result;
42 
43   // Finally try to parse the name as a function with non-function return type
44   // e.g. int main(int, char*[])
45   result = ParseFunctionImpl(true);
46   if (HasMoreTokens())
47     return None;
48   return result;
49 }
50 
51 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
52   m_next_token_index = 0;
53   Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
54   if (!name_ranges)
55     return None;
56   if (HasMoreTokens())
57     return None;
58   ParsedName result;
59   result.basename = GetTextForRange(name_ranges.value().basename_range);
60   result.context = GetTextForRange(name_ranges.value().context_range);
61   return result;
62 }
63 
64 bool CPlusPlusNameParser::HasMoreTokens() {
65   return m_next_token_index < m_tokens.size();
66 }
67 
68 void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
69 
70 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
71 
72 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
73   if (!HasMoreTokens())
74     return false;
75 
76   if (!Peek().is(kind))
77     return false;
78 
79   Advance();
80   return true;
81 }
82 
83 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
84   if (!HasMoreTokens())
85     return false;
86 
87   if (!Peek().isOneOf(kinds...))
88     return false;
89 
90   Advance();
91   return true;
92 }
93 
94 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
95   return Bookmark(m_next_token_index);
96 }
97 
98 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
99 
100 clang::Token &CPlusPlusNameParser::Peek() {
101   assert(HasMoreTokens());
102   return m_tokens[m_next_token_index];
103 }
104 
105 Optional<ParsedFunction>
106 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
107   Bookmark start_position = SetBookmark();
108   if (expect_return_type) {
109     // Consume return type if it's expected.
110     if (!ConsumeToken(tok::kw_auto) && !ConsumeTypename())
111       return None;
112   }
113 
114   auto maybe_name = ParseFullNameImpl();
115   if (!maybe_name) {
116     return None;
117   }
118 
119   size_t argument_start = GetCurrentPosition();
120   if (!ConsumeArguments()) {
121     return None;
122   }
123 
124   size_t qualifiers_start = GetCurrentPosition();
125   SkipFunctionQualifiers();
126   size_t end_position = GetCurrentPosition();
127 
128   ParsedFunction result;
129   result.name.basename = GetTextForRange(maybe_name.value().basename_range);
130   result.name.context = GetTextForRange(maybe_name.value().context_range);
131   result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
132   result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
133   start_position.Remove();
134   return result;
135 }
136 
137 Optional<ParsedFunction>
138 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
139   Bookmark start_position = SetBookmark();
140   if (expect_return_type) {
141     // Consume return type.
142     if (!ConsumeTypename())
143       return None;
144   }
145 
146   if (!ConsumeToken(tok::l_paren))
147     return None;
148   if (!ConsumePtrsAndRefs())
149     return None;
150 
151   {
152     Bookmark before_inner_function_pos = SetBookmark();
153     auto maybe_inner_function_name = ParseFunctionImpl(false);
154     if (maybe_inner_function_name)
155       if (ConsumeToken(tok::r_paren))
156         if (ConsumeArguments()) {
157           SkipFunctionQualifiers();
158           start_position.Remove();
159           before_inner_function_pos.Remove();
160           return maybe_inner_function_name;
161         }
162   }
163 
164   auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
165   if (maybe_inner_function_ptr_name)
166     if (ConsumeToken(tok::r_paren))
167       if (ConsumeArguments()) {
168         SkipFunctionQualifiers();
169         start_position.Remove();
170         return maybe_inner_function_ptr_name;
171       }
172   return None;
173 }
174 
175 bool CPlusPlusNameParser::ConsumeArguments() {
176   return ConsumeBrackets(tok::l_paren, tok::r_paren);
177 }
178 
179 bool CPlusPlusNameParser::ConsumeTemplateArgs() {
180   Bookmark start_position = SetBookmark();
181   if (!HasMoreTokens() || Peek().getKind() != tok::less)
182     return false;
183   Advance();
184 
185   // Consuming template arguments is a bit trickier than consuming function
186   // arguments, because '<' '>' brackets are not always trivially balanced. In
187   // some rare cases tokens '<' and '>' can appear inside template arguments as
188   // arithmetic or shift operators not as template brackets. Examples:
189   // std::enable_if<(10u)<(64), bool>
190   //           f<A<operator<(X,Y)::Subclass>>
191   // Good thing that compiler makes sure that really ambiguous cases of '>'
192   // usage should be enclosed within '()' brackets.
193   int template_counter = 1;
194   bool can_open_template = false;
195   while (HasMoreTokens() && template_counter > 0) {
196     tok::TokenKind kind = Peek().getKind();
197     switch (kind) {
198     case tok::greatergreater:
199       template_counter -= 2;
200       can_open_template = false;
201       Advance();
202       break;
203     case tok::greater:
204       --template_counter;
205       can_open_template = false;
206       Advance();
207       break;
208     case tok::less:
209       // '<' is an attempt to open a subteamplte
210       // check if parser is at the point where it's actually possible,
211       // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
212       // need to do the same for '>' because compiler actually makes sure that
213       // '>' always surrounded by brackets to avoid ambiguity.
214       if (can_open_template)
215         ++template_counter;
216       can_open_template = false;
217       Advance();
218       break;
219     case tok::kw_operator: // C++ operator overloading.
220       if (!ConsumeOperator())
221         return false;
222       can_open_template = true;
223       break;
224     case tok::raw_identifier:
225       can_open_template = true;
226       Advance();
227       break;
228     case tok::l_square:
229       // Handle templates tagged with an ABI tag.
230       // An example demangled/prettified version is:
231       //   func[abi:tag1][abi:tag2]<type[abi:tag3]>(int)
232       if (ConsumeAbiTag())
233         can_open_template = true;
234       else if (ConsumeBrackets(tok::l_square, tok::r_square))
235         can_open_template = false;
236       else
237         return false;
238       break;
239     case tok::l_paren:
240       if (!ConsumeArguments())
241         return false;
242       can_open_template = false;
243       break;
244     default:
245       can_open_template = false;
246       Advance();
247       break;
248     }
249   }
250 
251   if (template_counter != 0) {
252     return false;
253   }
254   start_position.Remove();
255   return true;
256 }
257 
258 bool CPlusPlusNameParser::ConsumeAbiTag() {
259   Bookmark start_position = SetBookmark();
260   if (!ConsumeToken(tok::l_square))
261     return false;
262 
263   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
264       Peek().getRawIdentifier() == "abi")
265     Advance();
266   else
267     return false;
268 
269   if (!ConsumeToken(tok::colon))
270     return false;
271 
272   // Consume the actual tag string (and allow some special characters)
273   while (ConsumeToken(tok::raw_identifier, tok::comma, tok::period,
274                       tok::numeric_constant))
275     ;
276 
277   if (!ConsumeToken(tok::r_square))
278     return false;
279 
280   start_position.Remove();
281   return true;
282 }
283 
284 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
285   Bookmark start_position = SetBookmark();
286   if (!ConsumeToken(tok::l_paren)) {
287     return false;
288   }
289   constexpr llvm::StringLiteral g_anonymous("anonymous");
290   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
291       Peek().getRawIdentifier() == g_anonymous) {
292     Advance();
293   } else {
294     return false;
295   }
296 
297   if (!ConsumeToken(tok::kw_namespace)) {
298     return false;
299   }
300 
301   if (!ConsumeToken(tok::r_paren)) {
302     return false;
303   }
304   start_position.Remove();
305   return true;
306 }
307 
308 bool CPlusPlusNameParser::ConsumeLambda() {
309   Bookmark start_position = SetBookmark();
310   if (!ConsumeToken(tok::l_brace)) {
311     return false;
312   }
313   constexpr llvm::StringLiteral g_lambda("lambda");
314   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
315       Peek().getRawIdentifier() == g_lambda) {
316     // Put the matched brace back so we can use ConsumeBrackets
317     TakeBack();
318   } else {
319     return false;
320   }
321 
322   if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
323     return false;
324   }
325 
326   start_position.Remove();
327   return true;
328 }
329 
330 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
331                                           tok::TokenKind right) {
332   Bookmark start_position = SetBookmark();
333   if (!HasMoreTokens() || Peek().getKind() != left)
334     return false;
335   Advance();
336 
337   int counter = 1;
338   while (HasMoreTokens() && counter > 0) {
339     tok::TokenKind kind = Peek().getKind();
340     if (kind == right)
341       --counter;
342     else if (kind == left)
343       ++counter;
344     Advance();
345   }
346 
347   assert(counter >= 0);
348   if (counter > 0) {
349     return false;
350   }
351   start_position.Remove();
352   return true;
353 }
354 
355 bool CPlusPlusNameParser::ConsumeOperator() {
356   Bookmark start_position = SetBookmark();
357   if (!ConsumeToken(tok::kw_operator))
358     return false;
359 
360   if (!HasMoreTokens()) {
361     return false;
362   }
363 
364   const auto &token = Peek();
365 
366   // When clang generates debug info it adds template parameters to names.
367   // Since clang doesn't add a space between the name and the template parameter
368   // in some cases we are not generating valid C++ names e.g.:
369   //
370   //   operator<<A::B>
371   //
372   // In some of these cases we will not parse them correctly. This fixes the
373   // issue by detecting this case and inserting tok::less in place of
374   // tok::lessless and returning successfully that we consumed the operator.
375   if (token.getKind() == tok::lessless) {
376     // Make sure we have more tokens before attempting to look ahead one more.
377     if (m_next_token_index + 1 < m_tokens.size()) {
378       // Look ahead two tokens.
379       clang::Token n_token = m_tokens[m_next_token_index + 1];
380       // If we find ( or < then this is indeed operator<< no need for fix.
381       if (n_token.getKind() != tok::l_paren && n_token.getKind() != tok::less) {
382         clang::Token tmp_tok;
383         tmp_tok.startToken();
384         tmp_tok.setLength(1);
385         tmp_tok.setLocation(token.getLocation().getLocWithOffset(1));
386         tmp_tok.setKind(tok::less);
387 
388         m_tokens[m_next_token_index] = tmp_tok;
389 
390         start_position.Remove();
391         return true;
392       }
393     }
394   }
395 
396   switch (token.getKind()) {
397   case tok::kw_new:
398   case tok::kw_delete:
399     // This is 'new' or 'delete' operators.
400     Advance();
401     // Check for array new/delete.
402     if (HasMoreTokens() && Peek().is(tok::l_square)) {
403       // Consume the '[' and ']'.
404       if (!ConsumeBrackets(tok::l_square, tok::r_square))
405         return false;
406     }
407     break;
408 
409 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
410   case tok::Token:                                                             \
411     Advance();                                                                 \
412     break;
413 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
414 #include "clang/Basic/OperatorKinds.def"
415 #undef OVERLOADED_OPERATOR
416 #undef OVERLOADED_OPERATOR_MULTI
417 
418   case tok::l_paren:
419     // Call operator consume '(' ... ')'.
420     if (ConsumeBrackets(tok::l_paren, tok::r_paren))
421       break;
422     return false;
423 
424   case tok::l_square:
425     // This is a [] operator.
426     // Consume the '[' and ']'.
427     if (ConsumeBrackets(tok::l_square, tok::r_square))
428       break;
429     return false;
430 
431   default:
432     // This might be a cast operator.
433     if (ConsumeTypename())
434       break;
435     return false;
436   }
437   start_position.Remove();
438   return true;
439 }
440 
441 void CPlusPlusNameParser::SkipTypeQualifiers() {
442   while (ConsumeToken(tok::kw_const, tok::kw_volatile))
443     ;
444 }
445 
446 void CPlusPlusNameParser::SkipFunctionQualifiers() {
447   while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
448     ;
449 }
450 
451 bool CPlusPlusNameParser::ConsumeBuiltinType() {
452   bool result = false;
453   bool continue_parsing = true;
454   // Built-in types can be made of a few keywords like 'unsigned long long
455   // int'. This function consumes all built-in type keywords without checking
456   // if they make sense like 'unsigned char void'.
457   while (continue_parsing && HasMoreTokens()) {
458     switch (Peek().getKind()) {
459     case tok::kw_short:
460     case tok::kw_long:
461     case tok::kw___int64:
462     case tok::kw___int128:
463     case tok::kw_signed:
464     case tok::kw_unsigned:
465     case tok::kw_void:
466     case tok::kw_char:
467     case tok::kw_int:
468     case tok::kw_half:
469     case tok::kw_float:
470     case tok::kw_double:
471     case tok::kw___float128:
472     case tok::kw_wchar_t:
473     case tok::kw_bool:
474     case tok::kw_char16_t:
475     case tok::kw_char32_t:
476       result = true;
477       Advance();
478       break;
479     default:
480       continue_parsing = false;
481       break;
482     }
483   }
484   return result;
485 }
486 
487 void CPlusPlusNameParser::SkipPtrsAndRefs() {
488   // Ignoring result.
489   ConsumePtrsAndRefs();
490 }
491 
492 bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
493   bool found = false;
494   SkipTypeQualifiers();
495   while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
496                       tok::kw_volatile)) {
497     found = true;
498     SkipTypeQualifiers();
499   }
500   return found;
501 }
502 
503 bool CPlusPlusNameParser::ConsumeDecltype() {
504   Bookmark start_position = SetBookmark();
505   if (!ConsumeToken(tok::kw_decltype))
506     return false;
507 
508   if (!ConsumeArguments())
509     return false;
510 
511   start_position.Remove();
512   return true;
513 }
514 
515 bool CPlusPlusNameParser::ConsumeTypename() {
516   Bookmark start_position = SetBookmark();
517   SkipTypeQualifiers();
518   if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
519     if (!ParseFullNameImpl())
520       return false;
521   }
522   SkipPtrsAndRefs();
523   start_position.Remove();
524   return true;
525 }
526 
527 Optional<CPlusPlusNameParser::ParsedNameRanges>
528 CPlusPlusNameParser::ParseFullNameImpl() {
529   // Name parsing state machine.
530   enum class State {
531     Beginning,       // start of the name
532     AfterTwoColons,  // right after ::
533     AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
534     AfterTemplate,   // right after template brackets (<something>)
535     AfterOperator,   // right after name of C++ operator
536   };
537 
538   Bookmark start_position = SetBookmark();
539   State state = State::Beginning;
540   bool continue_parsing = true;
541   Optional<size_t> last_coloncolon_position;
542 
543   while (continue_parsing && HasMoreTokens()) {
544     const auto &token = Peek();
545     switch (token.getKind()) {
546     case tok::raw_identifier: // Just a name.
547       if (state != State::Beginning && state != State::AfterTwoColons) {
548         continue_parsing = false;
549         break;
550       }
551       Advance();
552       state = State::AfterIdentifier;
553       break;
554     case tok::l_square: {
555       // Handles types or functions that were tagged
556       // with, e.g.,
557       //   [[gnu::abi_tag("tag1","tag2")]] func()
558       // and demangled/prettified into:
559       //   func[abi:tag1][abi:tag2]()
560 
561       // ABI tags only appear after a method or type name
562       const bool valid_state =
563           state == State::AfterIdentifier || state == State::AfterOperator;
564       if (!valid_state || !ConsumeAbiTag()) {
565         continue_parsing = false;
566       }
567 
568       break;
569     }
570     case tok::l_paren: {
571       if (state == State::Beginning || state == State::AfterTwoColons) {
572         // (anonymous namespace)
573         if (ConsumeAnonymousNamespace()) {
574           state = State::AfterIdentifier;
575           break;
576         }
577       }
578 
579       // Type declared inside a function 'func()::Type'
580       if (state != State::AfterIdentifier && state != State::AfterTemplate &&
581           state != State::AfterOperator) {
582         continue_parsing = false;
583         break;
584       }
585       Bookmark l_paren_position = SetBookmark();
586       // Consume the '(' ... ') [const]'.
587       if (!ConsumeArguments()) {
588         continue_parsing = false;
589         break;
590       }
591       SkipFunctionQualifiers();
592 
593       // Consume '::'
594       size_t coloncolon_position = GetCurrentPosition();
595       if (!ConsumeToken(tok::coloncolon)) {
596         continue_parsing = false;
597         break;
598       }
599       l_paren_position.Remove();
600       last_coloncolon_position = coloncolon_position;
601       state = State::AfterTwoColons;
602       break;
603     }
604     case tok::l_brace:
605       if (state == State::Beginning || state == State::AfterTwoColons) {
606         if (ConsumeLambda()) {
607           state = State::AfterIdentifier;
608           break;
609         }
610       }
611       continue_parsing = false;
612       break;
613     case tok::coloncolon: // Type nesting delimiter.
614       if (state != State::Beginning && state != State::AfterIdentifier &&
615           state != State::AfterTemplate) {
616         continue_parsing = false;
617         break;
618       }
619       last_coloncolon_position = GetCurrentPosition();
620       Advance();
621       state = State::AfterTwoColons;
622       break;
623     case tok::less: // Template brackets.
624       if (state != State::AfterIdentifier && state != State::AfterOperator) {
625         continue_parsing = false;
626         break;
627       }
628       if (!ConsumeTemplateArgs()) {
629         continue_parsing = false;
630         break;
631       }
632       state = State::AfterTemplate;
633       break;
634     case tok::kw_operator: // C++ operator overloading.
635       if (state != State::Beginning && state != State::AfterTwoColons) {
636         continue_parsing = false;
637         break;
638       }
639       if (!ConsumeOperator()) {
640         continue_parsing = false;
641         break;
642       }
643       state = State::AfterOperator;
644       break;
645     case tok::tilde: // Destructor.
646       if (state != State::Beginning && state != State::AfterTwoColons) {
647         continue_parsing = false;
648         break;
649       }
650       Advance();
651       if (ConsumeToken(tok::raw_identifier)) {
652         state = State::AfterIdentifier;
653       } else {
654         TakeBack();
655         continue_parsing = false;
656       }
657       break;
658     default:
659       continue_parsing = false;
660       break;
661     }
662   }
663 
664   if (state == State::AfterIdentifier || state == State::AfterOperator ||
665       state == State::AfterTemplate) {
666     ParsedNameRanges result;
667     if (last_coloncolon_position) {
668       result.context_range = Range(start_position.GetSavedPosition(),
669                                    last_coloncolon_position.value());
670       result.basename_range =
671           Range(last_coloncolon_position.value() + 1, GetCurrentPosition());
672     } else {
673       result.basename_range =
674           Range(start_position.GetSavedPosition(), GetCurrentPosition());
675     }
676     start_position.Remove();
677     return result;
678   } else {
679     return None;
680   }
681 }
682 
683 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
684   if (range.empty())
685     return llvm::StringRef();
686   assert(range.begin_index < range.end_index);
687   assert(range.begin_index < m_tokens.size());
688   assert(range.end_index <= m_tokens.size());
689   clang::Token &first_token = m_tokens[range.begin_index];
690   clang::Token &last_token = m_tokens[range.end_index - 1];
691   clang::SourceLocation start_loc = first_token.getLocation();
692   clang::SourceLocation end_loc = last_token.getLocation();
693   unsigned start_pos = start_loc.getRawEncoding();
694   unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
695   return m_text.take_front(end_pos).drop_front(start_pos);
696 }
697 
698 static const clang::LangOptions &GetLangOptions() {
699   static clang::LangOptions g_options;
700   static llvm::once_flag g_once_flag;
701   llvm::call_once(g_once_flag, []() {
702     g_options.LineComment = true;
703     g_options.C99 = true;
704     g_options.C11 = true;
705     g_options.CPlusPlus = true;
706     g_options.CPlusPlus11 = true;
707     g_options.CPlusPlus14 = true;
708     g_options.CPlusPlus17 = true;
709   });
710   return g_options;
711 }
712 
713 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
714   static llvm::StringMap<tok::TokenKind> g_map{
715 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
716 #include "clang/Basic/TokenKinds.def"
717 #undef KEYWORD
718   };
719   return g_map;
720 }
721 
722 void CPlusPlusNameParser::ExtractTokens() {
723   if (m_text.empty())
724     return;
725   clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
726                      m_text.data(), m_text.data() + m_text.size());
727   const auto &kw_map = GetKeywordsMap();
728   clang::Token token;
729   for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
730        lexer.LexFromRawLexer(token)) {
731     if (token.is(clang::tok::raw_identifier)) {
732       auto it = kw_map.find(token.getRawIdentifier());
733       if (it != kw_map.end()) {
734         token.setKind(it->getValue());
735       }
736     }
737 
738     m_tokens.push_back(token);
739   }
740 }
741