xref: /llvm-project/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp (revision a4561d934877fbba5cfb3cac3195a41707ba6043)
1 //===-- CPlusPlusNameParser.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "CPlusPlusNameParser.h"
10 
11 #include "clang/Basic/IdentifierTable.h"
12 #include "clang/Basic/TokenKinds.h"
13 #include "llvm/ADT/StringMap.h"
14 #include "llvm/Support/Threading.h"
15 
16 using namespace lldb;
17 using namespace lldb_private;
18 using llvm::Optional;
19 using llvm::None;
20 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
21 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
22 namespace tok = clang::tok;
23 
24 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
25   m_next_token_index = 0;
26   Optional<ParsedFunction> result(None);
27 
28   // Try to parse the name as function without a return type specified e.g.
29   // main(int, char*[])
30   {
31     Bookmark start_position = SetBookmark();
32     result = ParseFunctionImpl(false);
33     if (result && !HasMoreTokens())
34       return result;
35   }
36 
37   // Try to parse the name as function with function pointer return type e.g.
38   // void (*get_func(const char*))()
39   result = ParseFuncPtr(true);
40   if (result)
41     return result;
42 
43   // Finally try to parse the name as a function with non-function return type
44   // e.g. int main(int, char*[])
45   result = ParseFunctionImpl(true);
46   if (HasMoreTokens())
47     return None;
48   return result;
49 }
50 
51 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
52   m_next_token_index = 0;
53   Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
54   if (!name_ranges)
55     return None;
56   if (HasMoreTokens())
57     return None;
58   ParsedName result;
59   result.basename = GetTextForRange(name_ranges.value().basename_range);
60   result.context = GetTextForRange(name_ranges.value().context_range);
61   return result;
62 }
63 
64 bool CPlusPlusNameParser::HasMoreTokens() {
65   return m_next_token_index < m_tokens.size();
66 }
67 
68 void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
69 
70 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
71 
72 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
73   if (!HasMoreTokens())
74     return false;
75 
76   if (!Peek().is(kind))
77     return false;
78 
79   Advance();
80   return true;
81 }
82 
83 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
84   if (!HasMoreTokens())
85     return false;
86 
87   if (!Peek().isOneOf(kinds...))
88     return false;
89 
90   Advance();
91   return true;
92 }
93 
94 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
95   return Bookmark(m_next_token_index);
96 }
97 
98 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
99 
100 clang::Token &CPlusPlusNameParser::Peek() {
101   assert(HasMoreTokens());
102   return m_tokens[m_next_token_index];
103 }
104 
105 Optional<ParsedFunction>
106 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
107   Bookmark start_position = SetBookmark();
108   if (expect_return_type) {
109     // Consume return type if it's expected.
110     if (!ConsumeToken(tok::kw_auto) && !ConsumeTypename())
111       return None;
112   }
113 
114   auto maybe_name = ParseFullNameImpl();
115   if (!maybe_name) {
116     return None;
117   }
118 
119   size_t argument_start = GetCurrentPosition();
120   if (!ConsumeArguments()) {
121     return None;
122   }
123 
124   size_t qualifiers_start = GetCurrentPosition();
125   SkipFunctionQualifiers();
126   size_t end_position = GetCurrentPosition();
127 
128   ParsedFunction result;
129   result.name.basename = GetTextForRange(maybe_name.value().basename_range);
130   result.name.context = GetTextForRange(maybe_name.value().context_range);
131   result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
132   result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
133   start_position.Remove();
134   return result;
135 }
136 
137 Optional<ParsedFunction>
138 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
139   Bookmark start_position = SetBookmark();
140   if (expect_return_type) {
141     // Consume return type.
142     if (!ConsumeTypename())
143       return None;
144   }
145 
146   if (!ConsumeToken(tok::l_paren))
147     return None;
148   if (!ConsumePtrsAndRefs())
149     return None;
150 
151   {
152     Bookmark before_inner_function_pos = SetBookmark();
153     auto maybe_inner_function_name = ParseFunctionImpl(false);
154     if (maybe_inner_function_name)
155       if (ConsumeToken(tok::r_paren))
156         if (ConsumeArguments()) {
157           SkipFunctionQualifiers();
158           start_position.Remove();
159           before_inner_function_pos.Remove();
160           return maybe_inner_function_name;
161         }
162   }
163 
164   auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
165   if (maybe_inner_function_ptr_name)
166     if (ConsumeToken(tok::r_paren))
167       if (ConsumeArguments()) {
168         SkipFunctionQualifiers();
169         start_position.Remove();
170         return maybe_inner_function_ptr_name;
171       }
172   return None;
173 }
174 
175 bool CPlusPlusNameParser::ConsumeArguments() {
176   return ConsumeBrackets(tok::l_paren, tok::r_paren);
177 }
178 
179 bool CPlusPlusNameParser::ConsumeTemplateArgs() {
180   Bookmark start_position = SetBookmark();
181   if (!HasMoreTokens() || Peek().getKind() != tok::less)
182     return false;
183   Advance();
184 
185   // Consuming template arguments is a bit trickier than consuming function
186   // arguments, because '<' '>' brackets are not always trivially balanced. In
187   // some rare cases tokens '<' and '>' can appear inside template arguments as
188   // arithmetic or shift operators not as template brackets. Examples:
189   // std::enable_if<(10u)<(64), bool>
190   //           f<A<operator<(X,Y)::Subclass>>
191   // Good thing that compiler makes sure that really ambiguous cases of '>'
192   // usage should be enclosed within '()' brackets.
193   int template_counter = 1;
194   bool can_open_template = false;
195   while (HasMoreTokens() && template_counter > 0) {
196     tok::TokenKind kind = Peek().getKind();
197     switch (kind) {
198     case tok::greatergreater:
199       template_counter -= 2;
200       can_open_template = false;
201       Advance();
202       break;
203     case tok::greater:
204       --template_counter;
205       can_open_template = false;
206       Advance();
207       break;
208     case tok::less:
209       // '<' is an attempt to open a subteamplte
210       // check if parser is at the point where it's actually possible,
211       // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
212       // need to do the same for '>' because compiler actually makes sure that
213       // '>' always surrounded by brackets to avoid ambiguity.
214       if (can_open_template)
215         ++template_counter;
216       can_open_template = false;
217       Advance();
218       break;
219     case tok::kw_operator: // C++ operator overloading.
220       if (!ConsumeOperator())
221         return false;
222       can_open_template = true;
223       break;
224     case tok::raw_identifier:
225       can_open_template = true;
226       Advance();
227       break;
228     case tok::l_square:
229       if (!ConsumeBrackets(tok::l_square, tok::r_square))
230         return false;
231       can_open_template = false;
232       break;
233     case tok::l_paren:
234       if (!ConsumeArguments())
235         return false;
236       can_open_template = false;
237       break;
238     default:
239       can_open_template = false;
240       Advance();
241       break;
242     }
243   }
244 
245   if (template_counter != 0) {
246     return false;
247   }
248   start_position.Remove();
249   return true;
250 }
251 
252 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
253   Bookmark start_position = SetBookmark();
254   if (!ConsumeToken(tok::l_paren)) {
255     return false;
256   }
257   constexpr llvm::StringLiteral g_anonymous("anonymous");
258   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
259       Peek().getRawIdentifier() == g_anonymous) {
260     Advance();
261   } else {
262     return false;
263   }
264 
265   if (!ConsumeToken(tok::kw_namespace)) {
266     return false;
267   }
268 
269   if (!ConsumeToken(tok::r_paren)) {
270     return false;
271   }
272   start_position.Remove();
273   return true;
274 }
275 
276 bool CPlusPlusNameParser::ConsumeLambda() {
277   Bookmark start_position = SetBookmark();
278   if (!ConsumeToken(tok::l_brace)) {
279     return false;
280   }
281   constexpr llvm::StringLiteral g_lambda("lambda");
282   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
283       Peek().getRawIdentifier() == g_lambda) {
284     // Put the matched brace back so we can use ConsumeBrackets
285     TakeBack();
286   } else {
287     return false;
288   }
289 
290   if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
291     return false;
292   }
293 
294   start_position.Remove();
295   return true;
296 }
297 
298 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
299                                           tok::TokenKind right) {
300   Bookmark start_position = SetBookmark();
301   if (!HasMoreTokens() || Peek().getKind() != left)
302     return false;
303   Advance();
304 
305   int counter = 1;
306   while (HasMoreTokens() && counter > 0) {
307     tok::TokenKind kind = Peek().getKind();
308     if (kind == right)
309       --counter;
310     else if (kind == left)
311       ++counter;
312     Advance();
313   }
314 
315   assert(counter >= 0);
316   if (counter > 0) {
317     return false;
318   }
319   start_position.Remove();
320   return true;
321 }
322 
323 bool CPlusPlusNameParser::ConsumeOperator() {
324   Bookmark start_position = SetBookmark();
325   if (!ConsumeToken(tok::kw_operator))
326     return false;
327 
328   if (!HasMoreTokens()) {
329     return false;
330   }
331 
332   const auto &token = Peek();
333 
334   // When clang generates debug info it adds template parameters to names.
335   // Since clang doesn't add a space between the name and the template parameter
336   // in some cases we are not generating valid C++ names e.g.:
337   //
338   //   operator<<A::B>
339   //
340   // In some of these cases we will not parse them correctly. This fixes the
341   // issue by detecting this case and inserting tok::less in place of
342   // tok::lessless and returning successfully that we consumed the operator.
343   if (token.getKind() == tok::lessless) {
344     // Make sure we have more tokens before attempting to look ahead one more.
345     if (m_next_token_index + 1 < m_tokens.size()) {
346       // Look ahead two tokens.
347       clang::Token n_token = m_tokens[m_next_token_index + 1];
348       // If we find ( or < then this is indeed operator<< no need for fix.
349       if (n_token.getKind() != tok::l_paren && n_token.getKind() != tok::less) {
350         clang::Token tmp_tok;
351         tmp_tok.startToken();
352         tmp_tok.setLength(1);
353         tmp_tok.setLocation(token.getLocation().getLocWithOffset(1));
354         tmp_tok.setKind(tok::less);
355 
356         m_tokens[m_next_token_index] = tmp_tok;
357 
358         start_position.Remove();
359         return true;
360       }
361     }
362   }
363 
364   switch (token.getKind()) {
365   case tok::kw_new:
366   case tok::kw_delete:
367     // This is 'new' or 'delete' operators.
368     Advance();
369     // Check for array new/delete.
370     if (HasMoreTokens() && Peek().is(tok::l_square)) {
371       // Consume the '[' and ']'.
372       if (!ConsumeBrackets(tok::l_square, tok::r_square))
373         return false;
374     }
375     break;
376 
377 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
378   case tok::Token:                                                             \
379     Advance();                                                                 \
380     break;
381 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
382 #include "clang/Basic/OperatorKinds.def"
383 #undef OVERLOADED_OPERATOR
384 #undef OVERLOADED_OPERATOR_MULTI
385 
386   case tok::l_paren:
387     // Call operator consume '(' ... ')'.
388     if (ConsumeBrackets(tok::l_paren, tok::r_paren))
389       break;
390     return false;
391 
392   case tok::l_square:
393     // This is a [] operator.
394     // Consume the '[' and ']'.
395     if (ConsumeBrackets(tok::l_square, tok::r_square))
396       break;
397     return false;
398 
399   default:
400     // This might be a cast operator.
401     if (ConsumeTypename())
402       break;
403     return false;
404   }
405   start_position.Remove();
406   return true;
407 }
408 
409 void CPlusPlusNameParser::SkipTypeQualifiers() {
410   while (ConsumeToken(tok::kw_const, tok::kw_volatile))
411     ;
412 }
413 
414 void CPlusPlusNameParser::SkipFunctionQualifiers() {
415   while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
416     ;
417 }
418 
419 bool CPlusPlusNameParser::ConsumeBuiltinType() {
420   bool result = false;
421   bool continue_parsing = true;
422   // Built-in types can be made of a few keywords like 'unsigned long long
423   // int'. This function consumes all built-in type keywords without checking
424   // if they make sense like 'unsigned char void'.
425   while (continue_parsing && HasMoreTokens()) {
426     switch (Peek().getKind()) {
427     case tok::kw_short:
428     case tok::kw_long:
429     case tok::kw___int64:
430     case tok::kw___int128:
431     case tok::kw_signed:
432     case tok::kw_unsigned:
433     case tok::kw_void:
434     case tok::kw_char:
435     case tok::kw_int:
436     case tok::kw_half:
437     case tok::kw_float:
438     case tok::kw_double:
439     case tok::kw___float128:
440     case tok::kw_wchar_t:
441     case tok::kw_bool:
442     case tok::kw_char16_t:
443     case tok::kw_char32_t:
444       result = true;
445       Advance();
446       break;
447     default:
448       continue_parsing = false;
449       break;
450     }
451   }
452   return result;
453 }
454 
455 void CPlusPlusNameParser::SkipPtrsAndRefs() {
456   // Ignoring result.
457   ConsumePtrsAndRefs();
458 }
459 
460 bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
461   bool found = false;
462   SkipTypeQualifiers();
463   while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
464                       tok::kw_volatile)) {
465     found = true;
466     SkipTypeQualifiers();
467   }
468   return found;
469 }
470 
471 bool CPlusPlusNameParser::ConsumeDecltype() {
472   Bookmark start_position = SetBookmark();
473   if (!ConsumeToken(tok::kw_decltype))
474     return false;
475 
476   if (!ConsumeArguments())
477     return false;
478 
479   start_position.Remove();
480   return true;
481 }
482 
483 bool CPlusPlusNameParser::ConsumeTypename() {
484   Bookmark start_position = SetBookmark();
485   SkipTypeQualifiers();
486   if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
487     if (!ParseFullNameImpl())
488       return false;
489   }
490   SkipPtrsAndRefs();
491   start_position.Remove();
492   return true;
493 }
494 
495 Optional<CPlusPlusNameParser::ParsedNameRanges>
496 CPlusPlusNameParser::ParseFullNameImpl() {
497   // Name parsing state machine.
498   enum class State {
499     Beginning,       // start of the name
500     AfterTwoColons,  // right after ::
501     AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
502     AfterTemplate,   // right after template brackets (<something>)
503     AfterOperator,   // right after name of C++ operator
504   };
505 
506   Bookmark start_position = SetBookmark();
507   State state = State::Beginning;
508   bool continue_parsing = true;
509   Optional<size_t> last_coloncolon_position;
510 
511   while (continue_parsing && HasMoreTokens()) {
512     const auto &token = Peek();
513     switch (token.getKind()) {
514     case tok::raw_identifier: // Just a name.
515       if (state != State::Beginning && state != State::AfterTwoColons) {
516         continue_parsing = false;
517         break;
518       }
519       Advance();
520       state = State::AfterIdentifier;
521       break;
522     case tok::l_paren: {
523       if (state == State::Beginning || state == State::AfterTwoColons) {
524         // (anonymous namespace)
525         if (ConsumeAnonymousNamespace()) {
526           state = State::AfterIdentifier;
527           break;
528         }
529       }
530 
531       // Type declared inside a function 'func()::Type'
532       if (state != State::AfterIdentifier && state != State::AfterTemplate &&
533           state != State::AfterOperator) {
534         continue_parsing = false;
535         break;
536       }
537       Bookmark l_paren_position = SetBookmark();
538       // Consume the '(' ... ') [const]'.
539       if (!ConsumeArguments()) {
540         continue_parsing = false;
541         break;
542       }
543       SkipFunctionQualifiers();
544 
545       // Consume '::'
546       size_t coloncolon_position = GetCurrentPosition();
547       if (!ConsumeToken(tok::coloncolon)) {
548         continue_parsing = false;
549         break;
550       }
551       l_paren_position.Remove();
552       last_coloncolon_position = coloncolon_position;
553       state = State::AfterTwoColons;
554       break;
555     }
556     case tok::l_brace:
557       if (state == State::Beginning || state == State::AfterTwoColons) {
558         if (ConsumeLambda()) {
559           state = State::AfterIdentifier;
560           break;
561         }
562       }
563       continue_parsing = false;
564       break;
565     case tok::coloncolon: // Type nesting delimiter.
566       if (state != State::Beginning && state != State::AfterIdentifier &&
567           state != State::AfterTemplate) {
568         continue_parsing = false;
569         break;
570       }
571       last_coloncolon_position = GetCurrentPosition();
572       Advance();
573       state = State::AfterTwoColons;
574       break;
575     case tok::less: // Template brackets.
576       if (state != State::AfterIdentifier && state != State::AfterOperator) {
577         continue_parsing = false;
578         break;
579       }
580       if (!ConsumeTemplateArgs()) {
581         continue_parsing = false;
582         break;
583       }
584       state = State::AfterTemplate;
585       break;
586     case tok::kw_operator: // C++ operator overloading.
587       if (state != State::Beginning && state != State::AfterTwoColons) {
588         continue_parsing = false;
589         break;
590       }
591       if (!ConsumeOperator()) {
592         continue_parsing = false;
593         break;
594       }
595       state = State::AfterOperator;
596       break;
597     case tok::tilde: // Destructor.
598       if (state != State::Beginning && state != State::AfterTwoColons) {
599         continue_parsing = false;
600         break;
601       }
602       Advance();
603       if (ConsumeToken(tok::raw_identifier)) {
604         state = State::AfterIdentifier;
605       } else {
606         TakeBack();
607         continue_parsing = false;
608       }
609       break;
610     default:
611       continue_parsing = false;
612       break;
613     }
614   }
615 
616   if (state == State::AfterIdentifier || state == State::AfterOperator ||
617       state == State::AfterTemplate) {
618     ParsedNameRanges result;
619     if (last_coloncolon_position) {
620       result.context_range = Range(start_position.GetSavedPosition(),
621                                    last_coloncolon_position.value());
622       result.basename_range =
623           Range(last_coloncolon_position.value() + 1, GetCurrentPosition());
624     } else {
625       result.basename_range =
626           Range(start_position.GetSavedPosition(), GetCurrentPosition());
627     }
628     start_position.Remove();
629     return result;
630   } else {
631     return None;
632   }
633 }
634 
635 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
636   if (range.empty())
637     return llvm::StringRef();
638   assert(range.begin_index < range.end_index);
639   assert(range.begin_index < m_tokens.size());
640   assert(range.end_index <= m_tokens.size());
641   clang::Token &first_token = m_tokens[range.begin_index];
642   clang::Token &last_token = m_tokens[range.end_index - 1];
643   clang::SourceLocation start_loc = first_token.getLocation();
644   clang::SourceLocation end_loc = last_token.getLocation();
645   unsigned start_pos = start_loc.getRawEncoding();
646   unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
647   return m_text.take_front(end_pos).drop_front(start_pos);
648 }
649 
650 static const clang::LangOptions &GetLangOptions() {
651   static clang::LangOptions g_options;
652   static llvm::once_flag g_once_flag;
653   llvm::call_once(g_once_flag, []() {
654     g_options.LineComment = true;
655     g_options.C99 = true;
656     g_options.C11 = true;
657     g_options.CPlusPlus = true;
658     g_options.CPlusPlus11 = true;
659     g_options.CPlusPlus14 = true;
660     g_options.CPlusPlus17 = true;
661   });
662   return g_options;
663 }
664 
665 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
666   static llvm::StringMap<tok::TokenKind> g_map{
667 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
668 #include "clang/Basic/TokenKinds.def"
669 #undef KEYWORD
670   };
671   return g_map;
672 }
673 
674 void CPlusPlusNameParser::ExtractTokens() {
675   if (m_text.empty())
676     return;
677   clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
678                      m_text.data(), m_text.data() + m_text.size());
679   const auto &kw_map = GetKeywordsMap();
680   clang::Token token;
681   for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
682        lexer.LexFromRawLexer(token)) {
683     if (token.is(clang::tok::raw_identifier)) {
684       auto it = kw_map.find(token.getRawIdentifier());
685       if (it != kw_map.end()) {
686         token.setKind(it->getValue());
687       }
688     }
689 
690     m_tokens.push_back(token);
691   }
692 }
693