xref: /llvm-project/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp (revision 9e916e5e0e05f293b85cd97c03a7e38d28bb74d2)
1 //===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "CPlusPlusNameParser.h"
11 
12 #include "clang/Basic/IdentifierTable.h"
13 #include "llvm/ADT/StringMap.h"
14 #include "llvm/Support/Threading.h"
15 
16 using namespace lldb;
17 using namespace lldb_private;
18 using llvm::Optional;
19 using llvm::None;
20 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
21 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
22 namespace tok = clang::tok;
23 
24 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
25   m_next_token_index = 0;
26   Optional<ParsedFunction> result(None);
27 
28   // Try to parse the name as function without a return type specified
29   // e.g. main(int, char*[])
30   {
31     Bookmark start_position = SetBookmark();
32     result = ParseFunctionImpl(false);
33     if (result && !HasMoreTokens())
34       return result;
35   }
36 
37   // Try to parse the name as function with function pointer return type
38   // e.g. void (*get_func(const char*))()
39   result = ParseFuncPtr(true);
40   if (result)
41     return result;
42 
43   // Finally try to parse the name as a function with non-function return type
44   // e.g. int main(int, char*[])
45   result = ParseFunctionImpl(true);
46   if (HasMoreTokens())
47     return None;
48   return result;
49 }
50 
51 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
52   m_next_token_index = 0;
53   Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
54   if (!name_ranges)
55     return None;
56   if (HasMoreTokens())
57     return None;
58   ParsedName result;
59   result.basename = GetTextForRange(name_ranges.getValue().basename_range);
60   result.context = GetTextForRange(name_ranges.getValue().context_range);
61   return result;
62 }
63 
64 bool CPlusPlusNameParser::HasMoreTokens() {
65   return m_next_token_index < m_tokens.size();
66 }
67 
68 void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
69 
70 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
71 
72 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
73   if (!HasMoreTokens())
74     return false;
75 
76   if (!Peek().is(kind))
77     return false;
78 
79   Advance();
80   return true;
81 }
82 
83 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
84   if (!HasMoreTokens())
85     return false;
86 
87   if (!Peek().isOneOf(kinds...))
88     return false;
89 
90   Advance();
91   return true;
92 }
93 
94 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
95   return Bookmark(m_next_token_index);
96 }
97 
98 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
99 
100 clang::Token &CPlusPlusNameParser::Peek() {
101   assert(HasMoreTokens());
102   return m_tokens[m_next_token_index];
103 }
104 
105 Optional<ParsedFunction>
106 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
107   Bookmark start_position = SetBookmark();
108   if (expect_return_type) {
109     // Consume return type if it's expected.
110     if (!ConsumeTypename())
111       return None;
112   }
113 
114   auto maybe_name = ParseFullNameImpl();
115   if (!maybe_name) {
116     return None;
117   }
118 
119   size_t argument_start = GetCurrentPosition();
120   if (!ConsumeArguments()) {
121     return None;
122   }
123 
124   size_t qualifiers_start = GetCurrentPosition();
125   SkipFunctionQualifiers();
126   size_t end_position = GetCurrentPosition();
127 
128   ParsedFunction result;
129   result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
130   result.name.context = GetTextForRange(maybe_name.getValue().context_range);
131   result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
132   result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
133   start_position.Remove();
134   return result;
135 }
136 
137 Optional<ParsedFunction>
138 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
139   Bookmark start_position = SetBookmark();
140   if (expect_return_type) {
141     // Consume return type.
142     if (!ConsumeTypename())
143       return None;
144   }
145 
146   if (!ConsumeToken(tok::l_paren))
147     return None;
148   if (!ConsumePtrsAndRefs())
149     return None;
150 
151   {
152     Bookmark before_inner_function_pos = SetBookmark();
153     auto maybe_inner_function_name = ParseFunctionImpl(false);
154     if (maybe_inner_function_name)
155       if (ConsumeToken(tok::r_paren))
156         if (ConsumeArguments()) {
157           SkipFunctionQualifiers();
158           start_position.Remove();
159           before_inner_function_pos.Remove();
160           return maybe_inner_function_name;
161         }
162   }
163 
164   auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
165   if (maybe_inner_function_ptr_name)
166     if (ConsumeToken(tok::r_paren))
167       if (ConsumeArguments()) {
168         SkipFunctionQualifiers();
169         start_position.Remove();
170         return maybe_inner_function_ptr_name;
171       }
172   return None;
173 }
174 
175 bool CPlusPlusNameParser::ConsumeArguments() {
176   return ConsumeBrackets(tok::l_paren, tok::r_paren);
177 }
178 
179 bool CPlusPlusNameParser::ConsumeTemplateArgs() {
180   Bookmark start_position = SetBookmark();
181   if (!HasMoreTokens() || Peek().getKind() != tok::less)
182     return false;
183   Advance();
184 
185   // Consuming template arguments is a bit trickier than consuming function
186   // arguments, because '<' '>' brackets are not always trivially balanced.
187   // In some rare cases tokens '<' and '>' can appear inside template arguments
188   // as arithmetic or shift operators not as template brackets.
189   // Examples: std::enable_if<(10u)<(64), bool>
190   //           f<A<operator<(X,Y)::Subclass>>
191   // Good thing that compiler makes sure that really ambiguous cases of
192   // '>' usage should be enclosed within '()' brackets.
193   int template_counter = 1;
194   bool can_open_template = false;
195   while (HasMoreTokens() && template_counter > 0) {
196     tok::TokenKind kind = Peek().getKind();
197     switch (kind) {
198     case tok::greatergreater:
199       template_counter -= 2;
200       can_open_template = false;
201       Advance();
202       break;
203     case tok::greater:
204       --template_counter;
205       can_open_template = false;
206       Advance();
207       break;
208     case tok::less:
209       // '<' is an attempt to open a subteamplte
210       // check if parser is at the point where it's actually possible,
211       // otherwise it's just a part of an expression like 'sizeof(T)<(10)'.
212       // No need to do the same for '>' because compiler actually makes sure
213       // that '>' always surrounded by brackets to avoid ambiguity.
214       if (can_open_template)
215         ++template_counter;
216       can_open_template = false;
217       Advance();
218       break;
219     case tok::kw_operator: // C++ operator overloading.
220       if (!ConsumeOperator())
221         return false;
222       can_open_template = true;
223       break;
224     case tok::raw_identifier:
225       can_open_template = true;
226       Advance();
227       break;
228     case tok::l_square:
229       if (!ConsumeBrackets(tok::l_square, tok::r_square))
230         return false;
231       can_open_template = false;
232       break;
233     case tok::l_paren:
234       if (!ConsumeArguments())
235         return false;
236       can_open_template = false;
237       break;
238     default:
239       can_open_template = false;
240       Advance();
241       break;
242     }
243   }
244 
245   if (template_counter != 0) {
246     return false;
247   }
248   start_position.Remove();
249   return true;
250 }
251 
252 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
253   Bookmark start_position = SetBookmark();
254   if (!ConsumeToken(tok::l_paren)) {
255     return false;
256   }
257   constexpr llvm::StringLiteral g_anonymous("anonymous");
258   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
259       Peek().getRawIdentifier() == g_anonymous) {
260     Advance();
261   } else {
262     return false;
263   }
264 
265   if (!ConsumeToken(tok::kw_namespace)) {
266     return false;
267   }
268 
269   if (!ConsumeToken(tok::r_paren)) {
270     return false;
271   }
272   start_position.Remove();
273   return true;
274 }
275 
276 bool CPlusPlusNameParser::ConsumeLambda() {
277   Bookmark start_position = SetBookmark();
278   if (!ConsumeToken(tok::l_brace)) {
279     return false;
280   }
281   constexpr llvm::StringLiteral g_lambda("lambda");
282   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
283       Peek().getRawIdentifier() == g_lambda) {
284     // Put the matched brace back so we can use ConsumeBrackets
285     TakeBack();
286   } else {
287     return false;
288   }
289 
290   if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
291     return false;
292   }
293 
294   start_position.Remove();
295   return true;
296 }
297 
298 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
299                                           tok::TokenKind right) {
300   Bookmark start_position = SetBookmark();
301   if (!HasMoreTokens() || Peek().getKind() != left)
302     return false;
303   Advance();
304 
305   int counter = 1;
306   while (HasMoreTokens() && counter > 0) {
307     tok::TokenKind kind = Peek().getKind();
308     if (kind == right)
309       --counter;
310     else if (kind == left)
311       ++counter;
312     Advance();
313   }
314 
315   assert(counter >= 0);
316   if (counter > 0) {
317     return false;
318   }
319   start_position.Remove();
320   return true;
321 }
322 
323 bool CPlusPlusNameParser::ConsumeOperator() {
324   Bookmark start_position = SetBookmark();
325   if (!ConsumeToken(tok::kw_operator))
326     return false;
327 
328   if (!HasMoreTokens()) {
329     return false;
330   }
331 
332   const auto &token = Peek();
333   switch (token.getKind()) {
334   case tok::kw_new:
335   case tok::kw_delete:
336     // This is 'new' or 'delete' operators.
337     Advance();
338     // Check for array new/delete.
339     if (HasMoreTokens() && Peek().is(tok::l_square)) {
340       // Consume the '[' and ']'.
341       if (!ConsumeBrackets(tok::l_square, tok::r_square))
342         return false;
343     }
344     break;
345 
346 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
347   case tok::Token:                                                             \
348     Advance();                                                                 \
349     break;
350 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
351 #include "clang/Basic/OperatorKinds.def"
352 #undef OVERLOADED_OPERATOR
353 #undef OVERLOADED_OPERATOR_MULTI
354 
355   case tok::l_paren:
356     // Call operator consume '(' ... ')'.
357     if (ConsumeBrackets(tok::l_paren, tok::r_paren))
358       break;
359     return false;
360 
361   case tok::l_square:
362     // This is a [] operator.
363     // Consume the '[' and ']'.
364     if (ConsumeBrackets(tok::l_square, tok::r_square))
365       break;
366     return false;
367 
368   default:
369     // This might be a cast operator.
370     if (ConsumeTypename())
371       break;
372     return false;
373   }
374   start_position.Remove();
375   return true;
376 }
377 
378 void CPlusPlusNameParser::SkipTypeQualifiers() {
379   while (ConsumeToken(tok::kw_const, tok::kw_volatile))
380     ;
381 }
382 
383 void CPlusPlusNameParser::SkipFunctionQualifiers() {
384   while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
385     ;
386 }
387 
388 bool CPlusPlusNameParser::ConsumeBuiltinType() {
389   bool result = false;
390   bool continue_parsing = true;
391   // Built-in types can be made of a few keywords
392   // like 'unsigned long long int'. This function
393   // consumes all built-in type keywords without
394   // checking if they make sense like 'unsigned char void'.
395   while (continue_parsing && HasMoreTokens()) {
396     switch (Peek().getKind()) {
397     case tok::kw_short:
398     case tok::kw_long:
399     case tok::kw___int64:
400     case tok::kw___int128:
401     case tok::kw_signed:
402     case tok::kw_unsigned:
403     case tok::kw_void:
404     case tok::kw_char:
405     case tok::kw_int:
406     case tok::kw_half:
407     case tok::kw_float:
408     case tok::kw_double:
409     case tok::kw___float128:
410     case tok::kw_wchar_t:
411     case tok::kw_bool:
412     case tok::kw_char16_t:
413     case tok::kw_char32_t:
414       result = true;
415       Advance();
416       break;
417     default:
418       continue_parsing = false;
419       break;
420     }
421   }
422   return result;
423 }
424 
425 void CPlusPlusNameParser::SkipPtrsAndRefs() {
426   // Ignoring result.
427   ConsumePtrsAndRefs();
428 }
429 
430 bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
431   bool found = false;
432   SkipTypeQualifiers();
433   while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
434                       tok::kw_volatile)) {
435     found = true;
436     SkipTypeQualifiers();
437   }
438   return found;
439 }
440 
441 bool CPlusPlusNameParser::ConsumeDecltype() {
442   Bookmark start_position = SetBookmark();
443   if (!ConsumeToken(tok::kw_decltype))
444     return false;
445 
446   if (!ConsumeArguments())
447     return false;
448 
449   start_position.Remove();
450   return true;
451 }
452 
453 bool CPlusPlusNameParser::ConsumeTypename() {
454   Bookmark start_position = SetBookmark();
455   SkipTypeQualifiers();
456   if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
457     if (!ParseFullNameImpl())
458       return false;
459   }
460   SkipPtrsAndRefs();
461   start_position.Remove();
462   return true;
463 }
464 
465 Optional<CPlusPlusNameParser::ParsedNameRanges>
466 CPlusPlusNameParser::ParseFullNameImpl() {
467   // Name parsing state machine.
468   enum class State {
469     Beginning,       // start of the name
470     AfterTwoColons,  // right after ::
471     AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
472     AfterTemplate,   // right after template brackets (<something>)
473     AfterOperator,   // right after name of C++ operator
474   };
475 
476   Bookmark start_position = SetBookmark();
477   State state = State::Beginning;
478   bool continue_parsing = true;
479   Optional<size_t> last_coloncolon_position = None;
480 
481   while (continue_parsing && HasMoreTokens()) {
482     const auto &token = Peek();
483     switch (token.getKind()) {
484     case tok::raw_identifier: // Just a name.
485       if (state != State::Beginning && state != State::AfterTwoColons) {
486         continue_parsing = false;
487         break;
488       }
489       Advance();
490       state = State::AfterIdentifier;
491       break;
492     case tok::l_paren: {
493       if (state == State::Beginning || state == State::AfterTwoColons) {
494         // (anonymous namespace)
495         if (ConsumeAnonymousNamespace()) {
496           state = State::AfterIdentifier;
497           break;
498         }
499       }
500 
501       // Type declared inside a function 'func()::Type'
502       if (state != State::AfterIdentifier && state != State::AfterTemplate &&
503           state != State::AfterOperator) {
504         continue_parsing = false;
505         break;
506       }
507       Bookmark l_paren_position = SetBookmark();
508       // Consume the '(' ... ') [const]'.
509       if (!ConsumeArguments()) {
510         continue_parsing = false;
511         break;
512       }
513       SkipFunctionQualifiers();
514 
515       // Consume '::'
516       size_t coloncolon_position = GetCurrentPosition();
517       if (!ConsumeToken(tok::coloncolon)) {
518         continue_parsing = false;
519         break;
520       }
521       l_paren_position.Remove();
522       last_coloncolon_position = coloncolon_position;
523       state = State::AfterTwoColons;
524       break;
525     }
526     case tok::l_brace:
527       if (state == State::Beginning || state == State::AfterTwoColons) {
528         if (ConsumeLambda()) {
529           state = State::AfterIdentifier;
530           break;
531         }
532       }
533       continue_parsing = false;
534       break;
535     case tok::coloncolon: // Type nesting delimiter.
536       if (state != State::Beginning && state != State::AfterIdentifier &&
537           state != State::AfterTemplate) {
538         continue_parsing = false;
539         break;
540       }
541       last_coloncolon_position = GetCurrentPosition();
542       Advance();
543       state = State::AfterTwoColons;
544       break;
545     case tok::less: // Template brackets.
546       if (state != State::AfterIdentifier && state != State::AfterOperator) {
547         continue_parsing = false;
548         break;
549       }
550       if (!ConsumeTemplateArgs()) {
551         continue_parsing = false;
552         break;
553       }
554       state = State::AfterTemplate;
555       break;
556     case tok::kw_operator: // C++ operator overloading.
557       if (state != State::Beginning && state != State::AfterTwoColons) {
558         continue_parsing = false;
559         break;
560       }
561       if (!ConsumeOperator()) {
562         continue_parsing = false;
563         break;
564       }
565       state = State::AfterOperator;
566       break;
567     case tok::tilde: // Destructor.
568       if (state != State::Beginning && state != State::AfterTwoColons) {
569         continue_parsing = false;
570         break;
571       }
572       Advance();
573       if (ConsumeToken(tok::raw_identifier)) {
574         state = State::AfterIdentifier;
575       } else {
576         TakeBack();
577         continue_parsing = false;
578       }
579       break;
580     default:
581       continue_parsing = false;
582       break;
583     }
584   }
585 
586   if (state == State::AfterIdentifier || state == State::AfterOperator ||
587       state == State::AfterTemplate) {
588     ParsedNameRanges result;
589     if (last_coloncolon_position) {
590       result.context_range = Range(start_position.GetSavedPosition(),
591                                    last_coloncolon_position.getValue());
592       result.basename_range =
593           Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
594     } else {
595       result.basename_range =
596           Range(start_position.GetSavedPosition(), GetCurrentPosition());
597     }
598     start_position.Remove();
599     return result;
600   } else {
601     return None;
602   }
603 }
604 
605 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
606   if (range.empty())
607     return llvm::StringRef();
608   assert(range.begin_index < range.end_index);
609   assert(range.begin_index < m_tokens.size());
610   assert(range.end_index <= m_tokens.size());
611   clang::Token &first_token = m_tokens[range.begin_index];
612   clang::Token &last_token = m_tokens[range.end_index - 1];
613   clang::SourceLocation start_loc = first_token.getLocation();
614   clang::SourceLocation end_loc = last_token.getLocation();
615   unsigned start_pos = start_loc.getRawEncoding();
616   unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
617   return m_text.take_front(end_pos).drop_front(start_pos);
618 }
619 
620 static const clang::LangOptions &GetLangOptions() {
621   static clang::LangOptions g_options;
622   static llvm::once_flag g_once_flag;
623   llvm::call_once(g_once_flag, []() {
624     g_options.LineComment = true;
625     g_options.C99 = true;
626     g_options.C11 = true;
627     g_options.CPlusPlus = true;
628     g_options.CPlusPlus11 = true;
629     g_options.CPlusPlus14 = true;
630     g_options.CPlusPlus17 = true;
631   });
632   return g_options;
633 }
634 
635 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
636   static llvm::StringMap<tok::TokenKind> g_map{
637 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
638 #include "clang/Basic/TokenKinds.def"
639 #undef KEYWORD
640   };
641   return g_map;
642 }
643 
644 void CPlusPlusNameParser::ExtractTokens() {
645   clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
646                      m_text.data(), m_text.data() + m_text.size());
647   const auto &kw_map = GetKeywordsMap();
648   clang::Token token;
649   for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
650        lexer.LexFromRawLexer(token)) {
651     if (token.is(clang::tok::raw_identifier)) {
652       auto it = kw_map.find(token.getRawIdentifier());
653       if (it != kw_map.end()) {
654         token.setKind(it->getValue());
655       }
656     }
657 
658     m_tokens.push_back(token);
659   }
660 }
661