xref: /llvm-project/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp (revision a633ee6e4a02e7326ebca396eb88024c009eab0d)
1 //===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "CPlusPlusNameParser.h"
11 
12 #include "clang/Basic/IdentifierTable.h"
13 #include "llvm/ADT/StringMap.h"
14 #include "llvm/Support/Threading.h"
15 
16 using namespace lldb;
17 using namespace lldb_private;
18 using llvm::Optional;
19 using llvm::None;
20 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
21 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
22 namespace tok = clang::tok;
23 
24 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
25   m_next_token_index = 0;
26   Optional<ParsedFunction> result(None);
27 
28   // Try to parse the name as function without a return type specified
29   // e.g. main(int, char*[])
30   {
31     Bookmark start_position = SetBookmark();
32     result = ParseFunctionImpl(false);
33     if (result && !HasMoreTokens())
34       return result;
35   }
36 
37   // Try to parse the name as function with function pointer return type
38   // e.g. void (*get_func(const char*))()
39   result = ParseFuncPtr(true);
40   if (result)
41     return result;
42 
43   // Finally try to parse the name as a function with non-function return type
44   // e.g. int main(int, char*[])
45   result = ParseFunctionImpl(true);
46   if (HasMoreTokens())
47     return None;
48   return result;
49 }
50 
51 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
52   m_next_token_index = 0;
53   Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
54   if (!name_ranges)
55     return None;
56   if (HasMoreTokens())
57     return None;
58   ParsedName result;
59   result.basename = GetTextForRange(name_ranges.getValue().basename_range);
60   result.context = GetTextForRange(name_ranges.getValue().context_range);
61   return result;
62 }
63 
64 bool CPlusPlusNameParser::HasMoreTokens() {
65   return m_next_token_index < m_tokens.size();
66 }
67 
68 void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
69 
70 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
71 
72 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
73   if (!HasMoreTokens())
74     return false;
75 
76   if (!Peek().is(kind))
77     return false;
78 
79   Advance();
80   return true;
81 }
82 
83 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
84   if (!HasMoreTokens())
85     return false;
86 
87   if (!Peek().isOneOf(kinds...))
88     return false;
89 
90   Advance();
91   return true;
92 }
93 
94 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
95   return Bookmark(m_next_token_index);
96 }
97 
98 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
99 
100 clang::Token &CPlusPlusNameParser::Peek() {
101   assert(HasMoreTokens());
102   return m_tokens[m_next_token_index];
103 }
104 
105 Optional<ParsedFunction>
106 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
107   Bookmark start_position = SetBookmark();
108   if (expect_return_type) {
109     // Consume return type if it's expected.
110     if (!ConsumeTypename())
111       return None;
112   }
113 
114   auto maybe_name = ParseFullNameImpl();
115   if (!maybe_name) {
116     return None;
117   }
118 
119   size_t argument_start = GetCurrentPosition();
120   if (!ConsumeArguments()) {
121     return None;
122   }
123 
124   size_t qualifiers_start = GetCurrentPosition();
125   SkipFunctionQualifiers();
126   size_t end_position = GetCurrentPosition();
127 
128   ParsedFunction result;
129   result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
130   result.name.context = GetTextForRange(maybe_name.getValue().context_range);
131   result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
132   result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
133   start_position.Remove();
134   return result;
135 }
136 
137 Optional<ParsedFunction>
138 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
139   Bookmark start_position = SetBookmark();
140   if (expect_return_type) {
141     // Consume return type.
142     if (!ConsumeTypename())
143       return None;
144   }
145 
146   if (!ConsumeToken(tok::l_paren))
147     return None;
148   if (!ConsumePtrsAndRefs())
149     return None;
150 
151   {
152     Bookmark before_inner_function_pos = SetBookmark();
153     auto maybe_inner_function_name = ParseFunctionImpl(false);
154     if (maybe_inner_function_name)
155       if (ConsumeToken(tok::r_paren))
156         if (ConsumeArguments()) {
157           SkipFunctionQualifiers();
158           start_position.Remove();
159           before_inner_function_pos.Remove();
160           return maybe_inner_function_name;
161         }
162   }
163 
164   auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
165   if (maybe_inner_function_ptr_name)
166     if (ConsumeToken(tok::r_paren))
167       if (ConsumeArguments()) {
168         SkipFunctionQualifiers();
169         start_position.Remove();
170         return maybe_inner_function_ptr_name;
171       }
172   return None;
173 }
174 
175 bool CPlusPlusNameParser::ConsumeArguments() {
176   return ConsumeBrackets(tok::l_paren, tok::r_paren);
177 }
178 
179 bool CPlusPlusNameParser::ConsumeTemplateArgs() {
180   Bookmark start_position = SetBookmark();
181   if (!HasMoreTokens() || Peek().getKind() != tok::less)
182     return false;
183   Advance();
184 
185   // Consuming template arguments is a bit trickier than consuming function
186   // arguments, because '<' '>' brackets are not always trivially balanced.
187   // In some rare cases tokens '<' and '>' can appear inside template arguments
188   // as arithmetic or shift operators not as template brackets.
189   // Examples: std::enable_if<(10u)<(64), bool>
190   //           f<A<operator<(X,Y)::Subclass>>
191   // Good thing that compiler makes sure that really ambiguous cases of
192   // '>' usage should be enclosed within '()' brackets.
193   int template_counter = 1;
194   bool can_open_template = false;
195   while (HasMoreTokens() && template_counter > 0) {
196     tok::TokenKind kind = Peek().getKind();
197     switch (kind) {
198     case tok::greatergreater:
199       template_counter -= 2;
200       can_open_template = false;
201       Advance();
202       break;
203     case tok::greater:
204       --template_counter;
205       can_open_template = false;
206       Advance();
207       break;
208     case tok::less:
209       // '<' is an attempt to open a subteamplte
210       // check if parser is at the point where it's actually possible,
211       // otherwise it's just a part of an expression like 'sizeof(T)<(10)'.
212       // No need to do the same for '>' because compiler actually makes sure
213       // that '>' always surrounded by brackets to avoid ambiguity.
214       if (can_open_template)
215         ++template_counter;
216       can_open_template = false;
217       Advance();
218       break;
219     case tok::kw_operator: // C++ operator overloading.
220       if (!ConsumeOperator())
221         return false;
222       can_open_template = true;
223       break;
224     case tok::raw_identifier:
225       can_open_template = true;
226       Advance();
227       break;
228     case tok::l_square:
229       if (!ConsumeBrackets(tok::l_square, tok::r_square))
230         return false;
231       can_open_template = false;
232       break;
233     case tok::l_paren:
234       if (!ConsumeArguments())
235         return false;
236       can_open_template = false;
237       break;
238     default:
239       can_open_template = false;
240       Advance();
241       break;
242     }
243   }
244 
245   assert(template_counter >= 0);
246   if (template_counter > 0) {
247     return false;
248   }
249   start_position.Remove();
250   return true;
251 }
252 
253 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
254   Bookmark start_position = SetBookmark();
255   if (!ConsumeToken(tok::l_paren)) {
256     return false;
257   }
258   constexpr llvm::StringLiteral g_anonymous("anonymous");
259   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
260       Peek().getRawIdentifier() == g_anonymous) {
261     Advance();
262   } else {
263     return false;
264   }
265 
266   if (!ConsumeToken(tok::kw_namespace)) {
267     return false;
268   }
269 
270   if (!ConsumeToken(tok::r_paren)) {
271     return false;
272   }
273   start_position.Remove();
274   return true;
275 }
276 
277 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
278                                           tok::TokenKind right) {
279   Bookmark start_position = SetBookmark();
280   if (!HasMoreTokens() || Peek().getKind() != left)
281     return false;
282   Advance();
283 
284   int counter = 1;
285   while (HasMoreTokens() && counter > 0) {
286     tok::TokenKind kind = Peek().getKind();
287     if (kind == right)
288       --counter;
289     else if (kind == left)
290       ++counter;
291     Advance();
292   }
293 
294   assert(counter >= 0);
295   if (counter > 0) {
296     return false;
297   }
298   start_position.Remove();
299   return true;
300 }
301 
302 bool CPlusPlusNameParser::ConsumeOperator() {
303   Bookmark start_position = SetBookmark();
304   if (!ConsumeToken(tok::kw_operator))
305     return false;
306 
307   if (!HasMoreTokens()) {
308     return false;
309   }
310 
311   const auto &token = Peek();
312   switch (token.getKind()) {
313   case tok::kw_new:
314   case tok::kw_delete:
315     // This is 'new' or 'delete' operators.
316     Advance();
317     // Check for array new/delete.
318     if (HasMoreTokens() && Peek().is(tok::l_square)) {
319       // Consume the '[' and ']'.
320       if (!ConsumeBrackets(tok::l_square, tok::r_square))
321         return false;
322     }
323     break;
324 
325 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
326   case tok::Token:                                                             \
327     Advance();                                                                 \
328     break;
329 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
330 #include "clang/Basic/OperatorKinds.def"
331 #undef OVERLOADED_OPERATOR
332 #undef OVERLOADED_OPERATOR_MULTI
333 
334   case tok::l_paren:
335     // Call operator consume '(' ... ')'.
336     if (ConsumeBrackets(tok::l_paren, tok::r_paren))
337       break;
338     return false;
339 
340   case tok::l_square:
341     // This is a [] operator.
342     // Consume the '[' and ']'.
343     if (ConsumeBrackets(tok::l_square, tok::r_square))
344       break;
345     return false;
346 
347   default:
348     // This might be a cast operator.
349     if (ConsumeTypename())
350       break;
351     return false;
352   }
353   start_position.Remove();
354   return true;
355 }
356 
357 void CPlusPlusNameParser::SkipTypeQualifiers() {
358   while (ConsumeToken(tok::kw_const, tok::kw_volatile))
359     ;
360 }
361 
362 void CPlusPlusNameParser::SkipFunctionQualifiers() {
363   while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
364     ;
365 }
366 
367 bool CPlusPlusNameParser::ConsumeBuiltinType() {
368   bool result = false;
369   bool continue_parsing = true;
370   // Built-in types can be made of a few keywords
371   // like 'unsigned long long int'. This function
372   // consumes all built-in type keywords without
373   // checking if they make sense like 'unsigned char void'.
374   while (continue_parsing && HasMoreTokens()) {
375     switch (Peek().getKind()) {
376     case tok::kw_short:
377     case tok::kw_long:
378     case tok::kw___int64:
379     case tok::kw___int128:
380     case tok::kw_signed:
381     case tok::kw_unsigned:
382     case tok::kw_void:
383     case tok::kw_char:
384     case tok::kw_int:
385     case tok::kw_half:
386     case tok::kw_float:
387     case tok::kw_double:
388     case tok::kw___float128:
389     case tok::kw_wchar_t:
390     case tok::kw_bool:
391     case tok::kw_char16_t:
392     case tok::kw_char32_t:
393       result = true;
394       Advance();
395       break;
396     default:
397       continue_parsing = false;
398       break;
399     }
400   }
401   return result;
402 }
403 
404 void CPlusPlusNameParser::SkipPtrsAndRefs() {
405   // Ignoring result.
406   ConsumePtrsAndRefs();
407 }
408 
409 bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
410   bool found = false;
411   SkipTypeQualifiers();
412   while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
413                       tok::kw_volatile)) {
414     found = true;
415     SkipTypeQualifiers();
416   }
417   return found;
418 }
419 
420 bool CPlusPlusNameParser::ConsumeDecltype() {
421   Bookmark start_position = SetBookmark();
422   if (!ConsumeToken(tok::kw_decltype))
423     return false;
424 
425   if (!ConsumeArguments())
426     return false;
427 
428   start_position.Remove();
429   return true;
430 }
431 
432 bool CPlusPlusNameParser::ConsumeTypename() {
433   Bookmark start_position = SetBookmark();
434   SkipTypeQualifiers();
435   if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
436     if (!ParseFullNameImpl())
437       return false;
438   }
439   SkipPtrsAndRefs();
440   start_position.Remove();
441   return true;
442 }
443 
444 Optional<CPlusPlusNameParser::ParsedNameRanges>
445 CPlusPlusNameParser::ParseFullNameImpl() {
446   // Name parsing state machine.
447   enum class State {
448     Beginning,       // start of the name
449     AfterTwoColons,  // right after ::
450     AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
451     AfterTemplate,   // right after template brackets (<something>)
452     AfterOperator,   // right after name of C++ operator
453   };
454 
455   Bookmark start_position = SetBookmark();
456   State state = State::Beginning;
457   bool continue_parsing = true;
458   Optional<size_t> last_coloncolon_position = None;
459 
460   while (continue_parsing && HasMoreTokens()) {
461     const auto &token = Peek();
462     switch (token.getKind()) {
463     case tok::raw_identifier: // Just a name.
464       if (state != State::Beginning && state != State::AfterTwoColons) {
465         continue_parsing = false;
466         break;
467       }
468       Advance();
469       state = State::AfterIdentifier;
470       break;
471     case tok::l_paren: {
472       if (state == State::Beginning || state == State::AfterTwoColons) {
473         // (anonymous namespace)
474         if (ConsumeAnonymousNamespace()) {
475           state = State::AfterIdentifier;
476           break;
477         }
478       }
479 
480       // Type declared inside a function 'func()::Type'
481       if (state != State::AfterIdentifier && state != State::AfterTemplate &&
482           state != State::AfterOperator) {
483         continue_parsing = false;
484         break;
485       }
486       Bookmark l_paren_position = SetBookmark();
487       // Consume the '(' ... ') [const]'.
488       if (!ConsumeArguments()) {
489         continue_parsing = false;
490         break;
491       }
492       SkipFunctionQualifiers();
493 
494       // Consume '::'
495       size_t coloncolon_position = GetCurrentPosition();
496       if (!ConsumeToken(tok::coloncolon)) {
497         continue_parsing = false;
498         break;
499       }
500       l_paren_position.Remove();
501       last_coloncolon_position = coloncolon_position;
502       state = State::AfterTwoColons;
503       break;
504     }
505     case tok::coloncolon: // Type nesting delimiter.
506       if (state != State::Beginning && state != State::AfterIdentifier &&
507           state != State::AfterTemplate) {
508         continue_parsing = false;
509         break;
510       }
511       last_coloncolon_position = GetCurrentPosition();
512       Advance();
513       state = State::AfterTwoColons;
514       break;
515     case tok::less: // Template brackets.
516       if (state != State::AfterIdentifier && state != State::AfterOperator) {
517         continue_parsing = false;
518         break;
519       }
520       if (!ConsumeTemplateArgs()) {
521         continue_parsing = false;
522         break;
523       }
524       state = State::AfterTemplate;
525       break;
526     case tok::kw_operator: // C++ operator overloading.
527       if (state != State::Beginning && state != State::AfterTwoColons) {
528         continue_parsing = false;
529         break;
530       }
531       if (!ConsumeOperator()) {
532         continue_parsing = false;
533         break;
534       }
535       state = State::AfterOperator;
536       break;
537     case tok::tilde: // Destructor.
538       if (state != State::Beginning && state != State::AfterTwoColons) {
539         continue_parsing = false;
540         break;
541       }
542       Advance();
543       if (ConsumeToken(tok::raw_identifier)) {
544         state = State::AfterIdentifier;
545       } else {
546         TakeBack();
547         continue_parsing = false;
548       }
549       break;
550     default:
551       continue_parsing = false;
552       break;
553     }
554   }
555 
556   if (state == State::AfterIdentifier || state == State::AfterOperator ||
557       state == State::AfterTemplate) {
558     ParsedNameRanges result;
559     if (last_coloncolon_position) {
560       result.context_range = Range(start_position.GetSavedPosition(),
561                                    last_coloncolon_position.getValue());
562       result.basename_range =
563           Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
564     } else {
565       result.basename_range =
566           Range(start_position.GetSavedPosition(), GetCurrentPosition());
567     }
568     start_position.Remove();
569     return result;
570   } else {
571     return None;
572   }
573 }
574 
575 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
576   if (range.empty())
577     return llvm::StringRef();
578   assert(range.begin_index < range.end_index);
579   assert(range.begin_index < m_tokens.size());
580   assert(range.end_index <= m_tokens.size());
581   clang::Token &first_token = m_tokens[range.begin_index];
582   clang::Token &last_token = m_tokens[range.end_index - 1];
583   clang::SourceLocation start_loc = first_token.getLocation();
584   clang::SourceLocation end_loc = last_token.getLocation();
585   unsigned start_pos = start_loc.getRawEncoding();
586   unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
587   return m_text.take_front(end_pos).drop_front(start_pos);
588 }
589 
590 static const clang::LangOptions &GetLangOptions() {
591   static clang::LangOptions g_options;
592   static llvm::once_flag g_once_flag;
593   llvm::call_once(g_once_flag, []() {
594     g_options.LineComment = true;
595     g_options.C99 = true;
596     g_options.C11 = true;
597     g_options.CPlusPlus = true;
598     g_options.CPlusPlus11 = true;
599     g_options.CPlusPlus14 = true;
600     g_options.CPlusPlus1z = true;
601   });
602   return g_options;
603 }
604 
605 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
606   static llvm::StringMap<tok::TokenKind> g_map{
607 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
608 #include "clang/Basic/TokenKinds.def"
609 #undef KEYWORD
610   };
611   return g_map;
612 }
613 
614 void CPlusPlusNameParser::ExtractTokens() {
615   clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
616                      m_text.data(), m_text.data() + m_text.size());
617   const auto &kw_map = GetKeywordsMap();
618   clang::Token token;
619   for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
620        lexer.LexFromRawLexer(token)) {
621     if (token.is(clang::tok::raw_identifier)) {
622       auto it = kw_map.find(token.getRawIdentifier());
623       if (it != kw_map.end()) {
624         token.setKind(it->getValue());
625       }
626     }
627 
628     m_tokens.push_back(token);
629   }
630 }
631