xref: /llvm-project/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp (revision 699a748893d6b96a21d235aa109a6465e8af9f83)
1 //===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "CPlusPlusNameParser.h"
11 
12 #include "clang/Basic/IdentifierTable.h"
13 #include "llvm/ADT/StringMap.h"
14 #include "llvm/Support/Threading.h"
15 
16 using namespace lldb;
17 using namespace lldb_private;
18 using llvm::Optional;
19 using llvm::None;
20 using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
21 using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
22 namespace tok = clang::tok;
23 
24 Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
25   m_next_token_index = 0;
26   Optional<ParsedFunction> result(None);
27 
28   // Try to parse the name as function without a return type specified
29   // e.g. main(int, char*[])
30   {
31     Bookmark start_position = SetBookmark();
32     result = ParseFunctionImpl(false);
33     if (result && !HasMoreTokens())
34       return result;
35   }
36 
37   // Try to parse the name as function with function pointer return type
38   // e.g. void (*get_func(const char*))()
39   result = ParseFuncPtr(true);
40   if (result)
41     return result;
42 
43   // Finally try to parse the name as a function with non-function return type
44   // e.g. int main(int, char*[])
45   result = ParseFunctionImpl(true);
46   return result;
47 }
48 
49 Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
50   m_next_token_index = 0;
51   Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
52   if (!name_ranges)
53     return None;
54   ParsedName result;
55   result.basename = GetTextForRange(name_ranges.getValue().basename_range);
56   result.context = GetTextForRange(name_ranges.getValue().context_range);
57   return result;
58 }
59 
60 bool CPlusPlusNameParser::HasMoreTokens() {
61   return m_next_token_index < m_tokens.size();
62 }
63 
64 void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
65 
66 void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
67 
68 bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
69   if (!HasMoreTokens())
70     return false;
71 
72   if (!Peek().is(kind))
73     return false;
74 
75   Advance();
76   return true;
77 }
78 
79 template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
80   if (!HasMoreTokens())
81     return false;
82 
83   if (!Peek().isOneOf(kinds...))
84     return false;
85 
86   Advance();
87   return true;
88 }
89 
90 CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
91   return Bookmark(m_next_token_index);
92 }
93 
94 size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
95 
96 clang::Token &CPlusPlusNameParser::Peek() {
97   assert(HasMoreTokens());
98   return m_tokens[m_next_token_index];
99 }
100 
101 Optional<ParsedFunction>
102 CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
103   Bookmark start_position = SetBookmark();
104   if (expect_return_type) {
105     // Consume return type if it's expected.
106     if (!ConsumeTypename())
107       return None;
108   }
109 
110   auto maybe_name = ParseFullNameImpl();
111   if (!maybe_name) {
112     return None;
113   }
114 
115   size_t argument_start = GetCurrentPosition();
116   if (!ConsumeArguments()) {
117     return None;
118   }
119 
120   size_t qualifiers_start = GetCurrentPosition();
121   SkipFunctionQualifiers();
122   size_t end_position = GetCurrentPosition();
123 
124   ParsedFunction result;
125   result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
126   result.name.context = GetTextForRange(maybe_name.getValue().context_range);
127   result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
128   result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
129   start_position.Remove();
130   return result;
131 }
132 
133 Optional<ParsedFunction>
134 CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
135   Bookmark start_position = SetBookmark();
136   if (expect_return_type) {
137     // Consume return type.
138     if (!ConsumeTypename())
139       return None;
140   }
141 
142   if (!ConsumeToken(tok::l_paren))
143     return None;
144   if (!ConsumePtrsAndRefs())
145     return None;
146 
147   {
148     Bookmark before_inner_function_pos = SetBookmark();
149     auto maybe_inner_function_name = ParseFunctionImpl(false);
150     if (maybe_inner_function_name)
151       if (ConsumeToken(tok::r_paren))
152         if (ConsumeArguments()) {
153           SkipFunctionQualifiers();
154           start_position.Remove();
155           before_inner_function_pos.Remove();
156           return maybe_inner_function_name;
157         }
158   }
159 
160   auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
161   if (maybe_inner_function_ptr_name)
162     if (ConsumeToken(tok::r_paren))
163       if (ConsumeArguments()) {
164         SkipFunctionQualifiers();
165         start_position.Remove();
166         return maybe_inner_function_ptr_name;
167       }
168   return None;
169 }
170 
171 bool CPlusPlusNameParser::ConsumeArguments() {
172   return ConsumeBrackets(tok::l_paren, tok::r_paren);
173 }
174 
175 bool CPlusPlusNameParser::ConsumeTemplateArgs() {
176   Bookmark start_position = SetBookmark();
177   if (!HasMoreTokens() || Peek().getKind() != tok::less)
178     return false;
179   Advance();
180 
181   // Consuming template arguments is a bit trickier than consuming function
182   // arguments, because '<' '>' brackets are not always trivially balanced.
183   // In some rare cases tokens '<' and '>' can appear inside template arguments
184   // as arithmetic or shift operators not as template brackets.
185   // Examples: std::enable_if<(10u)<(64), bool>
186   //           f<A<operator<(X,Y)::Subclass>>
187   // Good thing that compiler makes sure that really ambiguous cases of
188   // '>' usage should be enclosed within '()' brackets.
189   int template_counter = 1;
190   bool can_open_template = false;
191   while (HasMoreTokens() && template_counter > 0) {
192     tok::TokenKind kind = Peek().getKind();
193     switch (kind) {
194     case tok::greatergreater:
195       template_counter -= 2;
196       can_open_template = false;
197       Advance();
198       break;
199     case tok::greater:
200       --template_counter;
201       can_open_template = false;
202       Advance();
203       break;
204     case tok::less:
205       // '<' is an attempt to open a subteamplte
206       // check if parser is at the point where it's actually possible,
207       // otherwise it's just a part of an expression like 'sizeof(T)<(10)'.
208       // No need to do the same for '>' because compiler actually makes sure
209       // that '>' always surrounded by brackets to avoid ambiguity.
210       if (can_open_template)
211         ++template_counter;
212       can_open_template = false;
213       Advance();
214       break;
215     case tok::kw_operator: // C++ operator overloading.
216       if (!ConsumeOperator())
217         return false;
218       can_open_template = true;
219       break;
220     case tok::raw_identifier:
221       can_open_template = true;
222       Advance();
223       break;
224     case tok::l_square:
225       if (!ConsumeBrackets(tok::l_square, tok::r_square))
226         return false;
227       can_open_template = false;
228       break;
229     case tok::l_paren:
230       if (!ConsumeArguments())
231         return false;
232       can_open_template = false;
233       break;
234     default:
235       can_open_template = false;
236       Advance();
237       break;
238     }
239   }
240 
241   assert(template_counter >= 0);
242   if (template_counter > 0) {
243     return false;
244   }
245   start_position.Remove();
246   return true;
247 }
248 
249 bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
250   Bookmark start_position = SetBookmark();
251   if (!ConsumeToken(tok::l_paren)) {
252     return false;
253   }
254   constexpr llvm::StringLiteral g_anonymous("anonymous");
255   if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
256       Peek().getRawIdentifier() == g_anonymous) {
257     Advance();
258   } else {
259     return false;
260   }
261 
262   if (!ConsumeToken(tok::kw_namespace)) {
263     return false;
264   }
265 
266   if (!ConsumeToken(tok::r_paren)) {
267     return false;
268   }
269   start_position.Remove();
270   return true;
271 }
272 
273 bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
274                                           tok::TokenKind right) {
275   Bookmark start_position = SetBookmark();
276   if (!HasMoreTokens() || Peek().getKind() != left)
277     return false;
278   Advance();
279 
280   int counter = 1;
281   while (HasMoreTokens() && counter > 0) {
282     tok::TokenKind kind = Peek().getKind();
283     if (kind == right)
284       --counter;
285     else if (kind == left)
286       ++counter;
287     Advance();
288   }
289 
290   assert(counter >= 0);
291   if (counter > 0) {
292     return false;
293   }
294   start_position.Remove();
295   return true;
296 }
297 
298 bool CPlusPlusNameParser::ConsumeOperator() {
299   Bookmark start_position = SetBookmark();
300   if (!ConsumeToken(tok::kw_operator))
301     return false;
302 
303   if (!HasMoreTokens()) {
304     return false;
305   }
306 
307   const auto &token = Peek();
308   switch (token.getKind()) {
309   case tok::kw_new:
310   case tok::kw_delete:
311     // This is 'new' or 'delete' operators.
312     Advance();
313     // Check for array new/delete.
314     if (HasMoreTokens() && Peek().is(tok::l_square)) {
315       // Consume the '[' and ']'.
316       if (!ConsumeBrackets(tok::l_square, tok::r_square))
317         return false;
318     }
319     break;
320 
321 #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
322   case tok::Token:                                                             \
323     Advance();                                                                 \
324     break;
325 #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
326 #include "clang/Basic/OperatorKinds.def"
327 #undef OVERLOADED_OPERATOR
328 #undef OVERLOADED_OPERATOR_MULTI
329 
330   case tok::l_paren:
331     // Call operator consume '(' ... ')'.
332     if (ConsumeBrackets(tok::l_paren, tok::r_paren))
333       break;
334     return false;
335 
336   case tok::l_square:
337     // This is a [] operator.
338     // Consume the '[' and ']'.
339     if (ConsumeBrackets(tok::l_square, tok::r_square))
340       break;
341     return false;
342 
343   default:
344     // This might be a cast operator.
345     if (ConsumeTypename())
346       break;
347     return false;
348   }
349   start_position.Remove();
350   return true;
351 }
352 
353 void CPlusPlusNameParser::SkipTypeQualifiers() {
354   while (ConsumeToken(tok::kw_const, tok::kw_volatile))
355     ;
356 }
357 
358 void CPlusPlusNameParser::SkipFunctionQualifiers() {
359   while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
360     ;
361 }
362 
363 bool CPlusPlusNameParser::ConsumeBuiltinType() {
364   bool result = false;
365   bool continue_parsing = true;
366   // Built-in types can be made of a few keywords
367   // like 'unsigned long long int'. This function
368   // consumes all built-in type keywords without
369   // checking if they make sense like 'unsigned char void'.
370   while (continue_parsing && HasMoreTokens()) {
371     switch (Peek().getKind()) {
372     case tok::kw_short:
373     case tok::kw_long:
374     case tok::kw___int64:
375     case tok::kw___int128:
376     case tok::kw_signed:
377     case tok::kw_unsigned:
378     case tok::kw_void:
379     case tok::kw_char:
380     case tok::kw_int:
381     case tok::kw_half:
382     case tok::kw_float:
383     case tok::kw_double:
384     case tok::kw___float128:
385     case tok::kw_wchar_t:
386     case tok::kw_bool:
387     case tok::kw_char16_t:
388     case tok::kw_char32_t:
389       result = true;
390       Advance();
391       break;
392     default:
393       continue_parsing = false;
394       break;
395     }
396   }
397   return result;
398 }
399 
400 void CPlusPlusNameParser::SkipPtrsAndRefs() {
401   // Ignoring result.
402   ConsumePtrsAndRefs();
403 }
404 
405 bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
406   bool found = false;
407   SkipTypeQualifiers();
408   while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
409                       tok::kw_volatile)) {
410     found = true;
411     SkipTypeQualifiers();
412   }
413   return found;
414 }
415 
416 bool CPlusPlusNameParser::ConsumeTypename() {
417   Bookmark start_position = SetBookmark();
418   SkipTypeQualifiers();
419   if (!ConsumeBuiltinType()) {
420     if (!ParseFullNameImpl())
421       return false;
422   }
423   SkipPtrsAndRefs();
424   start_position.Remove();
425   return true;
426 }
427 
428 Optional<CPlusPlusNameParser::ParsedNameRanges>
429 CPlusPlusNameParser::ParseFullNameImpl() {
430   // Name parsing state machine.
431   enum class State {
432     Beginning,       // start of the name
433     AfterTwoColons,  // right after ::
434     AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
435     AfterTemplate,   // right after template brackets (<something>)
436     AfterOperator,   // right after name of C++ operator
437   };
438 
439   Bookmark start_position = SetBookmark();
440   State state = State::Beginning;
441   bool continue_parsing = true;
442   Optional<size_t> last_coloncolon_position = None;
443 
444   while (continue_parsing && HasMoreTokens()) {
445     const auto &token = Peek();
446     switch (token.getKind()) {
447     case tok::raw_identifier: // Just a name.
448       if (state != State::Beginning && state != State::AfterTwoColons) {
449         continue_parsing = false;
450         break;
451       }
452       Advance();
453       state = State::AfterIdentifier;
454       break;
455     case tok::l_paren: {
456       if (state == State::Beginning || state == State::AfterTwoColons) {
457         // (anonymous namespace)
458         if (ConsumeAnonymousNamespace()) {
459           state = State::AfterIdentifier;
460           break;
461         }
462       }
463 
464       // Type declared inside a function 'func()::Type'
465       if (state != State::AfterIdentifier && state != State::AfterTemplate &&
466           state != State::AfterOperator) {
467         continue_parsing = false;
468         break;
469       }
470       Bookmark l_paren_position = SetBookmark();
471       // Consume the '(' ... ') [const]'.
472       if (!ConsumeArguments()) {
473         continue_parsing = false;
474         break;
475       }
476       SkipFunctionQualifiers();
477 
478       // Consume '::'
479       size_t coloncolon_position = GetCurrentPosition();
480       if (!ConsumeToken(tok::coloncolon)) {
481         continue_parsing = false;
482         break;
483       }
484       l_paren_position.Remove();
485       last_coloncolon_position = coloncolon_position;
486       state = State::AfterTwoColons;
487       break;
488     }
489     case tok::coloncolon: // Type nesting delimiter.
490       if (state != State::Beginning && state != State::AfterIdentifier &&
491           state != State::AfterTemplate) {
492         continue_parsing = false;
493         break;
494       }
495       last_coloncolon_position = GetCurrentPosition();
496       Advance();
497       state = State::AfterTwoColons;
498       break;
499     case tok::less: // Template brackets.
500       if (state != State::AfterIdentifier && state != State::AfterOperator) {
501         continue_parsing = false;
502         break;
503       }
504       if (!ConsumeTemplateArgs()) {
505         continue_parsing = false;
506         break;
507       }
508       state = State::AfterTemplate;
509       break;
510     case tok::kw_operator: // C++ operator overloading.
511       if (state != State::Beginning && state != State::AfterTwoColons) {
512         continue_parsing = false;
513         break;
514       }
515       if (!ConsumeOperator()) {
516         continue_parsing = false;
517         break;
518       }
519       state = State::AfterOperator;
520       break;
521     case tok::tilde: // Destructor.
522       if (state != State::Beginning && state != State::AfterTwoColons) {
523         continue_parsing = false;
524         break;
525       }
526       Advance();
527       if (ConsumeToken(tok::raw_identifier)) {
528         state = State::AfterIdentifier;
529       } else {
530         TakeBack();
531         continue_parsing = false;
532       }
533       break;
534     default:
535       continue_parsing = false;
536       break;
537     }
538   }
539 
540   if (state == State::AfterIdentifier || state == State::AfterOperator ||
541       state == State::AfterTemplate) {
542     ParsedNameRanges result;
543     if (last_coloncolon_position) {
544       result.context_range = Range(start_position.GetSavedPosition(),
545                                    last_coloncolon_position.getValue());
546       result.basename_range =
547           Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
548     } else {
549       result.basename_range =
550           Range(start_position.GetSavedPosition(), GetCurrentPosition());
551     }
552     start_position.Remove();
553     return result;
554   } else {
555     return None;
556   }
557 }
558 
559 llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
560   if (range.empty())
561     return llvm::StringRef();
562   assert(range.begin_index < range.end_index);
563   assert(range.begin_index < m_tokens.size());
564   assert(range.end_index <= m_tokens.size());
565   clang::Token &first_token = m_tokens[range.begin_index];
566   clang::Token &last_token = m_tokens[range.end_index - 1];
567   clang::SourceLocation start_loc = first_token.getLocation();
568   clang::SourceLocation end_loc = last_token.getLocation();
569   unsigned start_pos = start_loc.getRawEncoding();
570   unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
571   return m_text.take_front(end_pos).drop_front(start_pos);
572 }
573 
574 static const clang::LangOptions &GetLangOptions() {
575   static clang::LangOptions g_options;
576   static llvm::once_flag g_once_flag;
577   llvm::call_once(g_once_flag, []() {
578     g_options.LineComment = true;
579     g_options.C99 = true;
580     g_options.C11 = true;
581     g_options.CPlusPlus = true;
582     g_options.CPlusPlus11 = true;
583     g_options.CPlusPlus14 = true;
584     g_options.CPlusPlus1z = true;
585   });
586   return g_options;
587 }
588 
589 static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
590   static llvm::StringMap<tok::TokenKind> g_map{
591 #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
592 #include "clang/Basic/TokenKinds.def"
593 #undef KEYWORD
594   };
595   return g_map;
596 }
597 
598 void CPlusPlusNameParser::ExtractTokens() {
599   clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
600                      m_text.data(), m_text.data() + m_text.size());
601   const auto &kw_map = GetKeywordsMap();
602   clang::Token token;
603   for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
604        lexer.LexFromRawLexer(token)) {
605     if (token.is(clang::tok::raw_identifier)) {
606       auto it = kw_map.find(token.getRawIdentifier());
607       if (it != kw_map.end()) {
608         token.setKind(it->getValue());
609       }
610     }
611 
612     m_tokens.push_back(token);
613   }
614 }
615