1 /* Rust expression parsing for GDB, the GNU debugger. 2 3 Copyright (C) 2016-2023 Free Software Foundation, Inc. 4 5 This file is part of GDB. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 19 20 #include "defs.h" 21 22 #include "block.h" 23 #include "charset.h" 24 #include "cp-support.h" 25 #include "gdbsupport/gdb_obstack.h" 26 #include "gdbsupport/gdb_regex.h" 27 #include "rust-lang.h" 28 #include "parser-defs.h" 29 #include "gdbsupport/selftest.h" 30 #include "value.h" 31 #include "gdbarch.h" 32 #include "rust-exp.h" 33 34 using namespace expr; 35 36 /* A regular expression for matching Rust numbers. This is split up 37 since it is very long and this gives us a way to comment the 38 sections. */ 39 40 static const char number_regex_text[] = 41 /* subexpression 1: allows use of alternation, otherwise uninteresting */ 42 "^(" 43 /* First comes floating point. */ 44 /* Recognize number after the decimal point, with optional 45 exponent and optional type suffix. 46 subexpression 2: allows "?", otherwise uninteresting 47 subexpression 3: if present, type suffix 48 */ 49 "[0-9][0-9_]*\\.[0-9][0-9_]*([eE][-+]?[0-9][0-9_]*)?(f32|f64)?" 50 #define FLOAT_TYPE1 3 51 "|" 52 /* Recognize exponent without decimal point, with optional type 53 suffix. 54 subexpression 4: if present, type suffix 55 */ 56 #define FLOAT_TYPE2 4 57 "[0-9][0-9_]*[eE][-+]?[0-9][0-9_]*(f32|f64)?" 58 "|" 59 /* "23." is a valid floating point number, but "23.e5" and 60 "23.f32" are not. So, handle the trailing-. case 61 separately. */ 62 "[0-9][0-9_]*\\." 63 "|" 64 /* Finally come integers. 65 subexpression 5: text of integer 66 subexpression 6: if present, type suffix 67 subexpression 7: allows use of alternation, otherwise uninteresting 68 */ 69 #define INT_TEXT 5 70 #define INT_TYPE 6 71 "(0x[a-fA-F0-9_]+|0o[0-7_]+|0b[01_]+|[0-9][0-9_]*)" 72 "([iu](size|8|16|32|64))?" 73 ")"; 74 /* The number of subexpressions to allocate space for, including the 75 "0th" whole match subexpression. */ 76 #define NUM_SUBEXPRESSIONS 8 77 78 /* The compiled number-matching regex. */ 79 80 static regex_t number_regex; 81 82 /* The kinds of tokens. Note that single-character tokens are 83 represented by themselves, so for instance '[' is a token. */ 84 enum token_type : int 85 { 86 /* Make sure to start after any ASCII character. */ 87 GDBVAR = 256, 88 IDENT, 89 COMPLETE, 90 INTEGER, 91 DECIMAL_INTEGER, 92 STRING, 93 BYTESTRING, 94 FLOAT, 95 COMPOUND_ASSIGN, 96 97 /* Keyword tokens. */ 98 KW_AS, 99 KW_IF, 100 KW_TRUE, 101 KW_FALSE, 102 KW_SUPER, 103 KW_SELF, 104 KW_MUT, 105 KW_EXTERN, 106 KW_CONST, 107 KW_FN, 108 KW_SIZEOF, 109 110 /* Operator tokens. */ 111 DOTDOT, 112 DOTDOTEQ, 113 OROR, 114 ANDAND, 115 EQEQ, 116 NOTEQ, 117 LTEQ, 118 GTEQ, 119 LSH, 120 RSH, 121 COLONCOLON, 122 ARROW, 123 }; 124 125 /* A typed integer constant. */ 126 127 struct typed_val_int 128 { 129 ULONGEST val; 130 struct type *type; 131 }; 132 133 /* A typed floating point constant. */ 134 135 struct typed_val_float 136 { 137 float_data val; 138 struct type *type; 139 }; 140 141 /* A struct of this type is used to describe a token. */ 142 143 struct token_info 144 { 145 const char *name; 146 int value; 147 enum exp_opcode opcode; 148 }; 149 150 /* Identifier tokens. */ 151 152 static const struct token_info identifier_tokens[] = 153 { 154 { "as", KW_AS, OP_NULL }, 155 { "false", KW_FALSE, OP_NULL }, 156 { "if", 0, OP_NULL }, 157 { "mut", KW_MUT, OP_NULL }, 158 { "const", KW_CONST, OP_NULL }, 159 { "self", KW_SELF, OP_NULL }, 160 { "super", KW_SUPER, OP_NULL }, 161 { "true", KW_TRUE, OP_NULL }, 162 { "extern", KW_EXTERN, OP_NULL }, 163 { "fn", KW_FN, OP_NULL }, 164 { "sizeof", KW_SIZEOF, OP_NULL }, 165 }; 166 167 /* Operator tokens, sorted longest first. */ 168 169 static const struct token_info operator_tokens[] = 170 { 171 { ">>=", COMPOUND_ASSIGN, BINOP_RSH }, 172 { "<<=", COMPOUND_ASSIGN, BINOP_LSH }, 173 174 { "<<", LSH, OP_NULL }, 175 { ">>", RSH, OP_NULL }, 176 { "&&", ANDAND, OP_NULL }, 177 { "||", OROR, OP_NULL }, 178 { "==", EQEQ, OP_NULL }, 179 { "!=", NOTEQ, OP_NULL }, 180 { "<=", LTEQ, OP_NULL }, 181 { ">=", GTEQ, OP_NULL }, 182 { "+=", COMPOUND_ASSIGN, BINOP_ADD }, 183 { "-=", COMPOUND_ASSIGN, BINOP_SUB }, 184 { "*=", COMPOUND_ASSIGN, BINOP_MUL }, 185 { "/=", COMPOUND_ASSIGN, BINOP_DIV }, 186 { "%=", COMPOUND_ASSIGN, BINOP_REM }, 187 { "&=", COMPOUND_ASSIGN, BINOP_BITWISE_AND }, 188 { "|=", COMPOUND_ASSIGN, BINOP_BITWISE_IOR }, 189 { "^=", COMPOUND_ASSIGN, BINOP_BITWISE_XOR }, 190 { "..=", DOTDOTEQ, OP_NULL }, 191 192 { "::", COLONCOLON, OP_NULL }, 193 { "..", DOTDOT, OP_NULL }, 194 { "->", ARROW, OP_NULL } 195 }; 196 197 /* An instance of this is created before parsing, and destroyed when 198 parsing is finished. */ 199 200 struct rust_parser 201 { 202 explicit rust_parser (struct parser_state *state) 203 : pstate (state) 204 { 205 } 206 207 DISABLE_COPY_AND_ASSIGN (rust_parser); 208 209 /* Return the parser's language. */ 210 const struct language_defn *language () const 211 { 212 return pstate->language (); 213 } 214 215 /* Return the parser's gdbarch. */ 216 struct gdbarch *arch () const 217 { 218 return pstate->gdbarch (); 219 } 220 221 /* A helper to look up a Rust type, or fail. This only works for 222 types defined by rust_language_arch_info. */ 223 224 struct type *get_type (const char *name) 225 { 226 struct type *type; 227 228 type = language_lookup_primitive_type (language (), arch (), name); 229 if (type == NULL) 230 error (_("Could not find Rust type %s"), name); 231 return type; 232 } 233 234 std::string crate_name (const std::string &name); 235 std::string super_name (const std::string &ident, unsigned int n_supers); 236 237 int lex_character (); 238 int lex_number (); 239 int lex_string (); 240 int lex_identifier (); 241 uint32_t lex_hex (int min, int max); 242 uint32_t lex_escape (int is_byte); 243 int lex_operator (); 244 int lex_one_token (); 245 void push_back (char c); 246 247 /* The main interface to lexing. Lexes one token and updates the 248 internal state. */ 249 void lex () 250 { 251 current_token = lex_one_token (); 252 } 253 254 /* Assuming the current token is TYPE, lex the next token. */ 255 void assume (int type) 256 { 257 gdb_assert (current_token == type); 258 lex (); 259 } 260 261 /* Require the single-character token C, and lex the next token; or 262 throw an exception. */ 263 void require (char type) 264 { 265 if (current_token != type) 266 error (_("'%c' expected"), type); 267 lex (); 268 } 269 270 /* Entry point for all parsing. */ 271 operation_up parse_entry_point () 272 { 273 lex (); 274 operation_up result = parse_expr (); 275 if (current_token != 0) 276 error (_("Syntax error near '%s'"), pstate->prev_lexptr); 277 return result; 278 } 279 280 operation_up parse_tuple (); 281 operation_up parse_array (); 282 operation_up name_to_operation (const std::string &name); 283 operation_up parse_struct_expr (struct type *type); 284 operation_up parse_binop (bool required); 285 operation_up parse_range (); 286 operation_up parse_expr (); 287 operation_up parse_sizeof (); 288 operation_up parse_addr (); 289 operation_up parse_field (operation_up &&); 290 operation_up parse_index (operation_up &&); 291 std::vector<operation_up> parse_paren_args (); 292 operation_up parse_call (operation_up &&); 293 std::vector<struct type *> parse_type_list (); 294 std::vector<struct type *> parse_maybe_type_list (); 295 struct type *parse_array_type (); 296 struct type *parse_slice_type (); 297 struct type *parse_pointer_type (); 298 struct type *parse_function_type (); 299 struct type *parse_tuple_type (); 300 struct type *parse_type (); 301 std::string parse_path (bool for_expr); 302 operation_up parse_string (); 303 operation_up parse_tuple_struct (struct type *type); 304 operation_up parse_path_expr (); 305 operation_up parse_atom (bool required); 306 307 void update_innermost_block (struct block_symbol sym); 308 struct block_symbol lookup_symbol (const char *name, 309 const struct block *block, 310 const domain_enum domain); 311 struct type *rust_lookup_type (const char *name); 312 313 /* Clear some state. This is only used for testing. */ 314 #if GDB_SELF_TEST 315 void reset (const char *input) 316 { 317 pstate->prev_lexptr = nullptr; 318 pstate->lexptr = input; 319 paren_depth = 0; 320 current_token = 0; 321 current_int_val = {}; 322 current_float_val = {}; 323 current_string_val = {}; 324 current_opcode = OP_NULL; 325 } 326 #endif /* GDB_SELF_TEST */ 327 328 /* Return the token's string value as a string. */ 329 std::string get_string () const 330 { 331 return std::string (current_string_val.ptr, current_string_val.length); 332 } 333 334 /* A pointer to this is installed globally. */ 335 auto_obstack obstack; 336 337 /* The parser state gdb gave us. */ 338 struct parser_state *pstate; 339 340 /* Depth of parentheses. */ 341 int paren_depth = 0; 342 343 /* The current token's type. */ 344 int current_token = 0; 345 /* The current token's payload, if any. */ 346 typed_val_int current_int_val {}; 347 typed_val_float current_float_val {}; 348 struct stoken current_string_val {}; 349 enum exp_opcode current_opcode = OP_NULL; 350 351 /* When completing, this may be set to the field operation to 352 complete. */ 353 operation_up completion_op; 354 }; 355 356 /* Return an string referring to NAME, but relative to the crate's 357 name. */ 358 359 std::string 360 rust_parser::crate_name (const std::string &name) 361 { 362 std::string crate = rust_crate_for_block (pstate->expression_context_block); 363 364 if (crate.empty ()) 365 error (_("Could not find crate for current location")); 366 return "::" + crate + "::" + name; 367 } 368 369 /* Return a string referring to a "super::" qualified name. IDENT is 370 the base name and N_SUPERS is how many "super::"s were provided. 371 N_SUPERS can be zero. */ 372 373 std::string 374 rust_parser::super_name (const std::string &ident, unsigned int n_supers) 375 { 376 const char *scope = block_scope (pstate->expression_context_block); 377 int offset; 378 379 if (scope[0] == '\0') 380 error (_("Couldn't find namespace scope for self::")); 381 382 if (n_supers > 0) 383 { 384 int len; 385 std::vector<int> offsets; 386 unsigned int current_len; 387 388 current_len = cp_find_first_component (scope); 389 while (scope[current_len] != '\0') 390 { 391 offsets.push_back (current_len); 392 gdb_assert (scope[current_len] == ':'); 393 /* The "::". */ 394 current_len += 2; 395 current_len += cp_find_first_component (scope 396 + current_len); 397 } 398 399 len = offsets.size (); 400 if (n_supers >= len) 401 error (_("Too many super:: uses from '%s'"), scope); 402 403 offset = offsets[len - n_supers]; 404 } 405 else 406 offset = strlen (scope); 407 408 return "::" + std::string (scope, offset) + "::" + ident; 409 } 410 411 /* A helper to appropriately munge NAME and BLOCK depending on the 412 presence of a leading "::". */ 413 414 static void 415 munge_name_and_block (const char **name, const struct block **block) 416 { 417 /* If it is a global reference, skip the current block in favor of 418 the static block. */ 419 if (startswith (*name, "::")) 420 { 421 *name += 2; 422 *block = block_static_block (*block); 423 } 424 } 425 426 /* Like lookup_symbol, but handles Rust namespace conventions, and 427 doesn't require field_of_this_result. */ 428 429 struct block_symbol 430 rust_parser::lookup_symbol (const char *name, const struct block *block, 431 const domain_enum domain) 432 { 433 struct block_symbol result; 434 435 munge_name_and_block (&name, &block); 436 437 result = ::lookup_symbol (name, block, domain, NULL); 438 if (result.symbol != NULL) 439 update_innermost_block (result); 440 return result; 441 } 442 443 /* Look up a type, following Rust namespace conventions. */ 444 445 struct type * 446 rust_parser::rust_lookup_type (const char *name) 447 { 448 struct block_symbol result; 449 struct type *type; 450 451 const struct block *block = pstate->expression_context_block; 452 munge_name_and_block (&name, &block); 453 454 result = ::lookup_symbol (name, block, STRUCT_DOMAIN, NULL); 455 if (result.symbol != NULL) 456 { 457 update_innermost_block (result); 458 return result.symbol->type (); 459 } 460 461 type = lookup_typename (language (), name, NULL, 1); 462 if (type != NULL) 463 return type; 464 465 /* Last chance, try a built-in type. */ 466 return language_lookup_primitive_type (language (), arch (), name); 467 } 468 469 /* A helper that updates the innermost block as appropriate. */ 470 471 void 472 rust_parser::update_innermost_block (struct block_symbol sym) 473 { 474 if (symbol_read_needs_frame (sym.symbol)) 475 pstate->block_tracker->update (sym); 476 } 477 478 /* Lex a hex number with at least MIN digits and at most MAX 479 digits. */ 480 481 uint32_t 482 rust_parser::lex_hex (int min, int max) 483 { 484 uint32_t result = 0; 485 int len = 0; 486 /* We only want to stop at MAX if we're lexing a byte escape. */ 487 int check_max = min == max; 488 489 while ((check_max ? len <= max : 1) 490 && ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'f') 491 || (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'F') 492 || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9'))) 493 { 494 result *= 16; 495 if (pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'f') 496 result = result + 10 + pstate->lexptr[0] - 'a'; 497 else if (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'F') 498 result = result + 10 + pstate->lexptr[0] - 'A'; 499 else 500 result = result + pstate->lexptr[0] - '0'; 501 ++pstate->lexptr; 502 ++len; 503 } 504 505 if (len < min) 506 error (_("Not enough hex digits seen")); 507 if (len > max) 508 { 509 gdb_assert (min != max); 510 error (_("Overlong hex escape")); 511 } 512 513 return result; 514 } 515 516 /* Lex an escape. IS_BYTE is true if we're lexing a byte escape; 517 otherwise we're lexing a character escape. */ 518 519 uint32_t 520 rust_parser::lex_escape (int is_byte) 521 { 522 uint32_t result; 523 524 gdb_assert (pstate->lexptr[0] == '\\'); 525 ++pstate->lexptr; 526 switch (pstate->lexptr[0]) 527 { 528 case 'x': 529 ++pstate->lexptr; 530 result = lex_hex (2, 2); 531 break; 532 533 case 'u': 534 if (is_byte) 535 error (_("Unicode escape in byte literal")); 536 ++pstate->lexptr; 537 if (pstate->lexptr[0] != '{') 538 error (_("Missing '{' in Unicode escape")); 539 ++pstate->lexptr; 540 result = lex_hex (1, 6); 541 /* Could do range checks here. */ 542 if (pstate->lexptr[0] != '}') 543 error (_("Missing '}' in Unicode escape")); 544 ++pstate->lexptr; 545 break; 546 547 case 'n': 548 result = '\n'; 549 ++pstate->lexptr; 550 break; 551 case 'r': 552 result = '\r'; 553 ++pstate->lexptr; 554 break; 555 case 't': 556 result = '\t'; 557 ++pstate->lexptr; 558 break; 559 case '\\': 560 result = '\\'; 561 ++pstate->lexptr; 562 break; 563 case '0': 564 result = '\0'; 565 ++pstate->lexptr; 566 break; 567 case '\'': 568 result = '\''; 569 ++pstate->lexptr; 570 break; 571 case '"': 572 result = '"'; 573 ++pstate->lexptr; 574 break; 575 576 default: 577 error (_("Invalid escape \\%c in literal"), pstate->lexptr[0]); 578 } 579 580 return result; 581 } 582 583 /* A helper for lex_character. Search forward for the closing single 584 quote, then convert the bytes from the host charset to UTF-32. */ 585 586 static uint32_t 587 lex_multibyte_char (const char *text, int *len) 588 { 589 /* Only look a maximum of 5 bytes for the closing quote. This is 590 the maximum for UTF-8. */ 591 int quote; 592 gdb_assert (text[0] != '\''); 593 for (quote = 1; text[quote] != '\0' && text[quote] != '\''; ++quote) 594 ; 595 *len = quote; 596 /* The caller will issue an error. */ 597 if (text[quote] == '\0') 598 return 0; 599 600 auto_obstack result; 601 convert_between_encodings (host_charset (), HOST_UTF32, 602 (const gdb_byte *) text, 603 quote, 1, &result, translit_none); 604 605 int size = obstack_object_size (&result); 606 if (size > 4) 607 error (_("overlong character literal")); 608 uint32_t value; 609 memcpy (&value, obstack_finish (&result), size); 610 return value; 611 } 612 613 /* Lex a character constant. */ 614 615 int 616 rust_parser::lex_character () 617 { 618 int is_byte = 0; 619 uint32_t value; 620 621 if (pstate->lexptr[0] == 'b') 622 { 623 is_byte = 1; 624 ++pstate->lexptr; 625 } 626 gdb_assert (pstate->lexptr[0] == '\''); 627 ++pstate->lexptr; 628 if (pstate->lexptr[0] == '\'') 629 error (_("empty character literal")); 630 else if (pstate->lexptr[0] == '\\') 631 value = lex_escape (is_byte); 632 else 633 { 634 int len; 635 value = lex_multibyte_char (&pstate->lexptr[0], &len); 636 pstate->lexptr += len; 637 } 638 639 if (pstate->lexptr[0] != '\'') 640 error (_("Unterminated character literal")); 641 ++pstate->lexptr; 642 643 current_int_val.val = value; 644 current_int_val.type = get_type (is_byte ? "u8" : "char"); 645 646 return INTEGER; 647 } 648 649 /* Return the offset of the double quote if STR looks like the start 650 of a raw string, or 0 if STR does not start a raw string. */ 651 652 static int 653 starts_raw_string (const char *str) 654 { 655 const char *save = str; 656 657 if (str[0] != 'r') 658 return 0; 659 ++str; 660 while (str[0] == '#') 661 ++str; 662 if (str[0] == '"') 663 return str - save; 664 return 0; 665 } 666 667 /* Return true if STR looks like the end of a raw string that had N 668 hashes at the start. */ 669 670 static bool 671 ends_raw_string (const char *str, int n) 672 { 673 int i; 674 675 gdb_assert (str[0] == '"'); 676 for (i = 0; i < n; ++i) 677 if (str[i + 1] != '#') 678 return false; 679 return true; 680 } 681 682 /* Lex a string constant. */ 683 684 int 685 rust_parser::lex_string () 686 { 687 int is_byte = pstate->lexptr[0] == 'b'; 688 int raw_length; 689 690 if (is_byte) 691 ++pstate->lexptr; 692 raw_length = starts_raw_string (pstate->lexptr); 693 pstate->lexptr += raw_length; 694 gdb_assert (pstate->lexptr[0] == '"'); 695 ++pstate->lexptr; 696 697 while (1) 698 { 699 uint32_t value; 700 701 if (raw_length > 0) 702 { 703 if (pstate->lexptr[0] == '"' && ends_raw_string (pstate->lexptr, 704 raw_length - 1)) 705 { 706 /* Exit with lexptr pointing after the final "#". */ 707 pstate->lexptr += raw_length; 708 break; 709 } 710 else if (pstate->lexptr[0] == '\0') 711 error (_("Unexpected EOF in string")); 712 713 value = pstate->lexptr[0] & 0xff; 714 if (is_byte && value > 127) 715 error (_("Non-ASCII value in raw byte string")); 716 obstack_1grow (&obstack, value); 717 718 ++pstate->lexptr; 719 } 720 else if (pstate->lexptr[0] == '"') 721 { 722 /* Make sure to skip the quote. */ 723 ++pstate->lexptr; 724 break; 725 } 726 else if (pstate->lexptr[0] == '\\') 727 { 728 value = lex_escape (is_byte); 729 730 if (is_byte) 731 obstack_1grow (&obstack, value); 732 else 733 convert_between_encodings (HOST_UTF32, "UTF-8", 734 (gdb_byte *) &value, 735 sizeof (value), sizeof (value), 736 &obstack, translit_none); 737 } 738 else if (pstate->lexptr[0] == '\0') 739 error (_("Unexpected EOF in string")); 740 else 741 { 742 value = pstate->lexptr[0] & 0xff; 743 if (is_byte && value > 127) 744 error (_("Non-ASCII value in byte string")); 745 obstack_1grow (&obstack, value); 746 ++pstate->lexptr; 747 } 748 } 749 750 current_string_val.length = obstack_object_size (&obstack); 751 current_string_val.ptr = (const char *) obstack_finish (&obstack); 752 return is_byte ? BYTESTRING : STRING; 753 } 754 755 /* Return true if STRING starts with whitespace followed by a digit. */ 756 757 static bool 758 space_then_number (const char *string) 759 { 760 const char *p = string; 761 762 while (p[0] == ' ' || p[0] == '\t') 763 ++p; 764 if (p == string) 765 return false; 766 767 return *p >= '0' && *p <= '9'; 768 } 769 770 /* Return true if C can start an identifier. */ 771 772 static bool 773 rust_identifier_start_p (char c) 774 { 775 return ((c >= 'a' && c <= 'z') 776 || (c >= 'A' && c <= 'Z') 777 || c == '_' 778 || c == '$' 779 /* Allow any non-ASCII character as an identifier. There 780 doesn't seem to be a need to be picky about this. */ 781 || (c & 0x80) != 0); 782 } 783 784 /* Lex an identifier. */ 785 786 int 787 rust_parser::lex_identifier () 788 { 789 unsigned int length; 790 const struct token_info *token; 791 int is_gdb_var = pstate->lexptr[0] == '$'; 792 793 bool is_raw = false; 794 if (pstate->lexptr[0] == 'r' 795 && pstate->lexptr[1] == '#' 796 && rust_identifier_start_p (pstate->lexptr[2])) 797 { 798 is_raw = true; 799 pstate->lexptr += 2; 800 } 801 802 const char *start = pstate->lexptr; 803 gdb_assert (rust_identifier_start_p (pstate->lexptr[0])); 804 805 ++pstate->lexptr; 806 807 /* Allow any non-ASCII character here. This "handles" UTF-8 by 808 passing it through. */ 809 while ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'z') 810 || (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'Z') 811 || pstate->lexptr[0] == '_' 812 || (is_gdb_var && pstate->lexptr[0] == '$') 813 || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9') 814 || (pstate->lexptr[0] & 0x80) != 0) 815 ++pstate->lexptr; 816 817 818 length = pstate->lexptr - start; 819 token = NULL; 820 if (!is_raw) 821 { 822 for (const auto &candidate : identifier_tokens) 823 { 824 if (length == strlen (candidate.name) 825 && strncmp (candidate.name, start, length) == 0) 826 { 827 token = &candidate; 828 break; 829 } 830 } 831 } 832 833 if (token != NULL) 834 { 835 if (token->value == 0) 836 { 837 /* Leave the terminating token alone. */ 838 pstate->lexptr = start; 839 return 0; 840 } 841 } 842 else if (token == NULL 843 && !is_raw 844 && (strncmp (start, "thread", length) == 0 845 || strncmp (start, "task", length) == 0) 846 && space_then_number (pstate->lexptr)) 847 { 848 /* "task" or "thread" followed by a number terminates the 849 parse, per gdb rules. */ 850 pstate->lexptr = start; 851 return 0; 852 } 853 854 if (token == NULL || (pstate->parse_completion && pstate->lexptr[0] == '\0')) 855 { 856 current_string_val.length = length; 857 current_string_val.ptr = start; 858 } 859 860 if (pstate->parse_completion && pstate->lexptr[0] == '\0') 861 { 862 /* Prevent rustyylex from returning two COMPLETE tokens. */ 863 pstate->prev_lexptr = pstate->lexptr; 864 return COMPLETE; 865 } 866 867 if (token != NULL) 868 return token->value; 869 if (is_gdb_var) 870 return GDBVAR; 871 return IDENT; 872 } 873 874 /* Lex an operator. */ 875 876 int 877 rust_parser::lex_operator () 878 { 879 const struct token_info *token = NULL; 880 881 for (const auto &candidate : operator_tokens) 882 { 883 if (strncmp (candidate.name, pstate->lexptr, 884 strlen (candidate.name)) == 0) 885 { 886 pstate->lexptr += strlen (candidate.name); 887 token = &candidate; 888 break; 889 } 890 } 891 892 if (token != NULL) 893 { 894 current_opcode = token->opcode; 895 return token->value; 896 } 897 898 return *pstate->lexptr++; 899 } 900 901 /* Lex a number. */ 902 903 int 904 rust_parser::lex_number () 905 { 906 regmatch_t subexps[NUM_SUBEXPRESSIONS]; 907 int match; 908 int is_integer = 0; 909 int could_be_decimal = 1; 910 int implicit_i32 = 0; 911 const char *type_name = NULL; 912 struct type *type; 913 int end_index; 914 int type_index = -1; 915 int i; 916 917 match = regexec (&number_regex, pstate->lexptr, ARRAY_SIZE (subexps), 918 subexps, 0); 919 /* Failure means the regexp is broken. */ 920 gdb_assert (match == 0); 921 922 if (subexps[INT_TEXT].rm_so != -1) 923 { 924 /* Integer part matched. */ 925 is_integer = 1; 926 end_index = subexps[INT_TEXT].rm_eo; 927 if (subexps[INT_TYPE].rm_so == -1) 928 { 929 type_name = "i32"; 930 implicit_i32 = 1; 931 } 932 else 933 { 934 type_index = INT_TYPE; 935 could_be_decimal = 0; 936 } 937 } 938 else if (subexps[FLOAT_TYPE1].rm_so != -1) 939 { 940 /* Found floating point type suffix. */ 941 end_index = subexps[FLOAT_TYPE1].rm_so; 942 type_index = FLOAT_TYPE1; 943 } 944 else if (subexps[FLOAT_TYPE2].rm_so != -1) 945 { 946 /* Found floating point type suffix. */ 947 end_index = subexps[FLOAT_TYPE2].rm_so; 948 type_index = FLOAT_TYPE2; 949 } 950 else 951 { 952 /* Any other floating point match. */ 953 end_index = subexps[0].rm_eo; 954 type_name = "f64"; 955 } 956 957 /* We need a special case if the final character is ".". In this 958 case we might need to parse an integer. For example, "23.f()" is 959 a request for a trait method call, not a syntax error involving 960 the floating point number "23.". */ 961 gdb_assert (subexps[0].rm_eo > 0); 962 if (pstate->lexptr[subexps[0].rm_eo - 1] == '.') 963 { 964 const char *next = skip_spaces (&pstate->lexptr[subexps[0].rm_eo]); 965 966 if (rust_identifier_start_p (*next) || *next == '.') 967 { 968 --subexps[0].rm_eo; 969 is_integer = 1; 970 end_index = subexps[0].rm_eo; 971 type_name = "i32"; 972 could_be_decimal = 1; 973 implicit_i32 = 1; 974 } 975 } 976 977 /* Compute the type name if we haven't already. */ 978 std::string type_name_holder; 979 if (type_name == NULL) 980 { 981 gdb_assert (type_index != -1); 982 type_name_holder = std::string ((pstate->lexptr 983 + subexps[type_index].rm_so), 984 (subexps[type_index].rm_eo 985 - subexps[type_index].rm_so)); 986 type_name = type_name_holder.c_str (); 987 } 988 989 /* Look up the type. */ 990 type = get_type (type_name); 991 992 /* Copy the text of the number and remove the "_"s. */ 993 std::string number; 994 for (i = 0; i < end_index && pstate->lexptr[i]; ++i) 995 { 996 if (pstate->lexptr[i] == '_') 997 could_be_decimal = 0; 998 else 999 number.push_back (pstate->lexptr[i]); 1000 } 1001 1002 /* Advance past the match. */ 1003 pstate->lexptr += subexps[0].rm_eo; 1004 1005 /* Parse the number. */ 1006 if (is_integer) 1007 { 1008 uint64_t value; 1009 int radix = 10; 1010 int offset = 0; 1011 1012 if (number[0] == '0') 1013 { 1014 if (number[1] == 'x') 1015 radix = 16; 1016 else if (number[1] == 'o') 1017 radix = 8; 1018 else if (number[1] == 'b') 1019 radix = 2; 1020 if (radix != 10) 1021 { 1022 offset = 2; 1023 could_be_decimal = 0; 1024 } 1025 } 1026 1027 const char *trailer; 1028 value = strtoulst (number.c_str () + offset, &trailer, radix); 1029 if (*trailer != '\0') 1030 error (_("Integer literal is too large")); 1031 if (implicit_i32 && value >= ((uint64_t) 1) << 31) 1032 type = get_type ("i64"); 1033 1034 current_int_val.val = value; 1035 current_int_val.type = type; 1036 } 1037 else 1038 { 1039 current_float_val.type = type; 1040 bool parsed = parse_float (number.c_str (), number.length (), 1041 current_float_val.type, 1042 current_float_val.val.data ()); 1043 gdb_assert (parsed); 1044 } 1045 1046 return is_integer ? (could_be_decimal ? DECIMAL_INTEGER : INTEGER) : FLOAT; 1047 } 1048 1049 /* The lexer. */ 1050 1051 int 1052 rust_parser::lex_one_token () 1053 { 1054 /* Skip all leading whitespace. */ 1055 while (pstate->lexptr[0] == ' ' 1056 || pstate->lexptr[0] == '\t' 1057 || pstate->lexptr[0] == '\r' 1058 || pstate->lexptr[0] == '\n') 1059 ++pstate->lexptr; 1060 1061 /* If we hit EOF and we're completing, then return COMPLETE -- maybe 1062 we're completing an empty string at the end of a field_expr. 1063 But, we don't want to return two COMPLETE tokens in a row. */ 1064 if (pstate->lexptr[0] == '\0' && pstate->lexptr == pstate->prev_lexptr) 1065 return 0; 1066 pstate->prev_lexptr = pstate->lexptr; 1067 if (pstate->lexptr[0] == '\0') 1068 { 1069 if (pstate->parse_completion) 1070 { 1071 current_string_val.length =0; 1072 current_string_val.ptr = ""; 1073 return COMPLETE; 1074 } 1075 return 0; 1076 } 1077 1078 if (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9') 1079 return lex_number (); 1080 else if (pstate->lexptr[0] == 'b' && pstate->lexptr[1] == '\'') 1081 return lex_character (); 1082 else if (pstate->lexptr[0] == 'b' && pstate->lexptr[1] == '"') 1083 return lex_string (); 1084 else if (pstate->lexptr[0] == 'b' && starts_raw_string (pstate->lexptr + 1)) 1085 return lex_string (); 1086 else if (starts_raw_string (pstate->lexptr)) 1087 return lex_string (); 1088 else if (rust_identifier_start_p (pstate->lexptr[0])) 1089 return lex_identifier (); 1090 else if (pstate->lexptr[0] == '"') 1091 return lex_string (); 1092 else if (pstate->lexptr[0] == '\'') 1093 return lex_character (); 1094 else if (pstate->lexptr[0] == '}' || pstate->lexptr[0] == ']') 1095 { 1096 /* Falls through to lex_operator. */ 1097 --paren_depth; 1098 } 1099 else if (pstate->lexptr[0] == '(' || pstate->lexptr[0] == '{') 1100 { 1101 /* Falls through to lex_operator. */ 1102 ++paren_depth; 1103 } 1104 else if (pstate->lexptr[0] == ',' && pstate->comma_terminates 1105 && paren_depth == 0) 1106 return 0; 1107 1108 return lex_operator (); 1109 } 1110 1111 /* Push back a single character to be re-lexed. */ 1112 1113 void 1114 rust_parser::push_back (char c) 1115 { 1116 /* Can't be called before any lexing. */ 1117 gdb_assert (pstate->prev_lexptr != NULL); 1118 1119 --pstate->lexptr; 1120 gdb_assert (*pstate->lexptr == c); 1121 } 1122 1123 1124 1125 /* Parse a tuple or paren expression. */ 1126 1127 operation_up 1128 rust_parser::parse_tuple () 1129 { 1130 assume ('('); 1131 1132 if (current_token == ')') 1133 { 1134 lex (); 1135 struct type *unit = get_type ("()"); 1136 return make_operation<long_const_operation> (unit, 0); 1137 } 1138 1139 operation_up expr = parse_expr (); 1140 if (current_token == ')') 1141 { 1142 /* Parenthesized expression. */ 1143 lex (); 1144 return make_operation<rust_parenthesized_operation> (std::move (expr)); 1145 } 1146 1147 std::vector<operation_up> ops; 1148 ops.push_back (std::move (expr)); 1149 while (current_token != ')') 1150 { 1151 if (current_token != ',') 1152 error (_("',' or ')' expected")); 1153 lex (); 1154 1155 /* A trailing "," is ok. */ 1156 if (current_token != ')') 1157 ops.push_back (parse_expr ()); 1158 } 1159 1160 assume (')'); 1161 1162 error (_("Tuple expressions not supported yet")); 1163 } 1164 1165 /* Parse an array expression. */ 1166 1167 operation_up 1168 rust_parser::parse_array () 1169 { 1170 assume ('['); 1171 1172 if (current_token == KW_MUT) 1173 lex (); 1174 1175 operation_up result; 1176 operation_up expr = parse_expr (); 1177 if (current_token == ';') 1178 { 1179 lex (); 1180 operation_up rhs = parse_expr (); 1181 result = make_operation<rust_array_operation> (std::move (expr), 1182 std::move (rhs)); 1183 } 1184 else if (current_token == ',') 1185 { 1186 std::vector<operation_up> ops; 1187 ops.push_back (std::move (expr)); 1188 while (current_token != ']') 1189 { 1190 if (current_token != ',') 1191 error (_("',' or ']' expected")); 1192 lex (); 1193 ops.push_back (parse_expr ()); 1194 } 1195 ops.shrink_to_fit (); 1196 int len = ops.size () - 1; 1197 result = make_operation<array_operation> (0, len, std::move (ops)); 1198 } 1199 else if (current_token != ']') 1200 error (_("',', ';', or ']' expected")); 1201 1202 require (']'); 1203 1204 return result; 1205 } 1206 1207 /* Turn a name into an operation. */ 1208 1209 operation_up 1210 rust_parser::name_to_operation (const std::string &name) 1211 { 1212 struct block_symbol sym = lookup_symbol (name.c_str (), 1213 pstate->expression_context_block, 1214 VAR_DOMAIN); 1215 if (sym.symbol != nullptr && sym.symbol->aclass () != LOC_TYPEDEF) 1216 return make_operation<var_value_operation> (sym); 1217 1218 struct type *type = nullptr; 1219 1220 if (sym.symbol != nullptr) 1221 { 1222 gdb_assert (sym.symbol->aclass () == LOC_TYPEDEF); 1223 type = sym.symbol->type (); 1224 } 1225 if (type == nullptr) 1226 type = rust_lookup_type (name.c_str ()); 1227 if (type == nullptr) 1228 error (_("No symbol '%s' in current context"), name.c_str ()); 1229 1230 if (type->code () == TYPE_CODE_STRUCT && type->num_fields () == 0) 1231 { 1232 /* A unit-like struct. */ 1233 operation_up result (new rust_aggregate_operation (type, {}, {})); 1234 return result; 1235 } 1236 else 1237 return make_operation<type_operation> (type); 1238 } 1239 1240 /* Parse a struct expression. */ 1241 1242 operation_up 1243 rust_parser::parse_struct_expr (struct type *type) 1244 { 1245 assume ('{'); 1246 1247 if (type->code () != TYPE_CODE_STRUCT 1248 || rust_tuple_type_p (type) 1249 || rust_tuple_struct_type_p (type)) 1250 error (_("Struct expression applied to non-struct type")); 1251 1252 std::vector<std::pair<std::string, operation_up>> field_v; 1253 while (current_token != '}' && current_token != DOTDOT) 1254 { 1255 if (current_token != IDENT) 1256 error (_("'}', '..', or identifier expected")); 1257 1258 std::string name = get_string (); 1259 lex (); 1260 1261 operation_up expr; 1262 if (current_token == ',' || current_token == '}' 1263 || current_token == DOTDOT) 1264 expr = name_to_operation (name); 1265 else 1266 { 1267 require (':'); 1268 expr = parse_expr (); 1269 } 1270 field_v.emplace_back (std::move (name), std::move (expr)); 1271 1272 /* A trailing "," is ok. */ 1273 if (current_token == ',') 1274 lex (); 1275 } 1276 1277 operation_up others; 1278 if (current_token == DOTDOT) 1279 { 1280 lex (); 1281 others = parse_expr (); 1282 } 1283 1284 require ('}'); 1285 1286 return make_operation<rust_aggregate_operation> (type, 1287 std::move (others), 1288 std::move (field_v)); 1289 } 1290 1291 /* Used by the operator precedence parser. */ 1292 struct rustop_item 1293 { 1294 rustop_item (int token_, int precedence_, enum exp_opcode opcode_, 1295 operation_up &&op_) 1296 : token (token_), 1297 precedence (precedence_), 1298 opcode (opcode_), 1299 op (std::move (op_)) 1300 { 1301 } 1302 1303 /* The token value. */ 1304 int token; 1305 /* Precedence of this operator. */ 1306 int precedence; 1307 /* This is used only for assign-modify. */ 1308 enum exp_opcode opcode; 1309 /* The right hand side of this operation. */ 1310 operation_up op; 1311 }; 1312 1313 /* An operator precedence parser for binary operations, including 1314 "as". */ 1315 1316 operation_up 1317 rust_parser::parse_binop (bool required) 1318 { 1319 /* All the binary operators. Each one is of the form 1320 OPERATION(TOKEN, PRECEDENCE, TYPE) 1321 TOKEN is the corresponding operator token. 1322 PRECEDENCE is a value indicating relative precedence. 1323 TYPE is the operation type corresponding to the operator. 1324 Assignment operations are handled specially, not via this 1325 table; they have precedence 0. */ 1326 #define ALL_OPS \ 1327 OPERATION ('*', 10, mul_operation) \ 1328 OPERATION ('/', 10, div_operation) \ 1329 OPERATION ('%', 10, rem_operation) \ 1330 OPERATION ('@', 9, repeat_operation) \ 1331 OPERATION ('+', 8, add_operation) \ 1332 OPERATION ('-', 8, sub_operation) \ 1333 OPERATION (LSH, 7, lsh_operation) \ 1334 OPERATION (RSH, 7, rsh_operation) \ 1335 OPERATION ('&', 6, bitwise_and_operation) \ 1336 OPERATION ('^', 5, bitwise_xor_operation) \ 1337 OPERATION ('|', 4, bitwise_ior_operation) \ 1338 OPERATION (EQEQ, 3, equal_operation) \ 1339 OPERATION (NOTEQ, 3, notequal_operation) \ 1340 OPERATION ('<', 3, less_operation) \ 1341 OPERATION (LTEQ, 3, leq_operation) \ 1342 OPERATION ('>', 3, gtr_operation) \ 1343 OPERATION (GTEQ, 3, geq_operation) \ 1344 OPERATION (ANDAND, 2, logical_and_operation) \ 1345 OPERATION (OROR, 1, logical_or_operation) 1346 1347 #define ASSIGN_PREC 0 1348 1349 operation_up start = parse_atom (required); 1350 if (start == nullptr) 1351 { 1352 gdb_assert (!required); 1353 return start; 1354 } 1355 1356 std::vector<rustop_item> operator_stack; 1357 operator_stack.emplace_back (0, -1, OP_NULL, std::move (start)); 1358 1359 while (true) 1360 { 1361 int this_token = current_token; 1362 enum exp_opcode compound_assign_op = OP_NULL; 1363 int precedence = -2; 1364 1365 switch (this_token) 1366 { 1367 #define OPERATION(TOKEN, PRECEDENCE, TYPE) \ 1368 case TOKEN: \ 1369 precedence = PRECEDENCE; \ 1370 lex (); \ 1371 break; 1372 1373 ALL_OPS 1374 1375 #undef OPERATION 1376 1377 case COMPOUND_ASSIGN: 1378 compound_assign_op = current_opcode; 1379 /* FALLTHROUGH */ 1380 case '=': 1381 precedence = ASSIGN_PREC; 1382 lex (); 1383 break; 1384 1385 /* "as" must be handled specially. */ 1386 case KW_AS: 1387 { 1388 lex (); 1389 rustop_item &lhs = operator_stack.back (); 1390 struct type *type = parse_type (); 1391 lhs.op = make_operation<unop_cast_operation> (std::move (lhs.op), 1392 type); 1393 } 1394 /* Bypass the rest of the loop. */ 1395 continue; 1396 1397 default: 1398 /* Arrange to pop the entire stack. */ 1399 precedence = -2; 1400 break; 1401 } 1402 1403 /* Make sure that assignments are right-associative while other 1404 operations are left-associative. */ 1405 while ((precedence == ASSIGN_PREC 1406 ? precedence < operator_stack.back ().precedence 1407 : precedence <= operator_stack.back ().precedence) 1408 && operator_stack.size () > 1) 1409 { 1410 rustop_item rhs = std::move (operator_stack.back ()); 1411 operator_stack.pop_back (); 1412 1413 rustop_item &lhs = operator_stack.back (); 1414 1415 switch (rhs.token) 1416 { 1417 #define OPERATION(TOKEN, PRECEDENCE, TYPE) \ 1418 case TOKEN: \ 1419 lhs.op = make_operation<TYPE> (std::move (lhs.op), \ 1420 std::move (rhs.op)); \ 1421 break; 1422 1423 ALL_OPS 1424 1425 #undef OPERATION 1426 1427 case '=': 1428 case COMPOUND_ASSIGN: 1429 { 1430 if (rhs.token == '=') 1431 lhs.op = (make_operation<assign_operation> 1432 (std::move (lhs.op), std::move (rhs.op))); 1433 else 1434 lhs.op = (make_operation<assign_modify_operation> 1435 (rhs.opcode, std::move (lhs.op), 1436 std::move (rhs.op))); 1437 1438 struct type *unit_type = get_type ("()"); 1439 1440 operation_up nil (new long_const_operation (unit_type, 0)); 1441 lhs.op = (make_operation<comma_operation> 1442 (std::move (lhs.op), std::move (nil))); 1443 } 1444 break; 1445 1446 default: 1447 gdb_assert_not_reached ("bad binary operator"); 1448 } 1449 } 1450 1451 if (precedence == -2) 1452 break; 1453 1454 operator_stack.emplace_back (this_token, precedence, compound_assign_op, 1455 parse_atom (true)); 1456 } 1457 1458 gdb_assert (operator_stack.size () == 1); 1459 return std::move (operator_stack[0].op); 1460 #undef ALL_OPS 1461 } 1462 1463 /* Parse a range expression. */ 1464 1465 operation_up 1466 rust_parser::parse_range () 1467 { 1468 enum range_flag kind = (RANGE_HIGH_BOUND_DEFAULT 1469 | RANGE_LOW_BOUND_DEFAULT); 1470 1471 operation_up lhs; 1472 if (current_token != DOTDOT && current_token != DOTDOTEQ) 1473 { 1474 lhs = parse_binop (true); 1475 kind &= ~RANGE_LOW_BOUND_DEFAULT; 1476 } 1477 1478 if (current_token == DOTDOT) 1479 kind |= RANGE_HIGH_BOUND_EXCLUSIVE; 1480 else if (current_token != DOTDOTEQ) 1481 return lhs; 1482 lex (); 1483 1484 /* A "..=" range requires a high bound, but otherwise it is 1485 optional. */ 1486 operation_up rhs = parse_binop ((kind & RANGE_HIGH_BOUND_EXCLUSIVE) == 0); 1487 if (rhs != nullptr) 1488 kind &= ~RANGE_HIGH_BOUND_DEFAULT; 1489 1490 return make_operation<rust_range_operation> (kind, 1491 std::move (lhs), 1492 std::move (rhs)); 1493 } 1494 1495 /* Parse an expression. */ 1496 1497 operation_up 1498 rust_parser::parse_expr () 1499 { 1500 return parse_range (); 1501 } 1502 1503 /* Parse a sizeof expression. */ 1504 1505 operation_up 1506 rust_parser::parse_sizeof () 1507 { 1508 assume (KW_SIZEOF); 1509 1510 require ('('); 1511 operation_up result = make_operation<unop_sizeof_operation> (parse_expr ()); 1512 require (')'); 1513 return result; 1514 } 1515 1516 /* Parse an address-of operation. */ 1517 1518 operation_up 1519 rust_parser::parse_addr () 1520 { 1521 assume ('&'); 1522 1523 if (current_token == KW_MUT) 1524 lex (); 1525 1526 return make_operation<rust_unop_addr_operation> (parse_atom (true)); 1527 } 1528 1529 /* Parse a field expression. */ 1530 1531 operation_up 1532 rust_parser::parse_field (operation_up &&lhs) 1533 { 1534 assume ('.'); 1535 1536 operation_up result; 1537 switch (current_token) 1538 { 1539 case IDENT: 1540 case COMPLETE: 1541 { 1542 bool is_complete = current_token == COMPLETE; 1543 auto struct_op = new rust_structop (std::move (lhs), get_string ()); 1544 lex (); 1545 if (is_complete) 1546 { 1547 completion_op.reset (struct_op); 1548 pstate->mark_struct_expression (struct_op); 1549 /* Throw to the outermost level of the parser. */ 1550 error (_("not really an error")); 1551 } 1552 result.reset (struct_op); 1553 } 1554 break; 1555 1556 case DECIMAL_INTEGER: 1557 result = make_operation<rust_struct_anon> (current_int_val.val, 1558 std::move (lhs)); 1559 lex (); 1560 break; 1561 1562 case INTEGER: 1563 error (_("'_' not allowed in integers in anonymous field references")); 1564 1565 default: 1566 error (_("field name expected")); 1567 } 1568 1569 return result; 1570 } 1571 1572 /* Parse an index expression. */ 1573 1574 operation_up 1575 rust_parser::parse_index (operation_up &&lhs) 1576 { 1577 assume ('['); 1578 operation_up rhs = parse_expr (); 1579 require (']'); 1580 1581 return make_operation<rust_subscript_operation> (std::move (lhs), 1582 std::move (rhs)); 1583 } 1584 1585 /* Parse a sequence of comma-separated expressions in parens. */ 1586 1587 std::vector<operation_up> 1588 rust_parser::parse_paren_args () 1589 { 1590 assume ('('); 1591 1592 std::vector<operation_up> args; 1593 while (current_token != ')') 1594 { 1595 if (!args.empty ()) 1596 { 1597 if (current_token != ',') 1598 error (_("',' or ')' expected")); 1599 lex (); 1600 } 1601 1602 args.push_back (parse_expr ()); 1603 } 1604 1605 assume (')'); 1606 1607 return args; 1608 } 1609 1610 /* Parse the parenthesized part of a function call. */ 1611 1612 operation_up 1613 rust_parser::parse_call (operation_up &&lhs) 1614 { 1615 std::vector<operation_up> args = parse_paren_args (); 1616 1617 return make_operation<funcall_operation> (std::move (lhs), 1618 std::move (args)); 1619 } 1620 1621 /* Parse a list of types. */ 1622 1623 std::vector<struct type *> 1624 rust_parser::parse_type_list () 1625 { 1626 std::vector<struct type *> result; 1627 result.push_back (parse_type ()); 1628 while (current_token == ',') 1629 { 1630 lex (); 1631 result.push_back (parse_type ()); 1632 } 1633 return result; 1634 } 1635 1636 /* Parse a possibly-empty list of types, surrounded in parens. */ 1637 1638 std::vector<struct type *> 1639 rust_parser::parse_maybe_type_list () 1640 { 1641 assume ('('); 1642 std::vector<struct type *> types; 1643 if (current_token != ')') 1644 types = parse_type_list (); 1645 require (')'); 1646 return types; 1647 } 1648 1649 /* Parse an array type. */ 1650 1651 struct type * 1652 rust_parser::parse_array_type () 1653 { 1654 assume ('['); 1655 struct type *elt_type = parse_type (); 1656 require (';'); 1657 1658 if (current_token != INTEGER && current_token != DECIMAL_INTEGER) 1659 error (_("integer expected")); 1660 ULONGEST val = current_int_val.val; 1661 lex (); 1662 require (']'); 1663 1664 return lookup_array_range_type (elt_type, 0, val - 1); 1665 } 1666 1667 /* Parse a slice type. */ 1668 1669 struct type * 1670 rust_parser::parse_slice_type () 1671 { 1672 assume ('&'); 1673 1674 bool is_slice = current_token == '['; 1675 if (is_slice) 1676 lex (); 1677 1678 struct type *target = parse_type (); 1679 1680 if (is_slice) 1681 { 1682 require (']'); 1683 return rust_slice_type ("&[*gdb*]", target, get_type ("usize")); 1684 } 1685 1686 /* For now we treat &x and *x identically. */ 1687 return lookup_pointer_type (target); 1688 } 1689 1690 /* Parse a pointer type. */ 1691 1692 struct type * 1693 rust_parser::parse_pointer_type () 1694 { 1695 assume ('*'); 1696 1697 if (current_token == KW_MUT || current_token == KW_CONST) 1698 lex (); 1699 1700 struct type *target = parse_type (); 1701 /* For the time being we ignore mut/const. */ 1702 return lookup_pointer_type (target); 1703 } 1704 1705 /* Parse a function type. */ 1706 1707 struct type * 1708 rust_parser::parse_function_type () 1709 { 1710 assume (KW_FN); 1711 1712 if (current_token != '(') 1713 error (_("'(' expected")); 1714 1715 std::vector<struct type *> types = parse_maybe_type_list (); 1716 1717 if (current_token != ARROW) 1718 error (_("'->' expected")); 1719 lex (); 1720 1721 struct type *result_type = parse_type (); 1722 1723 struct type **argtypes = nullptr; 1724 if (!types.empty ()) 1725 argtypes = types.data (); 1726 1727 result_type = lookup_function_type_with_arguments (result_type, 1728 types.size (), 1729 argtypes); 1730 return lookup_pointer_type (result_type); 1731 } 1732 1733 /* Parse a tuple type. */ 1734 1735 struct type * 1736 rust_parser::parse_tuple_type () 1737 { 1738 std::vector<struct type *> types = parse_maybe_type_list (); 1739 1740 auto_obstack obstack; 1741 obstack_1grow (&obstack, '('); 1742 for (int i = 0; i < types.size (); ++i) 1743 { 1744 std::string type_name = type_to_string (types[i]); 1745 1746 if (i > 0) 1747 obstack_1grow (&obstack, ','); 1748 obstack_grow_str (&obstack, type_name.c_str ()); 1749 } 1750 1751 obstack_grow_str0 (&obstack, ")"); 1752 const char *name = (const char *) obstack_finish (&obstack); 1753 1754 /* We don't allow creating new tuple types (yet), but we do allow 1755 looking up existing tuple types. */ 1756 struct type *result = rust_lookup_type (name); 1757 if (result == nullptr) 1758 error (_("could not find tuple type '%s'"), name); 1759 1760 return result; 1761 } 1762 1763 /* Parse a type. */ 1764 1765 struct type * 1766 rust_parser::parse_type () 1767 { 1768 switch (current_token) 1769 { 1770 case '[': 1771 return parse_array_type (); 1772 case '&': 1773 return parse_slice_type (); 1774 case '*': 1775 return parse_pointer_type (); 1776 case KW_FN: 1777 return parse_function_type (); 1778 case '(': 1779 return parse_tuple_type (); 1780 case KW_SELF: 1781 case KW_SUPER: 1782 case COLONCOLON: 1783 case KW_EXTERN: 1784 case IDENT: 1785 { 1786 std::string path = parse_path (false); 1787 struct type *result = rust_lookup_type (path.c_str ()); 1788 if (result == nullptr) 1789 error (_("No type name '%s' in current context"), path.c_str ()); 1790 return result; 1791 } 1792 default: 1793 error (_("type expected")); 1794 } 1795 } 1796 1797 /* Parse a path. */ 1798 1799 std::string 1800 rust_parser::parse_path (bool for_expr) 1801 { 1802 unsigned n_supers = 0; 1803 int first_token = current_token; 1804 1805 switch (current_token) 1806 { 1807 case KW_SELF: 1808 lex (); 1809 if (current_token != COLONCOLON) 1810 return "self"; 1811 lex (); 1812 /* FALLTHROUGH */ 1813 case KW_SUPER: 1814 while (current_token == KW_SUPER) 1815 { 1816 ++n_supers; 1817 lex (); 1818 if (current_token != COLONCOLON) 1819 error (_("'::' expected")); 1820 lex (); 1821 } 1822 break; 1823 1824 case COLONCOLON: 1825 lex (); 1826 break; 1827 1828 case KW_EXTERN: 1829 /* This is a gdb extension to make it possible to refer to items 1830 in other crates. It just bypasses adding the current crate 1831 to the front of the name. */ 1832 lex (); 1833 break; 1834 } 1835 1836 if (current_token != IDENT) 1837 error (_("identifier expected")); 1838 std::string path = get_string (); 1839 bool saw_ident = true; 1840 lex (); 1841 1842 /* The condition here lets us enter the loop even if we see 1843 "ident<...>". */ 1844 while (current_token == COLONCOLON || current_token == '<') 1845 { 1846 if (current_token == COLONCOLON) 1847 { 1848 lex (); 1849 saw_ident = false; 1850 1851 if (current_token == IDENT) 1852 { 1853 path = path + "::" + get_string (); 1854 lex (); 1855 saw_ident = true; 1856 } 1857 else if (current_token == COLONCOLON) 1858 { 1859 /* The code below won't detect this scenario. */ 1860 error (_("unexpected '::'")); 1861 } 1862 } 1863 1864 if (current_token != '<') 1865 continue; 1866 1867 /* Expression use name::<...>, whereas types use name<...>. */ 1868 if (for_expr) 1869 { 1870 /* Expressions use "name::<...>", so if we saw an identifier 1871 after the "::", we ignore the "<" here. */ 1872 if (saw_ident) 1873 break; 1874 } 1875 else 1876 { 1877 /* Types use "name<...>", so we need to have seen the 1878 identifier. */ 1879 if (!saw_ident) 1880 break; 1881 } 1882 1883 lex (); 1884 std::vector<struct type *> types = parse_type_list (); 1885 if (current_token == '>') 1886 lex (); 1887 else if (current_token == RSH) 1888 { 1889 push_back ('>'); 1890 lex (); 1891 } 1892 else 1893 error (_("'>' expected")); 1894 1895 path += "<"; 1896 for (int i = 0; i < types.size (); ++i) 1897 { 1898 if (i > 0) 1899 path += ","; 1900 path += type_to_string (types[i]); 1901 } 1902 path += ">"; 1903 break; 1904 } 1905 1906 switch (first_token) 1907 { 1908 case KW_SELF: 1909 case KW_SUPER: 1910 return super_name (path, n_supers); 1911 1912 case COLONCOLON: 1913 return crate_name (path); 1914 1915 case KW_EXTERN: 1916 return "::" + path; 1917 1918 case IDENT: 1919 return path; 1920 1921 default: 1922 gdb_assert_not_reached ("missing case in path parsing"); 1923 } 1924 } 1925 1926 /* Handle the parsing for a string expression. */ 1927 1928 operation_up 1929 rust_parser::parse_string () 1930 { 1931 gdb_assert (current_token == STRING); 1932 1933 /* Wrap the raw string in the &str struct. */ 1934 struct type *type = rust_lookup_type ("&str"); 1935 if (type == nullptr) 1936 error (_("Could not find type '&str'")); 1937 1938 std::vector<std::pair<std::string, operation_up>> field_v; 1939 1940 size_t len = current_string_val.length; 1941 operation_up str = make_operation<string_operation> (get_string ()); 1942 operation_up addr 1943 = make_operation<rust_unop_addr_operation> (std::move (str)); 1944 field_v.emplace_back ("data_ptr", std::move (addr)); 1945 1946 struct type *valtype = get_type ("usize"); 1947 operation_up lenop = make_operation<long_const_operation> (valtype, len); 1948 field_v.emplace_back ("length", std::move (lenop)); 1949 1950 return make_operation<rust_aggregate_operation> (type, 1951 operation_up (), 1952 std::move (field_v)); 1953 } 1954 1955 /* Parse a tuple struct expression. */ 1956 1957 operation_up 1958 rust_parser::parse_tuple_struct (struct type *type) 1959 { 1960 std::vector<operation_up> args = parse_paren_args (); 1961 1962 std::vector<std::pair<std::string, operation_up>> field_v (args.size ()); 1963 for (int i = 0; i < args.size (); ++i) 1964 field_v[i] = { string_printf ("__%d", i), std::move (args[i]) }; 1965 1966 return (make_operation<rust_aggregate_operation> 1967 (type, operation_up (), std::move (field_v))); 1968 } 1969 1970 /* Parse a path expression. */ 1971 1972 operation_up 1973 rust_parser::parse_path_expr () 1974 { 1975 std::string path = parse_path (true); 1976 1977 if (current_token == '{') 1978 { 1979 struct type *type = rust_lookup_type (path.c_str ()); 1980 if (type == nullptr) 1981 error (_("Could not find type '%s'"), path.c_str ()); 1982 1983 return parse_struct_expr (type); 1984 } 1985 else if (current_token == '(') 1986 { 1987 struct type *type = rust_lookup_type (path.c_str ()); 1988 /* If this is actually a tuple struct expression, handle it 1989 here. If it is a call, it will be handled elsewhere. */ 1990 if (type != nullptr) 1991 { 1992 if (!rust_tuple_struct_type_p (type)) 1993 error (_("Type %s is not a tuple struct"), path.c_str ()); 1994 return parse_tuple_struct (type); 1995 } 1996 } 1997 1998 return name_to_operation (path); 1999 } 2000 2001 /* Parse an atom. "Atom" isn't a Rust term, but this refers to a 2002 single unitary item in the grammar; but here including some unary 2003 prefix and postfix expressions. */ 2004 2005 operation_up 2006 rust_parser::parse_atom (bool required) 2007 { 2008 operation_up result; 2009 2010 switch (current_token) 2011 { 2012 case '(': 2013 result = parse_tuple (); 2014 break; 2015 2016 case '[': 2017 result = parse_array (); 2018 break; 2019 2020 case INTEGER: 2021 case DECIMAL_INTEGER: 2022 result = make_operation<long_const_operation> (current_int_val.type, 2023 current_int_val.val); 2024 lex (); 2025 break; 2026 2027 case FLOAT: 2028 result = make_operation<float_const_operation> (current_float_val.type, 2029 current_float_val.val); 2030 lex (); 2031 break; 2032 2033 case STRING: 2034 result = parse_string (); 2035 lex (); 2036 break; 2037 2038 case BYTESTRING: 2039 result = make_operation<string_operation> (get_string ()); 2040 lex (); 2041 break; 2042 2043 case KW_TRUE: 2044 case KW_FALSE: 2045 result = make_operation<bool_operation> (current_token == KW_TRUE); 2046 lex (); 2047 break; 2048 2049 case GDBVAR: 2050 /* This is kind of a hacky approach. */ 2051 { 2052 pstate->push_dollar (current_string_val); 2053 result = pstate->pop (); 2054 lex (); 2055 } 2056 break; 2057 2058 case KW_SELF: 2059 case KW_SUPER: 2060 case COLONCOLON: 2061 case KW_EXTERN: 2062 case IDENT: 2063 result = parse_path_expr (); 2064 break; 2065 2066 case '*': 2067 lex (); 2068 result = make_operation<rust_unop_ind_operation> (parse_atom (true)); 2069 break; 2070 case '+': 2071 lex (); 2072 result = make_operation<unary_plus_operation> (parse_atom (true)); 2073 break; 2074 case '-': 2075 lex (); 2076 result = make_operation<unary_neg_operation> (parse_atom (true)); 2077 break; 2078 case '!': 2079 lex (); 2080 result = make_operation<rust_unop_compl_operation> (parse_atom (true)); 2081 break; 2082 case KW_SIZEOF: 2083 result = parse_sizeof (); 2084 break; 2085 case '&': 2086 result = parse_addr (); 2087 break; 2088 2089 default: 2090 if (!required) 2091 return {}; 2092 error (_("unexpected token")); 2093 } 2094 2095 /* Now parse suffixes. */ 2096 while (true) 2097 { 2098 switch (current_token) 2099 { 2100 case '.': 2101 result = parse_field (std::move (result)); 2102 break; 2103 2104 case '[': 2105 result = parse_index (std::move (result)); 2106 break; 2107 2108 case '(': 2109 result = parse_call (std::move (result)); 2110 break; 2111 2112 default: 2113 return result; 2114 } 2115 } 2116 } 2117 2118 2119 2120 /* The parser as exposed to gdb. */ 2121 2122 int 2123 rust_language::parser (struct parser_state *state) const 2124 { 2125 rust_parser parser (state); 2126 2127 operation_up result; 2128 try 2129 { 2130 result = parser.parse_entry_point (); 2131 } 2132 catch (const gdb_exception &exc) 2133 { 2134 if (state->parse_completion) 2135 { 2136 result = std::move (parser.completion_op); 2137 if (result == nullptr) 2138 throw; 2139 } 2140 else 2141 throw; 2142 } 2143 2144 state->set_operation (std::move (result)); 2145 2146 return 0; 2147 } 2148 2149 2150 2151 #if GDB_SELF_TEST 2152 2153 /* A test helper that lexes a string, expecting a single token. */ 2154 2155 static void 2156 rust_lex_test_one (rust_parser *parser, const char *input, int expected) 2157 { 2158 int token; 2159 2160 parser->reset (input); 2161 2162 token = parser->lex_one_token (); 2163 SELF_CHECK (token == expected); 2164 2165 if (token) 2166 { 2167 token = parser->lex_one_token (); 2168 SELF_CHECK (token == 0); 2169 } 2170 } 2171 2172 /* Test that INPUT lexes as the integer VALUE. */ 2173 2174 static void 2175 rust_lex_int_test (rust_parser *parser, const char *input, 2176 ULONGEST value, int kind) 2177 { 2178 rust_lex_test_one (parser, input, kind); 2179 SELF_CHECK (parser->current_int_val.val == value); 2180 } 2181 2182 /* Test that INPUT throws an exception with text ERR. */ 2183 2184 static void 2185 rust_lex_exception_test (rust_parser *parser, const char *input, 2186 const char *err) 2187 { 2188 try 2189 { 2190 /* The "kind" doesn't matter. */ 2191 rust_lex_test_one (parser, input, DECIMAL_INTEGER); 2192 SELF_CHECK (0); 2193 } 2194 catch (const gdb_exception_error &except) 2195 { 2196 SELF_CHECK (strcmp (except.what (), err) == 0); 2197 } 2198 } 2199 2200 /* Test that INPUT lexes as the identifier, string, or byte-string 2201 VALUE. KIND holds the expected token kind. */ 2202 2203 static void 2204 rust_lex_stringish_test (rust_parser *parser, const char *input, 2205 const char *value, int kind) 2206 { 2207 rust_lex_test_one (parser, input, kind); 2208 SELF_CHECK (parser->get_string () == value); 2209 } 2210 2211 /* Helper to test that a string parses as a given token sequence. */ 2212 2213 static void 2214 rust_lex_test_sequence (rust_parser *parser, const char *input, int len, 2215 const int expected[]) 2216 { 2217 int i; 2218 2219 parser->reset (input); 2220 2221 for (i = 0; i < len; ++i) 2222 { 2223 int token = parser->lex_one_token (); 2224 SELF_CHECK (token == expected[i]); 2225 } 2226 } 2227 2228 /* Tests for an integer-parsing corner case. */ 2229 2230 static void 2231 rust_lex_test_trailing_dot (rust_parser *parser) 2232 { 2233 const int expected1[] = { DECIMAL_INTEGER, '.', IDENT, '(', ')', 0 }; 2234 const int expected2[] = { INTEGER, '.', IDENT, '(', ')', 0 }; 2235 const int expected3[] = { FLOAT, EQEQ, '(', ')', 0 }; 2236 const int expected4[] = { DECIMAL_INTEGER, DOTDOT, DECIMAL_INTEGER, 0 }; 2237 2238 rust_lex_test_sequence (parser, "23.g()", ARRAY_SIZE (expected1), expected1); 2239 rust_lex_test_sequence (parser, "23_0.g()", ARRAY_SIZE (expected2), 2240 expected2); 2241 rust_lex_test_sequence (parser, "23.==()", ARRAY_SIZE (expected3), 2242 expected3); 2243 rust_lex_test_sequence (parser, "23..25", ARRAY_SIZE (expected4), expected4); 2244 } 2245 2246 /* Tests of completion. */ 2247 2248 static void 2249 rust_lex_test_completion (rust_parser *parser) 2250 { 2251 const int expected[] = { IDENT, '.', COMPLETE, 0 }; 2252 2253 parser->pstate->parse_completion = 1; 2254 2255 rust_lex_test_sequence (parser, "something.wha", ARRAY_SIZE (expected), 2256 expected); 2257 rust_lex_test_sequence (parser, "something.", ARRAY_SIZE (expected), 2258 expected); 2259 2260 parser->pstate->parse_completion = 0; 2261 } 2262 2263 /* Test pushback. */ 2264 2265 static void 2266 rust_lex_test_push_back (rust_parser *parser) 2267 { 2268 int token; 2269 2270 parser->reset (">>="); 2271 2272 token = parser->lex_one_token (); 2273 SELF_CHECK (token == COMPOUND_ASSIGN); 2274 SELF_CHECK (parser->current_opcode == BINOP_RSH); 2275 2276 parser->push_back ('='); 2277 2278 token = parser->lex_one_token (); 2279 SELF_CHECK (token == '='); 2280 2281 token = parser->lex_one_token (); 2282 SELF_CHECK (token == 0); 2283 } 2284 2285 /* Unit test the lexer. */ 2286 2287 static void 2288 rust_lex_tests (void) 2289 { 2290 /* Set up dummy "parser", so that rust_type works. */ 2291 struct parser_state ps (language_def (language_rust), target_gdbarch (), 2292 nullptr, 0, 0, nullptr, 0, nullptr, false); 2293 rust_parser parser (&ps); 2294 2295 rust_lex_test_one (&parser, "", 0); 2296 rust_lex_test_one (&parser, " \t \n \r ", 0); 2297 rust_lex_test_one (&parser, "thread 23", 0); 2298 rust_lex_test_one (&parser, "task 23", 0); 2299 rust_lex_test_one (&parser, "th 104", 0); 2300 rust_lex_test_one (&parser, "ta 97", 0); 2301 2302 rust_lex_int_test (&parser, "'z'", 'z', INTEGER); 2303 rust_lex_int_test (&parser, "'\\xff'", 0xff, INTEGER); 2304 rust_lex_int_test (&parser, "'\\u{1016f}'", 0x1016f, INTEGER); 2305 rust_lex_int_test (&parser, "b'z'", 'z', INTEGER); 2306 rust_lex_int_test (&parser, "b'\\xfe'", 0xfe, INTEGER); 2307 rust_lex_int_test (&parser, "b'\\xFE'", 0xfe, INTEGER); 2308 rust_lex_int_test (&parser, "b'\\xfE'", 0xfe, INTEGER); 2309 2310 /* Test all escapes in both modes. */ 2311 rust_lex_int_test (&parser, "'\\n'", '\n', INTEGER); 2312 rust_lex_int_test (&parser, "'\\r'", '\r', INTEGER); 2313 rust_lex_int_test (&parser, "'\\t'", '\t', INTEGER); 2314 rust_lex_int_test (&parser, "'\\\\'", '\\', INTEGER); 2315 rust_lex_int_test (&parser, "'\\0'", '\0', INTEGER); 2316 rust_lex_int_test (&parser, "'\\''", '\'', INTEGER); 2317 rust_lex_int_test (&parser, "'\\\"'", '"', INTEGER); 2318 2319 rust_lex_int_test (&parser, "b'\\n'", '\n', INTEGER); 2320 rust_lex_int_test (&parser, "b'\\r'", '\r', INTEGER); 2321 rust_lex_int_test (&parser, "b'\\t'", '\t', INTEGER); 2322 rust_lex_int_test (&parser, "b'\\\\'", '\\', INTEGER); 2323 rust_lex_int_test (&parser, "b'\\0'", '\0', INTEGER); 2324 rust_lex_int_test (&parser, "b'\\''", '\'', INTEGER); 2325 rust_lex_int_test (&parser, "b'\\\"'", '"', INTEGER); 2326 2327 rust_lex_exception_test (&parser, "'z", "Unterminated character literal"); 2328 rust_lex_exception_test (&parser, "b'\\x0'", "Not enough hex digits seen"); 2329 rust_lex_exception_test (&parser, "b'\\u{0}'", 2330 "Unicode escape in byte literal"); 2331 rust_lex_exception_test (&parser, "'\\x0'", "Not enough hex digits seen"); 2332 rust_lex_exception_test (&parser, "'\\u0'", "Missing '{' in Unicode escape"); 2333 rust_lex_exception_test (&parser, "'\\u{0", "Missing '}' in Unicode escape"); 2334 rust_lex_exception_test (&parser, "'\\u{0000007}", "Overlong hex escape"); 2335 rust_lex_exception_test (&parser, "'\\u{}", "Not enough hex digits seen"); 2336 rust_lex_exception_test (&parser, "'\\Q'", "Invalid escape \\Q in literal"); 2337 rust_lex_exception_test (&parser, "b'\\Q'", "Invalid escape \\Q in literal"); 2338 2339 rust_lex_int_test (&parser, "23", 23, DECIMAL_INTEGER); 2340 rust_lex_int_test (&parser, "2_344__29", 234429, INTEGER); 2341 rust_lex_int_test (&parser, "0x1f", 0x1f, INTEGER); 2342 rust_lex_int_test (&parser, "23usize", 23, INTEGER); 2343 rust_lex_int_test (&parser, "23i32", 23, INTEGER); 2344 rust_lex_int_test (&parser, "0x1_f", 0x1f, INTEGER); 2345 rust_lex_int_test (&parser, "0b1_101011__", 0x6b, INTEGER); 2346 rust_lex_int_test (&parser, "0o001177i64", 639, INTEGER); 2347 rust_lex_int_test (&parser, "0x123456789u64", 0x123456789ull, INTEGER); 2348 2349 rust_lex_test_trailing_dot (&parser); 2350 2351 rust_lex_test_one (&parser, "23.", FLOAT); 2352 rust_lex_test_one (&parser, "23.99f32", FLOAT); 2353 rust_lex_test_one (&parser, "23e7", FLOAT); 2354 rust_lex_test_one (&parser, "23E-7", FLOAT); 2355 rust_lex_test_one (&parser, "23e+7", FLOAT); 2356 rust_lex_test_one (&parser, "23.99e+7f64", FLOAT); 2357 rust_lex_test_one (&parser, "23.82f32", FLOAT); 2358 2359 rust_lex_stringish_test (&parser, "hibob", "hibob", IDENT); 2360 rust_lex_stringish_test (&parser, "hibob__93", "hibob__93", IDENT); 2361 rust_lex_stringish_test (&parser, "thread", "thread", IDENT); 2362 rust_lex_stringish_test (&parser, "r#true", "true", IDENT); 2363 2364 const int expected1[] = { IDENT, DECIMAL_INTEGER, 0 }; 2365 rust_lex_test_sequence (&parser, "r#thread 23", ARRAY_SIZE (expected1), 2366 expected1); 2367 const int expected2[] = { IDENT, '#', 0 }; 2368 rust_lex_test_sequence (&parser, "r#", ARRAY_SIZE (expected2), expected2); 2369 2370 rust_lex_stringish_test (&parser, "\"string\"", "string", STRING); 2371 rust_lex_stringish_test (&parser, "\"str\\ting\"", "str\ting", STRING); 2372 rust_lex_stringish_test (&parser, "\"str\\\"ing\"", "str\"ing", STRING); 2373 rust_lex_stringish_test (&parser, "r\"str\\ing\"", "str\\ing", STRING); 2374 rust_lex_stringish_test (&parser, "r#\"str\\ting\"#", "str\\ting", STRING); 2375 rust_lex_stringish_test (&parser, "r###\"str\\\"ing\"###", "str\\\"ing", 2376 STRING); 2377 2378 rust_lex_stringish_test (&parser, "b\"string\"", "string", BYTESTRING); 2379 rust_lex_stringish_test (&parser, "b\"\x73tring\"", "string", BYTESTRING); 2380 rust_lex_stringish_test (&parser, "b\"str\\\"ing\"", "str\"ing", BYTESTRING); 2381 rust_lex_stringish_test (&parser, "br####\"\\x73tring\"####", "\\x73tring", 2382 BYTESTRING); 2383 2384 for (const auto &candidate : identifier_tokens) 2385 rust_lex_test_one (&parser, candidate.name, candidate.value); 2386 2387 for (const auto &candidate : operator_tokens) 2388 rust_lex_test_one (&parser, candidate.name, candidate.value); 2389 2390 rust_lex_test_completion (&parser); 2391 rust_lex_test_push_back (&parser); 2392 } 2393 2394 #endif /* GDB_SELF_TEST */ 2395 2396 2397 2398 void _initialize_rust_exp (); 2399 void 2400 _initialize_rust_exp () 2401 { 2402 int code = regcomp (&number_regex, number_regex_text, REG_EXTENDED); 2403 /* If the regular expression was incorrect, it was a programming 2404 error. */ 2405 gdb_assert (code == 0); 2406 2407 #if GDB_SELF_TEST 2408 selftests::register_test ("rust-lex", rust_lex_tests); 2409 #endif 2410 } 2411