1 /* YACC parser for Go expressions, for GDB. 2 3 Copyright (C) 2012-2023 Free Software Foundation, Inc. 4 5 This file is part of GDB. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 19 20 /* This file is derived from c-exp.y, p-exp.y. */ 21 22 /* Parse a Go expression from text in a string, 23 and return the result as a struct expression pointer. 24 That structure contains arithmetic operations in reverse polish, 25 with constants represented by operations that are followed by special data. 26 See expression.h for the details of the format. 27 What is important here is that it can be built up sequentially 28 during the process of parsing; the lower levels of the tree always 29 come first in the result. 30 31 Note that malloc's and realloc's in this file are transformed to 32 xmalloc and xrealloc respectively by the same sed command in the 33 makefile that remaps any other malloc/realloc inserted by the parser 34 generator. Doing this with #defines and trying to control the interaction 35 with include files (<malloc.h> and <stdlib.h> for example) just became 36 too messy, particularly when such includes can be inserted at random 37 times by the parser generator. */ 38 39 /* Known bugs or limitations: 40 41 - Unicode 42 - &^ 43 - '_' (blank identifier) 44 - automatic deref of pointers 45 - method expressions 46 - interfaces, channels, etc. 47 48 And lots of other things. 49 I'm sure there's some cleanup to do. 50 */ 51 52 %{ 53 54 #include "defs.h" 55 #include <ctype.h> 56 #include "expression.h" 57 #include "value.h" 58 #include "parser-defs.h" 59 #include "language.h" 60 #include "c-lang.h" 61 #include "go-lang.h" 62 #include "bfd.h" /* Required by objfiles.h. */ 63 #include "symfile.h" /* Required by objfiles.h. */ 64 #include "objfiles.h" /* For have_full_symbols and have_partial_symbols */ 65 #include "charset.h" 66 #include "block.h" 67 #include "expop.h" 68 69 #define parse_type(ps) builtin_type (ps->gdbarch ()) 70 71 /* Remap normal yacc parser interface names (yyparse, yylex, yyerror, 72 etc). */ 73 #define GDB_YY_REMAP_PREFIX go_ 74 #include "yy-remap.h" 75 76 /* The state of the parser, used internally when we are parsing the 77 expression. */ 78 79 static struct parser_state *pstate = NULL; 80 81 int yyparse (void); 82 83 static int yylex (void); 84 85 static void yyerror (const char *); 86 87 %} 88 89 /* Although the yacc "value" of an expression is not used, 90 since the result is stored in the structure being created, 91 other node types do have values. */ 92 93 %union 94 { 95 LONGEST lval; 96 struct { 97 LONGEST val; 98 struct type *type; 99 } typed_val_int; 100 struct { 101 gdb_byte val[16]; 102 struct type *type; 103 } typed_val_float; 104 struct stoken sval; 105 struct symtoken ssym; 106 struct type *tval; 107 struct typed_stoken tsval; 108 struct ttype tsym; 109 int voidval; 110 enum exp_opcode opcode; 111 struct internalvar *ivar; 112 struct stoken_vector svec; 113 } 114 115 %{ 116 /* YYSTYPE gets defined by %union. */ 117 static int parse_number (struct parser_state *, 118 const char *, int, int, YYSTYPE *); 119 120 using namespace expr; 121 %} 122 123 %type <voidval> exp exp1 type_exp start variable lcurly 124 %type <lval> rcurly 125 %type <tval> type 126 127 %token <typed_val_int> INT 128 %token <typed_val_float> FLOAT 129 130 /* Both NAME and TYPENAME tokens represent symbols in the input, 131 and both convey their data as strings. 132 But a TYPENAME is a string that happens to be defined as a type 133 or builtin type name (such as int or char) 134 and a NAME is any other symbol. 135 Contexts where this distinction is not important can use the 136 nonterminal "name", which matches either NAME or TYPENAME. */ 137 138 %token <tsval> RAW_STRING 139 %token <tsval> STRING 140 %token <tsval> CHAR 141 %token <ssym> NAME 142 %token <tsym> TYPENAME /* Not TYPE_NAME cus already taken. */ 143 %token <voidval> COMPLETE 144 /*%type <sval> name*/ 145 %type <svec> string_exp 146 %type <ssym> name_not_typename 147 148 /* A NAME_OR_INT is a symbol which is not known in the symbol table, 149 but which would parse as a valid number in the current input radix. 150 E.g. "c" when input_radix==16. Depending on the parse, it will be 151 turned into a name or into a number. */ 152 %token <ssym> NAME_OR_INT 153 154 %token <lval> TRUE_KEYWORD FALSE_KEYWORD 155 %token STRUCT_KEYWORD INTERFACE_KEYWORD TYPE_KEYWORD CHAN_KEYWORD 156 %token SIZEOF_KEYWORD 157 %token LEN_KEYWORD CAP_KEYWORD 158 %token NEW_KEYWORD 159 %token IOTA_KEYWORD NIL_KEYWORD 160 %token CONST_KEYWORD 161 %token DOTDOTDOT 162 %token ENTRY 163 %token ERROR 164 165 /* Special type cases. */ 166 %token BYTE_KEYWORD /* An alias of uint8. */ 167 168 %token <sval> DOLLAR_VARIABLE 169 170 %token <opcode> ASSIGN_MODIFY 171 172 %left ',' 173 %left ABOVE_COMMA 174 %right '=' ASSIGN_MODIFY 175 %right '?' 176 %left OROR 177 %left ANDAND 178 %left '|' 179 %left '^' 180 %left '&' 181 %left ANDNOT 182 %left EQUAL NOTEQUAL 183 %left '<' '>' LEQ GEQ 184 %left LSH RSH 185 %left '@' 186 %left '+' '-' 187 %left '*' '/' '%' 188 %right UNARY INCREMENT DECREMENT 189 %right LEFT_ARROW '.' '[' '(' 190 191 192 %% 193 194 start : exp1 195 | type_exp 196 ; 197 198 type_exp: type 199 { pstate->push_new<type_operation> ($1); } 200 ; 201 202 /* Expressions, including the comma operator. */ 203 exp1 : exp 204 | exp1 ',' exp 205 { pstate->wrap2<comma_operation> (); } 206 ; 207 208 /* Expressions, not including the comma operator. */ 209 exp : '*' exp %prec UNARY 210 { pstate->wrap<unop_ind_operation> (); } 211 ; 212 213 exp : '&' exp %prec UNARY 214 { pstate->wrap<unop_addr_operation> (); } 215 ; 216 217 exp : '-' exp %prec UNARY 218 { pstate->wrap<unary_neg_operation> (); } 219 ; 220 221 exp : '+' exp %prec UNARY 222 { pstate->wrap<unary_plus_operation> (); } 223 ; 224 225 exp : '!' exp %prec UNARY 226 { pstate->wrap<unary_logical_not_operation> (); } 227 ; 228 229 exp : '^' exp %prec UNARY 230 { pstate->wrap<unary_complement_operation> (); } 231 ; 232 233 exp : exp INCREMENT %prec UNARY 234 { pstate->wrap<postinc_operation> (); } 235 ; 236 237 exp : exp DECREMENT %prec UNARY 238 { pstate->wrap<postdec_operation> (); } 239 ; 240 241 /* foo->bar is not in Go. May want as a gdb extension. Later. */ 242 243 exp : exp '.' name_not_typename 244 { 245 pstate->push_new<structop_operation> 246 (pstate->pop (), copy_name ($3.stoken)); 247 } 248 ; 249 250 exp : exp '.' name_not_typename COMPLETE 251 { 252 structop_base_operation *op 253 = new structop_operation (pstate->pop (), 254 copy_name ($3.stoken)); 255 pstate->mark_struct_expression (op); 256 pstate->push (operation_up (op)); 257 } 258 ; 259 260 exp : exp '.' COMPLETE 261 { 262 structop_base_operation *op 263 = new structop_operation (pstate->pop (), ""); 264 pstate->mark_struct_expression (op); 265 pstate->push (operation_up (op)); 266 } 267 ; 268 269 exp : exp '[' exp1 ']' 270 { pstate->wrap2<subscript_operation> (); } 271 ; 272 273 exp : exp '(' 274 /* This is to save the value of arglist_len 275 being accumulated by an outer function call. */ 276 { pstate->start_arglist (); } 277 arglist ')' %prec LEFT_ARROW 278 { 279 std::vector<operation_up> args 280 = pstate->pop_vector (pstate->end_arglist ()); 281 pstate->push_new<funcall_operation> 282 (pstate->pop (), std::move (args)); 283 } 284 ; 285 286 lcurly : '{' 287 { pstate->start_arglist (); } 288 ; 289 290 arglist : 291 ; 292 293 arglist : exp 294 { pstate->arglist_len = 1; } 295 ; 296 297 arglist : arglist ',' exp %prec ABOVE_COMMA 298 { pstate->arglist_len++; } 299 ; 300 301 rcurly : '}' 302 { $$ = pstate->end_arglist () - 1; } 303 ; 304 305 exp : lcurly type rcurly exp %prec UNARY 306 { 307 pstate->push_new<unop_memval_operation> 308 (pstate->pop (), $2); 309 } 310 ; 311 312 exp : type '(' exp ')' %prec UNARY 313 { 314 pstate->push_new<unop_cast_operation> 315 (pstate->pop (), $1); 316 } 317 ; 318 319 exp : '(' exp1 ')' 320 { } 321 ; 322 323 /* Binary operators in order of decreasing precedence. */ 324 325 exp : exp '@' exp 326 { pstate->wrap2<repeat_operation> (); } 327 ; 328 329 exp : exp '*' exp 330 { pstate->wrap2<mul_operation> (); } 331 ; 332 333 exp : exp '/' exp 334 { pstate->wrap2<div_operation> (); } 335 ; 336 337 exp : exp '%' exp 338 { pstate->wrap2<rem_operation> (); } 339 ; 340 341 exp : exp '+' exp 342 { pstate->wrap2<add_operation> (); } 343 ; 344 345 exp : exp '-' exp 346 { pstate->wrap2<sub_operation> (); } 347 ; 348 349 exp : exp LSH exp 350 { pstate->wrap2<lsh_operation> (); } 351 ; 352 353 exp : exp RSH exp 354 { pstate->wrap2<rsh_operation> (); } 355 ; 356 357 exp : exp EQUAL exp 358 { pstate->wrap2<equal_operation> (); } 359 ; 360 361 exp : exp NOTEQUAL exp 362 { pstate->wrap2<notequal_operation> (); } 363 ; 364 365 exp : exp LEQ exp 366 { pstate->wrap2<leq_operation> (); } 367 ; 368 369 exp : exp GEQ exp 370 { pstate->wrap2<geq_operation> (); } 371 ; 372 373 exp : exp '<' exp 374 { pstate->wrap2<less_operation> (); } 375 ; 376 377 exp : exp '>' exp 378 { pstate->wrap2<gtr_operation> (); } 379 ; 380 381 exp : exp '&' exp 382 { pstate->wrap2<bitwise_and_operation> (); } 383 ; 384 385 exp : exp '^' exp 386 { pstate->wrap2<bitwise_xor_operation> (); } 387 ; 388 389 exp : exp '|' exp 390 { pstate->wrap2<bitwise_ior_operation> (); } 391 ; 392 393 exp : exp ANDAND exp 394 { pstate->wrap2<logical_and_operation> (); } 395 ; 396 397 exp : exp OROR exp 398 { pstate->wrap2<logical_or_operation> (); } 399 ; 400 401 exp : exp '?' exp ':' exp %prec '?' 402 { 403 operation_up last = pstate->pop (); 404 operation_up mid = pstate->pop (); 405 operation_up first = pstate->pop (); 406 pstate->push_new<ternop_cond_operation> 407 (std::move (first), std::move (mid), 408 std::move (last)); 409 } 410 ; 411 412 exp : exp '=' exp 413 { pstate->wrap2<assign_operation> (); } 414 ; 415 416 exp : exp ASSIGN_MODIFY exp 417 { 418 operation_up rhs = pstate->pop (); 419 operation_up lhs = pstate->pop (); 420 pstate->push_new<assign_modify_operation> 421 ($2, std::move (lhs), std::move (rhs)); 422 } 423 ; 424 425 exp : INT 426 { 427 pstate->push_new<long_const_operation> 428 ($1.type, $1.val); 429 } 430 ; 431 432 exp : CHAR 433 { 434 struct stoken_vector vec; 435 vec.len = 1; 436 vec.tokens = &$1; 437 pstate->push_c_string ($1.type, &vec); 438 } 439 ; 440 441 exp : NAME_OR_INT 442 { YYSTYPE val; 443 parse_number (pstate, $1.stoken.ptr, 444 $1.stoken.length, 0, &val); 445 pstate->push_new<long_const_operation> 446 (val.typed_val_int.type, 447 val.typed_val_int.val); 448 } 449 ; 450 451 452 exp : FLOAT 453 { 454 float_data data; 455 std::copy (std::begin ($1.val), std::end ($1.val), 456 std::begin (data)); 457 pstate->push_new<float_const_operation> ($1.type, data); 458 } 459 ; 460 461 exp : variable 462 ; 463 464 exp : DOLLAR_VARIABLE 465 { 466 pstate->push_dollar ($1); 467 } 468 ; 469 470 exp : SIZEOF_KEYWORD '(' type ')' %prec UNARY 471 { 472 /* TODO(dje): Go objects in structs. */ 473 /* TODO(dje): What's the right type here? */ 474 struct type *size_type 475 = parse_type (pstate)->builtin_unsigned_int; 476 $3 = check_typedef ($3); 477 pstate->push_new<long_const_operation> 478 (size_type, (LONGEST) $3->length ()); 479 } 480 ; 481 482 exp : SIZEOF_KEYWORD '(' exp ')' %prec UNARY 483 { 484 /* TODO(dje): Go objects in structs. */ 485 pstate->wrap<unop_sizeof_operation> (); 486 } 487 488 string_exp: 489 STRING 490 { 491 /* We copy the string here, and not in the 492 lexer, to guarantee that we do not leak a 493 string. */ 494 /* Note that we NUL-terminate here, but just 495 for convenience. */ 496 struct typed_stoken *vec = XNEW (struct typed_stoken); 497 $$.len = 1; 498 $$.tokens = vec; 499 500 vec->type = $1.type; 501 vec->length = $1.length; 502 vec->ptr = (char *) malloc ($1.length + 1); 503 memcpy (vec->ptr, $1.ptr, $1.length + 1); 504 } 505 506 | string_exp '+' STRING 507 { 508 /* Note that we NUL-terminate here, but just 509 for convenience. */ 510 char *p; 511 ++$$.len; 512 $$.tokens = XRESIZEVEC (struct typed_stoken, 513 $$.tokens, $$.len); 514 515 p = (char *) malloc ($3.length + 1); 516 memcpy (p, $3.ptr, $3.length + 1); 517 518 $$.tokens[$$.len - 1].type = $3.type; 519 $$.tokens[$$.len - 1].length = $3.length; 520 $$.tokens[$$.len - 1].ptr = p; 521 } 522 ; 523 524 exp : string_exp %prec ABOVE_COMMA 525 { 526 int i; 527 528 /* Always utf8. */ 529 pstate->push_c_string (0, &$1); 530 for (i = 0; i < $1.len; ++i) 531 free ($1.tokens[i].ptr); 532 free ($1.tokens); 533 } 534 ; 535 536 exp : TRUE_KEYWORD 537 { pstate->push_new<bool_operation> ($1); } 538 ; 539 540 exp : FALSE_KEYWORD 541 { pstate->push_new<bool_operation> ($1); } 542 ; 543 544 variable: name_not_typename ENTRY 545 { struct symbol *sym = $1.sym.symbol; 546 547 if (sym == NULL 548 || !sym->is_argument () 549 || !symbol_read_needs_frame (sym)) 550 error (_("@entry can be used only for function " 551 "parameters, not for \"%s\""), 552 copy_name ($1.stoken).c_str ()); 553 554 pstate->push_new<var_entry_value_operation> (sym); 555 } 556 ; 557 558 variable: name_not_typename 559 { struct block_symbol sym = $1.sym; 560 561 if (sym.symbol) 562 { 563 if (symbol_read_needs_frame (sym.symbol)) 564 pstate->block_tracker->update (sym); 565 566 pstate->push_new<var_value_operation> (sym); 567 } 568 else if ($1.is_a_field_of_this) 569 { 570 /* TODO(dje): Can we get here? 571 E.g., via a mix of c++ and go? */ 572 gdb_assert_not_reached ("go with `this' field"); 573 } 574 else 575 { 576 struct bound_minimal_symbol msymbol; 577 std::string arg = copy_name ($1.stoken); 578 579 msymbol = 580 lookup_bound_minimal_symbol (arg.c_str ()); 581 if (msymbol.minsym != NULL) 582 pstate->push_new<var_msym_value_operation> 583 (msymbol); 584 else if (!have_full_symbols () 585 && !have_partial_symbols ()) 586 error (_("No symbol table is loaded. " 587 "Use the \"file\" command.")); 588 else 589 error (_("No symbol \"%s\" in current context."), 590 arg.c_str ()); 591 } 592 } 593 ; 594 595 /* TODO 596 method_exp: PACKAGENAME '.' name '.' name 597 { 598 } 599 ; 600 */ 601 602 type /* Implements (approximately): [*] type-specifier */ 603 : '*' type 604 { $$ = lookup_pointer_type ($2); } 605 | TYPENAME 606 { $$ = $1.type; } 607 /* 608 | STRUCT_KEYWORD name 609 { $$ = lookup_struct (copy_name ($2), 610 expression_context_block); } 611 */ 612 | BYTE_KEYWORD 613 { $$ = builtin_go_type (pstate->gdbarch ()) 614 ->builtin_uint8; } 615 ; 616 617 /* TODO 618 name : NAME { $$ = $1.stoken; } 619 | TYPENAME { $$ = $1.stoken; } 620 | NAME_OR_INT { $$ = $1.stoken; } 621 ; 622 */ 623 624 name_not_typename 625 : NAME 626 /* These would be useful if name_not_typename was useful, but it is just 627 a fake for "variable", so these cause reduce/reduce conflicts because 628 the parser can't tell whether NAME_OR_INT is a name_not_typename (=variable, 629 =exp) or just an exp. If name_not_typename was ever used in an lvalue 630 context where only a name could occur, this might be useful. 631 | NAME_OR_INT 632 */ 633 ; 634 635 %% 636 637 /* Take care of parsing a number (anything that starts with a digit). 638 Set yylval and return the token type; update lexptr. 639 LEN is the number of characters in it. */ 640 641 /* FIXME: Needs some error checking for the float case. */ 642 /* FIXME(dje): IWBN to use c-exp.y's parse_number if we could. 643 That will require moving the guts into a function that we both call 644 as our YYSTYPE is different than c-exp.y's */ 645 646 static int 647 parse_number (struct parser_state *par_state, 648 const char *p, int len, int parsed_float, YYSTYPE *putithere) 649 { 650 ULONGEST n = 0; 651 ULONGEST prevn = 0; 652 653 int i = 0; 654 int c; 655 int base = input_radix; 656 int unsigned_p = 0; 657 658 /* Number of "L" suffixes encountered. */ 659 int long_p = 0; 660 661 /* We have found a "L" or "U" suffix. */ 662 int found_suffix = 0; 663 664 if (parsed_float) 665 { 666 const struct builtin_go_type *builtin_go_types 667 = builtin_go_type (par_state->gdbarch ()); 668 669 /* Handle suffixes: 'f' for float32, 'l' for long double. 670 FIXME: This appears to be an extension -- do we want this? */ 671 if (len >= 1 && tolower (p[len - 1]) == 'f') 672 { 673 putithere->typed_val_float.type 674 = builtin_go_types->builtin_float32; 675 len--; 676 } 677 else if (len >= 1 && tolower (p[len - 1]) == 'l') 678 { 679 putithere->typed_val_float.type 680 = parse_type (par_state)->builtin_long_double; 681 len--; 682 } 683 /* Default type for floating-point literals is float64. */ 684 else 685 { 686 putithere->typed_val_float.type 687 = builtin_go_types->builtin_float64; 688 } 689 690 if (!parse_float (p, len, 691 putithere->typed_val_float.type, 692 putithere->typed_val_float.val)) 693 return ERROR; 694 return FLOAT; 695 } 696 697 /* Handle base-switching prefixes 0x, 0t, 0d, 0. */ 698 if (p[0] == '0' && len > 1) 699 switch (p[1]) 700 { 701 case 'x': 702 case 'X': 703 if (len >= 3) 704 { 705 p += 2; 706 base = 16; 707 len -= 2; 708 } 709 break; 710 711 case 'b': 712 case 'B': 713 if (len >= 3) 714 { 715 p += 2; 716 base = 2; 717 len -= 2; 718 } 719 break; 720 721 case 't': 722 case 'T': 723 case 'd': 724 case 'D': 725 if (len >= 3) 726 { 727 p += 2; 728 base = 10; 729 len -= 2; 730 } 731 break; 732 733 default: 734 base = 8; 735 break; 736 } 737 738 while (len-- > 0) 739 { 740 c = *p++; 741 if (c >= 'A' && c <= 'Z') 742 c += 'a' - 'A'; 743 if (c != 'l' && c != 'u') 744 n *= base; 745 if (c >= '0' && c <= '9') 746 { 747 if (found_suffix) 748 return ERROR; 749 n += i = c - '0'; 750 } 751 else 752 { 753 if (base > 10 && c >= 'a' && c <= 'f') 754 { 755 if (found_suffix) 756 return ERROR; 757 n += i = c - 'a' + 10; 758 } 759 else if (c == 'l') 760 { 761 ++long_p; 762 found_suffix = 1; 763 } 764 else if (c == 'u') 765 { 766 unsigned_p = 1; 767 found_suffix = 1; 768 } 769 else 770 return ERROR; /* Char not a digit */ 771 } 772 if (i >= base) 773 return ERROR; /* Invalid digit in this base. */ 774 775 if (c != 'l' && c != 'u') 776 { 777 /* Test for overflow. */ 778 if (n == 0 && prevn == 0) 779 ; 780 else if (prevn >= n) 781 error (_("Numeric constant too large.")); 782 } 783 prevn = n; 784 } 785 786 /* An integer constant is an int, a long, or a long long. An L 787 suffix forces it to be long; an LL suffix forces it to be long 788 long. If not forced to a larger size, it gets the first type of 789 the above that it fits in. To figure out whether it fits, we 790 shift it right and see whether anything remains. Note that we 791 can't shift sizeof (LONGEST) * HOST_CHAR_BIT bits or more in one 792 operation, because many compilers will warn about such a shift 793 (which always produces a zero result). Sometimes gdbarch_int_bit 794 or gdbarch_long_bit will be that big, sometimes not. To deal with 795 the case where it is we just always shift the value more than 796 once, with fewer bits each time. */ 797 798 int int_bits = gdbarch_int_bit (par_state->gdbarch ()); 799 int long_bits = gdbarch_long_bit (par_state->gdbarch ()); 800 int long_long_bits = gdbarch_long_long_bit (par_state->gdbarch ()); 801 bool have_signed = !unsigned_p; 802 bool have_int = long_p == 0; 803 bool have_long = long_p <= 1; 804 if (have_int && have_signed && fits_in_type (1, n, int_bits, true)) 805 putithere->typed_val_int.type = parse_type (par_state)->builtin_int; 806 else if (have_int && fits_in_type (1, n, int_bits, false)) 807 putithere->typed_val_int.type 808 = parse_type (par_state)->builtin_unsigned_int; 809 else if (have_long && have_signed && fits_in_type (1, n, long_bits, true)) 810 putithere->typed_val_int.type = parse_type (par_state)->builtin_long; 811 else if (have_long && fits_in_type (1, n, long_bits, false)) 812 putithere->typed_val_int.type 813 = parse_type (par_state)->builtin_unsigned_long; 814 else if (have_signed && fits_in_type (1, n, long_long_bits, true)) 815 putithere->typed_val_int.type 816 = parse_type (par_state)->builtin_long_long; 817 else if (fits_in_type (1, n, long_long_bits, false)) 818 putithere->typed_val_int.type 819 = parse_type (par_state)->builtin_unsigned_long_long; 820 else 821 error (_("Numeric constant too large.")); 822 putithere->typed_val_int.val = n; 823 824 return INT; 825 } 826 827 /* Temporary obstack used for holding strings. */ 828 static struct obstack tempbuf; 829 static int tempbuf_init; 830 831 /* Parse a string or character literal from TOKPTR. The string or 832 character may be wide or unicode. *OUTPTR is set to just after the 833 end of the literal in the input string. The resulting token is 834 stored in VALUE. This returns a token value, either STRING or 835 CHAR, depending on what was parsed. *HOST_CHARS is set to the 836 number of host characters in the literal. */ 837 838 static int 839 parse_string_or_char (const char *tokptr, const char **outptr, 840 struct typed_stoken *value, int *host_chars) 841 { 842 int quote; 843 844 /* Build the gdb internal form of the input string in tempbuf. Note 845 that the buffer is null byte terminated *only* for the 846 convenience of debugging gdb itself and printing the buffer 847 contents when the buffer contains no embedded nulls. Gdb does 848 not depend upon the buffer being null byte terminated, it uses 849 the length string instead. This allows gdb to handle C strings 850 (as well as strings in other languages) with embedded null 851 bytes */ 852 853 if (!tempbuf_init) 854 tempbuf_init = 1; 855 else 856 obstack_free (&tempbuf, NULL); 857 obstack_init (&tempbuf); 858 859 /* Skip the quote. */ 860 quote = *tokptr; 861 ++tokptr; 862 863 *host_chars = 0; 864 865 while (*tokptr) 866 { 867 char c = *tokptr; 868 if (c == '\\') 869 { 870 ++tokptr; 871 *host_chars += c_parse_escape (&tokptr, &tempbuf); 872 } 873 else if (c == quote) 874 break; 875 else 876 { 877 obstack_1grow (&tempbuf, c); 878 ++tokptr; 879 /* FIXME: this does the wrong thing with multi-byte host 880 characters. We could use mbrlen here, but that would 881 make "set host-charset" a bit less useful. */ 882 ++*host_chars; 883 } 884 } 885 886 if (*tokptr != quote) 887 { 888 if (quote == '"') 889 error (_("Unterminated string in expression.")); 890 else 891 error (_("Unmatched single quote.")); 892 } 893 ++tokptr; 894 895 value->type = (int) C_STRING | (quote == '\'' ? C_CHAR : 0); /*FIXME*/ 896 value->ptr = (char *) obstack_base (&tempbuf); 897 value->length = obstack_object_size (&tempbuf); 898 899 *outptr = tokptr; 900 901 return quote == '\'' ? CHAR : STRING; 902 } 903 904 struct token 905 { 906 const char *oper; 907 int token; 908 enum exp_opcode opcode; 909 }; 910 911 static const struct token tokentab3[] = 912 { 913 {">>=", ASSIGN_MODIFY, BINOP_RSH}, 914 {"<<=", ASSIGN_MODIFY, BINOP_LSH}, 915 /*{"&^=", ASSIGN_MODIFY, BINOP_BITWISE_ANDNOT}, TODO */ 916 {"...", DOTDOTDOT, OP_NULL}, 917 }; 918 919 static const struct token tokentab2[] = 920 { 921 {"+=", ASSIGN_MODIFY, BINOP_ADD}, 922 {"-=", ASSIGN_MODIFY, BINOP_SUB}, 923 {"*=", ASSIGN_MODIFY, BINOP_MUL}, 924 {"/=", ASSIGN_MODIFY, BINOP_DIV}, 925 {"%=", ASSIGN_MODIFY, BINOP_REM}, 926 {"|=", ASSIGN_MODIFY, BINOP_BITWISE_IOR}, 927 {"&=", ASSIGN_MODIFY, BINOP_BITWISE_AND}, 928 {"^=", ASSIGN_MODIFY, BINOP_BITWISE_XOR}, 929 {"++", INCREMENT, OP_NULL}, 930 {"--", DECREMENT, OP_NULL}, 931 /*{"->", RIGHT_ARROW, OP_NULL}, Doesn't exist in Go. */ 932 {"<-", LEFT_ARROW, OP_NULL}, 933 {"&&", ANDAND, OP_NULL}, 934 {"||", OROR, OP_NULL}, 935 {"<<", LSH, OP_NULL}, 936 {">>", RSH, OP_NULL}, 937 {"==", EQUAL, OP_NULL}, 938 {"!=", NOTEQUAL, OP_NULL}, 939 {"<=", LEQ, OP_NULL}, 940 {">=", GEQ, OP_NULL}, 941 /*{"&^", ANDNOT, OP_NULL}, TODO */ 942 }; 943 944 /* Identifier-like tokens. */ 945 static const struct token ident_tokens[] = 946 { 947 {"true", TRUE_KEYWORD, OP_NULL}, 948 {"false", FALSE_KEYWORD, OP_NULL}, 949 {"nil", NIL_KEYWORD, OP_NULL}, 950 {"const", CONST_KEYWORD, OP_NULL}, 951 {"struct", STRUCT_KEYWORD, OP_NULL}, 952 {"type", TYPE_KEYWORD, OP_NULL}, 953 {"interface", INTERFACE_KEYWORD, OP_NULL}, 954 {"chan", CHAN_KEYWORD, OP_NULL}, 955 {"byte", BYTE_KEYWORD, OP_NULL}, /* An alias of uint8. */ 956 {"len", LEN_KEYWORD, OP_NULL}, 957 {"cap", CAP_KEYWORD, OP_NULL}, 958 {"new", NEW_KEYWORD, OP_NULL}, 959 {"iota", IOTA_KEYWORD, OP_NULL}, 960 }; 961 962 /* This is set if a NAME token appeared at the very end of the input 963 string, with no whitespace separating the name from the EOF. This 964 is used only when parsing to do field name completion. */ 965 static int saw_name_at_eof; 966 967 /* This is set if the previously-returned token was a structure 968 operator -- either '.' or ARROW. This is used only when parsing to 969 do field name completion. */ 970 static int last_was_structop; 971 972 /* Depth of parentheses. */ 973 static int paren_depth; 974 975 /* Read one token, getting characters through lexptr. */ 976 977 static int 978 lex_one_token (struct parser_state *par_state) 979 { 980 int c; 981 int namelen; 982 const char *tokstart; 983 int saw_structop = last_was_structop; 984 985 last_was_structop = 0; 986 987 retry: 988 989 par_state->prev_lexptr = par_state->lexptr; 990 991 tokstart = par_state->lexptr; 992 /* See if it is a special token of length 3. */ 993 for (const auto &token : tokentab3) 994 if (strncmp (tokstart, token.oper, 3) == 0) 995 { 996 par_state->lexptr += 3; 997 yylval.opcode = token.opcode; 998 return token.token; 999 } 1000 1001 /* See if it is a special token of length 2. */ 1002 for (const auto &token : tokentab2) 1003 if (strncmp (tokstart, token.oper, 2) == 0) 1004 { 1005 par_state->lexptr += 2; 1006 yylval.opcode = token.opcode; 1007 /* NOTE: -> doesn't exist in Go, so we don't need to watch for 1008 setting last_was_structop here. */ 1009 return token.token; 1010 } 1011 1012 switch (c = *tokstart) 1013 { 1014 case 0: 1015 if (saw_name_at_eof) 1016 { 1017 saw_name_at_eof = 0; 1018 return COMPLETE; 1019 } 1020 else if (saw_structop) 1021 return COMPLETE; 1022 else 1023 return 0; 1024 1025 case ' ': 1026 case '\t': 1027 case '\n': 1028 par_state->lexptr++; 1029 goto retry; 1030 1031 case '[': 1032 case '(': 1033 paren_depth++; 1034 par_state->lexptr++; 1035 return c; 1036 1037 case ']': 1038 case ')': 1039 if (paren_depth == 0) 1040 return 0; 1041 paren_depth--; 1042 par_state->lexptr++; 1043 return c; 1044 1045 case ',': 1046 if (pstate->comma_terminates 1047 && paren_depth == 0) 1048 return 0; 1049 par_state->lexptr++; 1050 return c; 1051 1052 case '.': 1053 /* Might be a floating point number. */ 1054 if (par_state->lexptr[1] < '0' || par_state->lexptr[1] > '9') 1055 { 1056 if (pstate->parse_completion) 1057 last_was_structop = 1; 1058 goto symbol; /* Nope, must be a symbol. */ 1059 } 1060 /* FALL THRU. */ 1061 1062 case '0': 1063 case '1': 1064 case '2': 1065 case '3': 1066 case '4': 1067 case '5': 1068 case '6': 1069 case '7': 1070 case '8': 1071 case '9': 1072 { 1073 /* It's a number. */ 1074 int got_dot = 0, got_e = 0, toktype; 1075 const char *p = tokstart; 1076 int hex = input_radix > 10; 1077 1078 if (c == '0' && (p[1] == 'x' || p[1] == 'X')) 1079 { 1080 p += 2; 1081 hex = 1; 1082 } 1083 1084 for (;; ++p) 1085 { 1086 /* This test includes !hex because 'e' is a valid hex digit 1087 and thus does not indicate a floating point number when 1088 the radix is hex. */ 1089 if (!hex && !got_e && (*p == 'e' || *p == 'E')) 1090 got_dot = got_e = 1; 1091 /* This test does not include !hex, because a '.' always indicates 1092 a decimal floating point number regardless of the radix. */ 1093 else if (!got_dot && *p == '.') 1094 got_dot = 1; 1095 else if (got_e && (p[-1] == 'e' || p[-1] == 'E') 1096 && (*p == '-' || *p == '+')) 1097 /* This is the sign of the exponent, not the end of the 1098 number. */ 1099 continue; 1100 /* We will take any letters or digits. parse_number will 1101 complain if past the radix, or if L or U are not final. */ 1102 else if ((*p < '0' || *p > '9') 1103 && ((*p < 'a' || *p > 'z') 1104 && (*p < 'A' || *p > 'Z'))) 1105 break; 1106 } 1107 toktype = parse_number (par_state, tokstart, p - tokstart, 1108 got_dot|got_e, &yylval); 1109 if (toktype == ERROR) 1110 { 1111 char *err_copy = (char *) alloca (p - tokstart + 1); 1112 1113 memcpy (err_copy, tokstart, p - tokstart); 1114 err_copy[p - tokstart] = 0; 1115 error (_("Invalid number \"%s\"."), err_copy); 1116 } 1117 par_state->lexptr = p; 1118 return toktype; 1119 } 1120 1121 case '@': 1122 { 1123 const char *p = &tokstart[1]; 1124 size_t len = strlen ("entry"); 1125 1126 while (isspace (*p)) 1127 p++; 1128 if (strncmp (p, "entry", len) == 0 && !isalnum (p[len]) 1129 && p[len] != '_') 1130 { 1131 par_state->lexptr = &p[len]; 1132 return ENTRY; 1133 } 1134 } 1135 /* FALLTHRU */ 1136 case '+': 1137 case '-': 1138 case '*': 1139 case '/': 1140 case '%': 1141 case '|': 1142 case '&': 1143 case '^': 1144 case '~': 1145 case '!': 1146 case '<': 1147 case '>': 1148 case '?': 1149 case ':': 1150 case '=': 1151 case '{': 1152 case '}': 1153 symbol: 1154 par_state->lexptr++; 1155 return c; 1156 1157 case '\'': 1158 case '"': 1159 case '`': 1160 { 1161 int host_len; 1162 int result = parse_string_or_char (tokstart, &par_state->lexptr, 1163 &yylval.tsval, &host_len); 1164 if (result == CHAR) 1165 { 1166 if (host_len == 0) 1167 error (_("Empty character constant.")); 1168 else if (host_len > 2 && c == '\'') 1169 { 1170 ++tokstart; 1171 namelen = par_state->lexptr - tokstart - 1; 1172 goto tryname; 1173 } 1174 else if (host_len > 1) 1175 error (_("Invalid character constant.")); 1176 } 1177 return result; 1178 } 1179 } 1180 1181 if (!(c == '_' || c == '$' 1182 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))) 1183 /* We must have come across a bad character (e.g. ';'). */ 1184 error (_("Invalid character '%c' in expression."), c); 1185 1186 /* It's a name. See how long it is. */ 1187 namelen = 0; 1188 for (c = tokstart[namelen]; 1189 (c == '_' || c == '$' || (c >= '0' && c <= '9') 1190 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));) 1191 { 1192 c = tokstart[++namelen]; 1193 } 1194 1195 /* The token "if" terminates the expression and is NOT removed from 1196 the input stream. It doesn't count if it appears in the 1197 expansion of a macro. */ 1198 if (namelen == 2 1199 && tokstart[0] == 'i' 1200 && tokstart[1] == 'f') 1201 { 1202 return 0; 1203 } 1204 1205 /* For the same reason (breakpoint conditions), "thread N" 1206 terminates the expression. "thread" could be an identifier, but 1207 an identifier is never followed by a number without intervening 1208 punctuation. 1209 Handle abbreviations of these, similarly to 1210 breakpoint.c:find_condition_and_thread. 1211 TODO: Watch for "goroutine" here? */ 1212 if (namelen >= 1 1213 && strncmp (tokstart, "thread", namelen) == 0 1214 && (tokstart[namelen] == ' ' || tokstart[namelen] == '\t')) 1215 { 1216 const char *p = tokstart + namelen + 1; 1217 1218 while (*p == ' ' || *p == '\t') 1219 p++; 1220 if (*p >= '0' && *p <= '9') 1221 return 0; 1222 } 1223 1224 par_state->lexptr += namelen; 1225 1226 tryname: 1227 1228 yylval.sval.ptr = tokstart; 1229 yylval.sval.length = namelen; 1230 1231 /* Catch specific keywords. */ 1232 std::string copy = copy_name (yylval.sval); 1233 for (const auto &token : ident_tokens) 1234 if (copy == token.oper) 1235 { 1236 /* It is ok to always set this, even though we don't always 1237 strictly need to. */ 1238 yylval.opcode = token.opcode; 1239 return token.token; 1240 } 1241 1242 if (*tokstart == '$') 1243 return DOLLAR_VARIABLE; 1244 1245 if (pstate->parse_completion && *par_state->lexptr == '\0') 1246 saw_name_at_eof = 1; 1247 return NAME; 1248 } 1249 1250 /* An object of this type is pushed on a FIFO by the "outer" lexer. */ 1251 struct token_and_value 1252 { 1253 int token; 1254 YYSTYPE value; 1255 }; 1256 1257 /* A FIFO of tokens that have been read but not yet returned to the 1258 parser. */ 1259 static std::vector<token_and_value> token_fifo; 1260 1261 /* Non-zero if the lexer should return tokens from the FIFO. */ 1262 static int popping; 1263 1264 /* Temporary storage for yylex; this holds symbol names as they are 1265 built up. */ 1266 static auto_obstack name_obstack; 1267 1268 /* Build "package.name" in name_obstack. 1269 For convenience of the caller, the name is NUL-terminated, 1270 but the NUL is not included in the recorded length. */ 1271 1272 static struct stoken 1273 build_packaged_name (const char *package, int package_len, 1274 const char *name, int name_len) 1275 { 1276 struct stoken result; 1277 1278 name_obstack.clear (); 1279 obstack_grow (&name_obstack, package, package_len); 1280 obstack_grow_str (&name_obstack, "."); 1281 obstack_grow (&name_obstack, name, name_len); 1282 obstack_grow (&name_obstack, "", 1); 1283 result.ptr = (char *) obstack_base (&name_obstack); 1284 result.length = obstack_object_size (&name_obstack) - 1; 1285 1286 return result; 1287 } 1288 1289 /* Return non-zero if NAME is a package name. 1290 BLOCK is the scope in which to interpret NAME; this can be NULL 1291 to mean the global scope. */ 1292 1293 static int 1294 package_name_p (const char *name, const struct block *block) 1295 { 1296 struct symbol *sym; 1297 struct field_of_this_result is_a_field_of_this; 1298 1299 sym = lookup_symbol (name, block, STRUCT_DOMAIN, &is_a_field_of_this).symbol; 1300 1301 if (sym 1302 && sym->aclass () == LOC_TYPEDEF 1303 && sym->type ()->code () == TYPE_CODE_MODULE) 1304 return 1; 1305 1306 return 0; 1307 } 1308 1309 /* Classify a (potential) function in the "unsafe" package. 1310 We fold these into "keywords" to keep things simple, at least until 1311 something more complex is warranted. */ 1312 1313 static int 1314 classify_unsafe_function (struct stoken function_name) 1315 { 1316 std::string copy = copy_name (function_name); 1317 1318 if (copy == "Sizeof") 1319 { 1320 yylval.sval = function_name; 1321 return SIZEOF_KEYWORD; 1322 } 1323 1324 error (_("Unknown function in `unsafe' package: %s"), copy.c_str ()); 1325 } 1326 1327 /* Classify token(s) "name1.name2" where name1 is known to be a package. 1328 The contents of the token are in `yylval'. 1329 Updates yylval and returns the new token type. 1330 1331 The result is one of NAME, NAME_OR_INT, or TYPENAME. */ 1332 1333 static int 1334 classify_packaged_name (const struct block *block) 1335 { 1336 struct block_symbol sym; 1337 struct field_of_this_result is_a_field_of_this; 1338 1339 std::string copy = copy_name (yylval.sval); 1340 1341 sym = lookup_symbol (copy.c_str (), block, VAR_DOMAIN, &is_a_field_of_this); 1342 1343 if (sym.symbol) 1344 { 1345 yylval.ssym.sym = sym; 1346 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL; 1347 } 1348 1349 return NAME; 1350 } 1351 1352 /* Classify a NAME token. 1353 The contents of the token are in `yylval'. 1354 Updates yylval and returns the new token type. 1355 BLOCK is the block in which lookups start; this can be NULL 1356 to mean the global scope. 1357 1358 The result is one of NAME, NAME_OR_INT, or TYPENAME. */ 1359 1360 static int 1361 classify_name (struct parser_state *par_state, const struct block *block) 1362 { 1363 struct type *type; 1364 struct block_symbol sym; 1365 struct field_of_this_result is_a_field_of_this; 1366 1367 std::string copy = copy_name (yylval.sval); 1368 1369 /* Try primitive types first so they win over bad/weird debug info. */ 1370 type = language_lookup_primitive_type (par_state->language (), 1371 par_state->gdbarch (), 1372 copy.c_str ()); 1373 if (type != NULL) 1374 { 1375 /* NOTE: We take advantage of the fact that yylval coming in was a 1376 NAME, and that struct ttype is a compatible extension of struct 1377 stoken, so yylval.tsym.stoken is already filled in. */ 1378 yylval.tsym.type = type; 1379 return TYPENAME; 1380 } 1381 1382 /* TODO: What about other types? */ 1383 1384 sym = lookup_symbol (copy.c_str (), block, VAR_DOMAIN, &is_a_field_of_this); 1385 1386 if (sym.symbol) 1387 { 1388 yylval.ssym.sym = sym; 1389 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL; 1390 return NAME; 1391 } 1392 1393 /* If we didn't find a symbol, look again in the current package. 1394 This is to, e.g., make "p global_var" work without having to specify 1395 the package name. We intentionally only looks for objects in the 1396 current package. */ 1397 1398 { 1399 char *current_package_name = go_block_package_name (block); 1400 1401 if (current_package_name != NULL) 1402 { 1403 struct stoken sval = 1404 build_packaged_name (current_package_name, 1405 strlen (current_package_name), 1406 copy.c_str (), copy.size ()); 1407 1408 xfree (current_package_name); 1409 sym = lookup_symbol (sval.ptr, block, VAR_DOMAIN, 1410 &is_a_field_of_this); 1411 if (sym.symbol) 1412 { 1413 yylval.ssym.stoken = sval; 1414 yylval.ssym.sym = sym; 1415 yylval.ssym.is_a_field_of_this = is_a_field_of_this.type != NULL; 1416 return NAME; 1417 } 1418 } 1419 } 1420 1421 /* Input names that aren't symbols but ARE valid hex numbers, when 1422 the input radix permits them, can be names or numbers depending 1423 on the parse. Note we support radixes > 16 here. */ 1424 if ((copy[0] >= 'a' && copy[0] < 'a' + input_radix - 10) 1425 || (copy[0] >= 'A' && copy[0] < 'A' + input_radix - 10)) 1426 { 1427 YYSTYPE newlval; /* Its value is ignored. */ 1428 int hextype = parse_number (par_state, copy.c_str (), 1429 yylval.sval.length, 0, &newlval); 1430 if (hextype == INT) 1431 { 1432 yylval.ssym.sym.symbol = NULL; 1433 yylval.ssym.sym.block = NULL; 1434 yylval.ssym.is_a_field_of_this = 0; 1435 return NAME_OR_INT; 1436 } 1437 } 1438 1439 yylval.ssym.sym.symbol = NULL; 1440 yylval.ssym.sym.block = NULL; 1441 yylval.ssym.is_a_field_of_this = 0; 1442 return NAME; 1443 } 1444 1445 /* This is taken from c-exp.y mostly to get something working. 1446 The basic structure has been kept because we may yet need some of it. */ 1447 1448 static int 1449 yylex (void) 1450 { 1451 token_and_value current, next; 1452 1453 if (popping && !token_fifo.empty ()) 1454 { 1455 token_and_value tv = token_fifo[0]; 1456 token_fifo.erase (token_fifo.begin ()); 1457 yylval = tv.value; 1458 /* There's no need to fall through to handle package.name 1459 as that can never happen here. In theory. */ 1460 return tv.token; 1461 } 1462 popping = 0; 1463 1464 current.token = lex_one_token (pstate); 1465 1466 /* TODO: Need a way to force specifying name1 as a package. 1467 .name1.name2 ? */ 1468 1469 if (current.token != NAME) 1470 return current.token; 1471 1472 /* See if we have "name1 . name2". */ 1473 1474 current.value = yylval; 1475 next.token = lex_one_token (pstate); 1476 next.value = yylval; 1477 1478 if (next.token == '.') 1479 { 1480 token_and_value name2; 1481 1482 name2.token = lex_one_token (pstate); 1483 name2.value = yylval; 1484 1485 if (name2.token == NAME) 1486 { 1487 /* Ok, we have "name1 . name2". */ 1488 std::string copy = copy_name (current.value.sval); 1489 1490 if (copy == "unsafe") 1491 { 1492 popping = 1; 1493 return classify_unsafe_function (name2.value.sval); 1494 } 1495 1496 if (package_name_p (copy.c_str (), pstate->expression_context_block)) 1497 { 1498 popping = 1; 1499 yylval.sval = build_packaged_name (current.value.sval.ptr, 1500 current.value.sval.length, 1501 name2.value.sval.ptr, 1502 name2.value.sval.length); 1503 return classify_packaged_name (pstate->expression_context_block); 1504 } 1505 } 1506 1507 token_fifo.push_back (next); 1508 token_fifo.push_back (name2); 1509 } 1510 else 1511 token_fifo.push_back (next); 1512 1513 /* If we arrive here we don't have a package-qualified name. */ 1514 1515 popping = 1; 1516 yylval = current.value; 1517 return classify_name (pstate, pstate->expression_context_block); 1518 } 1519 1520 /* See language.h. */ 1521 1522 int 1523 go_language::parser (struct parser_state *par_state) const 1524 { 1525 /* Setting up the parser state. */ 1526 scoped_restore pstate_restore = make_scoped_restore (&pstate); 1527 gdb_assert (par_state != NULL); 1528 pstate = par_state; 1529 1530 scoped_restore restore_yydebug = make_scoped_restore (&yydebug, 1531 parser_debug); 1532 1533 /* Initialize some state used by the lexer. */ 1534 last_was_structop = 0; 1535 saw_name_at_eof = 0; 1536 paren_depth = 0; 1537 1538 token_fifo.clear (); 1539 popping = 0; 1540 name_obstack.clear (); 1541 1542 int result = yyparse (); 1543 if (!result) 1544 pstate->set_operation (pstate->pop ()); 1545 return result; 1546 } 1547 1548 static void 1549 yyerror (const char *msg) 1550 { 1551 if (pstate->prev_lexptr) 1552 pstate->lexptr = pstate->prev_lexptr; 1553 1554 error (_("A %s in expression, near `%s'."), msg, pstate->lexptr); 1555 } 1556