1 /* $NetBSD: lex.c,v 1.232 2024/12/08 17:12:01 rillig Exp $ */ 2 3 /* 4 * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved. 5 * Copyright (c) 1994, 1995 Jochen Pohl 6 * All Rights Reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Jochen Pohl for 19 * The NetBSD Project. 20 * 4. The name of the author may not be used to endorse or promote products 21 * derived from this software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #if HAVE_NBTOOL_CONFIG_H 36 #include "nbtool_config.h" 37 #endif 38 39 #include <sys/cdefs.h> 40 #if defined(__RCSID) 41 __RCSID("$NetBSD: lex.c,v 1.232 2024/12/08 17:12:01 rillig Exp $"); 42 #endif 43 44 #include <ctype.h> 45 #include <errno.h> 46 #include <float.h> 47 #include <limits.h> 48 #include <math.h> 49 #include <stdlib.h> 50 #include <string.h> 51 52 #include "lint1.h" 53 #include "cgram.h" 54 55 #define CHAR_MASK ((1U << CHAR_SIZE) - 1) 56 57 58 /* Current position (it's also updated when an included file is parsed) */ 59 pos_t curr_pos = { "", 1, 0 }; 60 61 /* 62 * Current position in C source (not updated when an included file is 63 * parsed). 64 */ 65 pos_t csrc_pos = { "", 1, 0 }; 66 67 bool in_gcc_attribute; 68 bool in_system_header; 69 70 /* 71 * Define a keyword that cannot be overridden by identifiers. 72 * 73 * Valid values for 'since' are 78, 90, 99, 11, 23. 74 * 75 * The C11 keywords are all taken from the reserved namespace. They are added 76 * in C99 mode as well, to make the parse error messages more useful. For 77 * example, if the keyword '_Generic' were not defined, it would be interpreted 78 * as an implicit function call, leading to a parse error. 79 * 80 * The C23 keywords are not made available in earlier modes, as they may 81 * conflict with user-defined identifiers. 82 */ 83 #define kwdef(name, token, detail, since, gcc, deco) \ 84 { /* CONSTCOND */ \ 85 name, token, detail, \ 86 (since) == 90, \ 87 (since) == 99 || (since) == 11, \ 88 (since) == 23, \ 89 (gcc) > 0, \ 90 ((deco) & 1) != 0, ((deco) & 2) != 0, ((deco) & 4) != 0, \ 91 } 92 #define kwdef_token(name, token, since, gcc, deco) \ 93 kwdef(name, token, {false}, since, gcc, deco) 94 #define kwdef_sclass(name, sclass, since, gcc, deco) \ 95 kwdef(name, T_SCLASS, .u.kw_scl = (sclass), since, gcc, deco) 96 #define kwdef_type(name, tspec, since) \ 97 kwdef(name, T_TYPE, .u.kw_tspec = (tspec), since, 0, 1) 98 #define kwdef_tqual(name, tqual, since, gcc, deco) \ 99 kwdef(name, T_QUAL, .u.kw_tqual = {.tqual = true}, since, gcc, deco) 100 #define kwdef_const(name, named_constant, since, gcc, deco) \ 101 kwdef(name, T_NAMED_CONSTANT, \ 102 .u.kw_named_constant = (named_constant), since, gcc, deco) 103 #define kwdef_keyword(name, token) \ 104 kwdef(name, token, {false}, 78, 0, 1) 105 106 /* During initialization, these keywords are written to the symbol table. */ 107 static const struct keyword { 108 const char kw_name[20]; 109 int kw_token; /* token to be returned by yylex() */ 110 union { 111 bool kw_dummy; 112 scl_t kw_scl; /* if kw_token is T_SCLASS */ 113 tspec_t kw_tspec; /* if kw_token is T_TYPE or 114 * T_STRUCT_OR_UNION */ 115 type_qualifiers kw_tqual; /* if kw_token is T_QUAL */ 116 function_specifier kw_fs; /* if kw_token is 117 * T_FUNCTION_SPECIFIER */ 118 named_constant kw_named_constant; 119 } u; 120 bool kw_added_in_c90:1; 121 bool kw_added_in_c99_or_c11:1; 122 bool kw_added_in_c23:1; 123 bool kw_gcc:1; /* available in GCC mode */ 124 bool kw_plain:1; /* 'name' */ 125 bool kw_leading:1; /* '__name' */ 126 bool kw_both:1; /* '__name__' */ 127 } keywords[] = { 128 // TODO: _Alignas is not available in C99. 129 kwdef_keyword( "_Alignas", T_ALIGNAS), 130 // TODO: _Alignof is not available in C99. 131 kwdef_keyword( "_Alignof", T_ALIGNOF), 132 // TODO: alignof is not available in C99. 133 kwdef_token( "alignof", T_ALIGNOF, 78,0,6), 134 kwdef_token( "asm", T_ASM, 78,1,7), 135 kwdef_token( "_Atomic", T_ATOMIC, 11,0,1), 136 kwdef("__auto_type", T_TYPE, .u.kw_tspec = AUTO_TYPE, 99,1,1), 137 kwdef_token( "attribute", T_ATTRIBUTE, 78,1,6), 138 kwdef_sclass( "auto", AUTO, 78,0,1), 139 kwdef_type( "_Bool", BOOL, 99), 140 kwdef_type( "bool", BOOL, 23), 141 kwdef_keyword( "break", T_BREAK), 142 kwdef_token( "__builtin_offsetof", T_BUILTIN_OFFSETOF, 78,1,1), 143 kwdef_keyword( "case", T_CASE), 144 kwdef_type( "char", CHAR, 78), 145 kwdef_type( "_Complex", COMPLEX, 99), 146 kwdef_tqual( "const", tq_const, 90,0,7), 147 kwdef_keyword( "continue", T_CONTINUE), 148 kwdef_keyword( "default", T_DEFAULT), 149 kwdef_keyword( "do", T_DO), 150 kwdef_type( "double", DOUBLE, 78), 151 kwdef_keyword( "else", T_ELSE), 152 // XXX: enum is not available in traditional C. 153 kwdef_keyword( "enum", T_ENUM), 154 kwdef_token( "__extension__",T_EXTENSION, 78,1,1), 155 kwdef_sclass( "extern", EXTERN, 78,0,1), 156 kwdef_const( "false", NC_FALSE, 23,0,1), 157 kwdef_type( "float", FLOAT, 78), 158 kwdef_keyword( "for", T_FOR), 159 kwdef_token( "_Generic", T_GENERIC, 11,0,1), 160 kwdef_keyword( "goto", T_GOTO), 161 kwdef_keyword( "if", T_IF), 162 kwdef_token( "__imag__", T_IMAG, 78,1,1), 163 kwdef("inline", T_FUNCTION_SPECIFIER, .u.kw_fs = FS_INLINE, 99,0,7), 164 kwdef_type( "int", INT, 78), 165 #ifdef INT128_SIZE 166 kwdef_type( "__int128_t", INT128, 99), 167 #endif 168 kwdef_type( "long", LONG, 78), 169 kwdef("_Noreturn", T_FUNCTION_SPECIFIER, .u.kw_fs = FS_NORETURN, 11,0,1), 170 kwdef_const( "nullptr", NC_NULLPTR, 23,0,1), 171 // XXX: __packed is GCC-specific. 172 kwdef_token( "__packed", T_PACKED, 78,0,1), 173 kwdef_token( "__real__", T_REAL, 78,1,1), 174 kwdef_sclass( "register", REG, 78,0,1), 175 kwdef_tqual( "restrict", tq_restrict, 99,0,7), 176 kwdef_keyword( "return", T_RETURN), 177 kwdef_type( "short", SHORT, 78), 178 kwdef( "signed", T_TYPE, .u.kw_tspec = SIGNED, 90,0,3), 179 kwdef_keyword( "sizeof", T_SIZEOF), 180 kwdef_sclass( "static", STATIC, 78,0,1), 181 // XXX: _Static_assert was added in C11. 182 kwdef_keyword( "_Static_assert", T_STATIC_ASSERT), 183 kwdef("struct", T_STRUCT_OR_UNION, .u.kw_tspec = STRUCT, 78,0,1), 184 kwdef_keyword( "switch", T_SWITCH), 185 kwdef_token( "__symbolrename", T_SYMBOLRENAME, 78,0,1), 186 kwdef_sclass( "__thread", THREAD_LOCAL, 78,1,1), 187 kwdef_sclass( "_Thread_local", THREAD_LOCAL, 11,0,1), 188 kwdef_sclass( "thread_local", THREAD_LOCAL, 23,0,1), 189 kwdef_const( "true", NC_TRUE, 23,0,1), 190 kwdef_sclass( "typedef", TYPEDEF, 78,0,1), 191 kwdef_token( "typeof", T_TYPEOF, 78,1,7), 192 #ifdef INT128_SIZE 193 kwdef_type( "__uint128_t", UINT128, 99), 194 #endif 195 kwdef("union", T_STRUCT_OR_UNION, .u.kw_tspec = UNION, 78,0,1), 196 kwdef_type( "unsigned", UNSIGN, 78), 197 // XXX: void is not available in traditional C. 198 kwdef_type( "void", VOID, 78), 199 kwdef_tqual( "volatile", tq_volatile, 90,0,7), 200 kwdef_keyword( "while", T_WHILE), 201 #undef kwdef 202 #undef kwdef_token 203 #undef kwdef_sclass 204 #undef kwdef_type 205 #undef kwdef_tqual 206 #undef kwdef_keyword 207 }; 208 209 /* 210 * The symbol table containing all keywords, identifiers and labels. The hash 211 * entries are linked via sym_t.s_symtab_next. 212 */ 213 static sym_t *symtab[503]; 214 215 /* 216 * The kind of the next expected symbol, to distinguish the namespaces of 217 * members, labels, type tags and other identifiers. 218 */ 219 symbol_kind sym_kind; 220 221 222 static unsigned int 223 hash(const char *s) 224 { 225 unsigned int v = 0; 226 for (const char *p = s; *p != '\0'; p++) { 227 v = (v << 4) + (unsigned char)*p; 228 v ^= v >> 28; 229 } 230 return v % (sizeof(symtab) / sizeof(symtab[0])); 231 } 232 233 static void 234 symtab_add(sym_t *sym) 235 { 236 unsigned int h = hash(sym->s_name); 237 if ((sym->s_symtab_next = symtab[h]) != NULL) 238 symtab[h]->s_symtab_ref = &sym->s_symtab_next; 239 sym->s_symtab_ref = &symtab[h]; 240 symtab[h] = sym; 241 } 242 243 static sym_t * 244 symtab_search(const char *name) 245 { 246 247 unsigned int h = hash(name); 248 for (sym_t *sym = symtab[h]; sym != NULL; sym = sym->s_symtab_next) { 249 if (strcmp(sym->s_name, name) != 0) 250 continue; 251 if (sym->s_keyword != NULL || 252 sym->s_kind == sym_kind || 253 in_gcc_attribute) 254 return sym; 255 } 256 257 return NULL; 258 } 259 260 static void 261 symtab_remove(sym_t *sym) 262 { 263 264 if ((*sym->s_symtab_ref = sym->s_symtab_next) != NULL) 265 sym->s_symtab_next->s_symtab_ref = sym->s_symtab_ref; 266 sym->s_symtab_next = NULL; 267 } 268 269 static void 270 symtab_remove_locals(void) 271 { 272 273 for (size_t i = 0; i < sizeof(symtab) / sizeof(symtab[0]); i++) { 274 for (sym_t *sym = symtab[i]; sym != NULL; ) { 275 sym_t *next = sym->s_symtab_next; 276 if (sym->s_block_level >= 1) 277 symtab_remove(sym); 278 sym = next; 279 } 280 } 281 } 282 283 #ifdef DEBUG 284 static int 285 sym_by_name(const void *va, const void *vb) 286 { 287 const sym_t *a = *(const sym_t *const *)va; 288 const sym_t *b = *(const sym_t *const *)vb; 289 290 return strcmp(a->s_name, b->s_name); 291 } 292 293 struct syms { 294 const sym_t **items; 295 size_t len; 296 size_t cap; 297 }; 298 299 static void 300 syms_add(struct syms *syms, const sym_t *sym) 301 { 302 if (syms->len >= syms->cap) { 303 syms->cap *= 2; 304 syms->items = xrealloc(syms->items, 305 syms->cap * sizeof(syms->items[0])); 306 } 307 syms->items[syms->len++] = sym; 308 } 309 310 void 311 debug_symtab(void) 312 { 313 struct syms syms = { xcalloc(64, sizeof(syms.items[0])), 0, 64 }; 314 315 debug_enter(); 316 for (int level = -1;; level++) { 317 bool more = false; 318 size_t n = sizeof(symtab) / sizeof(symtab[0]); 319 320 syms.len = 0; 321 for (size_t i = 0; i < n; i++) { 322 for (sym_t *sym = symtab[i]; sym != NULL;) { 323 if (sym->s_block_level == level && 324 sym->s_keyword == NULL) 325 syms_add(&syms, sym); 326 if (sym->s_block_level > level) 327 more = true; 328 sym = sym->s_symtab_next; 329 } 330 } 331 332 if (syms.len > 0) { 333 debug_step("symbol table level %d", level); 334 debug_indent_inc(); 335 qsort(syms.items, syms.len, sizeof(syms.items[0]), 336 sym_by_name); 337 for (size_t i = 0; i < syms.len; i++) 338 debug_sym("", syms.items[i], "\n"); 339 debug_indent_dec(); 340 341 lint_assert(level != -1); 342 } 343 344 if (!more) 345 break; 346 } 347 debug_leave(); 348 349 free(syms.items); 350 } 351 #endif 352 353 static void 354 register_keyword(const struct keyword *kw, bool leading, bool trailing) 355 { 356 357 const char *name; 358 if (!leading && !trailing) { 359 name = kw->kw_name; 360 } else { 361 char buf[256]; 362 (void)snprintf(buf, sizeof(buf), "%s%s%s", 363 leading ? "__" : "", kw->kw_name, trailing ? "__" : ""); 364 name = xstrdup(buf); 365 } 366 367 sym_t *sym = block_zero_alloc(sizeof(*sym), "sym"); 368 sym->s_name = name; 369 sym->s_keyword = kw; 370 int tok = kw->kw_token; 371 sym->u.s_keyword.sk_token = tok; 372 if (tok == T_TYPE || tok == T_STRUCT_OR_UNION) 373 sym->u.s_keyword.u.sk_tspec = kw->u.kw_tspec; 374 if (tok == T_SCLASS) 375 sym->s_scl = kw->u.kw_scl; 376 if (tok == T_QUAL) 377 sym->u.s_keyword.u.sk_type_qualifier = kw->u.kw_tqual; 378 if (tok == T_FUNCTION_SPECIFIER) 379 sym->u.s_keyword.u.function_specifier = kw->u.kw_fs; 380 if (tok == T_NAMED_CONSTANT) 381 sym->u.s_keyword.u.named_constant = kw->u.kw_named_constant; 382 383 symtab_add(sym); 384 } 385 386 static bool 387 is_keyword_known(const struct keyword *kw) 388 { 389 390 if (kw->kw_added_in_c23 && !allow_c23) 391 return false; 392 if ((kw->kw_added_in_c90 || kw->kw_added_in_c99_or_c11) && !allow_c90) 393 return false; 394 395 /* 396 * In the 1990s, GCC defined several keywords that were later 397 * incorporated into C99, therefore in GCC mode, all C99 keywords are 398 * made available. The C11 keywords are made available as well, but 399 * there are so few that they don't matter practically. 400 */ 401 if (allow_gcc) 402 return true; 403 if (kw->kw_gcc) 404 return false; 405 406 if (kw->kw_added_in_c99_or_c11 && !allow_c99) 407 return false; 408 return true; 409 } 410 411 /* Write all keywords to the symbol table. */ 412 void 413 init_lex(void) 414 { 415 416 size_t n = sizeof(keywords) / sizeof(keywords[0]); 417 for (size_t i = 0; i < n; i++) { 418 const struct keyword *kw = keywords + i; 419 if (!is_keyword_known(kw)) 420 continue; 421 if (kw->kw_plain) 422 register_keyword(kw, false, false); 423 if (kw->kw_leading) 424 register_keyword(kw, true, false); 425 if (kw->kw_both) 426 register_keyword(kw, true, true); 427 } 428 } 429 430 /* 431 * When scanning the remainder of a long token (see lex_input), read a byte 432 * and return it as an unsigned char or as EOF. 433 * 434 * Increment the line counts if necessary. 435 */ 436 static int 437 read_byte(void) 438 { 439 int c = lex_input(); 440 441 if (c == '\n') 442 lex_next_line(); 443 return c == '\0' ? EOF : c; /* lex returns 0 on EOF. */ 444 } 445 446 static int 447 lex_keyword(sym_t *sym) 448 { 449 int tok = sym->u.s_keyword.sk_token; 450 451 if (tok == T_SCLASS) 452 yylval.y_scl = sym->s_scl; 453 if (tok == T_TYPE || tok == T_STRUCT_OR_UNION) 454 yylval.y_tspec = sym->u.s_keyword.u.sk_tspec; 455 if (tok == T_QUAL) 456 yylval.y_type_qualifiers = 457 sym->u.s_keyword.u.sk_type_qualifier; 458 if (tok == T_FUNCTION_SPECIFIER) 459 yylval.y_function_specifier = 460 sym->u.s_keyword.u.function_specifier; 461 if (tok == T_NAMED_CONSTANT) 462 yylval.y_named_constant = sym->u.s_keyword.u.named_constant; 463 return tok; 464 } 465 466 /* 467 * Look up the definition of a name in the symbol table. This symbol must 468 * either be a keyword or a symbol of the type required by sym_kind (label, 469 * member, tag, ...). 470 */ 471 extern int 472 lex_name(const char *text, size_t len) 473 { 474 475 sym_t *sym = symtab_search(text); 476 if (sym != NULL && sym->s_keyword != NULL) 477 return lex_keyword(sym); 478 479 sbuf_t *sb = xmalloc(sizeof(*sb)); 480 sb->sb_len = len; 481 sb->sb_sym = sym; 482 yylval.y_name = sb; 483 484 if (sym != NULL) { 485 lint_assert(block_level >= sym->s_block_level); 486 sb->sb_name = sym->s_name; 487 return sym->s_scl == TYPEDEF ? T_TYPENAME : T_NAME; 488 } 489 490 char *name = block_zero_alloc(len + 1, "string"); 491 (void)memcpy(name, text, len + 1); 492 sb->sb_name = name; 493 return T_NAME; 494 } 495 496 static tspec_t 497 integer_constant_type_signed(unsigned ls, uint64_t ui, int base, bool warned) 498 { 499 if (ls == 0 && ui <= TARG_INT_MAX) 500 return INT; 501 if (ls == 0 && ui <= TARG_UINT_MAX && base != 10 && allow_c90) 502 return UINT; 503 if (ls == 0 && ui <= TARG_LONG_MAX) 504 return LONG; 505 506 if (ls <= 1 && ui <= TARG_LONG_MAX) 507 return LONG; 508 if (ls <= 1 && ui <= TARG_ULONG_MAX && base != 10) 509 return allow_c90 ? ULONG : LONG; 510 if (ls <= 1 && !allow_c99) { 511 if (!warned) 512 /* integer constant out of range */ 513 warning(252); 514 return allow_c90 ? ULONG : LONG; 515 } 516 517 if (ui <= TARG_LLONG_MAX) 518 return LLONG; 519 if (ui <= TARG_ULLONG_MAX && base != 10) 520 return allow_c90 ? ULLONG : LLONG; 521 if (!warned) 522 /* integer constant out of range */ 523 warning(252); 524 return allow_c90 ? ULLONG : LLONG; 525 } 526 527 static tspec_t 528 integer_constant_type_unsigned(unsigned l, uint64_t ui, bool warned) 529 { 530 if (l == 0 && ui <= TARG_UINT_MAX) 531 return UINT; 532 533 if (l <= 1 && ui <= TARG_ULONG_MAX) 534 return ULONG; 535 if (l <= 1 && !allow_c99) { 536 if (!warned) 537 /* integer constant out of range */ 538 warning(252); 539 return ULONG; 540 } 541 542 if (ui <= TARG_ULLONG_MAX) 543 return ULLONG; 544 if (!warned) 545 /* integer constant out of range */ 546 warning(252); 547 return ULLONG; 548 } 549 550 int 551 lex_integer_constant(const char *text, size_t len, int base) 552 { 553 const char *cp = text; 554 555 /* skip 0[xX] or 0[bB] */ 556 if (base == 16 || base == 2) { 557 cp += 2; 558 len -= 2; 559 } 560 561 /* read suffixes */ 562 unsigned l_suffix = 0, u_suffix = 0; 563 for (;; len--) { 564 char c = cp[len - 1]; 565 if (c == 'l' || c == 'L') 566 l_suffix++; 567 else if (c == 'u' || c == 'U') 568 u_suffix++; 569 else 570 break; 571 } 572 if (l_suffix > 2 || u_suffix > 1) { 573 /* malformed integer constant */ 574 warning(251); 575 if (l_suffix > 2) 576 l_suffix = 2; 577 if (u_suffix > 1) 578 u_suffix = 1; 579 } 580 if (!allow_c90 && u_suffix > 0) 581 /* suffix 'U' is illegal in traditional C */ 582 warning(97); 583 584 bool warned = false; 585 errno = 0; 586 char *eptr; 587 uint64_t ui = (uint64_t)strtoull(cp, &eptr, base); 588 lint_assert(eptr == cp + len); 589 if (errno != 0) { 590 /* integer constant out of range */ 591 warning(252); 592 warned = true; 593 } 594 595 if (base == 8 && len > 1) 596 /* octal number '%.*s' */ 597 query_message(8, (int)len, cp); 598 599 bool unsigned_since_c90 = allow_trad && allow_c90 && u_suffix == 0 600 && ui > TARG_INT_MAX 601 && ((l_suffix == 0 && base != 10 && ui <= TARG_UINT_MAX) 602 || (l_suffix <= 1 && ui > TARG_LONG_MAX)); 603 604 tspec_t t = u_suffix > 0 605 ? integer_constant_type_unsigned(l_suffix, ui, warned) 606 : integer_constant_type_signed(l_suffix, ui, base, warned); 607 ui = (uint64_t)convert_integer((int64_t)ui, t, size_in_bits(t)); 608 609 yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 610 yylval.y_val->v_tspec = t; 611 yylval.y_val->v_unsigned_since_c90 = unsigned_since_c90; 612 yylval.y_val->u.integer = (int64_t)ui; 613 614 return T_CON; 615 } 616 617 /* Extend or truncate si to match t. If t is signed, sign-extend. */ 618 int64_t 619 convert_integer(int64_t si, tspec_t t, unsigned int bits) 620 { 621 622 uint64_t vbits = value_bits(bits); 623 uint64_t ui = (uint64_t)si; 624 return t == PTR || is_uinteger(t) || ((ui & bit(bits - 1)) == 0) 625 ? (int64_t)(ui & vbits) 626 : (int64_t)(ui | ~vbits); 627 } 628 629 int 630 lex_floating_constant(const char *text, size_t len) 631 { 632 const char *cp = text; 633 634 bool imaginary = cp[len - 1] == 'i'; 635 if (imaginary) 636 len--; 637 638 char c = cp[len - 1]; 639 tspec_t t; 640 if (c == 'f' || c == 'F') { 641 t = imaginary ? FCOMPLEX : FLOAT; 642 len--; 643 } else if (c == 'l' || c == 'L') { 644 t = imaginary ? LCOMPLEX : LDOUBLE; 645 len--; 646 } else 647 t = imaginary ? DCOMPLEX : DOUBLE; 648 649 if (!allow_c90 && t != DOUBLE) 650 /* suffixes 'F' and 'L' are illegal in traditional C */ 651 warning(98); 652 653 errno = 0; 654 char *eptr; 655 long double ld = strtold(cp, &eptr); 656 lint_assert(eptr == cp + len); 657 if (errno != 0) 658 /* floating-point constant out of range */ 659 warning(248); 660 else if (t == FLOAT) { 661 ld = (float)ld; 662 if (isfinite(ld) == 0) { 663 /* floating-point constant out of range */ 664 warning(248); 665 ld = ld > 0 ? FLT_MAX : -FLT_MAX; 666 } 667 } else if (t == DOUBLE 668 || /* CONSTCOND */ LDOUBLE_SIZE == DOUBLE_SIZE) { 669 ld = (double)ld; 670 if (isfinite(ld) == 0) { 671 /* floating-point constant out of range */ 672 warning(248); 673 ld = ld > 0 ? DBL_MAX : -DBL_MAX; 674 } 675 } 676 677 yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 678 yylval.y_val->v_tspec = t; 679 yylval.y_val->u.floating = ld; 680 681 return T_CON; 682 } 683 684 int 685 lex_operator(int t, op_t o) 686 { 687 688 yylval.y_op = o; 689 return t; 690 } 691 692 static buffer 693 read_quoted(bool *complete, char delim, bool wide) 694 { 695 buffer buf; 696 buf_init(&buf); 697 if (wide) 698 buf_add_char(&buf, 'L'); 699 buf_add_char(&buf, delim); 700 701 for (;;) { 702 int c = read_byte(); 703 if (c <= 0) 704 break; 705 buf_add_char(&buf, (char)c); 706 if (c == '\n') 707 break; 708 if (c == delim) { 709 *complete = true; 710 return buf; 711 } 712 if (c == '\\') { 713 c = read_byte(); 714 buf_add_char(&buf, (char)(c <= 0 ? ' ' : c)); 715 if (c <= 0) 716 break; 717 } 718 } 719 *complete = false; 720 buf_add_char(&buf, delim); 721 return buf; 722 } 723 724 /* 725 * Analyze the lexical representation of the next character in the string 726 * literal list. At the end, only update the position information. 727 */ 728 bool 729 quoted_next(const buffer *lit, quoted_iterator *it) 730 { 731 const char *s = lit->data; 732 733 *it = (quoted_iterator){ .start = it->end }; 734 735 char delim = s[s[0] == 'L' ? 1 : 0]; 736 737 bool in_the_middle = it->start > 0; 738 if (!in_the_middle) { 739 it->start = s[0] == 'L' ? 2 : 1; 740 it->end = it->start; 741 } 742 743 while (s[it->start] == delim) { 744 if (it->start + 1 == lit->len) { 745 it->end = it->start; 746 return false; 747 } 748 it->next_literal = in_the_middle; 749 it->start += 2; 750 } 751 it->end = it->start; 752 753 again: 754 switch (s[it->end]) { 755 case '\\': 756 it->end++; 757 goto backslash; 758 case '\n': 759 it->unescaped_newline = true; 760 return false; 761 default: 762 it->value = (unsigned char)s[it->end++]; 763 return true; 764 } 765 766 backslash: 767 it->escaped = true; 768 if ('0' <= s[it->end] && s[it->end] <= '7') 769 goto octal_escape; 770 switch (s[it->end++]) { 771 case '\n': 772 goto again; 773 case 'a': 774 it->named_escape = true; 775 it->value = '\a'; 776 it->invalid_escape = !allow_c90; 777 return true; 778 case 'b': 779 it->named_escape = true; 780 it->value = '\b'; 781 return true; 782 case 'e': 783 it->named_escape = true; 784 it->value = '\033'; 785 it->invalid_escape = !allow_gcc; 786 return true; 787 case 'f': 788 it->named_escape = true; 789 it->value = '\f'; 790 return true; 791 case 'n': 792 it->named_escape = true; 793 it->value = '\n'; 794 return true; 795 case 'r': 796 it->named_escape = true; 797 it->value = '\r'; 798 return true; 799 case 't': 800 it->named_escape = true; 801 it->value = '\t'; 802 return true; 803 case 'v': 804 it->named_escape = true; 805 it->value = '\v'; 806 it->invalid_escape = !allow_c90; 807 return true; 808 case 'x': 809 goto hex_escape; 810 case '"': 811 it->literal_escape = true; 812 it->value = '"'; 813 it->invalid_escape = !allow_c90 && delim == '\''; 814 return true; 815 case '?': 816 it->literal_escape = true; 817 it->value = '?'; 818 it->invalid_escape = !allow_c90; 819 return true; 820 default: 821 it->invalid_escape = true; 822 /* FALLTHROUGH */ 823 case '\'': 824 case '\\': 825 it->literal_escape = true; 826 it->value = (unsigned char)s[it->end - 1]; 827 return true; 828 } 829 830 octal_escape: 831 it->octal_digits++; 832 it->value = s[it->end++] - '0'; 833 if ('0' <= s[it->end] && s[it->end] <= '7') { 834 it->octal_digits++; 835 it->value = 8 * it->value + (s[it->end++] - '0'); 836 if ('0' <= s[it->end] && s[it->end] <= '7') { 837 it->octal_digits++; 838 it->value = 8 * it->value + (s[it->end++] - '0'); 839 it->overflow = it->value > TARG_UCHAR_MAX 840 && s[0] != 'L'; 841 } 842 } 843 return true; 844 845 hex_escape: 846 for (;;) { 847 char ch = s[it->end]; 848 unsigned digit_value; 849 if ('0' <= ch && ch <= '9') 850 digit_value = ch - '0'; 851 else if ('A' <= ch && ch <= 'F') 852 digit_value = 10 + (ch - 'A'); 853 else if ('a' <= ch && ch <= 'f') 854 digit_value = 10 + (ch - 'a'); 855 else 856 break; 857 858 it->end++; 859 it->value = 16 * it->value + digit_value; 860 uint64_t limit = s[0] == 'L' ? TARG_UINT_MAX : TARG_UCHAR_MAX; 861 if (it->value > limit) 862 it->overflow = true; 863 if (it->hex_digits < 3) 864 it->hex_digits++; 865 } 866 it->missing_hex_digits = it->hex_digits == 0; 867 return true; 868 } 869 870 static void 871 check_quoted(const buffer *buf, bool complete, char delim) 872 { 873 quoted_iterator it = { .end = 0 }, prev = it; 874 for (; quoted_next(buf, &it); prev = it) { 875 if (it.missing_hex_digits) 876 /* no hex digits follow \x */ 877 error(74); 878 if (it.hex_digits > 0 && !allow_c90) 879 /* \x undefined in traditional C */ 880 warning(82); 881 else if (!it.invalid_escape) 882 ; 883 else if (it.value == '8' || it.value == '9') 884 /* bad octal digit '%c' */ 885 warning(77, (int)it.value); 886 else if (it.literal_escape && it.value == '?') 887 /* \? undefined in traditional C */ 888 warning(263); 889 else if (it.literal_escape && it.value == '"') 890 /* \" inside character constants undefined in ... */ 891 warning(262); 892 else if (it.named_escape && it.value == '\a') 893 /* \a undefined in traditional C */ 894 warning(81); 895 else if (it.named_escape && it.value == '\v') 896 /* \v undefined in traditional C */ 897 warning(264); 898 else { 899 unsigned char ch = buf->data[it.end - 1]; 900 if (ch_isprint(ch)) 901 /* dubious escape \%c */ 902 warning(79, ch); 903 else 904 /* dubious escape \%o */ 905 warning(80, ch); 906 } 907 if (it.overflow && it.hex_digits > 0) 908 /* overflow in hex escape */ 909 warning(75); 910 if (it.overflow && it.octal_digits > 0) 911 /* character escape does not fit in character */ 912 warning(76); 913 if (it.value < ' ' && !it.escaped && complete) 914 /* invisible character U+%04X in %s */ 915 query_message(17, (unsigned)it.value, delim == '"' 916 ? "string literal" : "character constant"); 917 if (prev.octal_digits > 0 && prev.octal_digits < 3 918 && !it.escaped && it.value >= '8' && it.value <= '9') 919 /* short octal escape '%.*s' followed by digit '%c' */ 920 warning(356, (int)(prev.end - prev.start), 921 buf->data + prev.start, buf->data[it.start]); 922 } 923 if (it.unescaped_newline) 924 /* newline in string or char constant */ 925 error(254); 926 if (!complete && delim == '"') 927 /* unterminated string constant */ 928 error(258); 929 if (!complete && delim == '\'') 930 /* unterminated character constant */ 931 error(253); 932 } 933 934 static buffer 935 lex_quoted(char delim, bool wide) 936 { 937 bool complete; 938 buffer buf = read_quoted(&complete, delim, wide); 939 check_quoted(&buf, complete, delim); 940 return buf; 941 } 942 943 /* Called if lex found a leading "'". */ 944 int 945 lex_character_constant(void) 946 { 947 buffer buf = lex_quoted('\'', false); 948 949 size_t n = 0; 950 uint64_t val = 0; 951 quoted_iterator it = { .end = 0 }; 952 while (quoted_next(&buf, &it)) { 953 val = (val << CHAR_SIZE) + it.value; 954 n++; 955 } 956 if (n > sizeof(int) || (n > 1 && (pflag || hflag))) { 957 /* 958 * XXX: ^^ should rather be sizeof(TARG_INT). Luckily, 959 * sizeof(int) is the same on all supported platforms. 960 */ 961 /* too many characters in character constant */ 962 error(71); 963 } else if (n > 1) 964 /* multi-character character constant */ 965 warning(294); 966 else if (n == 0 && !it.unescaped_newline) 967 /* empty character constant */ 968 error(73); 969 970 int64_t cval = n == 1 971 ? convert_integer((int64_t)val, CHAR, CHAR_SIZE) 972 : (int64_t)val; 973 974 yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 975 yylval.y_val->v_tspec = INT; 976 yylval.y_val->v_char_constant = true; 977 yylval.y_val->u.integer = cval; 978 979 return T_CON; 980 } 981 982 /* Called if lex found a leading "L'". */ 983 int 984 lex_wide_character_constant(void) 985 { 986 buffer buf = lex_quoted('\'', true); 987 988 static char wbuf[MB_LEN_MAX + 1]; 989 size_t n = 0, nmax = MB_CUR_MAX; 990 991 quoted_iterator it = { .end = 0 }; 992 while (quoted_next(&buf, &it)) { 993 if (n < nmax) 994 wbuf[n] = (char)it.value; 995 n++; 996 } 997 998 wchar_t wc = 0; 999 if (n == 0) 1000 /* empty character constant */ 1001 error(73); 1002 else if (n > nmax) { 1003 n = nmax; 1004 /* too many characters in character constant */ 1005 error(71); 1006 } else { 1007 wbuf[n] = '\0'; 1008 (void)mbtowc(NULL, NULL, 0); 1009 if (mbtowc(&wc, wbuf, nmax) < 0) 1010 /* invalid multibyte character */ 1011 error(291); 1012 } 1013 1014 yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 1015 yylval.y_val->v_tspec = WCHAR_TSPEC; 1016 yylval.y_val->v_char_constant = true; 1017 yylval.y_val->u.integer = wc; 1018 1019 return T_CON; 1020 } 1021 1022 /* See https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html */ 1023 static void 1024 parse_line_directive_flags(const char *p, 1025 bool *is_begin, bool *is_end, bool *is_system) 1026 { 1027 1028 *is_begin = false; 1029 *is_end = false; 1030 *is_system = false; 1031 1032 while (*p != '\0') { 1033 while (ch_isspace(*p)) 1034 p++; 1035 1036 const char *word = p; 1037 while (*p != '\0' && !ch_isspace(*p)) 1038 p++; 1039 size_t len = (size_t)(p - word); 1040 1041 if (len == 1 && word[0] == '1') 1042 *is_begin = true; 1043 if (len == 1 && word[0] == '2') 1044 *is_end = true; 1045 if (len == 1 && word[0] == '3') 1046 *is_system = true; 1047 /* Flag '4' is only interesting for C++. */ 1048 } 1049 } 1050 1051 /* 1052 * The first directive of the preprocessed translation unit provides the name 1053 * of the C source file as specified at the command line. 1054 */ 1055 static void 1056 set_csrc_pos(void) 1057 { 1058 static bool done; 1059 1060 if (done) 1061 return; 1062 done = true; 1063 csrc_pos.p_file = curr_pos.p_file; 1064 outsrc(transform_filename(curr_pos.p_file, strlen(curr_pos.p_file))); 1065 } 1066 1067 /* # lineno ["filename" [GCC-flag...]] */ 1068 static void 1069 set_location(const char *p) 1070 { 1071 char *end; 1072 long ln = strtol(--p, &end, 10); 1073 if (end == p) 1074 goto error; 1075 p = end; 1076 1077 if (*p != ' ' && *p != '\0') 1078 goto error; 1079 while (*p == ' ') 1080 p++; 1081 1082 if (*p != '\0') { 1083 if (*p != '"') 1084 goto error; 1085 const char *fn = ++p; 1086 while (*p != '"' && *p != '\0') 1087 p++; 1088 if (*p != '"') 1089 goto error; 1090 size_t fn_len = p++ - fn; 1091 if (fn_len > PATH_MAX) 1092 goto error; 1093 if (fn_len == 0) { 1094 fn = "{standard input}"; 1095 fn_len = strlen(fn); 1096 } 1097 curr_pos.p_file = record_filename(fn, fn_len); 1098 set_csrc_pos(); 1099 1100 bool is_begin, is_end, is_system; 1101 parse_line_directive_flags(p, &is_begin, &is_end, &is_system); 1102 update_location(curr_pos.p_file, (int)ln, is_begin, is_end); 1103 in_system_header = is_system; 1104 } 1105 curr_pos.p_line = (int)ln - 1; 1106 curr_pos.p_uniq = 0; 1107 if (curr_pos.p_file == csrc_pos.p_file) { 1108 csrc_pos.p_line = (int)ln - 1; 1109 csrc_pos.p_uniq = 0; 1110 } 1111 return; 1112 1113 error: 1114 /* undefined or invalid '#' directive */ 1115 warning(255); 1116 } 1117 1118 static void 1119 check_stmt_macro(const char *text) 1120 { 1121 const char *p = text; 1122 while (*p == ' ') 1123 p++; 1124 1125 const char *name_start = p; 1126 while (ch_isalnum(*p) || *p == '_') 1127 p++; 1128 const char *name_end = p; 1129 1130 if (*p == '(') { 1131 while (*p != '\0' && *p != ')') 1132 p++; 1133 if (*p == ')') 1134 p++; 1135 } 1136 1137 while (*p == ' ') 1138 p++; 1139 1140 if (strncmp(p, "do", 2) == 0 && !ch_isalnum(p[2])) 1141 /* do-while macro '%.*s' ends with semicolon */ 1142 warning(385, (int)(name_end - name_start), name_start); 1143 } 1144 1145 // Between lex_pp_begin and lex_pp_end, the current preprocessing line, 1146 // with comments and whitespace converted to a single space. 1147 static buffer pp_line; 1148 1149 void 1150 lex_pp_begin(void) 1151 { 1152 if (pp_line.data == NULL) 1153 buf_init(&pp_line); 1154 debug_step("%s", __func__); 1155 lint_assert(pp_line.len == 0); 1156 } 1157 1158 void 1159 lex_pp_identifier(const char *text) 1160 { 1161 debug_step("%s '%s'", __func__, text); 1162 buf_add(&pp_line, text); 1163 } 1164 1165 void 1166 lex_pp_number(const char *text) 1167 { 1168 debug_step("%s '%s'", __func__, text); 1169 buf_add(&pp_line, text); 1170 } 1171 1172 void 1173 lex_pp_character_constant(void) 1174 { 1175 buffer buf = lex_quoted('\'', false); 1176 debug_step("%s '%s'", __func__, buf.data); 1177 buf_add(&pp_line, buf.data); 1178 free(buf.data); 1179 } 1180 1181 void 1182 lex_pp_string_literal(void) 1183 { 1184 buffer buf = lex_quoted('"', false); 1185 debug_step("%s '%s'", __func__, buf.data); 1186 buf_add(&pp_line, buf.data); 1187 free(buf.data); 1188 } 1189 1190 void 1191 lex_pp_punctuator(const char *text) 1192 { 1193 debug_step("%s '%s'", __func__, text); 1194 buf_add(&pp_line, text); 1195 } 1196 1197 void 1198 lex_pp_comment(void) 1199 { 1200 int lc = -1, c; 1201 1202 for (;;) { 1203 if ((c = read_byte()) == EOF) { 1204 /* unterminated comment */ 1205 error(256); 1206 return; 1207 } 1208 if (lc == '*' && c == '/') 1209 break; 1210 lc = c; 1211 } 1212 1213 buf_add_char(&pp_line, ' '); 1214 } 1215 1216 void 1217 lex_pp_whitespace(void) 1218 { 1219 buf_add_char(&pp_line, ' '); 1220 } 1221 1222 void 1223 lex_pp_end(void) 1224 { 1225 const char *text = pp_line.data; 1226 size_t len = pp_line.len; 1227 while (len > 0 && text[len - 1] == ' ') 1228 len--; 1229 debug_step("%s '%.*s'", __func__, (int)len, text); 1230 1231 const char *p = text; 1232 while (*p == ' ') 1233 p++; 1234 1235 if (ch_isdigit(*p)) 1236 set_location(p); 1237 else if (strncmp(p, "pragma ", 7) == 0) 1238 goto done; 1239 else if (strncmp(p, "define ", 7) == 0) { 1240 if (text[len - 1] == ';') 1241 check_stmt_macro(p + 7); 1242 } else if (strncmp(p, "undef ", 6) == 0) 1243 goto done; 1244 else 1245 /* undefined or invalid '#' directive */ 1246 warning(255); 1247 1248 done: 1249 pp_line.len = 0; 1250 pp_line.data[0] = '\0'; 1251 } 1252 1253 /* Handle lint comments such as ARGSUSED. */ 1254 void 1255 lex_comment(void) 1256 { 1257 int c; 1258 static const struct { 1259 const char name[18]; 1260 bool arg; 1261 lint_comment comment; 1262 } keywtab[] = { 1263 { "ARGSUSED", true, LC_ARGSUSED }, 1264 { "BITFIELDTYPE", false, LC_BITFIELDTYPE }, 1265 { "CONSTCOND", false, LC_CONSTCOND }, 1266 { "CONSTANTCOND", false, LC_CONSTCOND }, 1267 { "CONSTANTCONDITION", false, LC_CONSTCOND }, 1268 { "FALLTHRU", false, LC_FALLTHROUGH }, 1269 { "FALLTHROUGH", false, LC_FALLTHROUGH }, 1270 { "FALL THROUGH", false, LC_FALLTHROUGH }, 1271 { "fallthrough", false, LC_FALLTHROUGH }, 1272 { "LINTLIBRARY", false, LC_LINTLIBRARY }, 1273 { "LINTED", true, LC_LINTED }, 1274 { "LONGLONG", false, LC_LONGLONG }, 1275 { "NOSTRICT", true, LC_LINTED }, 1276 { "NOTREACHED", false, LC_NOTREACHED }, 1277 { "PRINTFLIKE", true, LC_PRINTFLIKE }, 1278 { "PROTOLIB", true, LC_PROTOLIB }, 1279 { "SCANFLIKE", true, LC_SCANFLIKE }, 1280 { "VARARGS", true, LC_VARARGS }, 1281 }; 1282 char keywd[32]; 1283 1284 bool seen_end_of_comment = false; 1285 1286 while (c = read_byte(), isspace(c) != 0) 1287 continue; 1288 1289 /* Read the potential keyword to keywd */ 1290 size_t l = 0; 1291 while (c != EOF && l < sizeof(keywd) - 1 && 1292 (isalpha(c) != 0 || isspace(c) != 0)) { 1293 if (islower(c) != 0 && l > 0 && ch_isupper(keywd[0])) 1294 break; 1295 keywd[l++] = (char)c; 1296 c = read_byte(); 1297 } 1298 while (l > 0 && ch_isspace(keywd[l - 1])) 1299 l--; 1300 keywd[l] = '\0'; 1301 1302 /* look for the keyword */ 1303 size_t i; 1304 for (i = 0; i < sizeof(keywtab) / sizeof(keywtab[0]); i++) 1305 if (strcmp(keywtab[i].name, keywd) == 0) 1306 goto found_keyword; 1307 goto skip_rest; 1308 1309 found_keyword: 1310 while (isspace(c) != 0) 1311 c = read_byte(); 1312 1313 /* read the argument, if the keyword accepts one and there is one */ 1314 char arg[32]; 1315 l = 0; 1316 if (keywtab[i].arg) { 1317 while (isdigit(c) != 0 && l < sizeof(arg) - 1) { 1318 arg[l++] = (char)c; 1319 c = read_byte(); 1320 } 1321 } 1322 arg[l] = '\0'; 1323 int a = l != 0 ? atoi(arg) : -1; 1324 1325 while (isspace(c) != 0) 1326 c = read_byte(); 1327 1328 seen_end_of_comment = c == '*' && (c = read_byte()) == '/'; 1329 if (!seen_end_of_comment && keywtab[i].comment != LC_LINTED) 1330 /* extra characters in lint comment */ 1331 warning(257); 1332 1333 handle_lint_comment(keywtab[i].comment, a); 1334 1335 skip_rest: 1336 while (!seen_end_of_comment) { 1337 int lc = c; 1338 if ((c = read_byte()) == EOF) { 1339 /* unterminated comment */ 1340 error(256); 1341 break; 1342 } 1343 if (lc == '*' && c == '/') 1344 seen_end_of_comment = true; 1345 } 1346 } 1347 1348 void 1349 lex_slash_slash_comment(void) 1350 { 1351 1352 if (!allow_c99 && !allow_gcc) 1353 /* %s does not support '//' comments */ 1354 gnuism(312, allow_c90 ? "C90" : "traditional C"); 1355 1356 for (int c; c = read_byte(), c != EOF && c != '\n';) 1357 continue; 1358 } 1359 1360 void 1361 reset_suppressions(void) 1362 { 1363 1364 lwarn = LWARN_ALL; 1365 suppress_longlong = false; 1366 suppress_constcond = false; 1367 } 1368 1369 int 1370 lex_string(void) 1371 { 1372 buffer *buf = xmalloc(sizeof(*buf)); 1373 *buf = lex_quoted('"', false); 1374 yylval.y_string = buf; 1375 return T_STRING; 1376 } 1377 1378 static size_t 1379 wide_length(const buffer *buf) 1380 { 1381 1382 (void)mblen(NULL, 0); 1383 size_t len = 0, i = 0; 1384 while (i < buf->len) { 1385 int n = mblen(buf->data + i, MB_CUR_MAX); 1386 if (n == -1) { 1387 /* invalid multibyte character */ 1388 error(291); 1389 break; 1390 } 1391 i += n > 1 ? n : 1; 1392 len++; 1393 } 1394 return len; 1395 } 1396 1397 int 1398 lex_wide_string(void) 1399 { 1400 buffer buf = lex_quoted('"', true); 1401 1402 buffer str; 1403 buf_init(&str); 1404 quoted_iterator it = { .end = 0 }; 1405 while (quoted_next(&buf, &it)) 1406 buf_add_char(&str, (char)it.value); 1407 1408 free(buf.data); 1409 1410 buffer *len_buf = xcalloc(1, sizeof(*len_buf)); 1411 len_buf->len = wide_length(&str); 1412 yylval.y_string = len_buf; 1413 return T_STRING; 1414 } 1415 1416 void 1417 lex_next_line(void) 1418 { 1419 curr_pos.p_line++; 1420 curr_pos.p_uniq = 0; 1421 debug_skip_indent(); 1422 debug_printf("parsing %s:%d\n", curr_pos.p_file, curr_pos.p_line); 1423 if (curr_pos.p_file == csrc_pos.p_file) { 1424 csrc_pos.p_line++; 1425 csrc_pos.p_uniq = 0; 1426 } 1427 } 1428 1429 void 1430 lex_unknown_character(int c) 1431 { 1432 1433 /* unknown character \%o */ 1434 error(250, c); 1435 } 1436 1437 /* 1438 * The scanner does not create new symbol table entries for symbols it cannot 1439 * find in the symbol table. This is to avoid putting undeclared symbols into 1440 * the symbol table if a syntax error occurs. 1441 * 1442 * getsym is called as soon as it is probably ok to put the symbol in the 1443 * symbol table. It is still possible that symbols are put in the symbol 1444 * table that are not completely declared due to syntax errors. To avoid too 1445 * many problems in this case, symbols get type 'int' in getsym. 1446 * 1447 * XXX calls to getsym should be delayed until declare_1_* is called. 1448 */ 1449 sym_t * 1450 getsym(sbuf_t *sb) 1451 { 1452 1453 sym_t *sym = sb->sb_sym; 1454 1455 /* 1456 * During member declaration it is possible that name() looked for 1457 * symbols of type SK_VCFT, although it should have looked for symbols 1458 * of type SK_TAG. Same can happen for labels. Both cases are 1459 * compensated here. 1460 */ 1461 if (sym_kind == SK_MEMBER || sym_kind == SK_LABEL) { 1462 if (sym == NULL || sym->s_kind == SK_VCFT) 1463 sym = symtab_search(sb->sb_name); 1464 } 1465 1466 if (sym != NULL) { 1467 lint_assert(sym->s_kind == sym_kind); 1468 set_sym_kind(SK_VCFT); 1469 free(sb); 1470 return sym; 1471 } 1472 1473 /* create a new symbol table entry */ 1474 1475 decl_level *dl; 1476 if (sym_kind == SK_LABEL) { 1477 sym = level_zero_alloc(1, sizeof(*sym), "sym"); 1478 char *s = level_zero_alloc(1, sb->sb_len + 1, "string"); 1479 (void)memcpy(s, sb->sb_name, sb->sb_len + 1); 1480 sym->s_name = s; 1481 sym->s_block_level = 1; 1482 dl = dcs; 1483 while (dl->d_enclosing != NULL && 1484 dl->d_enclosing->d_enclosing != NULL) 1485 dl = dl->d_enclosing; 1486 lint_assert(dl->d_kind == DLK_AUTO); 1487 } else { 1488 sym = block_zero_alloc(sizeof(*sym), "sym"); 1489 sym->s_name = sb->sb_name; 1490 sym->s_block_level = block_level; 1491 dl = dcs; 1492 } 1493 1494 sym->s_def_pos = unique_curr_pos(); 1495 if ((sym->s_kind = sym_kind) != SK_LABEL) 1496 sym->s_type = gettyp(INT); 1497 1498 set_sym_kind(SK_VCFT); 1499 1500 if (!in_gcc_attribute) { 1501 debug_printf("%s: symtab_add ", __func__); 1502 debug_sym("", sym, "\n"); 1503 symtab_add(sym); 1504 1505 *dl->d_last_dlsym = sym; 1506 dl->d_last_dlsym = &sym->s_level_next; 1507 } 1508 1509 free(sb); 1510 return sym; 1511 } 1512 1513 /* 1514 * Construct a temporary symbol. The symbol name starts with a digit to avoid 1515 * name clashes with other identifiers. 1516 */ 1517 sym_t * 1518 mktempsym(type_t *tp) 1519 { 1520 static unsigned n = 0; 1521 char *s = level_zero_alloc((size_t)block_level, 64, "string"); 1522 sym_t *sym = block_zero_alloc(sizeof(*sym), "sym"); 1523 scl_t scl; 1524 1525 (void)snprintf(s, 64, "%.8u_tmp", n++); 1526 1527 scl = dcs->d_scl; 1528 if (scl == NO_SCL) 1529 scl = block_level > 0 ? AUTO : EXTERN; 1530 1531 sym->s_name = s; 1532 sym->s_type = tp; 1533 sym->s_block_level = block_level; 1534 sym->s_scl = scl; 1535 sym->s_kind = SK_VCFT; 1536 sym->s_used = true; 1537 sym->s_set = true; 1538 1539 symtab_add(sym); 1540 1541 *dcs->d_last_dlsym = sym; 1542 dcs->d_last_dlsym = &sym->s_level_next; 1543 1544 return sym; 1545 } 1546 1547 void 1548 symtab_remove_forever(sym_t *sym) 1549 { 1550 1551 debug_step("%s '%s' %s '%s'", __func__, 1552 sym->s_name, symbol_kind_name(sym->s_kind), 1553 type_name(sym->s_type)); 1554 symtab_remove(sym); 1555 1556 /* avoid that the symbol will later be put back to the symbol table */ 1557 sym->s_block_level = -1; 1558 } 1559 1560 /* 1561 * Remove all symbols from the symbol table that have the same level as the 1562 * given symbol. 1563 */ 1564 void 1565 symtab_remove_level(sym_t *syms) 1566 { 1567 1568 if (syms != NULL) 1569 debug_step("%s %d", __func__, syms->s_block_level); 1570 1571 /* Note the use of s_level_next instead of s_symtab_next. */ 1572 for (sym_t *sym = syms; sym != NULL; sym = sym->s_level_next) { 1573 if (sym->s_block_level != -1) { 1574 debug_step("%s '%s' %s '%s' %d", __func__, 1575 sym->s_name, symbol_kind_name(sym->s_kind), 1576 type_name(sym->s_type), sym->s_block_level); 1577 symtab_remove(sym); 1578 sym->s_symtab_ref = NULL; 1579 } 1580 } 1581 } 1582 1583 /* Put a symbol into the symbol table. */ 1584 void 1585 inssym(int level, sym_t *sym) 1586 { 1587 1588 debug_step("%s '%s' %s '%s' %d", __func__, 1589 sym->s_name, symbol_kind_name(sym->s_kind), 1590 type_name(sym->s_type), level); 1591 sym->s_block_level = level; 1592 symtab_add(sym); 1593 1594 const sym_t *next = sym->s_symtab_next; 1595 if (next != NULL) 1596 lint_assert(sym->s_block_level >= next->s_block_level); 1597 } 1598 1599 /* Called at level 0 after syntax errors. */ 1600 void 1601 clean_up_after_error(void) 1602 { 1603 1604 symtab_remove_locals(); 1605 1606 while (mem_block_level > 0) 1607 level_free_all(mem_block_level--); 1608 } 1609 1610 /* Create a new symbol with the same name as an existing symbol. */ 1611 sym_t * 1612 pushdown(const sym_t *sym) 1613 { 1614 1615 debug_step("pushdown '%s' %s '%s'", 1616 sym->s_name, symbol_kind_name(sym->s_kind), 1617 type_name(sym->s_type)); 1618 1619 sym_t *nsym = block_zero_alloc(sizeof(*nsym), "sym"); 1620 lint_assert(sym->s_block_level <= block_level); 1621 nsym->s_name = sym->s_name; 1622 nsym->s_def_pos = unique_curr_pos(); 1623 nsym->s_kind = sym->s_kind; 1624 nsym->s_block_level = block_level; 1625 1626 symtab_add(nsym); 1627 1628 *dcs->d_last_dlsym = nsym; 1629 dcs->d_last_dlsym = &nsym->s_level_next; 1630 1631 return nsym; 1632 } 1633 1634 static void 1635 fill_token(int tk, const char *text, token *tok) 1636 { 1637 switch (tk) { 1638 case T_NAME: 1639 case T_TYPENAME: 1640 tok->kind = TK_IDENTIFIER; 1641 tok->u.identifier = xstrdup(yylval.y_name->sb_name); 1642 break; 1643 case T_CON: 1644 tok->kind = TK_CONSTANT; 1645 tok->u.constant = *yylval.y_val; 1646 break; 1647 case T_NAMED_CONSTANT: 1648 tok->kind = TK_IDENTIFIER; 1649 tok->u.identifier = xstrdup(text); 1650 break; 1651 case T_STRING:; 1652 tok->kind = TK_STRING_LITERALS; 1653 tok->u.string_literals.len = yylval.y_string->len; 1654 tok->u.string_literals.cap = yylval.y_string->cap; 1655 tok->u.string_literals.data = xstrdup(yylval.y_string->data); 1656 break; 1657 default: 1658 tok->kind = TK_PUNCTUATOR; 1659 tok->u.punctuator = xstrdup(text); 1660 } 1661 } 1662 1663 static void 1664 seq_reserve(balanced_token_sequence *seq) 1665 { 1666 if (seq->len >= seq->cap) { 1667 seq->cap = 16 + 2 * seq->cap; 1668 const balanced_token *old_tokens = seq->tokens; 1669 balanced_token *new_tokens = block_zero_alloc( 1670 seq->cap * sizeof(*seq->tokens), "balanced_token[]"); 1671 if (seq->len > 0) 1672 memcpy(new_tokens, old_tokens, 1673 seq->len * sizeof(*seq->tokens)); 1674 seq->tokens = new_tokens; 1675 } 1676 } 1677 1678 static balanced_token_sequence 1679 read_balanced(int opening) 1680 { 1681 int closing = opening == T_LPAREN ? T_RPAREN 1682 : opening == T_LBRACK ? T_RBRACK : T_RBRACE; 1683 balanced_token_sequence seq = { NULL, 0, 0 }; 1684 1685 int tok; 1686 while (tok = yylex(), tok > 0 && tok != closing) { 1687 seq_reserve(&seq); 1688 if (tok == T_LPAREN || tok == T_LBRACK || tok == T_LBRACE) { 1689 seq.tokens[seq.len].kind = tok == T_LPAREN ? '(' 1690 : tok == T_LBRACK ? '[' : '{'; 1691 seq.tokens[seq.len].u.tokens = read_balanced(tok); 1692 } else { 1693 fill_token(tok, yytext, &seq.tokens[seq.len].u.token); 1694 freeyyv(&yylval, tok); 1695 } 1696 seq.len++; 1697 } 1698 return seq; 1699 } 1700 1701 balanced_token_sequence 1702 lex_balanced(void) 1703 { 1704 return read_balanced(T_LPAREN); 1705 } 1706 1707 /* 1708 * Free any dynamically allocated memory referenced by 1709 * the value stack or yylval. 1710 * The type of information in yylval is described by tok. 1711 */ 1712 void 1713 freeyyv(void *sp, int tok) 1714 { 1715 if (tok == T_NAME || tok == T_TYPENAME) { 1716 sbuf_t *sb = *(sbuf_t **)sp; 1717 free(sb); 1718 } else if (tok == T_CON) { 1719 val_t *val = *(val_t **)sp; 1720 free(val); 1721 } else if (tok == T_STRING) { 1722 buffer *str = *(buffer **)sp; 1723 free(str->data); 1724 free(str); 1725 } 1726 } 1727