1 /* $NetBSD: lex.c,v 1.123 2022/04/16 20:08:35 rillig Exp $ */ 2 3 /* 4 * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved. 5 * Copyright (c) 1994, 1995 Jochen Pohl 6 * All Rights Reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Jochen Pohl for 19 * The NetBSD Project. 20 * 4. The name of the author may not be used to endorse or promote products 21 * derived from this software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #if HAVE_NBTOOL_CONFIG_H 36 #include "nbtool_config.h" 37 #endif 38 39 #include <sys/cdefs.h> 40 #if defined(__RCSID) && !defined(lint) 41 __RCSID("$NetBSD: lex.c,v 1.123 2022/04/16 20:08:35 rillig Exp $"); 42 #endif 43 44 #include <ctype.h> 45 #include <errno.h> 46 #include <float.h> 47 #include <limits.h> 48 #include <math.h> 49 #include <stdlib.h> 50 #include <string.h> 51 52 #include "lint1.h" 53 #include "cgram.h" 54 55 #define CHAR_MASK ((1U << CHAR_SIZE) - 1) 56 57 58 /* Current position (it's also updated when an included file is parsed) */ 59 pos_t curr_pos = { "", 1, 0 }; 60 61 /* 62 * Current position in C source (not updated when an included file is 63 * parsed). 64 */ 65 pos_t csrc_pos = { "", 1, 0 }; 66 67 bool in_gcc_attribute; 68 bool in_system_header; 69 70 /* Valid values for 'since' are 78, 90, 99, 11. */ 71 #define kwdef(name, token, scl, tspec, tqual, since, gcc, attr, deco) \ 72 { \ 73 name, token, scl, tspec, tqual, \ 74 (since) == 90, \ 75 /* CONSTCOND */ (since) == 99 || (since) == 11, \ 76 (gcc) > 0, (attr) > 0, \ 77 ((deco) & 1) != 0, ((deco) & 2) != 0, ((deco) & 4) != 0, \ 78 } 79 #define kwdef_token(name, token, since, gcc, deco) \ 80 kwdef(name, token, 0, 0, 0, since, gcc, 0, deco) 81 #define kwdef_sclass(name, sclass, since, gcc, deco) \ 82 kwdef(name, T_SCLASS, sclass, 0, 0, since, gcc, 0, deco) 83 #define kwdef_type(name, tspec, since, gcc, deco) \ 84 kwdef(name, T_TYPE, 0, tspec, 0, since, gcc, 0, deco) 85 #define kwdef_tqual(name, tqual, since, gcc, deco) \ 86 kwdef(name, T_QUAL, 0, 0, tqual, since, gcc, 0, deco) 87 #define kwdef_keyword(name, token) \ 88 kwdef(name, token, 0, 0, 0, 78, 0, 0, 1) 89 #define kwdef_gcc_attr(name, token) \ 90 kwdef(name, token, 0, 0, 0, 78, 1, 1, 5) 91 92 /* During initialization, these keywords are written to the symbol table. */ 93 static const struct keyword { 94 const char *kw_name; /* keyword */ 95 int kw_token; /* token returned by yylex() */ 96 scl_t kw_scl; /* storage class if kw_token T_SCLASS */ 97 tspec_t kw_tspec; /* type spec. if kw_token 98 * T_TYPE or T_STRUCT_OR_UNION */ 99 tqual_t kw_tqual; /* type qual. if kw_token T_QUAL */ 100 bool kw_c90:1; /* C90 keyword */ 101 bool kw_c99:1; /* C99 keyword */ 102 bool kw_gcc:1; /* GCC keyword */ 103 bool kw_attr:1; /* GCC attribute */ 104 bool kw_plain:1; /* 'name' */ 105 bool kw_leading:1; /* '__name' */ 106 bool kw_both:1; /* '__name__' */ 107 } keywords[] = { 108 kwdef_gcc_attr( "alias", T_AT_ALIAS), 109 kwdef_keyword( "_Alignas", T_ALIGNAS), 110 kwdef_keyword( "_Alignof", T_ALIGNOF), 111 kwdef_gcc_attr( "aligned", T_AT_ALIGNED), 112 kwdef_token( "__alignof__", T_ALIGNOF, 78,0,1), 113 kwdef_gcc_attr( "alloc_size", T_AT_ALLOC_SIZE), 114 kwdef_gcc_attr( "always_inline",T_AT_ALWAYS_INLINE), 115 kwdef_token( "asm", T_ASM, 78,1,7), 116 kwdef_token( "attribute", T_ATTRIBUTE, 78,1,6), 117 kwdef_sclass( "auto", AUTO, 78,0,1), 118 kwdef_type( "_Bool", BOOL, 99,0,1), 119 kwdef_gcc_attr( "bounded", T_AT_BOUNDED), 120 kwdef_keyword( "break", T_BREAK), 121 kwdef_gcc_attr( "buffer", T_AT_BUFFER), 122 kwdef_token( "__builtin_offsetof", T_BUILTIN_OFFSETOF, 78,1,1), 123 kwdef_keyword( "case", T_CASE), 124 kwdef_type( "char", CHAR, 78,0,1), 125 kwdef_gcc_attr( "cold", T_AT_COLD), 126 kwdef_gcc_attr( "common", T_AT_COMMON), 127 kwdef_type( "_Complex", COMPLEX, 99,0,1), 128 kwdef_tqual( "const", CONST, 90,0,7), 129 kwdef_gcc_attr( "constructor", T_AT_CONSTRUCTOR), 130 kwdef_keyword( "continue", T_CONTINUE), 131 kwdef_keyword( "default", T_DEFAULT), 132 kwdef_gcc_attr( "deprecated", T_AT_DEPRECATED), 133 kwdef_gcc_attr( "destructor", T_AT_DESTRUCTOR), 134 kwdef_gcc_attr( "disable_sanitizer_instrumentation", 135 T_AT_DISABLE_SANITIZER_INSTRUMENTATION), 136 kwdef_keyword( "do", T_DO), 137 kwdef_type( "double", DOUBLE, 78,0,1), 138 kwdef_keyword( "else", T_ELSE), 139 kwdef_keyword( "enum", T_ENUM), 140 kwdef_token( "__extension__",T_EXTENSION, 78,1,1), 141 kwdef_sclass( "extern", EXTERN, 78,0,1), 142 kwdef_gcc_attr( "fallthrough", T_AT_FALLTHROUGH), 143 kwdef_type( "float", FLOAT, 78,0,1), 144 kwdef_keyword( "for", T_FOR), 145 kwdef_gcc_attr( "format", T_AT_FORMAT), 146 kwdef_gcc_attr( "format_arg", T_AT_FORMAT_ARG), 147 kwdef_token( "_Generic", T_GENERIC, 11,0,1), 148 kwdef_gcc_attr( "gnu_inline", T_AT_GNU_INLINE), 149 kwdef_gcc_attr( "gnu_printf", T_AT_FORMAT_GNU_PRINTF), 150 kwdef_keyword( "goto", T_GOTO), 151 kwdef_gcc_attr( "hot", T_AT_HOT), 152 kwdef_keyword( "if", T_IF), 153 kwdef_token( "__imag__", T_IMAG, 78,1,1), 154 kwdef_sclass( "inline", INLINE, 99,0,7), 155 kwdef_type( "int", INT, 78,0,1), 156 #ifdef INT128_SIZE 157 kwdef_type( "__int128_t", INT128, 99,0,1), 158 #endif 159 kwdef_type( "long", LONG, 78,0,1), 160 kwdef_gcc_attr( "malloc", T_AT_MALLOC), 161 kwdef_gcc_attr( "may_alias", T_AT_MAY_ALIAS), 162 kwdef_gcc_attr( "minbytes", T_AT_MINBYTES), 163 kwdef_gcc_attr( "mode", T_AT_MODE), 164 kwdef_gcc_attr("no_instrument_function", 165 T_AT_NO_INSTRUMENT_FUNCTION), 166 kwdef_gcc_attr( "no_sanitize", T_AT_NO_SANITIZE), 167 kwdef_gcc_attr( "no_sanitize_thread", T_AT_NO_SANITIZE_THREAD), 168 kwdef_gcc_attr( "noinline", T_AT_NOINLINE), 169 kwdef_gcc_attr( "nonnull", T_AT_NONNULL), 170 kwdef_gcc_attr( "nonstring", T_AT_NONSTRING), 171 kwdef_token( "_Noreturn", T_NORETURN, 11,0,1), 172 kwdef_gcc_attr( "noreturn", T_AT_NORETURN), 173 kwdef_gcc_attr( "nothrow", T_AT_NOTHROW), 174 kwdef_gcc_attr( "optimize", T_AT_OPTIMIZE), 175 kwdef_gcc_attr( "optnone", T_AT_OPTNONE), 176 kwdef_gcc_attr( "packed", T_AT_PACKED), 177 kwdef_token( "__packed", T_PACKED, 78,0,1), 178 kwdef_gcc_attr( "pcs", T_AT_PCS), 179 kwdef_gcc_attr( "printf", T_AT_FORMAT_PRINTF), 180 kwdef_gcc_attr( "pure", T_AT_PURE), 181 kwdef_token( "__real__", T_REAL, 78,1,1), 182 kwdef_sclass( "register", REG, 78,0,1), 183 kwdef_gcc_attr( "regparm", T_AT_REGPARM), 184 kwdef_tqual( "restrict", RESTRICT, 99,0,7), 185 kwdef_keyword( "return", T_RETURN), 186 kwdef_gcc_attr( "returns_nonnull",T_AT_RETURNS_NONNULL), 187 kwdef_gcc_attr( "returns_twice",T_AT_RETURNS_TWICE), 188 kwdef_gcc_attr( "scanf", T_AT_FORMAT_SCANF), 189 kwdef( "section", T_AT_SECTION, 0,0,0, 78,1,1,7), 190 kwdef_gcc_attr( "sentinel", T_AT_SENTINEL), 191 kwdef_type( "short", SHORT, 78,0,1), 192 kwdef_type( "signed", SIGNED, 90,0,3), 193 kwdef_keyword( "sizeof", T_SIZEOF), 194 kwdef_sclass( "static", STATIC, 78,0,1), 195 kwdef_keyword( "_Static_assert", T_STATIC_ASSERT), 196 kwdef_gcc_attr( "strfmon", T_AT_FORMAT_STRFMON), 197 kwdef_gcc_attr( "strftime", T_AT_FORMAT_STRFTIME), 198 kwdef_gcc_attr( "string", T_AT_STRING), 199 kwdef("struct", T_STRUCT_OR_UNION, 0, STRUCT, 0, 78,0,0,1), 200 kwdef_keyword( "switch", T_SWITCH), 201 kwdef_token( "__symbolrename", T_SYMBOLRENAME, 78,0,1), 202 kwdef_gcc_attr( "syslog", T_AT_FORMAT_SYSLOG), 203 kwdef_gcc_attr( "target", T_AT_TARGET), 204 kwdef_tqual( "__thread", THREAD, 78,1,1), 205 kwdef_tqual( "_Thread_local", THREAD, 11,0,1), 206 kwdef_gcc_attr( "tls_model", T_AT_TLS_MODEL), 207 kwdef_gcc_attr( "transparent_union", T_AT_TUNION), 208 kwdef_sclass( "typedef", TYPEDEF, 78,0,1), 209 kwdef_token( "typeof", T_TYPEOF, 78,1,7), 210 #ifdef INT128_SIZE 211 kwdef_type( "__uint128_t", UINT128, 99,0,1), 212 #endif 213 kwdef("union", T_STRUCT_OR_UNION, 0, UNION, 0, 78,0,0,1), 214 kwdef_type( "unsigned", UNSIGN, 78,0,1), 215 kwdef_gcc_attr( "unused", T_AT_UNUSED), 216 kwdef_gcc_attr( "used", T_AT_USED), 217 kwdef_gcc_attr( "visibility", T_AT_VISIBILITY), 218 kwdef_type( "void", VOID, 78,0,1), 219 kwdef_tqual( "volatile", VOLATILE, 90,0,7), 220 kwdef_gcc_attr( "warn_unused_result", T_AT_WARN_UNUSED_RESULT), 221 kwdef_gcc_attr( "weak", T_AT_WEAK), 222 kwdef_keyword( "while", T_WHILE), 223 #undef kwdef 224 #undef kwdef_token 225 #undef kwdef_sclass 226 #undef kwdef_type 227 #undef kwdef_tqual 228 #undef kwdef_keyword 229 #undef kwdef_gcc_attr 230 }; 231 232 /* Symbol table */ 233 static sym_t *symtab[HSHSIZ1]; 234 235 /* type of next expected symbol */ 236 symt_t symtyp; 237 238 239 static int get_escaped_char(int); 240 241 242 static unsigned int 243 hash(const char *s) 244 { 245 unsigned int v; 246 const char *p; 247 248 v = 0; 249 for (p = s; *p != '\0'; p++) { 250 v = (v << 4) + (unsigned char)*p; 251 v ^= v >> 28; 252 } 253 return v % HSHSIZ1; 254 } 255 256 static void 257 symtab_add(sym_t *sym) 258 { 259 unsigned int h; 260 261 h = hash(sym->s_name); 262 if ((sym->s_symtab_next = symtab[h]) != NULL) 263 symtab[h]->s_symtab_ref = &sym->s_symtab_next; 264 sym->s_symtab_ref = &symtab[h]; 265 symtab[h] = sym; 266 } 267 268 static sym_t * 269 symtab_search(sbuf_t *sb) 270 { 271 272 unsigned int h = hash(sb->sb_name); 273 for (sym_t *sym = symtab[h]; sym != NULL; sym = sym->s_symtab_next) { 274 if (strcmp(sym->s_name, sb->sb_name) != 0) 275 continue; 276 277 const struct keyword *kw = sym->s_keyword; 278 if (kw != NULL && !kw->kw_attr) 279 return sym; 280 if (kw != NULL && in_gcc_attribute) 281 return sym; 282 if (kw == NULL && !in_gcc_attribute && sym->s_kind == symtyp) 283 return sym; 284 } 285 286 return NULL; 287 } 288 289 static void 290 symtab_remove(sym_t *sym) 291 { 292 293 if ((*sym->s_symtab_ref = sym->s_symtab_next) != NULL) 294 sym->s_symtab_next->s_symtab_ref = sym->s_symtab_ref; 295 sym->s_symtab_next = NULL; 296 } 297 298 static void 299 symtab_remove_locals(void) 300 { 301 302 for (size_t i = 0; i < HSHSIZ1; i++) { 303 for (sym_t *sym = symtab[i]; sym != NULL; ) { 304 sym_t *next = sym->s_symtab_next; 305 if (sym->s_block_level >= 1) 306 symtab_remove(sym); 307 sym = next; 308 } 309 } 310 } 311 312 #ifdef DEBUG 313 static int 314 sym_by_name(const void *va, const void *vb) 315 { 316 const sym_t *a = *(const sym_t *const *)va; 317 const sym_t *b = *(const sym_t *const *)vb; 318 319 return strcmp(a->s_name, b->s_name); 320 } 321 322 struct syms { 323 const sym_t **items; 324 size_t len; 325 size_t cap; 326 }; 327 328 static void 329 syms_add(struct syms *syms, const sym_t *sym) 330 { 331 while (syms->len >= syms->cap) { 332 syms->cap *= 2; 333 syms->items = xrealloc(syms->items, 334 syms->cap * sizeof(syms->items[0])); 335 } 336 syms->items[syms->len++] = sym; 337 } 338 339 void 340 debug_symtab(void) 341 { 342 struct syms syms = { xcalloc(64, sizeof(syms.items[0])), 0, 64 }; 343 344 for (int level = -1;; level++) { 345 bool more = false; 346 size_t n = sizeof(symtab) / sizeof(symtab[0]); 347 348 syms.len = 0; 349 for (size_t i = 0; i < n; i++) { 350 for (sym_t *sym = symtab[i]; sym != NULL;) { 351 if (sym->s_block_level == level && 352 sym->s_keyword == NULL) 353 syms_add(&syms, sym); 354 if (sym->s_block_level > level) 355 more = true; 356 sym = sym->s_symtab_next; 357 } 358 } 359 360 if (syms.len > 0) { 361 debug_printf("symbol table level %d\n", level); 362 debug_indent_inc(); 363 qsort(syms.items, syms.len, sizeof(syms.items[0]), 364 sym_by_name); 365 for (size_t i = 0; i < syms.len; i++) 366 debug_sym("", syms.items[i], "\n"); 367 debug_indent_dec(); 368 369 lint_assert(level != -1); 370 } 371 372 if (!more) 373 break; 374 } 375 376 free(syms.items); 377 } 378 #endif 379 380 static void 381 add_keyword(const struct keyword *kw, bool leading, bool trailing) 382 { 383 sym_t *sym; 384 char buf[256]; 385 const char *name; 386 387 if (!leading && !trailing) { 388 name = kw->kw_name; 389 } else { 390 (void)snprintf(buf, sizeof(buf), "%s%s%s", 391 leading ? "__" : "", kw->kw_name, trailing ? "__" : ""); 392 name = xstrdup(buf); 393 } 394 395 sym = block_zero_alloc(sizeof(*sym)); 396 sym->s_name = name; 397 sym->s_keyword = kw; 398 sym->u.s_keyword.sk_token = kw->kw_token; 399 if (kw->kw_token == T_TYPE || kw->kw_token == T_STRUCT_OR_UNION) { 400 sym->u.s_keyword.sk_tspec = kw->kw_tspec; 401 } else if (kw->kw_token == T_SCLASS) { 402 sym->s_scl = kw->kw_scl; 403 } else if (kw->kw_token == T_QUAL) { 404 sym->u.s_keyword.sk_qualifier = kw->kw_tqual; 405 } 406 407 symtab_add(sym); 408 } 409 410 /* 411 * All keywords are written to the symbol table. This saves us looking 412 * in an extra table for each name we found. 413 */ 414 void 415 initscan(void) 416 { 417 const struct keyword *kw, *end; 418 419 end = keywords + sizeof(keywords) / sizeof(keywords[0]); 420 for (kw = keywords; kw != end; kw++) { 421 if ((kw->kw_c90 || kw->kw_c99) && tflag) 422 continue; 423 /* FIXME: C99 and GCC are independent. */ 424 if (kw->kw_c99 && !(Sflag || allow_gcc)) 425 continue; 426 if (kw->kw_gcc && !allow_gcc) 427 continue; 428 if (kw->kw_plain) 429 add_keyword(kw, false, false); 430 if (kw->kw_leading) 431 add_keyword(kw, true, false); 432 if (kw->kw_both) 433 add_keyword(kw, true, true); 434 } 435 } 436 437 /* 438 * Read a character and ensure that it is positive (except EOF). 439 * Increment line count(s) if necessary. 440 */ 441 static int 442 inpc(void) 443 { 444 int c; 445 446 if ((c = lex_input()) == EOF) 447 return c; 448 c &= CHAR_MASK; 449 if (c == '\0') 450 return EOF; /* lex returns 0 on EOF. */ 451 if (c == '\n') 452 lex_next_line(); 453 return c; 454 } 455 456 static int 457 lex_keyword(sym_t *sym) 458 { 459 int t; 460 461 if ((t = sym->u.s_keyword.sk_token) == T_SCLASS) { 462 yylval.y_scl = sym->s_scl; 463 } else if (t == T_TYPE || t == T_STRUCT_OR_UNION) { 464 yylval.y_tspec = sym->u.s_keyword.sk_tspec; 465 } else if (t == T_QUAL) { 466 yylval.y_tqual = sym->u.s_keyword.sk_qualifier; 467 } 468 return t; 469 } 470 471 /* 472 * Lex has found a letter followed by zero or more letters or digits. 473 * It looks for a symbol in the symbol table with the same name. This 474 * symbol must either be a keyword or a symbol of the type required by 475 * symtyp (label, member, tag, ...). 476 * 477 * If it is a keyword, the token is returned. In some cases it is described 478 * more deeply by data written to yylval. 479 * 480 * If it is a symbol, T_NAME is returned and the name is stored in yylval. 481 * If there is already a symbol of the same name and type in the symbol 482 * table, yylval.y_name->sb_sym points there. 483 */ 484 extern int 485 lex_name(const char *yytext, size_t yyleng) 486 { 487 char *s; 488 sbuf_t *sb; 489 sym_t *sym; 490 int tok; 491 492 sb = xmalloc(sizeof(*sb)); 493 sb->sb_name = yytext; 494 sb->sb_len = yyleng; 495 if ((sym = symtab_search(sb)) != NULL && sym->s_keyword != NULL) { 496 free(sb); 497 return lex_keyword(sym); 498 } 499 500 sb->sb_sym = sym; 501 502 if (sym != NULL) { 503 lint_assert(block_level >= sym->s_block_level); 504 sb->sb_name = sym->s_name; 505 tok = sym->s_scl == TYPEDEF ? T_TYPENAME : T_NAME; 506 } else { 507 s = block_zero_alloc(yyleng + 1); 508 (void)memcpy(s, yytext, yyleng + 1); 509 sb->sb_name = s; 510 tok = T_NAME; 511 } 512 513 yylval.y_name = sb; 514 return tok; 515 } 516 517 /* 518 * Convert a string representing an integer into internal representation. 519 * Return T_CON, storing the numeric value in yylval, for yylex. 520 */ 521 int 522 lex_integer_constant(const char *yytext, size_t yyleng, int base) 523 { 524 int l_suffix, u_suffix; 525 size_t len; 526 const char *cp; 527 char c, *eptr; 528 tspec_t typ; 529 bool ansiu; 530 bool warned = false; 531 uint64_t uq = 0; 532 533 /* C11 6.4.4.1p5 */ 534 static const tspec_t suffix_type[2][3] = { 535 { INT, LONG, QUAD, }, 536 { UINT, ULONG, UQUAD, } 537 }; 538 539 cp = yytext; 540 len = yyleng; 541 542 /* skip 0[xX] or 0[bB] */ 543 if (base == 16 || base == 2) { 544 cp += 2; 545 len -= 2; 546 } 547 548 /* read suffixes */ 549 l_suffix = u_suffix = 0; 550 for (;;) { 551 if ((c = cp[len - 1]) == 'l' || c == 'L') { 552 l_suffix++; 553 } else if (c == 'u' || c == 'U') { 554 u_suffix++; 555 } else { 556 break; 557 } 558 len--; 559 } 560 if (l_suffix > 2 || u_suffix > 1) { 561 /* malformed integer constant */ 562 warning(251); 563 if (l_suffix > 2) 564 l_suffix = 2; 565 if (u_suffix > 1) 566 u_suffix = 1; 567 } 568 if (tflag && u_suffix != 0) { 569 /* suffix U is illegal in traditional C */ 570 warning(97); 571 } 572 typ = suffix_type[u_suffix][l_suffix]; 573 574 errno = 0; 575 576 uq = (uint64_t)strtoull(cp, &eptr, base); 577 lint_assert(eptr == cp + len); 578 if (errno != 0) { 579 /* integer constant out of range */ 580 warning(252); 581 warned = true; 582 } 583 584 /* 585 * If the value is too big for the current type, we must choose 586 * another type. 587 */ 588 ansiu = false; 589 switch (typ) { 590 case INT: 591 if (uq <= TARG_INT_MAX) { 592 /* ok */ 593 } else if (uq <= TARG_UINT_MAX && base != 10) { 594 typ = UINT; 595 } else if (uq <= TARG_LONG_MAX) { 596 typ = LONG; 597 } else { 598 typ = ULONG; 599 if (uq > TARG_ULONG_MAX && !warned) { 600 /* integer constant out of range */ 601 warning(252); 602 } 603 } 604 if (typ == UINT || typ == ULONG) { 605 if (tflag) { 606 typ = LONG; 607 } else if (!sflag) { 608 /* 609 * Remember that the constant is unsigned 610 * only in ANSI C 611 */ 612 ansiu = true; 613 } 614 } 615 break; 616 case UINT: 617 if (uq > TARG_UINT_MAX) { 618 typ = ULONG; 619 if (uq > TARG_ULONG_MAX && !warned) { 620 /* integer constant out of range */ 621 warning(252); 622 } 623 } 624 break; 625 case LONG: 626 if (uq > TARG_LONG_MAX && !tflag) { 627 typ = ULONG; 628 if (!sflag) 629 ansiu = true; 630 if (uq > TARG_ULONG_MAX && !warned) { 631 /* integer constant out of range */ 632 warning(252); 633 } 634 } 635 break; 636 case ULONG: 637 if (uq > TARG_ULONG_MAX && !warned) { 638 /* integer constant out of range */ 639 warning(252); 640 } 641 break; 642 case QUAD: 643 if (uq > TARG_QUAD_MAX && !tflag) { 644 typ = UQUAD; 645 if (!sflag) 646 ansiu = true; 647 } 648 break; 649 case UQUAD: 650 if (uq > TARG_UQUAD_MAX && !warned) { 651 /* integer constant out of range */ 652 warning(252); 653 } 654 break; 655 default: 656 break; 657 } 658 659 uq = (uint64_t)convert_integer((int64_t)uq, typ, 0); 660 661 yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 662 yylval.y_val->v_tspec = typ; 663 yylval.y_val->v_unsigned_since_c90 = ansiu; 664 yylval.y_val->v_quad = (int64_t)uq; 665 666 return T_CON; 667 } 668 669 /* 670 * Extend or truncate q to match t. If t is signed, sign-extend. 671 * 672 * len is the number of significant bits. If len is -1, len is set 673 * to the width of type t. 674 */ 675 int64_t 676 convert_integer(int64_t q, tspec_t t, unsigned int len) 677 { 678 uint64_t vbits; 679 680 if (len == 0) 681 len = size_in_bits(t); 682 683 vbits = value_bits(len); 684 return t == PTR || is_uinteger(t) || ((q & bit(len - 1)) == 0) 685 ? (int64_t)(q & vbits) 686 : (int64_t)(q | ~vbits); 687 } 688 689 /* 690 * Convert a string representing a floating point value into its numerical 691 * representation. Type and value are returned in yylval. 692 * 693 * XXX Currently it is not possible to convert constants of type 694 * long double which are greater than DBL_MAX. 695 */ 696 int 697 lex_floating_constant(const char *yytext, size_t yyleng) 698 { 699 const char *cp; 700 size_t len; 701 tspec_t typ; 702 char c, *eptr; 703 double d; 704 float f = 0; 705 706 cp = yytext; 707 len = yyleng; 708 709 if (cp[len - 1] == 'i') 710 len--; /* imaginary, do nothing for now */ 711 712 if ((c = cp[len - 1]) == 'f' || c == 'F') { 713 typ = FLOAT; 714 len--; 715 } else if (c == 'l' || c == 'L') { 716 typ = LDOUBLE; 717 len--; 718 } else { 719 if (c == 'd' || c == 'D') 720 len--; 721 typ = DOUBLE; 722 } 723 724 if (tflag && typ != DOUBLE) { 725 /* suffixes F and L are illegal in traditional C */ 726 warning(98); 727 } 728 729 errno = 0; 730 d = strtod(cp, &eptr); 731 if (eptr != cp + len) { 732 switch (*eptr) { 733 /* 734 * XXX: non-native non-current strtod() may not handle hex 735 * floats, ignore the rest if we find traces of hex float 736 * syntax... 737 */ 738 case 'p': 739 case 'P': 740 case 'x': 741 case 'X': 742 d = 0; 743 errno = 0; 744 break; 745 default: 746 INTERNAL_ERROR("lex_floating_constant(%s->%s)", 747 cp, eptr); 748 } 749 } 750 if (errno != 0) 751 /* floating-point constant out of range */ 752 warning(248); 753 754 if (typ == FLOAT) { 755 f = (float)d; 756 if (isfinite(f) == 0) { 757 /* floating-point constant out of range */ 758 warning(248); 759 f = f > 0 ? FLT_MAX : -FLT_MAX; 760 } 761 } 762 763 yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 764 yylval.y_val->v_tspec = typ; 765 if (typ == FLOAT) { 766 yylval.y_val->v_ldbl = f; 767 } else { 768 yylval.y_val->v_ldbl = d; 769 } 770 771 return T_CON; 772 } 773 774 int 775 lex_operator(int t, op_t o) 776 { 777 778 yylval.y_op = o; 779 return t; 780 } 781 782 /* Called if lex found a leading "'". */ 783 int 784 lex_character_constant(void) 785 { 786 size_t n; 787 int val, c; 788 789 n = 0; 790 val = 0; 791 while ((c = get_escaped_char('\'')) >= 0) { 792 val = (val << CHAR_SIZE) + c; 793 n++; 794 } 795 if (c == -2) { 796 /* unterminated character constant */ 797 error(253); 798 } else if (n > sizeof(int) || (n > 1 && (pflag || hflag))) { 799 /* XXX: should rather be sizeof(TARG_INT) */ 800 801 /* too many characters in character constant */ 802 error(71); 803 } else if (n > 1) { 804 /* multi-character character constant */ 805 warning(294); 806 } else if (n == 0) { 807 /* empty character constant */ 808 error(73); 809 } 810 if (n == 1) 811 val = (int)convert_integer(val, CHAR, CHAR_SIZE); 812 813 yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 814 yylval.y_val->v_tspec = INT; 815 yylval.y_val->v_quad = val; 816 817 return T_CON; 818 } 819 820 /* 821 * Called if lex found a leading L\' 822 */ 823 int 824 lex_wide_character_constant(void) 825 { 826 static char buf[MB_LEN_MAX + 1]; 827 size_t n, nmax; 828 int c; 829 wchar_t wc; 830 831 nmax = MB_CUR_MAX; 832 833 n = 0; 834 while ((c = get_escaped_char('\'')) >= 0) { 835 if (n < nmax) 836 buf[n] = (char)c; 837 n++; 838 } 839 840 wc = 0; 841 842 if (c == -2) { 843 /* unterminated character constant */ 844 error(253); 845 } else if (n == 0) { 846 /* empty character constant */ 847 error(73); 848 } else if (n > nmax) { 849 n = nmax; 850 /* too many characters in character constant */ 851 error(71); 852 } else { 853 buf[n] = '\0'; 854 (void)mbtowc(NULL, NULL, 0); 855 if (mbtowc(&wc, buf, nmax) < 0) 856 /* invalid multibyte character */ 857 error(291); 858 } 859 860 yylval.y_val = xcalloc(1, sizeof(*yylval.y_val)); 861 yylval.y_val->v_tspec = WCHAR; 862 yylval.y_val->v_quad = wc; 863 864 return T_CON; 865 } 866 867 /* 868 * Read a character which is part of a character constant or of a string 869 * and handle escapes. 870 * 871 * The argument is the character which delimits the character constant or 872 * string. 873 * 874 * Returns -1 if the end of the character constant or string is reached, 875 * -2 if the EOF is reached, and the character otherwise. 876 */ 877 static int 878 get_escaped_char(int delim) 879 { 880 static int pbc = -1; 881 int n, c, v; 882 883 if (pbc == -1) { 884 c = inpc(); 885 } else { 886 c = pbc; 887 pbc = -1; 888 } 889 if (c == delim) 890 return -1; 891 switch (c) { 892 case '\n': 893 if (tflag) { 894 /* newline in string or char constant */ 895 error(254); 896 return -2; 897 } 898 return c; 899 case 0: 900 /* syntax error '%s' */ 901 error(249, "EOF or null byte in literal"); 902 return -2; 903 case EOF: 904 return -2; 905 case '\\': 906 switch (c = inpc()) { 907 case '"': 908 if (tflag && delim == '\'') 909 /* \" inside character constants undef... */ 910 warning(262); 911 return '"'; 912 case '\'': 913 return '\''; 914 case '?': 915 if (tflag) 916 /* \? undefined in traditional C */ 917 warning(263); 918 return '?'; 919 case '\\': 920 return '\\'; 921 case 'a': 922 if (tflag) 923 /* \a undefined in traditional C */ 924 warning(81); 925 return '\a'; 926 case 'b': 927 return '\b'; 928 case 'f': 929 return '\f'; 930 case 'n': 931 return '\n'; 932 case 'r': 933 return '\r'; 934 case 't': 935 return '\t'; 936 case 'v': 937 if (tflag) 938 /* \v undefined in traditional C */ 939 warning(264); 940 return '\v'; 941 case '8': case '9': 942 /* bad octal digit %c */ 943 warning(77, c); 944 /* FALLTHROUGH */ 945 case '0': case '1': case '2': case '3': 946 case '4': case '5': case '6': case '7': 947 n = 3; 948 v = 0; 949 do { 950 v = (v << 3) + (c - '0'); 951 c = inpc(); 952 } while (--n > 0 && '0' <= c && c <= '7'); 953 pbc = c; 954 if (v > TARG_UCHAR_MAX) { 955 /* character escape does not fit in character */ 956 warning(76); 957 v &= CHAR_MASK; 958 } 959 return v; 960 case 'x': 961 if (tflag) 962 /* \x undefined in traditional C */ 963 warning(82); 964 v = 0; 965 n = 0; 966 while (c = inpc(), isxdigit(c)) { 967 c = isdigit(c) ? 968 c - '0' : toupper(c) - 'A' + 10; 969 v = (v << 4) + c; 970 if (n >= 0) { 971 if ((v & ~CHAR_MASK) != 0) { 972 /* overflow in hex escape */ 973 warning(75); 974 n = -1; 975 } else { 976 n++; 977 } 978 } 979 } 980 pbc = c; 981 if (n == 0) { 982 /* no hex digits follow \x */ 983 error(74); 984 } if (n == -1) { 985 v &= CHAR_MASK; 986 } 987 return v; 988 case '\n': 989 return get_escaped_char(delim); 990 case EOF: 991 return -2; 992 default: 993 if (isprint(c)) { 994 /* dubious escape \%c */ 995 warning(79, c); 996 } else { 997 /* dubious escape \%o */ 998 warning(80, c); 999 } 1000 } 1001 } 1002 return c; 1003 } 1004 1005 /* See https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html */ 1006 static void 1007 parse_line_directive_flags(const char *p, 1008 bool *is_begin, bool *is_end, bool *is_system) 1009 { 1010 1011 *is_begin = false; 1012 *is_end = false; 1013 *is_system = false; 1014 1015 while (*p != '\0') { 1016 const char *word_start, *word_end; 1017 1018 while (ch_isspace(*p)) 1019 p++; 1020 1021 word_start = p; 1022 while (*p != '\0' && !ch_isspace(*p)) 1023 p++; 1024 word_end = p; 1025 1026 if (word_end - word_start == 1 && word_start[0] == '1') 1027 *is_begin = true; 1028 if (word_end - word_start == 1 && word_start[0] == '2') 1029 *is_end = true; 1030 if (word_end - word_start == 1 && word_start[0] == '3') 1031 *is_system = true; 1032 /* Flag '4' is only interesting for C++. */ 1033 } 1034 } 1035 1036 /* 1037 * Called for preprocessor directives. Currently implemented are: 1038 * # pragma [argument...] 1039 * # lineno 1040 * # lineno "filename" 1041 * # lineno "filename" GCC-flag... 1042 */ 1043 void 1044 lex_directive(const char *yytext) 1045 { 1046 const char *cp, *fn; 1047 char c, *eptr; 1048 size_t fnl; 1049 long ln; 1050 bool is_begin, is_end, is_system; 1051 1052 static bool first = true; 1053 1054 /* Go to first non-whitespace after # */ 1055 for (cp = yytext + 1; (c = *cp) == ' ' || c == '\t'; cp++) 1056 continue; 1057 1058 if (!ch_isdigit(c)) { 1059 if (strncmp(cp, "pragma", 6) == 0 && ch_isspace(cp[6])) 1060 return; 1061 error: 1062 /* undefined or invalid # directive */ 1063 warning(255); 1064 return; 1065 } 1066 ln = strtol(--cp, &eptr, 10); 1067 if (eptr == cp) 1068 goto error; 1069 if ((c = *(cp = eptr)) != ' ' && c != '\t' && c != '\0') 1070 goto error; 1071 while ((c = *cp++) == ' ' || c == '\t') 1072 continue; 1073 if (c != '\0') { 1074 if (c != '"') 1075 goto error; 1076 fn = cp; 1077 while ((c = *cp) != '"' && c != '\0') 1078 cp++; 1079 if (c != '"') 1080 goto error; 1081 if ((fnl = cp++ - fn) > PATH_MAX) 1082 goto error; 1083 /* empty string means stdin */ 1084 if (fnl == 0) { 1085 fn = "{standard input}"; 1086 fnl = 16; /* strlen (fn) */ 1087 } 1088 curr_pos.p_file = record_filename(fn, fnl); 1089 /* 1090 * If this is the first directive, the name is the name 1091 * of the C source file as specified at the command line. 1092 * It is written to the output file. 1093 */ 1094 if (first) { 1095 csrc_pos.p_file = curr_pos.p_file; 1096 outsrc(transform_filename(curr_pos.p_file, 1097 strlen(curr_pos.p_file))); 1098 first = false; 1099 } 1100 1101 parse_line_directive_flags(cp, &is_begin, &is_end, &is_system); 1102 update_location(curr_pos.p_file, (int)ln, is_begin, is_end); 1103 in_system_header = is_system; 1104 } 1105 curr_pos.p_line = (int)ln - 1; 1106 curr_pos.p_uniq = 0; 1107 if (curr_pos.p_file == csrc_pos.p_file) { 1108 csrc_pos.p_line = (int)ln - 1; 1109 csrc_pos.p_uniq = 0; 1110 } 1111 } 1112 1113 /* 1114 * Handle lint comments such as ARGSUSED. 1115 * 1116 * If one of these comments is recognized, the argument, if any, is 1117 * parsed and a function which handles this comment is called. 1118 */ 1119 void 1120 lex_comment(void) 1121 { 1122 int c, lc; 1123 static const struct { 1124 const char *keywd; 1125 bool arg; 1126 void (*func)(int); 1127 } keywtab[] = { 1128 { "ARGSUSED", true, argsused }, 1129 { "BITFIELDTYPE", false, bitfieldtype }, 1130 { "CONSTCOND", false, constcond }, 1131 { "CONSTANTCOND", false, constcond }, 1132 { "CONSTANTCONDITION", false, constcond }, 1133 { "FALLTHRU", false, fallthru }, 1134 { "FALLTHROUGH", false, fallthru }, 1135 { "FALL THROUGH", false, fallthru }, 1136 { "fallthrough", false, fallthru }, 1137 { "LINTLIBRARY", false, lintlib }, 1138 { "LINTED", true, linted }, 1139 { "LONGLONG", false, longlong }, 1140 { "NOSTRICT", true, linted }, 1141 { "NOTREACHED", false, not_reached }, 1142 { "PRINTFLIKE", true, printflike }, 1143 { "PROTOLIB", true, protolib }, 1144 { "SCANFLIKE", true, scanflike }, 1145 { "VARARGS", true, varargs }, 1146 }; 1147 char keywd[32]; 1148 char arg[32]; 1149 size_t l, i; 1150 int a; 1151 bool eoc; 1152 1153 eoc = false; 1154 1155 /* Skip whitespace after the start of the comment */ 1156 while (c = inpc(), isspace(c)) 1157 continue; 1158 1159 /* Read the potential keyword to keywd */ 1160 l = 0; 1161 while (c != EOF && l < sizeof(keywd) - 1 && 1162 (isalpha(c) || isspace(c))) { 1163 if (islower(c) && l > 0 && ch_isupper(keywd[0])) 1164 break; 1165 keywd[l++] = (char)c; 1166 c = inpc(); 1167 } 1168 while (l > 0 && ch_isspace(keywd[l - 1])) 1169 l--; 1170 keywd[l] = '\0'; 1171 1172 /* look for the keyword */ 1173 for (i = 0; i < sizeof(keywtab) / sizeof(keywtab[0]); i++) { 1174 if (strcmp(keywtab[i].keywd, keywd) == 0) 1175 break; 1176 } 1177 if (i == sizeof(keywtab) / sizeof(keywtab[0])) 1178 goto skip_rest; 1179 1180 /* skip whitespace after the keyword */ 1181 while (isspace(c)) 1182 c = inpc(); 1183 1184 /* read the argument, if the keyword accepts one and there is one */ 1185 l = 0; 1186 if (keywtab[i].arg) { 1187 while (isdigit(c) && l < sizeof(arg) - 1) { 1188 arg[l++] = (char)c; 1189 c = inpc(); 1190 } 1191 } 1192 arg[l] = '\0'; 1193 a = l != 0 ? atoi(arg) : -1; 1194 1195 /* skip whitespace after the argument */ 1196 while (isspace(c)) 1197 c = inpc(); 1198 1199 if (c != '*' || (c = inpc()) != '/') { 1200 if (keywtab[i].func != linted) 1201 /* extra characters in lint comment */ 1202 warning(257); 1203 } else { 1204 /* 1205 * remember that we have already found the end of the 1206 * comment 1207 */ 1208 eoc = true; 1209 } 1210 1211 if (keywtab[i].func != NULL) 1212 (*keywtab[i].func)(a); 1213 1214 skip_rest: 1215 while (!eoc) { 1216 lc = c; 1217 if ((c = inpc()) == EOF) { 1218 /* unterminated comment */ 1219 error(256); 1220 break; 1221 } 1222 if (lc == '*' && c == '/') 1223 eoc = true; 1224 } 1225 } 1226 1227 /* 1228 * Handle // style comments 1229 */ 1230 void 1231 lex_slash_slash_comment(void) 1232 { 1233 int c; 1234 1235 if (!allow_c99 && !allow_gcc) 1236 /* %s does not support // comments */ 1237 gnuism(312, allow_c90 ? "C90" : "traditional C"); 1238 1239 while ((c = inpc()) != EOF && c != '\n') 1240 continue; 1241 } 1242 1243 /* 1244 * Clear flags for lint comments LINTED, LONGLONG and CONSTCOND. 1245 * clear_warn_flags is called after function definitions and global and 1246 * local declarations and definitions. It is also called between 1247 * the controlling expression and the body of control statements 1248 * (if, switch, for, while). 1249 */ 1250 void 1251 clear_warn_flags(void) 1252 { 1253 1254 lwarn = LWARN_ALL; 1255 quadflg = false; 1256 constcond_flag = false; 1257 } 1258 1259 /* 1260 * Strings are stored in a dynamically allocated buffer and passed 1261 * in yylval.y_string to the parser. The parser or the routines called 1262 * by the parser are responsible for freeing this buffer. 1263 */ 1264 int 1265 lex_string(void) 1266 { 1267 unsigned char *s; 1268 int c; 1269 size_t len, max; 1270 strg_t *strg; 1271 1272 s = xmalloc(max = 64); 1273 1274 len = 0; 1275 while ((c = get_escaped_char('"')) >= 0) { 1276 /* +1 to reserve space for a trailing NUL character */ 1277 if (len + 1 == max) 1278 s = xrealloc(s, max *= 2); 1279 s[len++] = (char)c; 1280 } 1281 s[len] = '\0'; 1282 if (c == -2) 1283 /* unterminated string constant */ 1284 error(258); 1285 1286 strg = xcalloc(1, sizeof(*strg)); 1287 strg->st_char = true; 1288 strg->st_len = len; 1289 strg->st_mem = s; 1290 1291 yylval.y_string = strg; 1292 return T_STRING; 1293 } 1294 1295 int 1296 lex_wide_string(void) 1297 { 1298 char *s; 1299 int c, n; 1300 size_t i, wi; 1301 size_t len, max, wlen; 1302 wchar_t *ws; 1303 strg_t *strg; 1304 1305 s = xmalloc(max = 64); 1306 len = 0; 1307 while ((c = get_escaped_char('"')) >= 0) { 1308 /* +1 to save space for a trailing NUL character */ 1309 if (len + 1 >= max) 1310 s = xrealloc(s, max *= 2); 1311 s[len++] = (char)c; 1312 } 1313 s[len] = '\0'; 1314 if (c == -2) 1315 /* unterminated string constant */ 1316 error(258); 1317 1318 /* get length of wide-character string */ 1319 (void)mblen(NULL, 0); 1320 for (i = 0, wlen = 0; i < len; i += n, wlen++) { 1321 if ((n = mblen(&s[i], MB_CUR_MAX)) == -1) { 1322 /* invalid multibyte character */ 1323 error(291); 1324 break; 1325 } 1326 if (n == 0) 1327 n = 1; 1328 } 1329 1330 ws = xmalloc((wlen + 1) * sizeof(*ws)); 1331 1332 /* convert from multibyte to wide char */ 1333 (void)mbtowc(NULL, NULL, 0); 1334 for (i = 0, wi = 0; i < len; i += n, wi++) { 1335 if ((n = mbtowc(&ws[wi], &s[i], MB_CUR_MAX)) == -1) 1336 break; 1337 if (n == 0) 1338 n = 1; 1339 } 1340 ws[wi] = 0; 1341 free(s); 1342 1343 strg = xcalloc(1, sizeof(*strg)); 1344 strg->st_char = false; 1345 strg->st_len = wlen; 1346 strg->st_mem = ws; 1347 1348 yylval.y_string = strg; 1349 return T_STRING; 1350 } 1351 1352 void 1353 lex_next_line(void) 1354 { 1355 curr_pos.p_line++; 1356 curr_pos.p_uniq = 0; 1357 debug_step("parsing %s:%d", curr_pos.p_file, curr_pos.p_line); 1358 if (curr_pos.p_file == csrc_pos.p_file) { 1359 csrc_pos.p_line++; 1360 csrc_pos.p_uniq = 0; 1361 } 1362 } 1363 1364 void 1365 lex_unknown_character(int c) 1366 { 1367 1368 /* unknown character \%o */ 1369 error(250, c); 1370 } 1371 1372 /* 1373 * The scanner does not create new symbol table entries for symbols it cannot 1374 * find in the symbol table. This is to avoid putting undeclared symbols into 1375 * the symbol table if a syntax error occurs. 1376 * 1377 * getsym is called as soon as it is probably ok to put the symbol in the 1378 * symbol table. It is still possible that symbols are put in the symbol 1379 * table that are not completely declared due to syntax errors. To avoid too 1380 * many problems in this case, symbols get type 'int' in getsym. 1381 * 1382 * XXX calls to getsym should be delayed until declare_1_* is called. 1383 */ 1384 sym_t * 1385 getsym(sbuf_t *sb) 1386 { 1387 dinfo_t *di; 1388 char *s; 1389 sym_t *sym; 1390 1391 sym = sb->sb_sym; 1392 1393 /* 1394 * During member declaration it is possible that name() looked 1395 * for symbols of type FVFT, although it should have looked for 1396 * symbols of type FTAG. Same can happen for labels. Both cases 1397 * are compensated here. 1398 */ 1399 if (symtyp == FMEMBER || symtyp == FLABEL) { 1400 if (sym == NULL || sym->s_kind == FVFT) 1401 sym = symtab_search(sb); 1402 } 1403 1404 if (sym != NULL) { 1405 lint_assert(sym->s_kind == symtyp); 1406 symtyp = FVFT; 1407 free(sb); 1408 return sym; 1409 } 1410 1411 /* create a new symbol table entry */ 1412 1413 /* labels must always be allocated at level 1 (outermost block) */ 1414 if (symtyp == FLABEL) { 1415 sym = level_zero_alloc(1, sizeof(*sym)); 1416 s = level_zero_alloc(1, sb->sb_len + 1); 1417 (void)memcpy(s, sb->sb_name, sb->sb_len + 1); 1418 sym->s_name = s; 1419 sym->s_block_level = 1; 1420 di = dcs; 1421 while (di->d_enclosing != NULL && 1422 di->d_enclosing->d_enclosing != NULL) 1423 di = di->d_enclosing; 1424 lint_assert(di->d_kind == DK_AUTO); 1425 } else { 1426 sym = block_zero_alloc(sizeof(*sym)); 1427 sym->s_name = sb->sb_name; 1428 sym->s_block_level = block_level; 1429 di = dcs; 1430 } 1431 1432 UNIQUE_CURR_POS(sym->s_def_pos); 1433 if ((sym->s_kind = symtyp) != FLABEL) 1434 sym->s_type = gettyp(INT); 1435 1436 symtyp = FVFT; 1437 1438 symtab_add(sym); 1439 1440 *di->d_ldlsym = sym; 1441 di->d_ldlsym = &sym->s_level_next; 1442 1443 free(sb); 1444 return sym; 1445 } 1446 1447 /* 1448 * Construct a temporary symbol. The symbol name starts with a digit, making 1449 * the name illegal. 1450 */ 1451 sym_t * 1452 mktempsym(type_t *tp) 1453 { 1454 static unsigned n = 0; 1455 char *s = level_zero_alloc((size_t)block_level, 64); 1456 sym_t *sym = block_zero_alloc(sizeof(*sym)); 1457 scl_t scl; 1458 1459 (void)snprintf(s, 64, "%.8u_tmp", n++); 1460 1461 scl = dcs->d_scl; 1462 if (scl == NOSCL) 1463 scl = block_level > 0 ? AUTO : EXTERN; 1464 1465 sym->s_name = s; 1466 sym->s_type = tp; 1467 sym->s_block_level = block_level; 1468 sym->s_scl = scl; 1469 sym->s_kind = FVFT; 1470 sym->s_used = true; 1471 sym->s_set = true; 1472 1473 symtab_add(sym); 1474 1475 *dcs->d_ldlsym = sym; 1476 dcs->d_ldlsym = &sym->s_level_next; 1477 1478 return sym; 1479 } 1480 1481 /* Remove a symbol forever from the symbol table. */ 1482 void 1483 rmsym(sym_t *sym) 1484 { 1485 1486 debug_step("rmsym '%s' %s '%s'", 1487 sym->s_name, symt_name(sym->s_kind), type_name(sym->s_type)); 1488 symtab_remove(sym); 1489 1490 /* avoid that the symbol will later be put back to the symbol table */ 1491 sym->s_block_level = -1; 1492 } 1493 1494 /* 1495 * Remove all symbols from the symbol table that have the same level as the 1496 * given symbol. 1497 */ 1498 void 1499 rmsyms(sym_t *syms) 1500 { 1501 sym_t *sym; 1502 1503 /* Note the use of s_level_next instead of s_symtab_next. */ 1504 for (sym = syms; sym != NULL; sym = sym->s_level_next) { 1505 if (sym->s_block_level != -1) { 1506 debug_step("rmsyms '%s' %s '%s'", 1507 sym->s_name, symt_name(sym->s_kind), 1508 type_name(sym->s_type)); 1509 symtab_remove(sym); 1510 sym->s_symtab_ref = NULL; 1511 } 1512 } 1513 } 1514 1515 /* 1516 * Put a symbol into the symbol table. 1517 */ 1518 void 1519 inssym(int level, sym_t *sym) 1520 { 1521 1522 debug_step("inssym '%s' %s '%s'", 1523 sym->s_name, symt_name(sym->s_kind), type_name(sym->s_type)); 1524 symtab_add(sym); 1525 sym->s_block_level = level; 1526 1527 /* 1528 * Placing the inner symbols to the beginning of the list ensures 1529 * that these symbols are preferred over symbols from the outer 1530 * blocks that happen to have the same name. 1531 */ 1532 lint_assert(sym->s_symtab_next != NULL 1533 ? sym->s_block_level >= sym->s_symtab_next->s_block_level 1534 : true); 1535 } 1536 1537 /* 1538 * Called at level 0 after syntax errors. 1539 * 1540 * Removes all symbols which are not declared at level 0 from the 1541 * symbol table. Also frees all memory which is not associated with 1542 * level 0. 1543 */ 1544 void 1545 clean_up_after_error(void) 1546 { 1547 1548 symtab_remove_locals(); 1549 1550 for (size_t i = mem_block_level; i > 0; i--) 1551 level_free_all(i); 1552 } 1553 1554 /* Create a new symbol with the same name as an existing symbol. */ 1555 sym_t * 1556 pushdown(const sym_t *sym) 1557 { 1558 sym_t *nsym; 1559 1560 debug_step("pushdown '%s' %s '%s'", 1561 sym->s_name, symt_name(sym->s_kind), type_name(sym->s_type)); 1562 nsym = block_zero_alloc(sizeof(*nsym)); 1563 lint_assert(sym->s_block_level <= block_level); 1564 nsym->s_name = sym->s_name; 1565 UNIQUE_CURR_POS(nsym->s_def_pos); 1566 nsym->s_kind = sym->s_kind; 1567 nsym->s_block_level = block_level; 1568 1569 symtab_add(nsym); 1570 1571 *dcs->d_ldlsym = nsym; 1572 dcs->d_ldlsym = &nsym->s_level_next; 1573 1574 return nsym; 1575 } 1576 1577 /* 1578 * Free any dynamically allocated memory referenced by 1579 * the value stack or yylval. 1580 * The type of information in yylval is described by tok. 1581 */ 1582 void 1583 freeyyv(void *sp, int tok) 1584 { 1585 if (tok == T_NAME || tok == T_TYPENAME) { 1586 sbuf_t *sb = *(sbuf_t **)sp; 1587 free(sb); 1588 } else if (tok == T_CON) { 1589 val_t *val = *(val_t **)sp; 1590 free(val); 1591 } else if (tok == T_STRING) { 1592 strg_t *strg = *(strg_t **)sp; 1593 free(strg->st_mem); 1594 free(strg); 1595 } 1596 } 1597