1 /* $NetBSD: lexi.c,v 1.172 2022/02/13 12:43:26 rillig Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-4-Clause 5 * 6 * Copyright (c) 1985 Sun Microsystems, Inc. 7 * Copyright (c) 1980, 1993 8 * The Regents of the University of California. All rights reserved. 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 #if 0 41 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; 42 #endif 43 44 #include <sys/cdefs.h> 45 #if defined(__NetBSD__) 46 __RCSID("$NetBSD: lexi.c,v 1.172 2022/02/13 12:43:26 rillig Exp $"); 47 #elif defined(__FreeBSD__) 48 __FBSDID("$FreeBSD: head/usr.bin/indent/lexi.c 337862 2018-08-15 18:19:45Z pstef $"); 49 #endif 50 51 #include <stdlib.h> 52 #include <string.h> 53 54 #include "indent.h" 55 56 /* 57 * While inside lexi_alnum, this constant just marks a type, independently of 58 * the parentheses level. 59 */ 60 #define lsym_type lsym_type_outside_parentheses 61 62 /* must be sorted alphabetically, is used in binary search */ 63 static const struct keyword { 64 const char *name; 65 lexer_symbol lsym; 66 } keywords[] = { 67 {"_Bool", lsym_type}, 68 {"_Complex", lsym_type}, 69 {"_Imaginary", lsym_type}, 70 {"auto", lsym_storage_class}, 71 {"bool", lsym_type}, 72 {"break", lsym_word}, 73 {"case", lsym_case_label}, 74 {"char", lsym_type}, 75 {"complex", lsym_type}, 76 {"const", lsym_type}, 77 {"continue", lsym_word}, 78 {"default", lsym_case_label}, 79 {"do", lsym_do}, 80 {"double", lsym_type}, 81 {"else", lsym_else}, 82 {"enum", lsym_tag}, 83 {"extern", lsym_storage_class}, 84 {"float", lsym_type}, 85 {"for", lsym_for}, 86 {"goto", lsym_word}, 87 {"if", lsym_if}, 88 {"imaginary", lsym_type}, 89 {"inline", lsym_word}, 90 {"int", lsym_type}, 91 {"long", lsym_type}, 92 {"offsetof", lsym_offsetof}, 93 {"register", lsym_storage_class}, 94 {"restrict", lsym_word}, 95 {"return", lsym_return}, 96 {"short", lsym_type}, 97 {"signed", lsym_type}, 98 {"sizeof", lsym_sizeof}, 99 {"static", lsym_storage_class}, 100 {"struct", lsym_tag}, 101 {"switch", lsym_switch}, 102 {"typedef", lsym_typedef}, 103 {"union", lsym_tag}, 104 {"unsigned", lsym_type}, 105 {"void", lsym_type}, 106 {"volatile", lsym_type}, 107 {"while", lsym_while} 108 }; 109 110 static struct { 111 const char **items; 112 unsigned int len; 113 unsigned int cap; 114 } typenames; 115 116 /* 117 * The transition table below was rewritten by hand from lx's output, given 118 * the following definitions. lx is Katherine Flavel's lexer generator. 119 * 120 * O = /[0-7]/; D = /[0-9]/; NZ = /[1-9]/; 121 * H = /[a-f0-9]/i; B = /[0-1]/; HP = /0x/i; 122 * BP = /0b/i; E = /e[+\-]?/i D+; P = /p[+\-]?/i D+; 123 * FS = /[fl]/i; IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?; 124 * 125 * D+ E FS? -> $float; 126 * D* "." D+ E? FS? -> $float; 127 * D+ "." E? FS? -> $float; HP H+ IS? -> $int; 128 * HP H+ P FS? -> $float; NZ D* IS? -> $int; 129 * HP H* "." H+ P FS? -> $float; "0" O* IS? -> $int; 130 * HP H+ "." P FS -> $float; BP B+ IS? -> $int; 131 */ 132 /* INDENT OFF */ 133 static const unsigned char lex_number_state[][26] = { 134 /* examples: 135 00 136 s 0xx 137 t 00xaa 138 a 11 101100xxa.. 139 r 11ee0001101lbuuxx.a.pp 140 t.01.e+008bLuxll0Ll.aa.p+0 141 states: ABCDEFGHIJKLMNOPQRSTUVWXYZ */ 142 [0] = "uuiifuufiuuiiuiiiiiuiuuuuu", /* (other) */ 143 [1] = "CEIDEHHHIJQ U Q VUVVZZZ", /* 0 */ 144 [2] = "DEIDEHHHIJQ U Q VUVVZZZ", /* 1 */ 145 [3] = "DEIDEHHHIJ U VUVVZZZ", /* 2 3 4 5 6 7 */ 146 [4] = "DEJDEHHHJJ U VUVVZZZ", /* 8 9 */ 147 [5] = " U VUVV ", /* A a C c D d */ 148 [6] = " K U VUVV ", /* B b */ 149 [7] = " FFF FF U VUVV ", /* E e */ 150 [8] = " f f U VUVV f", /* F f */ 151 [9] = " LLf fL PR Li L f", /* L */ 152 [10] = " OOf fO S P O i O f", /* l */ 153 [11] = " FFX ", /* P p */ 154 [12] = " MM M i iiM M ", /* U u */ 155 [13] = " N ", /* X x */ 156 [14] = " G Y ", /* + - */ 157 [15] = "B EE EE T W ", /* . */ 158 /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ 159 }; 160 /* INDENT ON */ 161 162 static const unsigned char lex_number_row[] = { 163 ['0'] = 1, 164 ['1'] = 2, 165 ['2'] = 3, ['3'] = 3, ['4'] = 3, ['5'] = 3, ['6'] = 3, ['7'] = 3, 166 ['8'] = 4, ['9'] = 4, 167 ['A'] = 5, ['a'] = 5, ['C'] = 5, ['c'] = 5, ['D'] = 5, ['d'] = 5, 168 ['B'] = 6, ['b'] = 6, 169 ['E'] = 7, ['e'] = 7, 170 ['F'] = 8, ['f'] = 8, 171 ['L'] = 9, 172 ['l'] = 10, 173 ['P'] = 11, ['p'] = 11, 174 ['U'] = 12, ['u'] = 12, 175 ['X'] = 13, ['x'] = 13, 176 ['+'] = 14, ['-'] = 14, 177 ['.'] = 15, 178 }; 179 180 static void 181 check_size_token(size_t desired_size) 182 { 183 if (token.e + desired_size >= token.l) 184 buf_expand(&token, desired_size); 185 } 186 187 static void 188 token_add_char(char ch) 189 { 190 check_size_token(1); 191 *token.e++ = ch; 192 } 193 194 #ifdef debug 195 static const char * 196 lsym_name(lexer_symbol sym) 197 { 198 static const char *const name[] = { 199 "eof", 200 "preprocessing", 201 "newline", 202 "form_feed", 203 "comment", 204 "lparen_or_lbracket", 205 "rparen_or_rbracket", 206 "lbrace", 207 "rbrace", 208 "period", 209 "unary_op", 210 "binary_op", 211 "postfix_op", 212 "question", 213 "colon", 214 "comma", 215 "semicolon", 216 "typedef", 217 "storage_class", 218 "type_outside_parentheses", 219 "type_in_parentheses", 220 "tag", 221 "case_label", 222 "sizeof", 223 "offsetof", 224 "word", 225 "funcname", 226 "do", 227 "else", 228 "for", 229 "if", 230 "switch", 231 "while", 232 "return", 233 }; 234 235 return name[sym]; 236 } 237 238 static void 239 debug_print_buf(const char *name, const struct buffer *buf) 240 { 241 if (buf->s < buf->e) { 242 debug_printf("%s ", name); 243 debug_vis_range("\"", buf->s, buf->e, "\"\n"); 244 } 245 } 246 247 static bool 248 debug_full_parser_state(void) 249 { 250 return true; 251 } 252 253 #define debug_ps_bool(name) \ 254 if (ps.name != prev_ps.name) \ 255 debug_println("[%c] -> [%c] ps." #name, \ 256 prev_ps.name ? 'x' : ' ', ps.name ? 'x' : ' '); \ 257 else if (debug_full_parser_state()) \ 258 debug_println(" [%c] ps." #name, ps.name ? 'x' : ' ') 259 #define debug_ps_int(name) \ 260 if (ps.name != prev_ps.name) \ 261 debug_println("%3d -> %3d ps." #name, prev_ps.name, ps.name); \ 262 else if (debug_full_parser_state()) \ 263 debug_println(" %3d ps." #name, ps.name) 264 265 static bool 266 ps_paren_has_changed(const struct parser_state *prev_ps) 267 { 268 const paren_level_props *prev = prev_ps->paren, *curr = ps.paren; 269 270 if (prev_ps->nparen != ps.nparen) 271 return true; 272 273 for (int i = 0; i < ps.nparen; i++) { 274 if (curr[i].indent != prev[i].indent || 275 curr[i].maybe_cast != prev[i].maybe_cast || 276 curr[i].no_cast != prev[i].no_cast) 277 return true; 278 } 279 return false; 280 } 281 282 static void 283 debug_ps_paren(const struct parser_state *prev_ps) 284 { 285 if (!debug_full_parser_state() && !ps_paren_has_changed(prev_ps)) 286 return; 287 288 debug_printf(" ps.paren:"); 289 for (int i = 0; i < ps.nparen; i++) { 290 const paren_level_props *props = ps.paren + i; 291 const char *cast = props->no_cast ? "(no cast)" 292 : props->maybe_cast ? "(cast)" 293 : ""; 294 debug_printf(" %s%d", cast, props->indent); 295 } 296 if (ps.nparen == 0) 297 debug_printf(" none"); 298 debug_println(""); 299 } 300 301 static void 302 debug_lexi(lexer_symbol lsym) 303 { 304 /* 305 * Watch out for 'rolled back parser state' in the debug output; the 306 * differences around these are unreliable. 307 */ 308 static struct parser_state prev_ps; 309 310 debug_println(""); 311 debug_printf("line %d: %s", line_no, lsym_name(lsym)); 312 debug_vis_range(" \"", token.s, token.e, "\"\n"); 313 314 debug_print_buf("label", &lab); 315 debug_print_buf("code", &code); 316 debug_print_buf("comment", &com); 317 318 debug_println(" ps.prev_token = %s", lsym_name(ps.prev_token)); 319 debug_ps_bool(next_col_1); 320 debug_ps_bool(curr_col_1); 321 debug_ps_bool(next_unary); 322 debug_ps_bool(is_function_definition); 323 debug_ps_bool(want_blank); 324 debug_ps_int(line_start_nparen); 325 debug_ps_int(nparen); 326 debug_ps_paren(&prev_ps); 327 328 debug_ps_int(comment_delta); 329 debug_ps_int(n_comment_delta); 330 debug_ps_int(com_ind); 331 332 debug_ps_bool(block_init); 333 debug_ps_int(block_init_level); 334 debug_ps_bool(init_or_struct); 335 336 debug_ps_int(ind_level); 337 debug_ps_int(ind_level_follow); 338 339 debug_ps_int(decl_level); 340 debug_ps_bool(decl_on_line); 341 debug_ps_bool(in_decl); 342 debug_ps_int(just_saw_decl); 343 debug_ps_bool(in_func_def_params); 344 debug_ps_bool(decl_indent_done); 345 346 debug_ps_bool(in_stmt_or_decl); 347 debug_ps_bool(in_stmt_cont); 348 debug_ps_bool(is_case_label); 349 350 debug_ps_bool(search_stmt); 351 352 prev_ps = ps; 353 } 354 #endif 355 356 static lexer_symbol 357 lexi_end(lexer_symbol lsym) 358 { 359 #ifdef debug 360 debug_lexi(lsym); 361 #endif 362 return lsym; 363 } 364 365 static void 366 lex_number(void) 367 { 368 for (unsigned char s = 'A'; s != 'f' && s != 'i' && s != 'u';) { 369 unsigned char ch = (unsigned char)inp_peek(); 370 if (ch >= array_length(lex_number_row) || lex_number_row[ch] == 0) 371 break; 372 373 unsigned char row = lex_number_row[ch]; 374 if (lex_number_state[row][s - 'A'] == ' ') { 375 /*- 376 * lex_number_state[0][s - 'A'] now indicates the type: 377 * f = floating, i = integer, u = unknown 378 */ 379 return; 380 } 381 382 s = lex_number_state[row][s - 'A']; 383 token_add_char(inp_next()); 384 } 385 } 386 387 static bool 388 is_identifier_start(char ch) 389 { 390 return ch_isalpha(ch) || ch == '_' || ch == '$'; 391 } 392 393 static bool 394 is_identifier_part(char ch) 395 { 396 return ch_isalnum(ch) || ch == '_' || ch == '$'; 397 } 398 399 static void 400 lex_word(void) 401 { 402 for (;;) { 403 if (is_identifier_part(inp_peek())) 404 token_add_char(inp_next()); 405 else if (inp_peek() == '\\' && inp_lookahead(1) == '\n') { 406 inp_skip(); 407 inp_skip(); 408 } else 409 return; 410 } 411 } 412 413 static void 414 lex_char_or_string(void) 415 { 416 for (char delim = token.e[-1];;) { 417 if (inp_peek() == '\n') { 418 diag(1, "Unterminated literal"); 419 return; 420 } 421 422 token_add_char(inp_next()); 423 if (token.e[-1] == delim) 424 return; 425 426 if (token.e[-1] == '\\') { 427 if (inp_peek() == '\n') 428 ++line_no; 429 token_add_char(inp_next()); 430 } 431 } 432 } 433 434 /* Guess whether the current token is a declared type. */ 435 static bool 436 probably_typename(void) 437 { 438 if (ps.prev_token == lsym_storage_class) 439 return true; 440 if (ps.block_init) 441 return false; 442 if (ps.in_stmt_or_decl) /* XXX: this condition looks incorrect */ 443 return false; 444 if (inp_peek() == '*' && inp_lookahead(1) != '=') 445 goto maybe; 446 /* XXX: is_identifier_start */ 447 if (ch_isalpha(inp_peek())) 448 goto maybe; 449 return false; 450 maybe: 451 return ps.prev_token == lsym_semicolon || 452 ps.prev_token == lsym_lbrace || 453 ps.prev_token == lsym_rbrace; 454 } 455 456 static int 457 bsearch_typenames(const char *key) 458 { 459 const char **arr = typenames.items; 460 int lo = 0; 461 int hi = (int)typenames.len - 1; 462 463 while (lo <= hi) { 464 int mid = (int)((unsigned)(lo + hi) >> 1); 465 int cmp = strcmp(arr[mid], key); 466 if (cmp < 0) 467 lo = mid + 1; 468 else if (cmp > 0) 469 hi = mid - 1; 470 else 471 return mid; 472 } 473 return -(lo + 1); 474 } 475 476 static bool 477 is_typename(void) 478 { 479 if (opt.auto_typedefs && 480 token.e - token.s >= 2 && memcmp(token.e - 2, "_t", 2) == 0) 481 return true; 482 483 return bsearch_typenames(token.s) >= 0; 484 } 485 486 static int 487 cmp_keyword_by_name(const void *key, const void *elem) 488 { 489 return strcmp(key, ((const struct keyword *)elem)->name); 490 } 491 492 /* 493 * Looking at something like 'function_name(...)' in a line, guess whether 494 * this starts a function definition or a declaration. 495 */ 496 static bool 497 probably_looking_at_definition(void) 498 { 499 int paren_level = 0; 500 for (const char *p = inp_p(), *e = inp_line_end(); p < e; p++) { 501 if (*p == '(') 502 paren_level++; 503 if (*p == ')' && --paren_level == 0) { 504 p++; 505 506 while (p < e && (ch_isspace(*p) || is_identifier_part(*p))) 507 p++; /* '__dead' or '__unused' */ 508 509 if (p == e) /* func(...) */ 510 break; 511 if (*p == ';') /* func(...); */ 512 return false; 513 if (*p == ',') /* double abs(), pi; */ 514 return false; 515 if (*p == '(') /* func(...) __attribute__((...)) */ 516 paren_level++; /* func(...) __printflike(...) */ 517 else 518 break; /* func(...) { ... */ 519 } 520 } 521 522 /* 523 * To further reduce the cases where indent wrongly treats an incomplete 524 * function declaration as a function definition, thus adding a newline 525 * before the function name, it may be worth looking for parameter names, 526 * as these are often omitted in function declarations and only included 527 * in function definitions. Or just increase the lookahead to more than 528 * just the current line of input, until the next '{'. 529 */ 530 return true; 531 } 532 533 /* Read an alphanumeric token into 'token', or return lsym_eof. */ 534 static lexer_symbol 535 lexi_alnum(void) 536 { 537 if (ch_isdigit(inp_peek()) || 538 (inp_peek() == '.' && ch_isdigit(inp_lookahead(1)))) { 539 lex_number(); 540 } else if (is_identifier_start(inp_peek())) { 541 lex_word(); 542 543 if (token.s[0] == 'L' && token.e - token.s == 1 && 544 (inp_peek() == '"' || inp_peek() == '\'')) { 545 token_add_char(inp_next()); 546 lex_char_or_string(); 547 ps.next_unary = false; 548 549 check_size_token(1); 550 *token.e = '\0'; 551 552 return lsym_word; 553 } 554 } else 555 return lsym_eof; /* just as a placeholder */ 556 557 *token.e = '\0'; 558 559 while (ch_isblank(inp_peek())) 560 inp_skip(); 561 562 ps.next_unary = ps.prev_token == lsym_tag; /* for 'struct s *' */ 563 564 if (ps.prev_token == lsym_tag && ps.nparen == 0) 565 return lsym_type_outside_parentheses; 566 567 const struct keyword *kw = bsearch(token.s, keywords, 568 array_length(keywords), sizeof(keywords[0]), cmp_keyword_by_name); 569 bool is_type = false; 570 if (kw == NULL) { 571 if (is_typename()) { 572 is_type = true; 573 ps.next_unary = true; 574 if (ps.in_enum == in_enum_enum) 575 ps.in_enum = in_enum_type; 576 goto found_typename; 577 } 578 579 } else { /* we have a keyword */ 580 is_type = kw->lsym == lsym_type; 581 ps.next_unary = true; 582 if (kw->lsym != lsym_tag && kw->lsym != lsym_type) 583 return kw->lsym; 584 585 found_typename: 586 if (ps.nparen > 0) { 587 /* inside parentheses: cast, param list, offsetof or sizeof */ 588 if (!ps.paren[ps.nparen - 1].no_cast) 589 ps.paren[ps.nparen - 1].maybe_cast = true; 590 } 591 if (ps.prev_token != lsym_period && ps.prev_token != lsym_unary_op) { 592 if (kw != NULL && kw->lsym == lsym_tag) { 593 if (token.s[0] == 'e' /* enum */) 594 ps.in_enum = in_enum_enum; 595 return lsym_tag; 596 } 597 if (ps.nparen == 0) 598 return lsym_type_outside_parentheses; 599 } 600 } 601 602 if (inp_peek() == '(' && ps.tos <= 1 && ps.ind_level == 0 && 603 !ps.in_func_def_params && !ps.block_init) { 604 605 if (ps.nparen == 0 && probably_looking_at_definition()) { 606 ps.is_function_definition = true; 607 if (ps.in_decl) 608 ps.in_func_def_params = true; 609 return lsym_funcname; 610 } 611 612 } else if (ps.nparen == 0 && probably_typename()) { 613 ps.next_unary = true; 614 return lsym_type_outside_parentheses; 615 } 616 617 return is_type ? lsym_type_in_parentheses : lsym_word; 618 } 619 620 static bool 621 is_asterisk_unary(void) 622 { 623 if (ps.next_unary || ps.in_func_def_params) 624 return true; 625 if (ps.prev_token == lsym_word || 626 ps.prev_token == lsym_rparen_or_rbracket) 627 return false; 628 return ps.in_decl && ps.nparen > 0; 629 } 630 631 static void 632 lex_asterisk_unary(void) 633 { 634 while (inp_peek() == '*' || ch_isspace(inp_peek())) { 635 if (inp_peek() == '*') 636 token_add_char('*'); 637 inp_skip(); 638 } 639 640 if (ps.in_decl) { 641 const char *tp = inp_p(), *e = inp_line_end(); 642 643 while (tp < e) { 644 if (ch_isspace(*tp)) 645 tp++; 646 else if (is_identifier_start(*tp)) { 647 tp++; 648 while (tp < e && is_identifier_part(*tp)) 649 tp++; 650 } else 651 break; 652 } 653 654 if (tp < e && *tp == '(') 655 ps.is_function_definition = true; 656 } 657 } 658 659 /* Reads the next token, placing it in the global variable "token". */ 660 lexer_symbol 661 lexi(void) 662 { 663 token.e = token.s; 664 ps.curr_col_1 = ps.next_col_1; 665 ps.next_col_1 = false; 666 667 while (ch_isblank(inp_peek())) { 668 ps.curr_col_1 = false; 669 inp_skip(); 670 } 671 672 lexer_symbol alnum_lsym = lexi_alnum(); 673 if (alnum_lsym != lsym_eof) 674 return lexi_end(alnum_lsym); 675 676 /* Scan a non-alphanumeric token */ 677 678 check_size_token(3); /* for things like "<<=" */ 679 *token.e++ = inp_next(); 680 *token.e = '\0'; 681 682 lexer_symbol lsym; 683 bool next_unary; 684 685 switch (token.e[-1]) { 686 687 /* INDENT OFF */ 688 case '(': 689 case '[': lsym = lsym_lparen_or_lbracket; next_unary = true; break; 690 case ')': 691 case ']': lsym = lsym_rparen_or_rbracket; next_unary = false; break; 692 case '?': lsym = lsym_question; next_unary = true; break; 693 case ':': lsym = lsym_colon; next_unary = true; break; 694 case ';': lsym = lsym_semicolon; next_unary = true; break; 695 case '{': lsym = lsym_lbrace; next_unary = true; break; 696 case '}': lsym = lsym_rbrace; next_unary = true; break; 697 case ',': lsym = lsym_comma; next_unary = true; break; 698 case '.': lsym = lsym_period; next_unary = false; break; 699 /* INDENT ON */ 700 701 case '\n': 702 /* if data has been exhausted, the '\n' is a dummy. */ 703 lsym = had_eof ? lsym_eof : lsym_newline; 704 next_unary = ps.next_unary; 705 ps.next_col_1 = true; 706 break; 707 708 case '\f': 709 lsym = lsym_form_feed; 710 next_unary = ps.next_unary; 711 ps.next_col_1 = true; 712 break; 713 714 case '#': 715 lsym = lsym_preprocessing; 716 next_unary = ps.next_unary; 717 break; 718 719 case '\'': 720 case '"': 721 lex_char_or_string(); 722 lsym = lsym_word; 723 next_unary = false; 724 break; 725 726 case '-': 727 case '+': 728 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; 729 next_unary = true; 730 731 if (inp_peek() == token.e[-1]) { /* '++' or '--' */ 732 *token.e++ = inp_next(); 733 if (ps.prev_token == lsym_word || 734 ps.prev_token == lsym_rparen_or_rbracket) { 735 lsym = ps.next_unary ? lsym_unary_op : lsym_postfix_op; 736 next_unary = false; 737 } 738 739 } else if (inp_peek() == '=') { /* '+=' or '-=' */ 740 *token.e++ = inp_next(); 741 742 } else if (inp_peek() == '>') { /* '->' */ 743 *token.e++ = inp_next(); 744 lsym = lsym_unary_op; 745 next_unary = false; 746 ps.want_blank = false; 747 } 748 break; 749 750 case '=': 751 if (ps.init_or_struct) 752 ps.block_init = true; 753 if (inp_peek() == '=') { /* == */ 754 *token.e++ = inp_next(); 755 *token.e = '\0'; 756 } 757 lsym = lsym_binary_op; 758 next_unary = true; 759 break; 760 761 case '>': 762 case '<': 763 case '!': /* ops like <, <<, <=, !=, etc */ 764 if (inp_peek() == '>' || inp_peek() == '<' || inp_peek() == '=') 765 *token.e++ = inp_next(); 766 if (inp_peek() == '=') 767 *token.e++ = inp_next(); 768 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; 769 next_unary = true; 770 break; 771 772 case '*': 773 if (is_asterisk_unary()) { 774 lex_asterisk_unary(); 775 lsym = lsym_unary_op; 776 next_unary = true; 777 } else { 778 if (inp_peek() == '=') 779 *token.e++ = inp_next(); 780 lsym = lsym_binary_op; 781 next_unary = true; 782 } 783 break; 784 785 default: 786 if (token.e[-1] == '/' && (inp_peek() == '*' || inp_peek() == '/')) { 787 *token.e++ = inp_next(); 788 lsym = lsym_comment; 789 next_unary = ps.next_unary; 790 break; 791 } 792 793 /* handle '||', '&&', etc., and also things as in 'int *****i' */ 794 while (token.e[-1] == inp_peek() || inp_peek() == '=') 795 token_add_char(inp_next()); 796 797 lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; 798 next_unary = true; 799 } 800 801 if (ps.in_enum == in_enum_enum || ps.in_enum == in_enum_type) 802 ps.in_enum = lsym == lsym_lbrace ? in_enum_brace : in_enum_no; 803 if (lsym == lsym_rbrace) 804 ps.in_enum = in_enum_no; 805 806 ps.next_unary = next_unary; 807 808 check_size_token(1); 809 *token.e = '\0'; 810 811 return lexi_end(lsym); 812 } 813 814 void 815 register_typename(const char *name) 816 { 817 if (typenames.len >= typenames.cap) { 818 typenames.cap = 16 + 2 * typenames.cap; 819 typenames.items = xrealloc(typenames.items, 820 sizeof(typenames.items[0]) * typenames.cap); 821 } 822 823 int pos = bsearch_typenames(name); 824 if (pos >= 0) 825 return; /* already in the list */ 826 827 pos = -(pos + 1); 828 memmove(typenames.items + pos + 1, typenames.items + pos, 829 sizeof(typenames.items[0]) * (typenames.len++ - (unsigned)pos)); 830 typenames.items[pos] = xstrdup(name); 831 } 832