1 /* $NetBSD: indent.c,v 1.310 2023/05/23 18:16:28 rillig Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-4-Clause 5 * 6 * Copyright (c) 1985 Sun Microsystems, Inc. 7 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 8 * Copyright (c) 1980, 1993 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 #include <sys/cdefs.h> 41 __RCSID("$NetBSD: indent.c,v 1.310 2023/05/23 18:16:28 rillig Exp $"); 42 43 #include <sys/param.h> 44 #include <err.h> 45 #include <fcntl.h> 46 #include <stdarg.h> 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <unistd.h> 51 52 #include "indent.h" 53 54 struct options opt = { 55 .brace_same_line = true, 56 .comment_delimiter_on_blankline = true, 57 .cuddle_else = true, 58 .comment_column = 33, 59 .decl_indent = 16, 60 .else_if = true, 61 .function_brace_split = true, 62 .format_col1_comments = true, 63 .format_block_comments = true, 64 .indent_parameters = true, 65 .indent_size = 8, 66 .local_decl_indent = -1, 67 .lineup_to_parens = true, 68 .procnames_start_line = true, 69 .star_comment_cont = true, 70 .tabsize = 8, 71 .max_line_length = 78, 72 .use_tabs = true, 73 }; 74 75 struct parser_state ps; 76 77 struct buffer token; 78 79 struct buffer lab; 80 struct buffer code; 81 struct buffer com; 82 83 bool found_err; 84 bool break_comma; 85 float case_ind; 86 bool had_eof; 87 int line_no = 1; 88 enum indent_enabled indent_enabled; 89 90 static int ifdef_level; 91 static struct parser_state state_stack[5]; 92 93 FILE *input; 94 FILE *output; 95 96 static const char *in_name = "Standard Input"; 97 static const char *out_name = "Standard Output"; 98 static const char *backup_suffix = ".BAK"; 99 static char bakfile[MAXPATHLEN] = ""; 100 101 102 void * 103 nonnull(void *p) 104 { 105 if (p == NULL) 106 err(EXIT_FAILURE, NULL); 107 return p; 108 } 109 110 static void 111 buf_expand(struct buffer *buf, size_t add_size) 112 { 113 buf->cap = buf->cap + add_size + 400; 114 buf->mem = nonnull(realloc(buf->mem, buf->cap)); 115 buf->st = buf->mem; 116 } 117 118 void 119 buf_add_char(struct buffer *buf, char ch) 120 { 121 if (buf->len == buf->cap) 122 buf_expand(buf, 1); 123 buf->mem[buf->len++] = ch; 124 } 125 126 void 127 buf_add_chars(struct buffer *buf, const char *s, size_t len) 128 { 129 if (len == 0) 130 return; 131 if (len > buf->cap - buf->len) 132 buf_expand(buf, len); 133 memcpy(buf->mem + buf->len, s, len); 134 buf->len += len; 135 } 136 137 static void 138 buf_add_buf(struct buffer *buf, const struct buffer *add) 139 { 140 buf_add_chars(buf, add->st, add->len); 141 } 142 143 void 144 diag(int level, const char *msg, ...) 145 { 146 va_list ap; 147 148 if (level != 0) 149 found_err = true; 150 151 va_start(ap, msg); 152 fprintf(stderr, "%s: %s:%d: ", 153 level == 0 ? "warning" : "error", in_name, line_no); 154 vfprintf(stderr, msg, ap); 155 fprintf(stderr, "\n"); 156 va_end(ap); 157 } 158 159 /* 160 * Compute the indentation from starting at 'ind' and adding the text starting 161 * at 's'. 162 */ 163 int 164 ind_add(int ind, const char *s, size_t len) 165 { 166 for (const char *p = s; len > 0; p++, len--) { 167 if (*p == '\n') 168 ind = 0; 169 else if (*p == '\t') 170 ind = next_tab(ind); 171 else if (*p == '\b') 172 --ind; 173 else 174 ++ind; 175 } 176 return ind; 177 } 178 179 static void 180 init_globals(void) 181 { 182 ps.s_sym[0] = psym_stmt_list; 183 ps.prev_token = lsym_semicolon; 184 ps.next_col_1 = true; 185 186 const char *suffix = getenv("SIMPLE_BACKUP_SUFFIX"); 187 if (suffix != NULL) 188 backup_suffix = suffix; 189 } 190 191 /* 192 * Copy the input file to the backup file, then make the backup file the input 193 * and the original input file the output. 194 */ 195 static void 196 bakcopy(void) 197 { 198 ssize_t n; 199 int bak_fd; 200 char buff[8 * 1024]; 201 202 const char *last_slash = strrchr(in_name, '/'); 203 snprintf(bakfile, sizeof(bakfile), "%s%s", 204 last_slash != NULL ? last_slash + 1 : in_name, backup_suffix); 205 206 /* copy in_name to backup file */ 207 bak_fd = creat(bakfile, 0600); 208 if (bak_fd < 0) 209 err(1, "%s", bakfile); 210 211 while ((n = read(fileno(input), buff, sizeof(buff))) > 0) 212 if (write(bak_fd, buff, (size_t)n) != n) 213 err(1, "%s", bakfile); 214 if (n < 0) 215 err(1, "%s", in_name); 216 217 close(bak_fd); 218 (void)fclose(input); 219 220 /* re-open backup file as the input file */ 221 input = fopen(bakfile, "r"); 222 if (input == NULL) 223 err(1, "%s", bakfile); 224 /* now the original input file will be the output */ 225 output = fopen(in_name, "w"); 226 if (output == NULL) { 227 unlink(bakfile); 228 err(1, "%s", in_name); 229 } 230 } 231 232 static void 233 load_profiles(int argc, char **argv) 234 { 235 const char *profile_name = NULL; 236 237 for (int i = 1; i < argc; ++i) { 238 const char *arg = argv[i]; 239 240 if (strcmp(arg, "-npro") == 0) 241 return; 242 if (arg[0] == '-' && arg[1] == 'P' && arg[2] != '\0') 243 profile_name = arg + 2; 244 } 245 246 load_profile_files(profile_name); 247 } 248 249 static void 250 parse_command_line(int argc, char **argv) 251 { 252 for (int i = 1; i < argc; ++i) { 253 const char *arg = argv[i]; 254 255 if (arg[0] == '-') { 256 set_option(arg, "Command line"); 257 258 } else if (input == NULL) { 259 in_name = arg; 260 if ((input = fopen(in_name, "r")) == NULL) 261 err(1, "%s", in_name); 262 263 } else if (output == NULL) { 264 out_name = arg; 265 if (strcmp(in_name, out_name) == 0) 266 errx(1, "input and output files " 267 "must be different"); 268 if ((output = fopen(out_name, "w")) == NULL) 269 err(1, "%s", out_name); 270 271 } else 272 errx(1, "too many arguments: %s", arg); 273 } 274 275 if (input == NULL) { 276 input = stdin; 277 output = stdout; 278 } else if (output == NULL) { 279 out_name = in_name; 280 bakcopy(); 281 } 282 283 if (opt.comment_column <= 1) 284 opt.comment_column = 2; /* don't put normal comments in column 285 * 1, see opt.format_col1_comments */ 286 if (opt.block_comment_max_line_length <= 0) 287 opt.block_comment_max_line_length = opt.max_line_length; 288 if (opt.local_decl_indent < 0) 289 opt.local_decl_indent = opt.decl_indent; 290 if (opt.decl_comment_column <= 0) 291 opt.decl_comment_column = opt.ljust_decl 292 ? (opt.comment_column <= 10 ? 2 : opt.comment_column - 8) 293 : opt.comment_column; 294 if (opt.continuation_indent == 0) 295 opt.continuation_indent = opt.indent_size; 296 } 297 298 static void 299 set_initial_indentation(void) 300 { 301 inp_read_line(); 302 303 int ind = 0; 304 for (const char *p = inp.st;; p++) { 305 if (*p == ' ') 306 ind++; 307 else if (*p == '\t') 308 ind = next_tab(ind); 309 else 310 break; 311 } 312 313 ps.ind_level = ps.ind_level_follow = ind / opt.indent_size; 314 } 315 316 static void 317 code_add_decl_indent(int decl_ind, bool tabs_to_var) 318 { 319 int base = ps.ind_level * opt.indent_size; 320 int ind = base + (int)code.len; 321 int target = base + decl_ind; 322 size_t orig_code_len = code.len; 323 324 if (tabs_to_var) 325 for (int next; (next = next_tab(ind)) <= target; ind = next) 326 buf_add_char(&code, '\t'); 327 328 for (; ind < target; ind++) 329 buf_add_char(&code, ' '); 330 331 if (code.len == orig_code_len && ps.want_blank) { 332 buf_add_char(&code, ' '); 333 ps.want_blank = false; 334 } 335 } 336 337 static void 338 update_ps_decl_ptr(lexer_symbol lsym) 339 { 340 switch (ps.decl_ptr) { 341 case dp_start: 342 if (lsym == lsym_storage_class) 343 ps.decl_ptr = dp_start; 344 else if (lsym == lsym_type_outside_parentheses) 345 ps.decl_ptr = dp_word; 346 else if (lsym == lsym_word) 347 ps.decl_ptr = dp_word; 348 else 349 ps.decl_ptr = dp_other; 350 break; 351 case dp_word: 352 if (lsym == lsym_unary_op && token.st[0] == '*') 353 ps.decl_ptr = dp_word_asterisk; 354 else 355 ps.decl_ptr = dp_other; 356 break; 357 case dp_word_asterisk: 358 if (lsym == lsym_unary_op && token.st[0] == '*') 359 ps.decl_ptr = dp_word_asterisk; 360 else 361 ps.decl_ptr = dp_other; 362 break; 363 case dp_other: 364 if (lsym == lsym_semicolon || lsym == lsym_rbrace) 365 ps.decl_ptr = dp_start; 366 if (lsym == lsym_lparen_or_lbracket 367 && ps.prev_token == lsym_for) 368 ps.decl_ptr = dp_start; 369 if (lsym == lsym_comma && ps.in_decl) 370 ps.decl_ptr = dp_start; 371 break; 372 } 373 } 374 375 static void 376 update_ps_in_enum(lexer_symbol lsym) 377 { 378 switch (ps.in_enum) { 379 case in_enum_no: 380 if (lsym == lsym_tag && token.st[0] == 'e') 381 ps.in_enum = in_enum_enum; 382 break; 383 case in_enum_enum: 384 if (lsym == lsym_type_outside_parentheses 385 || lsym == lsym_type_in_parentheses) 386 ps.in_enum = in_enum_type; 387 else if (lsym == lsym_lbrace) 388 ps.in_enum = in_enum_brace; 389 else 390 ps.in_enum = in_enum_no; 391 break; 392 case in_enum_type: 393 if (lsym == lsym_lbrace) 394 ps.in_enum = in_enum_brace; 395 else 396 ps.in_enum = in_enum_no; 397 break; 398 case in_enum_brace: 399 if (lsym == lsym_rbrace) 400 ps.in_enum = in_enum_no; 401 break; 402 } 403 } 404 405 static int 406 process_eof(void) 407 { 408 if (lab.len > 0 || code.len > 0 || com.len > 0) 409 output_line(); 410 if (indent_enabled != indent_on) { 411 indent_enabled = indent_last_off_line; 412 output_line(); 413 } 414 415 if (ps.tos > 1) /* check for balanced braces */ 416 diag(1, "Stuff missing from end of file"); 417 418 fflush(output); 419 return found_err ? EXIT_FAILURE : EXIT_SUCCESS; 420 } 421 422 static void 423 maybe_break_line(lexer_symbol lsym) 424 { 425 if (!ps.force_nl) 426 return; 427 if (lsym == lsym_semicolon) 428 return; 429 if (lsym == lsym_lbrace && opt.brace_same_line) 430 return; 431 432 if (opt.verbose) 433 diag(0, "Line broken"); 434 output_line(); 435 ps.force_nl = false; 436 } 437 438 static void 439 move_com_to_code(lexer_symbol lsym) 440 { 441 if (ps.want_blank) 442 buf_add_char(&code, ' '); 443 buf_add_buf(&code, &com); 444 if (lsym != lsym_rparen_or_rbracket) 445 buf_add_char(&code, ' '); 446 com.len = 0; 447 ps.want_blank = false; 448 } 449 450 static void 451 process_newline(void) 452 { 453 if (ps.prev_token == lsym_comma && ps.nparen == 0 && !ps.block_init && 454 !opt.break_after_comma && break_comma && 455 com.len == 0) 456 goto stay_in_line; 457 458 output_line(); 459 460 stay_in_line: 461 ++line_no; 462 } 463 464 static bool 465 is_function_pointer_declaration(void) 466 { 467 return token.st[0] == '(' 468 && ps.in_decl 469 && !ps.block_init 470 && !ps.decl_indent_done 471 && !ps.is_function_definition 472 && ps.line_start_nparen == 0; 473 } 474 475 static bool 476 want_blank_before_lparen(void) 477 { 478 if (!ps.want_blank) 479 return false; 480 if (opt.proc_calls_space) 481 return true; 482 if (ps.prev_token == lsym_rparen_or_rbracket) 483 return false; 484 if (ps.prev_token == lsym_offsetof) 485 return false; 486 if (ps.prev_token == lsym_sizeof) 487 return opt.blank_after_sizeof; 488 if (ps.prev_token == lsym_word || ps.prev_token == lsym_funcname) 489 return false; 490 return true; 491 } 492 493 static bool 494 want_blank_before_lbracket(void) 495 { 496 if (code.len == 0) 497 return false; 498 if (ps.prev_token == lsym_comma) 499 return true; 500 if (ps.prev_token == lsym_binary_op) 501 return true; 502 return false; 503 } 504 505 static void 506 process_lparen_or_lbracket(void) 507 { 508 if (++ps.nparen == array_length(ps.paren)) { 509 diag(0, "Reached internal limit of %zu unclosed parentheses", 510 array_length(ps.paren)); 511 ps.nparen--; 512 } 513 514 if (is_function_pointer_declaration()) { 515 code_add_decl_indent(ps.decl_ind, ps.tabs_to_var); 516 ps.decl_indent_done = true; 517 } else if (token.st[0] == '(' 518 ? want_blank_before_lparen() : want_blank_before_lbracket()) 519 buf_add_char(&code, ' '); 520 ps.want_blank = false; 521 buf_add_char(&code, token.st[0]); 522 523 int indent = ind_add(0, code.st, code.len); 524 enum paren_level_cast cast = cast_unknown; 525 526 if (opt.extra_expr_indent && !opt.lineup_to_parens 527 && ps.spaced_expr_psym != psym_0 && ps.nparen == 1 528 && opt.continuation_indent == opt.indent_size) 529 ps.extra_expr_indent = eei_yes; 530 531 if (opt.extra_expr_indent && ps.spaced_expr_psym != psym_0 532 && ps.nparen == 1 && indent < 2 * opt.indent_size) 533 indent = 2 * opt.indent_size; 534 535 if (ps.init_or_struct && *token.st == '(' && ps.tos <= 2) { 536 /* this is a kluge to make sure that declarations will be 537 * aligned right if proc decl has an explicit type on it, i.e. 538 * "int a(x) {..." */ 539 parse(psym_0); 540 ps.init_or_struct = false; 541 } 542 543 if (ps.prev_token == lsym_offsetof || ps.prev_token == lsym_sizeof 544 || ps.is_function_definition) 545 cast = cast_no; 546 547 ps.paren[ps.nparen - 1].indent = indent; 548 ps.paren[ps.nparen - 1].cast = cast; 549 debug_println("paren_indents[%d] is now %s%d", 550 ps.nparen - 1, paren_level_cast_name[cast], indent); 551 } 552 553 static void 554 process_rparen_or_rbracket(void) 555 { 556 if (ps.nparen == 0) { 557 diag(0, "Extra '%c'", *token.st); 558 goto unbalanced; 559 } 560 561 enum paren_level_cast cast = ps.paren[--ps.nparen].cast; 562 if (ps.decl_on_line && !ps.block_init) 563 cast = cast_no; 564 565 if (cast == cast_maybe) { 566 ps.next_unary = true; 567 ps.want_blank = opt.space_after_cast; 568 } else 569 ps.want_blank = true; 570 571 if (code.len == 0) /* if the paren starts the line */ 572 ps.line_start_nparen = ps.nparen; /* then indent it */ 573 574 unbalanced: 575 buf_add_char(&code, token.st[0]); 576 577 if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) { 578 if (ps.extra_expr_indent == eei_yes) 579 ps.extra_expr_indent = eei_last; 580 ps.force_nl = true; 581 ps.next_unary = true; 582 ps.in_stmt_or_decl = false; 583 parse(ps.spaced_expr_psym); 584 ps.spaced_expr_psym = psym_0; 585 ps.want_blank = true; 586 out.line_kind = lk_stmt_head; 587 } 588 } 589 590 static bool 591 want_blank_before_unary_op(void) 592 { 593 if (ps.want_blank) 594 return true; 595 if (token.st[0] == '+' || token.st[0] == '-') 596 return code.len > 0 && code.mem[code.len - 1] == token.st[0]; 597 return false; 598 } 599 600 static void 601 process_unary_op(void) 602 { 603 if (!ps.decl_indent_done && ps.in_decl && !ps.block_init && 604 !ps.is_function_definition && ps.line_start_nparen == 0) { 605 /* pointer declarations */ 606 code_add_decl_indent(ps.decl_ind - (int)token.len, 607 ps.tabs_to_var); 608 ps.decl_indent_done = true; 609 } else if (want_blank_before_unary_op()) 610 buf_add_char(&code, ' '); 611 612 buf_add_buf(&code, &token); 613 ps.want_blank = false; 614 } 615 616 static void 617 process_binary_op(void) 618 { 619 if (code.len > 0 && ps.want_blank) 620 buf_add_char(&code, ' '); 621 buf_add_buf(&code, &token); 622 ps.want_blank = true; 623 } 624 625 static void 626 process_postfix_op(void) 627 { 628 buf_add_buf(&code, &token); 629 ps.want_blank = true; 630 } 631 632 static void 633 process_question(void) 634 { 635 ps.quest_level++; 636 if (code.len == 0) { 637 ps.in_stmt_cont = true; 638 ps.in_stmt_or_decl = true; 639 ps.in_decl = false; 640 } 641 if (ps.want_blank) 642 buf_add_char(&code, ' '); 643 buf_add_char(&code, '?'); 644 ps.want_blank = true; 645 } 646 647 static void 648 process_colon(void) 649 { 650 if (ps.quest_level > 0) { /* part of a '?:' operator */ 651 ps.quest_level--; 652 if (code.len == 0) { 653 ps.in_stmt_cont = true; 654 ps.in_stmt_or_decl = true; 655 ps.in_decl = false; 656 } 657 if (ps.want_blank) 658 buf_add_char(&code, ' '); 659 buf_add_char(&code, ':'); 660 ps.want_blank = true; 661 return; 662 } 663 664 if (ps.init_or_struct) { /* bit-field */ 665 buf_add_char(&code, ':'); 666 ps.want_blank = false; 667 return; 668 } 669 670 buf_add_buf(&lab, &code); /* 'case' or 'default' or named label 671 */ 672 buf_add_char(&lab, ':'); 673 code.len = 0; 674 675 ps.in_stmt_or_decl = false; 676 ps.is_case_label = ps.seen_case; 677 ps.force_nl = ps.seen_case; 678 ps.seen_case = false; 679 ps.want_blank = false; 680 } 681 682 static void 683 process_semicolon(void) 684 { 685 if (ps.decl_level == 0) 686 ps.init_or_struct = false; 687 ps.seen_case = false; /* only needs to be reset on error */ 688 ps.quest_level = 0; /* only needs to be reset on error */ 689 if (ps.prev_token == lsym_rparen_or_rbracket) 690 ps.in_func_def_params = false; 691 ps.block_init = false; 692 ps.block_init_level = 0; 693 ps.declaration = ps.declaration == decl_begin ? decl_end : decl_no; 694 695 if (ps.in_decl && code.len == 0 && !ps.block_init && 696 !ps.decl_indent_done && ps.line_start_nparen == 0) { 697 /* indent stray semicolons in declarations */ 698 code_add_decl_indent(ps.decl_ind - 1, ps.tabs_to_var); 699 ps.decl_indent_done = true; 700 } 701 702 ps.in_decl = ps.decl_level > 0; /* if we were in a first level 703 * structure declaration before, we 704 * aren't anymore */ 705 706 if (ps.nparen > 0 && ps.spaced_expr_psym != psym_for_exprs) { 707 /* There were unbalanced parentheses in the statement. It is a 708 * bit complicated, because the semicolon might be in a for 709 * statement. */ 710 diag(1, "Unbalanced parentheses"); 711 ps.nparen = 0; 712 if (ps.spaced_expr_psym != psym_0) { 713 parse(ps.spaced_expr_psym); 714 ps.spaced_expr_psym = psym_0; 715 } 716 } 717 buf_add_char(&code, ';'); 718 ps.want_blank = true; 719 ps.in_stmt_or_decl = ps.nparen > 0; 720 ps.decl_ind = 0; 721 722 if (ps.spaced_expr_psym == psym_0) { 723 parse(psym_0); /* let parser know about end of stmt */ 724 ps.force_nl = true; 725 } 726 } 727 728 static void 729 process_lbrace(void) 730 { 731 ps.in_stmt_or_decl = false; /* don't indent the {} */ 732 733 if (!ps.block_init) 734 ps.force_nl = true; 735 else if (ps.block_init_level <= 0) 736 ps.block_init_level = 1; 737 else 738 ps.block_init_level++; 739 740 if (code.len > 0 && !ps.block_init) { 741 if (!opt.brace_same_line || 742 (code.len > 0 && code.mem[code.len - 1] == '}')) 743 output_line(); 744 else if (ps.in_func_def_params && !ps.init_or_struct) { 745 ps.ind_level_follow = 0; 746 if (opt.function_brace_split) 747 output_line(); 748 else 749 ps.want_blank = true; 750 } 751 } 752 753 if (ps.nparen > 0) { 754 diag(1, "Unbalanced parentheses"); 755 ps.nparen = 0; 756 if (ps.spaced_expr_psym != psym_0) { 757 parse(ps.spaced_expr_psym); 758 ps.spaced_expr_psym = psym_0; 759 ps.ind_level = ps.ind_level_follow; 760 } 761 } 762 763 if (code.len == 0) 764 ps.in_stmt_cont = false; /* don't indent the '{' itself 765 */ 766 if (ps.in_decl && ps.init_or_struct) { 767 ps.di_stack[ps.decl_level] = ps.decl_ind; 768 if (++ps.decl_level == (int)array_length(ps.di_stack)) { 769 diag(0, "Reached internal limit of %d struct levels", 770 (int)array_length(ps.di_stack)); 771 ps.decl_level--; 772 } 773 } else { 774 ps.decl_on_line = false; /* we can't be in the middle of 775 * a declaration, so don't do 776 * special indentation of 777 * comments */ 778 ps.in_func_def_params = false; 779 ps.in_decl = false; 780 } 781 782 ps.decl_ind = 0; 783 parse(psym_lbrace); 784 if (ps.want_blank) 785 buf_add_char(&code, ' '); 786 ps.want_blank = false; 787 buf_add_char(&code, '{'); 788 ps.declaration = decl_no; 789 } 790 791 static void 792 process_rbrace(void) 793 { 794 if (ps.nparen > 0) { /* check for unclosed if, for, else. */ 795 diag(1, "Unbalanced parentheses"); 796 ps.nparen = 0; 797 ps.spaced_expr_psym = psym_0; 798 } 799 800 ps.declaration = decl_no; 801 ps.block_init_level--; 802 803 if (code.len > 0 && !ps.block_init) { 804 if (opt.verbose) 805 diag(0, "Line broken"); 806 output_line(); 807 } 808 809 buf_add_char(&code, '}'); 810 ps.want_blank = true; 811 ps.in_stmt_or_decl = false; 812 ps.in_stmt_cont = false; 813 814 if (ps.decl_level > 0) { /* multi-level structure declaration */ 815 ps.decl_ind = ps.di_stack[--ps.decl_level]; 816 if (ps.decl_level == 0 && !ps.in_func_def_params) { 817 ps.declaration = decl_begin; 818 ps.decl_ind = ps.ind_level == 0 819 ? opt.decl_indent : opt.local_decl_indent; 820 } 821 ps.in_decl = true; 822 } 823 824 if (ps.tos == 2) 825 out.line_kind = lk_func_end; 826 827 parse(psym_rbrace); 828 } 829 830 static void 831 process_do(void) 832 { 833 ps.in_stmt_or_decl = false; 834 835 if (code.len > 0) { /* make sure this starts a line */ 836 if (opt.verbose) 837 diag(0, "Line broken"); 838 output_line(); 839 } 840 841 ps.force_nl = true; 842 parse(psym_do); 843 } 844 845 static void 846 process_else(void) 847 { 848 ps.in_stmt_or_decl = false; 849 850 if (code.len > 0 851 && !(opt.cuddle_else && code.mem[code.len - 1] == '}')) { 852 if (opt.verbose) 853 diag(0, "Line broken"); 854 output_line(); 855 } 856 857 ps.force_nl = true; 858 parse(psym_else); 859 } 860 861 static void 862 process_type(void) 863 { 864 parse(psym_decl); /* let the parser worry about indentation */ 865 866 if (ps.prev_token == lsym_rparen_or_rbracket && ps.tos <= 1) { 867 if (code.len > 0) 868 output_line(); 869 } 870 871 if (ps.in_func_def_params && opt.indent_parameters && 872 ps.decl_level == 0) { 873 ps.ind_level = ps.ind_level_follow = 1; 874 ps.in_stmt_cont = false; 875 } 876 877 ps.init_or_struct = /* maybe */ true; 878 ps.in_decl = ps.decl_on_line = ps.prev_token != lsym_typedef; 879 if (ps.decl_level <= 0) 880 ps.declaration = decl_begin; 881 882 int len = (int)token.len + 1; 883 int ind = ps.ind_level == 0 || ps.decl_level > 0 884 ? opt.decl_indent /* global variable or local member */ 885 : opt.local_decl_indent; /* local variable */ 886 ps.decl_ind = ind > 0 ? ind : len; 887 ps.tabs_to_var = opt.use_tabs && ind > 0; 888 } 889 890 static void 891 process_ident(lexer_symbol lsym) 892 { 893 if (ps.in_decl) { 894 if (lsym == lsym_funcname) { 895 ps.in_decl = false; 896 if (opt.procnames_start_line && code.len > 0) 897 output_line(); 898 else if (ps.want_blank) 899 buf_add_char(&code, ' '); 900 ps.want_blank = false; 901 902 } else if (!ps.block_init && !ps.decl_indent_done && 903 ps.line_start_nparen == 0) { 904 if (opt.decl_indent == 0 905 && code.len > 0 && code.mem[code.len - 1] == '}') 906 ps.decl_ind = 907 ind_add(0, code.st, code.len) + 1; 908 code_add_decl_indent(ps.decl_ind, ps.tabs_to_var); 909 ps.decl_indent_done = true; 910 ps.want_blank = false; 911 } 912 913 } else if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) { 914 ps.force_nl = true; 915 ps.next_unary = true; 916 ps.in_stmt_or_decl = false; 917 parse(ps.spaced_expr_psym); 918 ps.spaced_expr_psym = psym_0; 919 } 920 } 921 922 static void 923 process_period(void) 924 { 925 if (code.len > 0 && code.mem[code.len - 1] == ',') 926 buf_add_char(&code, ' '); 927 buf_add_char(&code, '.'); 928 ps.want_blank = false; 929 } 930 931 static void 932 process_comma(void) 933 { 934 ps.want_blank = code.len > 0; /* only put blank after comma if comma 935 * does not start the line */ 936 937 if (ps.in_decl && !ps.is_function_definition && !ps.block_init && 938 !ps.decl_indent_done && ps.line_start_nparen == 0) { 939 /* indent leading commas and not the actual identifiers */ 940 code_add_decl_indent(ps.decl_ind - 1, ps.tabs_to_var); 941 ps.decl_indent_done = true; 942 } 943 944 buf_add_char(&code, ','); 945 946 if (ps.nparen == 0) { 947 if (ps.block_init_level <= 0) 948 ps.block_init = false; 949 int typical_varname_length = 8; 950 if (break_comma && (opt.break_after_comma || 951 ind_add(compute_code_indent(), code.st, code.len) 952 >= opt.max_line_length - typical_varname_length)) 953 ps.force_nl = true; 954 } 955 } 956 957 /* move the whole line to the 'label' buffer */ 958 static void 959 read_preprocessing_line(void) 960 { 961 enum { 962 PLAIN, STR, CHR, COMM 963 } state = PLAIN; 964 965 buf_add_char(&lab, '#'); 966 967 while (ch_isblank(inp.st[0])) 968 buf_add_char(&lab, *inp.st++); 969 970 while (inp.st[0] != '\n' || (state == COMM && !had_eof)) { 971 buf_add_char(&lab, inp_next()); 972 switch (lab.mem[lab.len - 1]) { 973 case '\\': 974 if (state != COMM) 975 buf_add_char(&lab, inp_next()); 976 break; 977 case '/': 978 if (inp.st[0] == '*' && state == PLAIN) { 979 state = COMM; 980 buf_add_char(&lab, *inp.st++); 981 } 982 break; 983 case '"': 984 if (state == STR) 985 state = PLAIN; 986 else if (state == PLAIN) 987 state = STR; 988 break; 989 case '\'': 990 if (state == CHR) 991 state = PLAIN; 992 else if (state == PLAIN) 993 state = CHR; 994 break; 995 case '*': 996 if (inp.st[0] == '/' && state == COMM) { 997 state = PLAIN; 998 buf_add_char(&lab, *inp.st++); 999 } 1000 break; 1001 } 1002 } 1003 1004 while (lab.len > 0 && ch_isblank(lab.mem[lab.len - 1])) 1005 lab.len--; 1006 } 1007 1008 static void 1009 process_preprocessing(void) 1010 { 1011 if (lab.len > 0 || code.len > 0 || com.len > 0) 1012 output_line(); 1013 1014 read_preprocessing_line(); 1015 1016 ps.is_case_label = false; 1017 1018 const char *end = lab.mem + lab.len; 1019 const char *dir = lab.st + 1; 1020 while (dir < end && ch_isblank(*dir)) 1021 dir++; 1022 size_t dir_len = 0; 1023 while (dir + dir_len < end && ch_isalpha(dir[dir_len])) 1024 dir_len++; 1025 1026 if (dir_len >= 2 && memcmp(dir, "if", 2) == 0) { 1027 if ((size_t)ifdef_level < array_length(state_stack)) 1028 state_stack[ifdef_level++] = ps; 1029 else 1030 diag(1, "#if stack overflow"); 1031 out.line_kind = lk_if; 1032 1033 } else if (dir_len >= 2 && memcmp(dir, "el", 2) == 0) { 1034 if (ifdef_level <= 0) 1035 diag(1, dir[2] == 'i' 1036 ? "Unmatched #elif" : "Unmatched #else"); 1037 else 1038 ps = state_stack[ifdef_level - 1]; 1039 1040 } else if (dir_len == 5 && memcmp(dir, "endif", 5) == 0) { 1041 if (ifdef_level <= 0) 1042 diag(1, "Unmatched #endif"); 1043 else 1044 ifdef_level--; 1045 out.line_kind = lk_endif; 1046 } 1047 1048 /* subsequent processing of the newline character will cause the line 1049 * to be printed */ 1050 } 1051 1052 static void 1053 process_lsym(lexer_symbol lsym) 1054 { 1055 switch (lsym) { 1056 1057 case lsym_newline: 1058 process_newline(); 1059 break; 1060 1061 case lsym_lparen_or_lbracket: 1062 process_lparen_or_lbracket(); 1063 break; 1064 1065 case lsym_rparen_or_rbracket: 1066 process_rparen_or_rbracket(); 1067 break; 1068 1069 case lsym_unary_op: 1070 process_unary_op(); 1071 break; 1072 1073 case lsym_binary_op: 1074 process_binary_op(); 1075 break; 1076 1077 case lsym_postfix_op: 1078 process_postfix_op(); 1079 break; 1080 1081 case lsym_question: 1082 process_question(); 1083 break; 1084 1085 case lsym_case_label: 1086 ps.seen_case = true; 1087 goto copy_token; 1088 1089 case lsym_colon: 1090 process_colon(); 1091 break; 1092 1093 case lsym_semicolon: 1094 process_semicolon(); 1095 break; 1096 1097 case lsym_lbrace: 1098 process_lbrace(); 1099 break; 1100 1101 case lsym_rbrace: 1102 process_rbrace(); 1103 break; 1104 1105 case lsym_switch: 1106 ps.spaced_expr_psym = psym_switch_expr; 1107 goto copy_token; 1108 1109 case lsym_for: 1110 ps.spaced_expr_psym = psym_for_exprs; 1111 goto copy_token; 1112 1113 case lsym_if: 1114 ps.spaced_expr_psym = psym_if_expr; 1115 goto copy_token; 1116 1117 case lsym_while: 1118 ps.spaced_expr_psym = psym_while_expr; 1119 goto copy_token; 1120 1121 case lsym_do: 1122 process_do(); 1123 goto copy_token; 1124 1125 case lsym_else: 1126 process_else(); 1127 goto copy_token; 1128 1129 case lsym_typedef: 1130 case lsym_storage_class: 1131 goto copy_token; 1132 1133 case lsym_tag: 1134 if (ps.nparen > 0) 1135 goto copy_token; 1136 /* FALLTHROUGH */ 1137 case lsym_type_outside_parentheses: 1138 process_type(); 1139 goto copy_token; 1140 1141 case lsym_type_in_parentheses: 1142 case lsym_offsetof: 1143 case lsym_sizeof: 1144 case lsym_word: 1145 case lsym_funcname: 1146 case lsym_return: 1147 process_ident(lsym); 1148 copy_token: 1149 if (ps.want_blank) 1150 buf_add_char(&code, ' '); 1151 buf_add_buf(&code, &token); 1152 if (lsym != lsym_funcname) 1153 ps.want_blank = true; 1154 break; 1155 1156 case lsym_period: 1157 process_period(); 1158 break; 1159 1160 case lsym_comma: 1161 process_comma(); 1162 break; 1163 1164 case lsym_preprocessing: 1165 process_preprocessing(); 1166 break; 1167 1168 case lsym_comment: 1169 process_comment(); 1170 break; 1171 1172 default: 1173 break; 1174 } 1175 } 1176 1177 static int 1178 indent(void) 1179 { 1180 debug_parser_state(); 1181 1182 for (;;) { /* loop until we reach eof */ 1183 lexer_symbol lsym = lexi(); 1184 1185 debug_blank_line(); 1186 debug_printf("line %d: %s", line_no, lsym_name[lsym]); 1187 debug_buffers(); 1188 debug_blank_line(); 1189 1190 if (lsym == lsym_eof) 1191 return process_eof(); 1192 1193 if (lsym == lsym_if && ps.prev_token == lsym_else 1194 && opt.else_if) 1195 ps.force_nl = false; 1196 1197 if (lsym == lsym_newline || lsym == lsym_preprocessing) 1198 ps.force_nl = false; 1199 else if (lsym == lsym_comment) { 1200 /* no special processing */ 1201 } else { 1202 maybe_break_line(lsym); 1203 /* 1204 * Add an extra level of indentation; turned off again 1205 * by a ';' or '}'. 1206 */ 1207 ps.in_stmt_or_decl = true; 1208 if (com.len > 0) 1209 move_com_to_code(lsym); 1210 update_ps_decl_ptr(lsym); 1211 update_ps_in_enum(lsym); 1212 } 1213 1214 process_lsym(lsym); 1215 1216 debug_parser_state(); 1217 1218 if (lsym != lsym_comment && lsym != lsym_newline && 1219 lsym != lsym_preprocessing) 1220 ps.prev_token = lsym; 1221 } 1222 } 1223 1224 int 1225 main(int argc, char **argv) 1226 { 1227 init_globals(); 1228 load_profiles(argc, argv); 1229 parse_command_line(argc, argv); 1230 set_initial_indentation(); 1231 return indent(); 1232 } 1233