1 /* $NetBSD: indent.c,v 1.245 2022/05/09 21:41:49 rillig Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-4-Clause 5 * 6 * Copyright (c) 1985 Sun Microsystems, Inc. 7 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 8 * Copyright (c) 1980, 1993 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 #if 0 41 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93"; 42 #endif 43 44 #include <sys/cdefs.h> 45 #if defined(__NetBSD__) 46 __RCSID("$NetBSD: indent.c,v 1.245 2022/05/09 21:41:49 rillig Exp $"); 47 #elif defined(__FreeBSD__) 48 __FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $"); 49 #endif 50 51 #include <sys/param.h> 52 #include <assert.h> 53 #include <err.h> 54 #include <errno.h> 55 #include <fcntl.h> 56 #include <stdarg.h> 57 #include <stdio.h> 58 #include <stdlib.h> 59 #include <string.h> 60 #include <unistd.h> 61 62 #include "indent.h" 63 64 struct options opt = { 65 .brace_same_line = true, 66 .comment_delimiter_on_blankline = true, 67 .cuddle_else = true, 68 .comment_column = 33, 69 .decl_indent = 16, 70 .else_if = true, 71 .function_brace_split = true, 72 .format_col1_comments = true, 73 .format_block_comments = true, 74 .indent_parameters = true, 75 .indent_size = 8, 76 .local_decl_indent = -1, 77 .lineup_to_parens = true, 78 .procnames_start_line = true, 79 .star_comment_cont = true, 80 .tabsize = 8, 81 .max_line_length = 78, 82 .use_tabs = true, 83 }; 84 85 struct parser_state ps; 86 87 struct buffer token; 88 89 struct buffer lab; 90 struct buffer code; 91 struct buffer com; 92 93 bool found_err; 94 bool break_comma; 95 float case_ind; 96 bool had_eof; 97 int line_no = 1; 98 bool inhibit_formatting; 99 100 static int ifdef_level; 101 static struct parser_state state_stack[5]; 102 103 FILE *input; 104 FILE *output; 105 struct output_control out; 106 107 static const char *in_name = "Standard Input"; 108 static const char *out_name = "Standard Output"; 109 static const char *backup_suffix = ".BAK"; 110 static char bakfile[MAXPATHLEN] = ""; 111 112 113 static void 114 buf_init(struct buffer *buf) 115 { 116 size_t size = 200; 117 buf->buf = xmalloc(size); 118 buf->l = buf->buf + size - 5 /* safety margin */; 119 buf->s = buf->buf + 1; /* allow accessing buf->e[-1] */ 120 buf->e = buf->s; 121 buf->buf[0] = ' '; 122 buf->buf[1] = '\0'; 123 } 124 125 static size_t 126 buf_len(const struct buffer *buf) 127 { 128 return (size_t)(buf->e - buf->s); 129 } 130 131 void 132 buf_expand(struct buffer *buf, size_t add_size) 133 { 134 size_t new_size = (size_t)(buf->l - buf->s) + 400 + add_size; 135 size_t len = buf_len(buf); 136 buf->buf = xrealloc(buf->buf, new_size); 137 buf->l = buf->buf + new_size - 5; 138 buf->s = buf->buf + 1; 139 buf->e = buf->s + len; 140 /* At this point, the buffer may not be null-terminated anymore. */ 141 } 142 143 static void 144 buf_reserve(struct buffer *buf, size_t n) 145 { 146 if (n >= (size_t)(buf->l - buf->e)) 147 buf_expand(buf, n); 148 } 149 150 void 151 buf_add_char(struct buffer *buf, char ch) 152 { 153 buf_reserve(buf, 1); 154 *buf->e++ = ch; 155 } 156 157 void 158 buf_add_range(struct buffer *buf, const char *s, const char *e) 159 { 160 size_t len = (size_t)(e - s); 161 buf_reserve(buf, len); 162 memcpy(buf->e, s, len); 163 buf->e += len; 164 } 165 166 static void 167 buf_add_buf(struct buffer *buf, const struct buffer *add) 168 { 169 buf_add_range(buf, add->s, add->e); 170 } 171 172 static void 173 buf_terminate(struct buffer *buf) 174 { 175 buf_reserve(buf, 1); 176 *buf->e = '\0'; 177 } 178 179 static void 180 buf_reset(struct buffer *buf) 181 { 182 buf->e = buf->s; 183 } 184 185 void 186 diag(int level, const char *msg, ...) 187 { 188 va_list ap; 189 190 if (level != 0) 191 found_err = true; 192 193 va_start(ap, msg); 194 fprintf(stderr, "%s: %s:%d: ", 195 level == 0 ? "warning" : "error", in_name, line_no); 196 vfprintf(stderr, msg, ap); 197 fprintf(stderr, "\n"); 198 va_end(ap); 199 } 200 201 /* 202 * Compute the indentation from starting at 'ind' and adding the text from 203 * 'start' to 'end'. 204 */ 205 int 206 ind_add(int ind, const char *start, const char *end) 207 { 208 for (const char *p = start; p != end; ++p) { 209 if (*p == '\n' || *p == '\f') 210 ind = 0; 211 else if (*p == '\t') 212 ind = next_tab(ind); 213 else if (*p == '\b') 214 --ind; 215 else 216 ++ind; 217 } 218 return ind; 219 } 220 221 static void 222 search_stmt_newline(bool *force_nl) 223 { 224 inp_comment_init_newline(); 225 inp_comment_add_char('\n'); 226 debug_inp(__func__); 227 228 line_no++; 229 230 /* 231 * We may have inherited a force_nl == true from the previous token (like 232 * a semicolon). But once we know that a newline has been scanned in this 233 * loop, force_nl should be false. 234 * 235 * However, the force_nl == true must be preserved if newline is never 236 * scanned in this loop, so this assignment cannot be done earlier. 237 */ 238 *force_nl = false; 239 } 240 241 static void 242 search_stmt_comment(void) 243 { 244 inp_comment_init_comment(); 245 inp_comment_add_range(token.s, token.e); 246 if (token.e[-1] == '/') { 247 while (inp_peek() != '\n') 248 inp_comment_add_char(inp_next()); 249 debug_inp("search_stmt_comment: end of C99 comment"); 250 } else { 251 while (!inp_comment_complete_block()) 252 inp_comment_add_char(inp_next()); 253 debug_inp("search_stmt_comment: end of block comment"); 254 } 255 } 256 257 static bool 258 search_stmt_lbrace(void) 259 { 260 /* 261 * Put KNF-style lbraces before the buffered up tokens and jump out of 262 * this loop in order to avoid copying the token again. 263 */ 264 if (inp_comment_seen() && opt.brace_same_line) { 265 inp_comment_insert_lbrace(); 266 /* 267 * Originally the lbrace may have been alone on its own line, but it 268 * will be moved into "the else's line", so if there was a newline 269 * resulting from the "{" before, it must be scanned now and ignored. 270 */ 271 while (ch_isspace(inp_peek())) { 272 inp_skip(); 273 if (inp_peek() == '\n') 274 break; 275 } 276 debug_inp(__func__); 277 return true; 278 } 279 return false; 280 } 281 282 static bool 283 search_stmt_other(lexer_symbol lsym, bool *force_nl, 284 bool comment_buffered, bool last_else) 285 { 286 bool remove_newlines; 287 288 remove_newlines = 289 /* "} else" */ 290 (lsym == lsym_else && code.e != code.s && code.e[-1] == '}') 291 /* "else if" */ 292 || (lsym == lsym_if && last_else && opt.else_if); 293 if (remove_newlines) 294 *force_nl = false; 295 296 if (!inp_comment_seen()) { 297 ps.search_stmt = false; 298 return false; 299 } 300 301 debug_inp(__func__); 302 inp_comment_rtrim_blank(); 303 304 if (opt.swallow_optional_blanklines || 305 (!comment_buffered && remove_newlines)) { 306 *force_nl = !remove_newlines; 307 inp_comment_rtrim_newline(); 308 } 309 310 if (*force_nl) { /* if we should insert a newline here, put it 311 * into the buffer */ 312 *force_nl = false; 313 --line_no; /* this will be re-increased when the newline 314 * is read from the buffer */ 315 inp_comment_add_char('\n'); 316 inp_comment_add_char(' '); 317 if (opt.verbose) /* warn if the line was not already broken */ 318 diag(0, "Line broken"); 319 } 320 321 inp_comment_add_range(token.s, token.e); 322 323 debug_inp("search_stmt_other end"); 324 return true; 325 } 326 327 static void 328 search_stmt_lookahead(lexer_symbol *lsym) 329 { 330 if (*lsym == lsym_eof) 331 return; 332 333 /* 334 * The only intended purpose of calling lexi() below is to categorize the 335 * next token in order to decide whether to continue buffering forthcoming 336 * tokens. Once the buffering is over, lexi() will be called again 337 * elsewhere on all of the tokens - this time for normal processing. 338 * 339 * Calling it for this purpose is a bug, because lexi() also changes the 340 * parser state and discards leading whitespace, which is needed mostly 341 * for comment-related considerations. 342 * 343 * Work around the former problem by giving lexi() a copy of the current 344 * parser state and discard it if the call turned out to be just a 345 * lookahead. 346 * 347 * Work around the latter problem by copying all whitespace characters 348 * into the buffer so that the later lexi() call will read them. 349 */ 350 if (inp_comment_seen()) { 351 while (ch_isblank(inp_peek())) 352 inp_comment_add_char(inp_next()); 353 debug_inp(__func__); 354 } 355 356 struct parser_state backup_ps = ps; 357 debug_println("backed up parser state"); 358 *lsym = lexi(); 359 if (*lsym == lsym_newline || *lsym == lsym_form_feed || 360 *lsym == lsym_comment || ps.search_stmt) { 361 ps = backup_ps; 362 debug_println("restored parser state"); 363 } 364 } 365 366 /* 367 * Move newlines and comments following an 'if (expr)', 'while (expr)', 368 * 'else', etc. up to the start of the following statement to a buffer. This 369 * allows proper handling of both kinds of brace placement (-br, -bl) and 370 * "cuddling else" (-ce). 371 */ 372 static void 373 search_stmt(lexer_symbol *lsym, bool *force_nl, bool *last_else) 374 { 375 bool comment_buffered = false; 376 377 while (ps.search_stmt) { 378 switch (*lsym) { 379 case lsym_newline: 380 search_stmt_newline(force_nl); 381 break; 382 case lsym_form_feed: 383 /* XXX: Is simply removed from the source code. */ 384 break; 385 case lsym_comment: 386 search_stmt_comment(); 387 comment_buffered = true; 388 break; 389 case lsym_lbrace: 390 if (search_stmt_lbrace()) 391 goto switch_buffer; 392 /* FALLTHROUGH */ 393 default: 394 if (!search_stmt_other(*lsym, force_nl, comment_buffered, 395 *last_else)) 396 return; 397 switch_buffer: 398 ps.search_stmt = false; 399 inp_comment_add_char(' '); /* add trailing blank, just in case */ 400 inp_from_comment(); 401 } 402 search_stmt_lookahead(lsym); 403 } 404 405 *last_else = false; 406 } 407 408 static void 409 main_init_globals(void) 410 { 411 inp_init(); 412 413 buf_init(&token); 414 415 buf_init(&lab); 416 buf_init(&code); 417 buf_init(&com); 418 419 ps.s_sym[0] = psym_stmt_list; 420 ps.prev_token = lsym_semicolon; 421 ps.next_col_1 = true; 422 423 const char *suffix = getenv("SIMPLE_BACKUP_SUFFIX"); 424 if (suffix != NULL) 425 backup_suffix = suffix; 426 } 427 428 /* 429 * Copy the input file to the backup file, then make the backup file the input 430 * and the original input file the output. 431 */ 432 static void 433 bakcopy(void) 434 { 435 ssize_t n; 436 int bak_fd; 437 char buff[8 * 1024]; 438 439 const char *last_slash = strrchr(in_name, '/'); 440 snprintf(bakfile, sizeof(bakfile), "%s%s", 441 last_slash != NULL ? last_slash + 1 : in_name, backup_suffix); 442 443 /* copy in_name to backup file */ 444 bak_fd = creat(bakfile, 0600); 445 if (bak_fd < 0) 446 err(1, "%s", bakfile); 447 448 while ((n = read(fileno(input), buff, sizeof(buff))) > 0) 449 if (write(bak_fd, buff, (size_t)n) != n) 450 err(1, "%s", bakfile); 451 if (n < 0) 452 err(1, "%s", in_name); 453 454 close(bak_fd); 455 (void)fclose(input); 456 457 /* re-open backup file as the input file */ 458 input = fopen(bakfile, "r"); 459 if (input == NULL) 460 err(1, "%s", bakfile); 461 /* now the original input file will be the output */ 462 output = fopen(in_name, "w"); 463 if (output == NULL) { 464 unlink(bakfile); 465 err(1, "%s", in_name); 466 } 467 } 468 469 static void 470 main_load_profiles(int argc, char **argv) 471 { 472 const char *profile_name = NULL; 473 474 for (int i = 1; i < argc; ++i) { 475 const char *arg = argv[i]; 476 477 if (strcmp(arg, "-npro") == 0) 478 return; 479 if (arg[0] == '-' && arg[1] == 'P' && arg[2] != '\0') 480 profile_name = arg + 2; 481 } 482 load_profiles(profile_name); 483 } 484 485 static void 486 main_parse_command_line(int argc, char **argv) 487 { 488 for (int i = 1; i < argc; ++i) { 489 const char *arg = argv[i]; 490 491 if (arg[0] == '-') { 492 set_option(arg, "Command line"); 493 494 } else if (input == NULL) { 495 in_name = arg; 496 if ((input = fopen(in_name, "r")) == NULL) 497 err(1, "%s", in_name); 498 499 } else if (output == NULL) { 500 out_name = arg; 501 if (strcmp(in_name, out_name) == 0) 502 errx(1, "input and output files must be different"); 503 if ((output = fopen(out_name, "w")) == NULL) 504 err(1, "%s", out_name); 505 506 } else 507 errx(1, "too many arguments: %s", arg); 508 } 509 510 if (input == NULL) { 511 input = stdin; 512 output = stdout; 513 } else if (output == NULL) { 514 out_name = in_name; 515 bakcopy(); 516 } 517 518 if (opt.comment_column <= 1) 519 opt.comment_column = 2; /* don't put normal comments before column 2 */ 520 if (opt.block_comment_max_line_length <= 0) 521 opt.block_comment_max_line_length = opt.max_line_length; 522 if (opt.local_decl_indent < 0) /* if not specified by user, set this */ 523 opt.local_decl_indent = opt.decl_indent; 524 if (opt.decl_comment_column <= 0) /* if not specified by user, set this */ 525 opt.decl_comment_column = opt.ljust_decl 526 ? (opt.comment_column <= 10 ? 2 : opt.comment_column - 8) 527 : opt.comment_column; 528 if (opt.continuation_indent == 0) 529 opt.continuation_indent = opt.indent_size; 530 } 531 532 static void 533 main_prepare_parsing(void) 534 { 535 inp_read_line(); 536 537 int ind = 0; 538 for (const char *p = inp_p();; p++) { 539 if (*p == ' ') 540 ind++; 541 else if (*p == '\t') 542 ind = next_tab(ind); 543 else 544 break; 545 } 546 547 if (ind >= opt.indent_size) 548 ps.ind_level = ps.ind_level_follow = ind / opt.indent_size; 549 } 550 551 static void 552 code_add_decl_indent(int decl_ind, bool tabs_to_var) 553 { 554 int base_ind = ps.ind_level * opt.indent_size; 555 int ind = base_ind + (int)buf_len(&code); 556 int target_ind = base_ind + decl_ind; 557 char *orig_code_e = code.e; 558 559 if (tabs_to_var) 560 for (int next; (next = next_tab(ind)) <= target_ind; ind = next) 561 buf_add_char(&code, '\t'); 562 563 for (; ind < target_ind; ind++) 564 buf_add_char(&code, ' '); 565 566 if (code.e == orig_code_e && ps.want_blank) { 567 buf_add_char(&code, ' '); 568 ps.want_blank = false; 569 } 570 } 571 572 static void __attribute__((__noreturn__)) 573 process_eof(void) 574 { 575 if (lab.s != lab.e || code.s != code.e || com.s != com.e) 576 output_line(); 577 578 if (ps.tos > 1) /* check for balanced braces */ 579 diag(1, "Stuff missing from end of file"); 580 581 if (opt.verbose) { 582 printf("There were %d output lines and %d comments\n", 583 ps.stats.lines, ps.stats.comments); 584 printf("(Lines with comments)/(Lines with code): %6.3f\n", 585 (1.0 * ps.stats.comment_lines) / ps.stats.code_lines); 586 } 587 588 fflush(output); 589 exit(found_err ? EXIT_FAILURE : EXIT_SUCCESS); 590 } 591 592 static void 593 maybe_break_line(lexer_symbol lsym, bool *force_nl) 594 { 595 if (!*force_nl) 596 return; 597 if (lsym == lsym_semicolon) 598 return; 599 if (lsym == lsym_lbrace && opt.brace_same_line) 600 return; 601 602 if (opt.verbose) 603 diag(0, "Line broken"); 604 output_line(); 605 ps.want_blank = false; 606 *force_nl = false; 607 } 608 609 static void 610 move_com_to_code(void) 611 { 612 buf_add_char(&code, ' '); 613 buf_add_buf(&code, &com); 614 buf_add_char(&code, ' '); 615 buf_terminate(&code); 616 buf_reset(&com); 617 ps.want_blank = false; 618 } 619 620 static void 621 process_form_feed(void) 622 { 623 output_line_ff(); 624 ps.want_blank = false; 625 } 626 627 static void 628 process_newline(void) 629 { 630 if (ps.prev_token == lsym_comma && ps.nparen == 0 && !ps.block_init && 631 !opt.break_after_comma && break_comma && 632 com.s == com.e) 633 goto stay_in_line; 634 635 output_line(); 636 ps.want_blank = false; 637 638 stay_in_line: 639 ++line_no; 640 } 641 642 static bool 643 want_blank_before_lparen(void) 644 { 645 if (!ps.want_blank) 646 return false; 647 if (opt.proc_calls_space) 648 return true; 649 if (ps.prev_token == lsym_rparen_or_rbracket) 650 return false; 651 if (ps.prev_token == lsym_offsetof) 652 return false; 653 if (ps.prev_token == lsym_sizeof) 654 return opt.blank_after_sizeof; 655 if (ps.prev_token == lsym_word || ps.prev_token == lsym_funcname) 656 return false; 657 return true; 658 } 659 660 static void 661 process_lparen_or_lbracket(int decl_ind, bool tabs_to_var, bool spaced_expr) 662 { 663 if (++ps.nparen == array_length(ps.paren)) { 664 diag(0, "Reached internal limit of %zu unclosed parentheses", 665 array_length(ps.paren)); 666 ps.nparen--; 667 } 668 669 if (token.s[0] == '(' && ps.in_decl 670 && !ps.block_init && !ps.decl_indent_done && 671 !ps.is_function_definition && ps.line_start_nparen == 0) { 672 /* function pointer declarations */ 673 code_add_decl_indent(decl_ind, tabs_to_var); 674 ps.decl_indent_done = true; 675 } else if (want_blank_before_lparen()) 676 *code.e++ = ' '; 677 ps.want_blank = false; 678 *code.e++ = token.s[0]; 679 680 ps.paren[ps.nparen - 1].indent = (short)ind_add(0, code.s, code.e); 681 debug_println("paren_indents[%d] is now %d", 682 ps.nparen - 1, ps.paren[ps.nparen - 1].indent); 683 684 if (spaced_expr && ps.nparen == 1 && opt.extra_expr_indent 685 && ps.paren[0].indent < 2 * opt.indent_size) { 686 ps.paren[0].indent = (short)(2 * opt.indent_size); 687 debug_println("paren_indents[0] is now %d", ps.paren[0].indent); 688 } 689 690 if (ps.init_or_struct && *token.s == '(' && ps.tos <= 2) { 691 /* 692 * this is a kluge to make sure that declarations will be aligned 693 * right if proc decl has an explicit type on it, i.e. "int a(x) {..." 694 */ 695 parse(psym_semicolon); /* I said this was a kluge... */ 696 ps.init_or_struct = false; 697 } 698 699 /* parenthesized type following sizeof or offsetof is not a cast */ 700 if (ps.prev_token == lsym_offsetof || ps.prev_token == lsym_sizeof) 701 ps.paren[ps.nparen - 1].no_cast = true; 702 } 703 704 static void 705 process_rparen_or_rbracket(bool *spaced_expr, bool *force_nl, stmt_head hd) 706 { 707 if (ps.paren[ps.nparen - 1].maybe_cast && 708 !ps.paren[ps.nparen - 1].no_cast) { 709 ps.next_unary = true; 710 ps.paren[ps.nparen - 1].maybe_cast = false; 711 ps.want_blank = opt.space_after_cast; 712 } else 713 ps.want_blank = true; 714 ps.paren[ps.nparen - 1].no_cast = false; 715 716 if (ps.nparen > 0) 717 ps.nparen--; 718 else 719 diag(0, "Extra '%c'", *token.s); 720 721 if (code.e == code.s) /* if the paren starts the line */ 722 ps.line_start_nparen = ps.nparen; /* then indent it */ 723 724 *code.e++ = token.s[0]; 725 726 if (*spaced_expr && ps.nparen == 0) { /* check for end of 'if 727 * (...)', or some such */ 728 *spaced_expr = false; 729 *force_nl = true; /* must force newline after if */ 730 ps.next_unary = true; 731 ps.in_stmt_or_decl = false; /* don't use stmt continuation 732 * indentation */ 733 734 parse_stmt_head(hd); 735 } 736 737 /* 738 * This should ensure that constructs such as main(){...} and int[]{...} 739 * have their braces put in the right place. 740 */ 741 ps.search_stmt = opt.brace_same_line; 742 } 743 744 static bool 745 want_blank_before_unary_op(void) 746 { 747 if (ps.want_blank) 748 return true; 749 if (token.s[0] == '+' || token.s[0] == '-') 750 return code.e > code.s && code.e[-1] == token.s[0]; 751 return false; 752 } 753 754 static void 755 process_unary_op(int decl_ind, bool tabs_to_var) 756 { 757 if (!ps.decl_indent_done && ps.in_decl && !ps.block_init && 758 !ps.is_function_definition && ps.line_start_nparen == 0) { 759 /* pointer declarations */ 760 code_add_decl_indent(decl_ind - (int)buf_len(&token), tabs_to_var); 761 ps.decl_indent_done = true; 762 } else if (want_blank_before_unary_op()) 763 *code.e++ = ' '; 764 765 buf_add_buf(&code, &token); 766 ps.want_blank = false; 767 } 768 769 static void 770 process_binary_op(void) 771 { 772 if (buf_len(&code) > 0) 773 buf_add_char(&code, ' '); 774 buf_add_buf(&code, &token); 775 ps.want_blank = true; 776 } 777 778 static void 779 process_postfix_op(void) 780 { 781 *code.e++ = token.s[0]; 782 *code.e++ = token.s[1]; 783 ps.want_blank = true; 784 } 785 786 static void 787 process_question(int *quest_level) 788 { 789 (*quest_level)++; 790 if (ps.want_blank) 791 *code.e++ = ' '; 792 *code.e++ = '?'; 793 ps.want_blank = true; 794 } 795 796 static void 797 process_colon(int *quest_level, bool *force_nl, bool *seen_case) 798 { 799 if (*quest_level > 0) { /* part of a '?:' operator */ 800 --*quest_level; 801 if (ps.want_blank) 802 *code.e++ = ' '; 803 *code.e++ = ':'; 804 ps.want_blank = true; 805 return; 806 } 807 808 if (ps.init_or_struct) { /* bit-field */ 809 *code.e++ = ':'; 810 ps.want_blank = false; 811 return; 812 } 813 814 buf_add_buf(&lab, &code); /* 'case' or 'default' or named label */ 815 buf_add_char(&lab, ':'); 816 buf_terminate(&lab); 817 buf_reset(&code); 818 819 ps.in_stmt_or_decl = false; 820 ps.is_case_label = *seen_case; 821 *force_nl = *seen_case; 822 *seen_case = false; 823 ps.want_blank = false; 824 } 825 826 static void 827 process_semicolon(bool *seen_case, int *quest_level, int decl_ind, 828 bool tabs_to_var, bool *spaced_expr, stmt_head hd, bool *force_nl) 829 { 830 if (ps.decl_level == 0) 831 ps.init_or_struct = false; 832 *seen_case = false; /* these will only need resetting in an error */ 833 *quest_level = 0; 834 if (ps.prev_token == lsym_rparen_or_rbracket) 835 ps.in_func_def_params = false; 836 ps.block_init = false; 837 ps.block_init_level = 0; 838 ps.just_saw_decl--; 839 840 if (ps.in_decl && code.s == code.e && !ps.block_init && 841 !ps.decl_indent_done && ps.line_start_nparen == 0) { 842 /* indent stray semicolons in declarations */ 843 code_add_decl_indent(decl_ind - 1, tabs_to_var); 844 ps.decl_indent_done = true; 845 } 846 847 ps.in_decl = ps.decl_level > 0; /* if we were in a first level 848 * structure declaration before, we 849 * aren't anymore */ 850 851 if ((!*spaced_expr || hd != hd_for) && ps.nparen > 0) { 852 853 /* 854 * There were unbalanced parentheses in the statement. It is a bit 855 * complicated, because the semicolon might be in a for statement. 856 */ 857 diag(1, "Unbalanced parentheses"); 858 ps.nparen = 0; 859 if (*spaced_expr) { /* 'if', 'while', etc. */ 860 *spaced_expr = false; 861 parse_stmt_head(hd); 862 } 863 } 864 *code.e++ = ';'; 865 ps.want_blank = true; 866 ps.in_stmt_or_decl = ps.nparen > 0; 867 868 if (!*spaced_expr) { /* if not if for (;;) */ 869 parse(psym_semicolon); /* let parser know about end of stmt */ 870 *force_nl = true; /* force newline after an end of stmt */ 871 } 872 } 873 874 static void 875 process_lbrace(bool *force_nl, bool *spaced_expr, stmt_head hd, 876 int *di_stack, int di_stack_cap, int *decl_ind) 877 { 878 ps.in_stmt_or_decl = false; /* don't indent the {} */ 879 880 if (!ps.block_init) 881 *force_nl = true; /* force other stuff on same line as '{' onto 882 * new line */ 883 else if (ps.block_init_level <= 0) 884 ps.block_init_level = 1; 885 else 886 ps.block_init_level++; 887 888 if (code.s != code.e && !ps.block_init) { 889 if (!opt.brace_same_line) { 890 output_line(); 891 ps.want_blank = false; 892 } else if (ps.in_func_def_params && !ps.init_or_struct) { 893 ps.ind_level_follow = 0; 894 if (opt.function_brace_split) { /* dump the line prior to the 895 * brace ... */ 896 output_line(); 897 ps.want_blank = false; 898 } else /* add a space between the decl and brace */ 899 ps.want_blank = true; 900 } 901 } 902 903 if (ps.in_func_def_params) 904 out.blank_line_before = false; 905 906 if (ps.nparen > 0) { 907 diag(1, "Unbalanced parentheses"); 908 ps.nparen = 0; 909 if (*spaced_expr) { /* check for unclosed 'if', 'for', etc. */ 910 *spaced_expr = false; 911 parse_stmt_head(hd); 912 ps.ind_level = ps.ind_level_follow; 913 } 914 } 915 916 if (code.s == code.e) 917 ps.in_stmt_cont = false; /* don't indent the '{' itself */ 918 if (ps.in_decl && ps.init_or_struct) { 919 di_stack[ps.decl_level] = *decl_ind; 920 if (++ps.decl_level == di_stack_cap) { 921 diag(0, "Reached internal limit of %d struct levels", 922 di_stack_cap); 923 ps.decl_level--; 924 } 925 } else { 926 ps.decl_on_line = false; /* we can't be in the middle of a 927 * declaration, so don't do special 928 * indentation of comments */ 929 if (opt.blanklines_after_decl_at_top && ps.in_func_def_params) 930 out.blank_line_after = true; 931 ps.in_func_def_params = false; 932 ps.in_decl = false; 933 } 934 935 *decl_ind = 0; 936 parse(psym_lbrace); 937 if (ps.want_blank) 938 *code.e++ = ' '; 939 ps.want_blank = false; 940 *code.e++ = '{'; 941 ps.just_saw_decl = 0; 942 } 943 944 static void 945 process_rbrace(bool *spaced_expr, int *decl_ind, const int *di_stack) 946 { 947 if (ps.s_sym[ps.tos] == psym_decl && !ps.block_init) { 948 /* semicolons can be omitted in declarations */ 949 parse(psym_semicolon); 950 } 951 952 if (ps.nparen > 0) { /* check for unclosed if, for, else. */ 953 diag(1, "Unbalanced parentheses"); 954 ps.nparen = 0; 955 *spaced_expr = false; 956 } 957 958 ps.just_saw_decl = 0; 959 ps.block_init_level--; 960 961 if (code.s != code.e && !ps.block_init) { /* '}' must be first on line */ 962 if (opt.verbose) 963 diag(0, "Line broken"); 964 output_line(); 965 } 966 967 *code.e++ = '}'; 968 ps.want_blank = true; 969 ps.in_stmt_or_decl = false; 970 ps.in_stmt_cont = false; 971 972 if (ps.decl_level > 0) { /* multi-level structure declaration */ 973 *decl_ind = di_stack[--ps.decl_level]; 974 if (ps.decl_level == 0 && !ps.in_func_def_params) { 975 ps.just_saw_decl = 2; 976 *decl_ind = ps.ind_level == 0 977 ? opt.decl_indent : opt.local_decl_indent; 978 } 979 ps.in_decl = true; 980 } 981 982 out.blank_line_before = false; 983 parse(psym_rbrace); 984 ps.search_stmt = opt.cuddle_else 985 && ps.s_sym[ps.tos] == psym_if_expr_stmt 986 && ps.s_ind_level[ps.tos] >= ps.ind_level; 987 988 if (ps.tos <= 1 && opt.blanklines_after_procs && ps.decl_level <= 0) 989 out.blank_line_after = true; 990 } 991 992 static void 993 process_do(bool *force_nl, bool *last_else) 994 { 995 ps.in_stmt_or_decl = false; 996 997 if (code.e != code.s) { /* make sure this starts a line */ 998 if (opt.verbose) 999 diag(0, "Line broken"); 1000 output_line(); 1001 ps.want_blank = false; 1002 } 1003 1004 *force_nl = true; /* following stuff must go onto new line */ 1005 *last_else = false; 1006 parse(psym_do); 1007 } 1008 1009 static void 1010 process_else(bool *force_nl, bool *last_else) 1011 { 1012 ps.in_stmt_or_decl = false; 1013 1014 if (code.e > code.s && !(opt.cuddle_else && code.e[-1] == '}')) { 1015 if (opt.verbose) 1016 diag(0, "Line broken"); 1017 output_line(); /* make sure this starts a line */ 1018 ps.want_blank = false; 1019 } 1020 1021 *force_nl = true; /* following stuff must go onto new line */ 1022 *last_else = true; 1023 parse(psym_else); 1024 } 1025 1026 static void 1027 process_type(int *decl_ind, bool *tabs_to_var) 1028 { 1029 parse(psym_decl); /* let the parser worry about indentation */ 1030 1031 if (ps.prev_token == lsym_rparen_or_rbracket && ps.tos <= 1) { 1032 if (code.s != code.e) { 1033 output_line(); 1034 ps.want_blank = false; 1035 } 1036 } 1037 1038 if (ps.in_func_def_params && opt.indent_parameters && 1039 ps.decl_level == 0) { 1040 ps.ind_level = ps.ind_level_follow = 1; 1041 ps.in_stmt_cont = false; 1042 } 1043 1044 ps.init_or_struct = /* maybe */ true; 1045 ps.in_decl = ps.decl_on_line = ps.prev_token != lsym_typedef; 1046 if (ps.decl_level <= 0) 1047 ps.just_saw_decl = 2; 1048 1049 out.blank_line_before = false; 1050 1051 int len = (int)buf_len(&token) + 1; 1052 int ind = ps.ind_level == 0 || ps.decl_level > 0 1053 ? opt.decl_indent /* global variable or local member */ 1054 : opt.local_decl_indent; /* local variable */ 1055 *decl_ind = ind > 0 ? ind : len; 1056 *tabs_to_var = opt.use_tabs && ind > 0; 1057 } 1058 1059 static void 1060 process_ident(lexer_symbol lsym, int decl_ind, bool tabs_to_var, 1061 bool *spaced_expr, bool *force_nl, stmt_head hd) 1062 { 1063 if (ps.in_decl) { 1064 if (lsym == lsym_funcname) { 1065 ps.in_decl = false; 1066 if (opt.procnames_start_line && code.s != code.e) { 1067 *code.e = '\0'; 1068 output_line(); 1069 } else if (ps.want_blank) { 1070 *code.e++ = ' '; 1071 } 1072 ps.want_blank = false; 1073 1074 } else if (!ps.block_init && !ps.decl_indent_done && 1075 ps.line_start_nparen == 0) { 1076 code_add_decl_indent(decl_ind, tabs_to_var); 1077 ps.decl_indent_done = true; 1078 ps.want_blank = false; 1079 } 1080 1081 } else if (*spaced_expr && ps.nparen == 0) { 1082 *spaced_expr = false; 1083 *force_nl = true; 1084 ps.next_unary = true; 1085 ps.in_stmt_or_decl = false; 1086 parse_stmt_head(hd); 1087 } 1088 } 1089 1090 static void 1091 copy_token(void) 1092 { 1093 if (ps.want_blank) 1094 buf_add_char(&code, ' '); 1095 buf_add_buf(&code, &token); 1096 } 1097 1098 static void 1099 process_period(void) 1100 { 1101 if (code.e > code.s && code.e[-1] == ',') 1102 *code.e++ = ' '; 1103 *code.e++ = '.'; 1104 ps.want_blank = false; 1105 } 1106 1107 static void 1108 process_comma(int decl_ind, bool tabs_to_var, bool *force_nl) 1109 { 1110 ps.want_blank = code.s != code.e; /* only put blank after comma if comma 1111 * does not start the line */ 1112 1113 if (ps.in_decl && !ps.is_function_definition && !ps.block_init && 1114 !ps.decl_indent_done && ps.line_start_nparen == 0) { 1115 /* indent leading commas and not the actual identifiers */ 1116 code_add_decl_indent(decl_ind - 1, tabs_to_var); 1117 ps.decl_indent_done = true; 1118 } 1119 1120 *code.e++ = ','; 1121 1122 if (ps.nparen == 0) { 1123 if (ps.block_init_level <= 0) 1124 ps.block_init = false; 1125 int varname_len = 8; /* rough estimate for the length of a typical 1126 * variable name */ 1127 if (break_comma && (opt.break_after_comma || 1128 ind_add(compute_code_indent(), code.s, code.e) 1129 >= opt.max_line_length - varname_len)) 1130 *force_nl = true; 1131 } 1132 } 1133 1134 /* move the whole line to the 'label' buffer */ 1135 static void 1136 read_preprocessing_line(void) 1137 { 1138 enum { 1139 PLAIN, STR, CHR, COMM 1140 } state; 1141 1142 buf_add_char(&lab, '#'); 1143 1144 state = PLAIN; 1145 int com_start = 0, com_end = 0; 1146 1147 while (ch_isblank(inp_peek())) 1148 inp_skip(); 1149 1150 while (inp_peek() != '\n' || (state == COMM && !had_eof)) { 1151 buf_reserve(&lab, 2); 1152 *lab.e++ = inp_next(); 1153 switch (lab.e[-1]) { 1154 case '\\': 1155 if (state != COMM) 1156 *lab.e++ = inp_next(); 1157 break; 1158 case '/': 1159 if (inp_peek() == '*' && state == PLAIN) { 1160 state = COMM; 1161 *lab.e++ = inp_next(); 1162 com_start = (int)buf_len(&lab) - 2; 1163 } 1164 break; 1165 case '"': 1166 if (state == STR) 1167 state = PLAIN; 1168 else if (state == PLAIN) 1169 state = STR; 1170 break; 1171 case '\'': 1172 if (state == CHR) 1173 state = PLAIN; 1174 else if (state == PLAIN) 1175 state = CHR; 1176 break; 1177 case '*': 1178 if (inp_peek() == '/' && state == COMM) { 1179 state = PLAIN; 1180 *lab.e++ = inp_next(); 1181 com_end = (int)buf_len(&lab); 1182 } 1183 break; 1184 } 1185 } 1186 1187 while (lab.e > lab.s && ch_isblank(lab.e[-1])) 1188 lab.e--; 1189 if (lab.e - lab.s == com_end && !inp_comment_seen()) { 1190 /* comment on preprocessor line */ 1191 inp_comment_init_preproc(); 1192 inp_comment_add_range(lab.s + com_start, lab.s + com_end); 1193 lab.e = lab.s + com_start; 1194 while (lab.e > lab.s && ch_isblank(lab.e[-1])) 1195 lab.e--; 1196 inp_comment_add_char(' '); /* add trailing blank, just in case */ 1197 inp_from_comment(); 1198 } 1199 buf_terminate(&lab); 1200 } 1201 1202 static void 1203 process_preprocessing(void) 1204 { 1205 if (com.s != com.e || lab.s != lab.e || code.s != code.e) 1206 output_line(); 1207 1208 read_preprocessing_line(); 1209 1210 ps.is_case_label = false; 1211 1212 if (strncmp(lab.s, "#if", 3) == 0) { /* also ifdef, ifndef */ 1213 if ((size_t)ifdef_level < array_length(state_stack)) 1214 state_stack[ifdef_level++] = ps; 1215 else 1216 diag(1, "#if stack overflow"); 1217 1218 } else if (strncmp(lab.s, "#el", 3) == 0) { /* else, elif */ 1219 if (ifdef_level <= 0) 1220 diag(1, lab.s[3] == 'i' ? "Unmatched #elif" : "Unmatched #else"); 1221 else 1222 ps = state_stack[ifdef_level - 1]; 1223 1224 } else if (strncmp(lab.s, "#endif", 6) == 0) { 1225 if (ifdef_level <= 0) 1226 diag(1, "Unmatched #endif"); 1227 else 1228 ifdef_level--; 1229 1230 } else { 1231 if (strncmp(lab.s + 1, "pragma", 6) != 0 && 1232 strncmp(lab.s + 1, "error", 5) != 0 && 1233 strncmp(lab.s + 1, "line", 4) != 0 && 1234 strncmp(lab.s + 1, "undef", 5) != 0 && 1235 strncmp(lab.s + 1, "define", 6) != 0 && 1236 strncmp(lab.s + 1, "include", 7) != 0) { 1237 diag(1, "Unrecognized cpp directive"); 1238 return; 1239 } 1240 } 1241 1242 if (opt.blanklines_around_conditional_compilation) { 1243 out.blank_line_after = true; 1244 out.blank_lines_to_output = 0; 1245 } else { 1246 out.blank_line_after = false; 1247 out.blank_line_before = false; 1248 } 1249 1250 /* 1251 * subsequent processing of the newline character will cause the line to 1252 * be printed 1253 */ 1254 } 1255 1256 static void __attribute__((__noreturn__)) 1257 main_loop(void) 1258 { 1259 bool force_nl = false; /* when true, code must be broken */ 1260 bool last_else = false; /* true iff last keyword was an else */ 1261 int decl_ind = 0; /* current indentation for declarations */ 1262 int di_stack[20]; /* a stack of structure indentation levels */ 1263 bool tabs_to_var = false; /* true if using tabs to indent to var name */ 1264 bool spaced_expr = false; /* whether we are in the expression of 1265 * if(...), while(...), etc. */ 1266 stmt_head hd = hd_0; /* the type of statement for 'if (...)', 'for 1267 * (...)', etc */ 1268 int quest_level = 0; /* when this is positive, we have seen a '?' 1269 * without the matching ':' in a '?:' 1270 * expression */ 1271 bool seen_case = false; /* set to true when we see a 'case', so we 1272 * know what to do with the following colon */ 1273 1274 di_stack[ps.decl_level = 0] = 0; 1275 1276 for (;;) { /* loop until we reach eof */ 1277 lexer_symbol lsym = lexi(); 1278 1279 search_stmt(&lsym, &force_nl, &last_else); 1280 1281 if (lsym == lsym_eof) { 1282 process_eof(); 1283 /* NOTREACHED */ 1284 } 1285 1286 if (lsym == lsym_newline || lsym == lsym_form_feed || 1287 lsym == lsym_preprocessing) 1288 force_nl = false; 1289 else if (lsym != lsym_comment) { 1290 maybe_break_line(lsym, &force_nl); 1291 ps.in_stmt_or_decl = true; /* add an extra level of indentation; 1292 * turned off again by a ';' or '}' */ 1293 if (com.s != com.e) 1294 move_com_to_code(); 1295 } 1296 1297 buf_reserve(&code, 3); /* space for 2 characters plus '\0' */ 1298 1299 switch (lsym) { 1300 1301 case lsym_form_feed: 1302 process_form_feed(); 1303 break; 1304 1305 case lsym_newline: 1306 process_newline(); 1307 break; 1308 1309 case lsym_lparen_or_lbracket: 1310 process_lparen_or_lbracket(decl_ind, tabs_to_var, spaced_expr); 1311 break; 1312 1313 case lsym_rparen_or_rbracket: 1314 process_rparen_or_rbracket(&spaced_expr, &force_nl, hd); 1315 break; 1316 1317 case lsym_unary_op: 1318 process_unary_op(decl_ind, tabs_to_var); 1319 break; 1320 1321 case lsym_binary_op: 1322 process_binary_op(); 1323 break; 1324 1325 case lsym_postfix_op: 1326 process_postfix_op(); 1327 break; 1328 1329 case lsym_question: 1330 process_question(&quest_level); 1331 break; 1332 1333 case lsym_case_label: 1334 seen_case = true; 1335 goto copy_token; 1336 1337 case lsym_colon: 1338 process_colon(&quest_level, &force_nl, &seen_case); 1339 break; 1340 1341 case lsym_semicolon: 1342 process_semicolon(&seen_case, &quest_level, decl_ind, tabs_to_var, 1343 &spaced_expr, hd, &force_nl); 1344 break; 1345 1346 case lsym_lbrace: 1347 process_lbrace(&force_nl, &spaced_expr, hd, di_stack, 1348 (int)array_length(di_stack), &decl_ind); 1349 break; 1350 1351 case lsym_rbrace: 1352 process_rbrace(&spaced_expr, &decl_ind, di_stack); 1353 break; 1354 1355 case lsym_switch: 1356 spaced_expr = true; /* the interesting stuff is done after the 1357 * expressions are scanned */ 1358 hd = hd_switch; /* remember the type of header for later use 1359 * by the parser */ 1360 goto copy_token; 1361 1362 case lsym_for: 1363 spaced_expr = true; 1364 hd = hd_for; 1365 goto copy_token; 1366 1367 case lsym_if: 1368 spaced_expr = true; 1369 hd = hd_if; 1370 goto copy_token; 1371 1372 case lsym_while: 1373 spaced_expr = true; 1374 hd = hd_while; 1375 goto copy_token; 1376 1377 case lsym_do: 1378 process_do(&force_nl, &last_else); 1379 goto copy_token; 1380 1381 case lsym_else: 1382 process_else(&force_nl, &last_else); 1383 goto copy_token; 1384 1385 case lsym_typedef: 1386 case lsym_storage_class: 1387 out.blank_line_before = false; 1388 goto copy_token; 1389 1390 case lsym_tag: 1391 if (ps.nparen > 0) 1392 goto copy_token; 1393 /* FALLTHROUGH */ 1394 case lsym_type_outside_parentheses: 1395 process_type(&decl_ind, &tabs_to_var); 1396 goto copy_token; 1397 1398 case lsym_type_in_parentheses: 1399 case lsym_offsetof: 1400 case lsym_sizeof: 1401 case lsym_word: 1402 case lsym_funcname: 1403 case lsym_return: 1404 process_ident(lsym, decl_ind, tabs_to_var, &spaced_expr, 1405 &force_nl, hd); 1406 copy_token: 1407 copy_token(); 1408 if (lsym != lsym_funcname) 1409 ps.want_blank = true; 1410 break; 1411 1412 case lsym_period: 1413 process_period(); 1414 break; 1415 1416 case lsym_comma: 1417 process_comma(decl_ind, tabs_to_var, &force_nl); 1418 break; 1419 1420 case lsym_preprocessing: 1421 process_preprocessing(); 1422 break; 1423 1424 case lsym_comment: 1425 process_comment(); 1426 break; 1427 1428 default: 1429 break; 1430 } 1431 1432 *code.e = '\0'; 1433 if (lsym != lsym_comment && lsym != lsym_newline && 1434 lsym != lsym_preprocessing) 1435 ps.prev_token = lsym; 1436 } 1437 } 1438 1439 int 1440 main(int argc, char **argv) 1441 { 1442 main_init_globals(); 1443 main_load_profiles(argc, argv); 1444 main_parse_command_line(argc, argv); 1445 main_prepare_parsing(); 1446 main_loop(); 1447 } 1448 1449 #ifdef debug 1450 void 1451 debug_printf(const char *fmt, ...) 1452 { 1453 FILE *f = output == stdout ? stderr : stdout; 1454 va_list ap; 1455 1456 va_start(ap, fmt); 1457 vfprintf(f, fmt, ap); 1458 va_end(ap); 1459 } 1460 1461 void 1462 debug_println(const char *fmt, ...) 1463 { 1464 FILE *f = output == stdout ? stderr : stdout; 1465 va_list ap; 1466 1467 va_start(ap, fmt); 1468 vfprintf(f, fmt, ap); 1469 va_end(ap); 1470 fprintf(f, "\n"); 1471 } 1472 1473 void 1474 debug_vis_range(const char *prefix, const char *s, const char *e, 1475 const char *suffix) 1476 { 1477 debug_printf("%s", prefix); 1478 for (const char *p = s; p < e; p++) { 1479 if (*p == '\\' || *p == '"') 1480 debug_printf("\\%c", *p); 1481 else if (isprint((unsigned char)*p)) 1482 debug_printf("%c", *p); 1483 else if (*p == '\n') 1484 debug_printf("\\n"); 1485 else if (*p == '\t') 1486 debug_printf("\\t"); 1487 else 1488 debug_printf("\\x%02x", (unsigned char)*p); 1489 } 1490 debug_printf("%s", suffix); 1491 } 1492 #endif 1493 1494 static void * 1495 nonnull(void *p) 1496 { 1497 if (p == NULL) 1498 err(EXIT_FAILURE, NULL); 1499 return p; 1500 } 1501 1502 void * 1503 xmalloc(size_t size) 1504 { 1505 return nonnull(malloc(size)); 1506 } 1507 1508 void * 1509 xrealloc(void *p, size_t new_size) 1510 { 1511 return nonnull(realloc(p, new_size)); 1512 } 1513 1514 char * 1515 xstrdup(const char *s) 1516 { 1517 return nonnull(strdup(s)); 1518 } 1519