1 /* $NetBSD: indent.c,v 1.25 2019/04/04 15:22:13 kamil Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-4-Clause 5 * 6 * Copyright (c) 1985 Sun Microsystems, Inc. 7 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 8 * Copyright (c) 1980, 1993 9 * The Regents of the University of California. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 #if 0 41 #ifndef lint 42 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93"; 43 #endif /* not lint */ 44 #endif 45 46 #include <sys/cdefs.h> 47 #ifndef lint 48 #if defined(__NetBSD__) 49 __RCSID("$NetBSD: indent.c,v 1.25 2019/04/04 15:22:13 kamil Exp $"); 50 #elif defined(__FreeBSD__) 51 __FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $"); 52 #endif 53 #endif 54 55 #include <sys/param.h> 56 #if HAVE_CAPSICUM 57 #include <sys/capsicum.h> 58 #include <capsicum_helpers.h> 59 #endif 60 #include <err.h> 61 #include <errno.h> 62 #include <fcntl.h> 63 #include <unistd.h> 64 #include <stdio.h> 65 #include <stdlib.h> 66 #include <string.h> 67 #include <ctype.h> 68 #include "indent_globs.h" 69 #include "indent_codes.h" 70 #include "indent.h" 71 72 static void bakcopy(void); 73 static void indent_declaration(int, int); 74 75 const char *in_name = "Standard Input"; /* will always point to name of input 76 * file */ 77 const char *out_name = "Standard Output"; /* will always point to name 78 * of output file */ 79 const char *simple_backup_suffix = ".BAK"; /* Suffix to use for backup 80 * files */ 81 char bakfile[MAXPATHLEN] = ""; 82 83 int 84 main(int argc, char **argv) 85 { 86 #if HAVE_CAPSICUM 87 cap_rights_t rights; 88 #endif 89 90 int dec_ind; /* current indentation for declarations */ 91 int di_stack[20]; /* a stack of structure indentation levels */ 92 int force_nl; /* when true, code must be broken */ 93 int hd_type = 0; /* used to store type of stmt for if (...), 94 * for (...), etc */ 95 int i; /* local loop counter */ 96 int scase; /* set to true when we see a case, so we will 97 * know what to do with the following colon */ 98 int sp_sw; /* when true, we are in the expression of 99 * if(...), while(...), etc. */ 100 int squest; /* when this is positive, we have seen a ? 101 * without the matching : in a <c>?<s>:<s> 102 * construct */ 103 const char *t_ptr; /* used for copying tokens */ 104 int tabs_to_var; /* true if using tabs to indent to var name */ 105 int type_code; /* the type of token, returned by lexi */ 106 107 int last_else = 0; /* true iff last keyword was an else */ 108 const char *profile_name = NULL; 109 const char *envval = NULL; 110 struct parser_state transient_state; /* a copy for lookup */ 111 112 /*-----------------------------------------------*\ 113 | INITIALIZATION | 114 \*-----------------------------------------------*/ 115 116 found_err = 0; 117 118 ps.p_stack[0] = stmt; /* this is the parser's stack */ 119 ps.last_nl = true; /* this is true if the last thing scanned was 120 * a newline */ 121 ps.last_token = semicolon; 122 combuf = (char *) malloc(bufsize); 123 if (combuf == NULL) 124 err(1, NULL); 125 labbuf = (char *) malloc(bufsize); 126 if (labbuf == NULL) 127 err(1, NULL); 128 codebuf = (char *) malloc(bufsize); 129 if (codebuf == NULL) 130 err(1, NULL); 131 tokenbuf = (char *) malloc(bufsize); 132 if (tokenbuf == NULL) 133 err(1, NULL); 134 alloc_typenames(); 135 init_constant_tt(); 136 l_com = combuf + bufsize - 5; 137 l_lab = labbuf + bufsize - 5; 138 l_code = codebuf + bufsize - 5; 139 l_token = tokenbuf + bufsize - 5; 140 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and 141 * comment buffers */ 142 combuf[1] = codebuf[1] = labbuf[1] = '\0'; 143 opt.else_if = 1; /* Default else-if special processing to on */ 144 s_lab = e_lab = labbuf + 1; 145 s_code = e_code = codebuf + 1; 146 s_com = e_com = combuf + 1; 147 s_token = e_token = tokenbuf + 1; 148 149 in_buffer = (char *) malloc(10); 150 if (in_buffer == NULL) 151 err(1, NULL); 152 in_buffer_limit = in_buffer + 8; 153 buf_ptr = buf_end = in_buffer; 154 line_no = 1; 155 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; 156 sp_sw = force_nl = false; 157 ps.in_or_st = false; 158 ps.bl_line = true; 159 dec_ind = 0; 160 di_stack[ps.dec_nest = 0] = 0; 161 ps.want_blank = ps.in_stmt = ps.ind_stmt = false; 162 163 scase = ps.pcase = false; 164 squest = 0; 165 sc_end = NULL; 166 bp_save = NULL; 167 be_save = NULL; 168 169 output = NULL; 170 tabs_to_var = 0; 171 172 envval = getenv("SIMPLE_BACKUP_SUFFIX"); 173 if (envval) 174 simple_backup_suffix = envval; 175 176 /*--------------------------------------------------*\ 177 | COMMAND LINE SCAN | 178 \*--------------------------------------------------*/ 179 180 #ifdef undef 181 max_col = 78; /* -l78 */ 182 lineup_to_parens = 1; /* -lp */ 183 lineup_to_parens_always = 0; /* -nlpl */ 184 ps.ljust_decl = 0; /* -ndj */ 185 ps.com_ind = 33; /* -c33 */ 186 star_comment_cont = 1; /* -sc */ 187 ps.ind_size = 8; /* -i8 */ 188 verbose = 0; 189 ps.decl_indent = 16; /* -di16 */ 190 ps.local_decl_indent = -1; /* if this is not set to some nonnegative value 191 * by an arg, we will set this equal to 192 * ps.decl_ind */ 193 ps.indent_parameters = 1; /* -ip */ 194 ps.decl_com_ind = 0; /* if this is not set to some positive value 195 * by an arg, we will set this equal to 196 * ps.com_ind */ 197 btype_2 = 1; /* -br */ 198 cuddle_else = 1; /* -ce */ 199 ps.unindent_displace = 0; /* -d0 */ 200 ps.case_indent = 0; /* -cli0 */ 201 format_block_comments = 1; /* -fcb */ 202 format_col1_comments = 1; /* -fc1 */ 203 procnames_start_line = 1; /* -psl */ 204 proc_calls_space = 0; /* -npcs */ 205 comment_delimiter_on_blankline = 1; /* -cdb */ 206 ps.leave_comma = 1; /* -nbc */ 207 #endif 208 209 for (i = 1; i < argc; ++i) 210 if (strcmp(argv[i], "-npro") == 0) 211 break; 212 else if (argv[i][0] == '-' && argv[i][1] == 'P' && argv[i][2] != '\0') 213 profile_name = argv[i]; /* non-empty -P (set profile) */ 214 set_defaults(); 215 if (i >= argc) 216 set_profile(profile_name); 217 218 for (i = 1; i < argc; ++i) { 219 220 /* 221 * look thru args (if any) for changes to defaults 222 */ 223 if (argv[i][0] != '-') {/* no flag on parameter */ 224 if (input == NULL) { /* we must have the input file */ 225 in_name = argv[i]; /* remember name of input file */ 226 input = fopen(in_name, "r"); 227 if (input == NULL) /* check for open error */ 228 err(1, "%s", in_name); 229 continue; 230 } 231 else if (output == NULL) { /* we have the output file */ 232 out_name = argv[i]; /* remember name of output file */ 233 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite 234 * the file */ 235 errx(1, "input and output files must be different"); 236 } 237 output = fopen(out_name, "w"); 238 if (output == NULL) /* check for create error */ 239 err(1, "%s", out_name); 240 continue; 241 } 242 errx(1, "unknown parameter: %s", argv[i]); 243 } 244 else 245 set_option(argv[i]); 246 } /* end of for */ 247 if (input == NULL) 248 input = stdin; 249 if (output == NULL) { 250 if (input == stdin) 251 output = stdout; 252 else { 253 out_name = in_name; 254 bakcopy(); 255 } 256 } 257 258 #if HAVE_CAPSICUM 259 /* Restrict input/output descriptors and enter Capsicum sandbox. */ 260 cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE); 261 if (caph_rights_limit(fileno(output), &rights) < 0) 262 err(EXIT_FAILURE, "unable to limit rights for %s", out_name); 263 cap_rights_init(&rights, CAP_FSTAT, CAP_READ); 264 if (caph_rights_limit(fileno(input), &rights) < 0) 265 err(EXIT_FAILURE, "unable to limit rights for %s", in_name); 266 if (caph_enter() < 0) 267 err(EXIT_FAILURE, "unable to enter capability mode"); 268 #endif 269 270 if (opt.com_ind <= 1) 271 opt.com_ind = 2; /* don't put normal comments before column 2 */ 272 if (opt.block_comment_max_col <= 0) 273 opt.block_comment_max_col = opt.max_col; 274 if (opt.local_decl_indent < 0) /* if not specified by user, set this */ 275 opt.local_decl_indent = opt.decl_indent; 276 if (opt.decl_com_ind <= 0) /* if not specified by user, set this */ 277 opt.decl_com_ind = opt.ljust_decl ? (opt.com_ind <= 10 ? 2 : opt.com_ind - 8) : opt.com_ind; 278 if (opt.continuation_indent == 0) 279 opt.continuation_indent = opt.ind_size; 280 fill_buffer(); /* get first batch of stuff into input buffer */ 281 282 parse(semicolon); 283 { 284 char *p = buf_ptr; 285 int col = 1; 286 287 while (1) { 288 if (*p == ' ') 289 col++; 290 else if (*p == '\t') 291 col = opt.tabsize * (1 + (col - 1) / opt.tabsize) + 1; 292 else 293 break; 294 p++; 295 } 296 if (col > opt.ind_size) 297 ps.ind_level = ps.i_l_follow = col / opt.ind_size; 298 } 299 300 /* 301 * START OF MAIN LOOP 302 */ 303 304 while (1) { /* this is the main loop. it will go until we 305 * reach eof */ 306 int comment_buffered = false; 307 308 type_code = lexi(&ps); /* lexi reads one token. The actual 309 * characters read are stored in "token". lexi 310 * returns a code indicating the type of token */ 311 312 /* 313 * The following code moves newlines and comments following an if (), 314 * while (), else, etc. up to the start of the following stmt to 315 * a buffer. This allows proper handling of both kinds of brace 316 * placement (-br, -bl) and cuddling "else" (-ce). 317 */ 318 319 while (ps.search_brace) { 320 switch (type_code) { 321 case newline: 322 if (sc_end == NULL) { 323 save_com = sc_buf; 324 save_com[0] = save_com[1] = ' '; 325 sc_end = &save_com[2]; 326 } 327 *sc_end++ = '\n'; 328 /* 329 * We may have inherited a force_nl == true from the previous 330 * token (like a semicolon). But once we know that a newline 331 * has been scanned in this loop, force_nl should be false. 332 * 333 * However, the force_nl == true must be preserved if newline 334 * is never scanned in this loop, so this assignment cannot be 335 * done earlier. 336 */ 337 force_nl = false; 338 case form_feed: 339 break; 340 case comment: 341 if (sc_end == NULL) { 342 /* 343 * Copy everything from the start of the line, because 344 * pr_comment() will use that to calculate original 345 * indentation of a boxed comment. 346 */ 347 memcpy(sc_buf, in_buffer, buf_ptr - in_buffer - 4); 348 save_com = sc_buf + (buf_ptr - in_buffer - 4); 349 save_com[0] = save_com[1] = ' '; 350 sc_end = &save_com[2]; 351 } 352 comment_buffered = true; 353 *sc_end++ = '/'; /* copy in start of comment */ 354 *sc_end++ = '*'; 355 for (;;) { /* loop until we get to the end of the comment */ 356 *sc_end = *buf_ptr++; 357 if (buf_ptr >= buf_end) 358 fill_buffer(); 359 if (*sc_end++ == '*' && *buf_ptr == '/') 360 break; /* we are at end of comment */ 361 if (sc_end >= &save_com[sc_size]) { /* check for temp buffer 362 * overflow */ 363 diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever"); 364 fflush(output); 365 exit(1); 366 } 367 } 368 *sc_end++ = '/'; /* add ending slash */ 369 if (++buf_ptr >= buf_end) /* get past / in buffer */ 370 fill_buffer(); 371 break; 372 case lbrace: 373 /* 374 * Put KNF-style lbraces before the buffered up tokens and 375 * jump out of this loop in order to avoid copying the token 376 * again under the default case of the switch below. 377 */ 378 if (sc_end != NULL && opt.btype_2) { 379 save_com[0] = '{'; 380 /* 381 * Originally the lbrace may have been alone on its own 382 * line, but it will be moved into "the else's line", so 383 * if there was a newline resulting from the "{" before, 384 * it must be scanned now and ignored. 385 */ 386 while (isspace((unsigned char)*buf_ptr)) { 387 if (++buf_ptr >= buf_end) 388 fill_buffer(); 389 if (*buf_ptr == '\n') 390 break; 391 } 392 goto sw_buffer; 393 } 394 /* FALLTHROUGH */ 395 default: /* it is the start of a normal statement */ 396 { 397 int remove_newlines; 398 399 remove_newlines = 400 /* "} else" */ 401 (type_code == sp_nparen && *token == 'e' && 402 e_code != s_code && e_code[-1] == '}') 403 /* "else if" */ 404 || (type_code == sp_paren && *token == 'i' && 405 last_else && opt.else_if); 406 if (remove_newlines) 407 force_nl = false; 408 if (sc_end == NULL) { /* ignore buffering if 409 * comment wasn't saved up */ 410 ps.search_brace = false; 411 goto check_type; 412 } 413 while (sc_end > save_com && isblank((unsigned char)sc_end[-1])) { 414 sc_end--; 415 } 416 if (opt.swallow_optional_blanklines || 417 (!comment_buffered && remove_newlines)) { 418 force_nl = !remove_newlines; 419 while (sc_end > save_com && sc_end[-1] == '\n') { 420 sc_end--; 421 } 422 } 423 if (force_nl) { /* if we should insert a nl here, put 424 * it into the buffer */ 425 force_nl = false; 426 --line_no; /* this will be re-increased when the 427 * newline is read from the buffer */ 428 *sc_end++ = '\n'; 429 *sc_end++ = ' '; 430 if (opt.verbose) /* print error msg if the line was 431 * not already broken */ 432 diag2(0, "Line broken"); 433 } 434 for (t_ptr = token; *t_ptr; ++t_ptr) 435 *sc_end++ = *t_ptr; 436 437 sw_buffer: 438 ps.search_brace = false; /* stop looking for start of 439 * stmt */ 440 bp_save = buf_ptr; /* save current input buffer */ 441 be_save = buf_end; 442 buf_ptr = save_com; /* fix so that subsequent calls to 443 * lexi will take tokens out of 444 * save_com */ 445 *sc_end++ = ' ';/* add trailing blank, just in case */ 446 buf_end = sc_end; 447 sc_end = NULL; 448 break; 449 } 450 } /* end of switch */ 451 /* 452 * We must make this check, just in case there was an unexpected 453 * EOF. 454 */ 455 if (type_code != 0) { 456 /* 457 * The only intended purpose of calling lexi() below is to 458 * categorize the next token in order to decide whether to 459 * continue buffering forthcoming tokens. Once the buffering 460 * is over, lexi() will be called again elsewhere on all of 461 * the tokens - this time for normal processing. 462 * 463 * Calling it for this purpose is a bug, because lexi() also 464 * changes the parser state and discards leading whitespace, 465 * which is needed mostly for comment-related considerations. 466 * 467 * Work around the former problem by giving lexi() a copy of 468 * the current parser state and discard it if the call turned 469 * out to be just a look ahead. 470 * 471 * Work around the latter problem by copying all whitespace 472 * characters into the buffer so that the later lexi() call 473 * will read them. 474 */ 475 if (sc_end != NULL) { 476 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 477 *sc_end++ = *buf_ptr++; 478 if (sc_end >= &save_com[sc_size]) { 479 errx(1, "input too long"); 480 } 481 } 482 if (buf_ptr >= buf_end) { 483 fill_buffer(); 484 } 485 } 486 transient_state = ps; 487 type_code = lexi(&transient_state); /* read another token */ 488 if (type_code != newline && type_code != form_feed && 489 type_code != comment && !transient_state.search_brace) { 490 ps = transient_state; 491 } 492 } 493 } /* end of while (search_brace) */ 494 last_else = 0; 495 check_type: 496 if (type_code == 0) { /* we got eof */ 497 if (s_lab != e_lab || s_code != e_code 498 || s_com != e_com) /* must dump end of line */ 499 dump_line(); 500 if (ps.tos > 1) /* check for balanced braces */ 501 diag2(1, "Stuff missing from end of file"); 502 503 if (opt.verbose) { 504 printf("There were %d output lines and %d comments\n", 505 ps.out_lines, ps.out_coms); 506 printf("(Lines with comments)/(Lines with code): %6.3f\n", 507 (1.0 * ps.com_lines) / code_lines); 508 } 509 fflush(output); 510 exit(found_err); 511 } 512 if ( 513 (type_code != comment) && 514 (type_code != newline) && 515 (type_code != preesc) && 516 (type_code != form_feed)) { 517 if (force_nl && 518 (type_code != semicolon) && 519 (type_code != lbrace || !opt.btype_2)) { 520 /* we should force a broken line here */ 521 if (opt.verbose) 522 diag2(0, "Line broken"); 523 dump_line(); 524 ps.want_blank = false; /* dont insert blank at line start */ 525 force_nl = false; 526 } 527 ps.in_stmt = true; /* turn on flag which causes an extra level of 528 * indentation. this is turned off by a ; or 529 * '}' */ 530 if (s_com != e_com) { /* the turkey has embedded a comment 531 * in a line. fix it */ 532 int len = e_com - s_com; 533 534 CHECK_SIZE_CODE(len + 3); 535 *e_code++ = ' '; 536 memcpy(e_code, s_com, len); 537 e_code += len; 538 *e_code++ = ' '; 539 *e_code = '\0'; /* null terminate code sect */ 540 ps.want_blank = false; 541 e_com = s_com; 542 } 543 } 544 else if (type_code != comment) /* preserve force_nl thru a comment */ 545 force_nl = false; /* cancel forced newline after newline, form 546 * feed, etc */ 547 548 549 550 /*-----------------------------------------------------*\ 551 | do switch on type of token scanned | 552 \*-----------------------------------------------------*/ 553 CHECK_SIZE_CODE(3); /* maximum number of increments of e_code 554 * before the next CHECK_SIZE_CODE or 555 * dump_line() is 2. After that there's the 556 * final increment for the null character. */ 557 switch (type_code) { /* now, decide what to do with the token */ 558 559 case form_feed: /* found a form feed in line */ 560 ps.use_ff = true; /* a form feed is treated much like a newline */ 561 dump_line(); 562 ps.want_blank = false; 563 break; 564 565 case newline: 566 if (ps.last_token != comma || ps.p_l_follow > 0 567 || !opt.leave_comma || ps.block_init || !break_comma || s_com != e_com) { 568 dump_line(); 569 ps.want_blank = false; 570 } 571 ++line_no; /* keep track of input line number */ 572 break; 573 574 case lparen: /* got a '(' or '[' */ 575 /* count parens to make Healy happy */ 576 if (++ps.p_l_follow == nitems(ps.paren_indents)) { 577 diag3(0, "Reached internal limit of %d unclosed parens", 578 nitems(ps.paren_indents)); 579 ps.p_l_follow--; 580 } 581 if (*token == '[') 582 /* not a function pointer declaration or a function call */; 583 else if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent && 584 ps.procname[0] == '\0' && ps.paren_level == 0) { 585 /* function pointer declarations */ 586 indent_declaration(dec_ind, tabs_to_var); 587 ps.dumped_decl_indent = true; 588 } 589 else if (ps.want_blank && 590 ((ps.last_token != ident && ps.last_token != funcname) || 591 opt.proc_calls_space || 592 /* offsetof (1) is never allowed a space; sizeof (2) gets 593 * one iff -bs; all other keywords (>2) always get a space 594 * before lparen */ 595 ps.keyword + opt.Bill_Shannon > 2)) 596 *e_code++ = ' '; 597 ps.want_blank = false; 598 *e_code++ = token[0]; 599 ps.paren_indents[ps.p_l_follow - 1] = count_spaces_until(1, s_code, e_code) - 1; 600 if (sp_sw && ps.p_l_follow == 1 && opt.extra_expression_indent 601 && ps.paren_indents[0] < 2 * opt.ind_size) 602 ps.paren_indents[0] = 2 * opt.ind_size; 603 if (ps.in_or_st && *token == '(' && ps.tos <= 2) { 604 /* 605 * this is a kluge to make sure that declarations will be 606 * aligned right if proc decl has an explicit type on it, i.e. 607 * "int a(x) {..." 608 */ 609 parse(semicolon); /* I said this was a kluge... */ 610 ps.in_or_st = false; /* turn off flag for structure decl or 611 * initialization */ 612 } 613 /* parenthesized type following sizeof or offsetof is not a cast */ 614 if (ps.keyword == 1 || ps.keyword == 2) 615 ps.not_cast_mask |= 1 << ps.p_l_follow; 616 break; 617 618 case rparen: /* got a ')' or ']' */ 619 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) { 620 ps.last_u_d = true; 621 ps.cast_mask &= (1 << ps.p_l_follow) - 1; 622 ps.want_blank = opt.space_after_cast; 623 } else 624 ps.want_blank = true; 625 ps.not_cast_mask &= (1 << ps.p_l_follow) - 1; 626 if (--ps.p_l_follow < 0) { 627 ps.p_l_follow = 0; 628 diag3(0, "Extra %c", *token); 629 } 630 if (e_code == s_code) /* if the paren starts the line */ 631 ps.paren_level = ps.p_l_follow; /* then indent it */ 632 633 *e_code++ = token[0]; 634 635 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if 636 * (...), or some such */ 637 sp_sw = false; 638 force_nl = true;/* must force newline after if */ 639 ps.last_u_d = true; /* inform lexi that a following 640 * operator is unary */ 641 ps.in_stmt = false; /* dont use stmt continuation 642 * indentation */ 643 644 parse(hd_type); /* let parser worry about if, or whatever */ 645 } 646 ps.search_brace = opt.btype_2; /* this should ensure that 647 * constructs such as main(){...} 648 * and int[]{...} have their braces 649 * put in the right place */ 650 break; 651 652 case unary_op: /* this could be any unary operation */ 653 if (!ps.dumped_decl_indent && ps.in_decl && !ps.block_init && 654 ps.procname[0] == '\0' && ps.paren_level == 0) { 655 /* pointer declarations */ 656 657 /* 658 * if this is a unary op in a declaration, we should indent 659 * this token 660 */ 661 for (i = 0; token[i]; ++i) 662 /* find length of token */; 663 indent_declaration(dec_ind - i, tabs_to_var); 664 ps.dumped_decl_indent = true; 665 } 666 else if (ps.want_blank) 667 *e_code++ = ' '; 668 669 { 670 int len = e_token - s_token; 671 672 CHECK_SIZE_CODE(len); 673 memcpy(e_code, token, len); 674 e_code += len; 675 } 676 ps.want_blank = false; 677 break; 678 679 case binary_op: /* any binary operation */ 680 { 681 int len = e_token - s_token; 682 683 CHECK_SIZE_CODE(len + 1); 684 if (ps.want_blank) 685 *e_code++ = ' '; 686 memcpy(e_code, token, len); 687 e_code += len; 688 } 689 ps.want_blank = true; 690 break; 691 692 case postop: /* got a trailing ++ or -- */ 693 *e_code++ = token[0]; 694 *e_code++ = token[1]; 695 ps.want_blank = true; 696 break; 697 698 case question: /* got a ? */ 699 squest++; /* this will be used when a later colon 700 * appears so we can distinguish the 701 * <c>?<n>:<n> construct */ 702 if (ps.want_blank) 703 *e_code++ = ' '; 704 *e_code++ = '?'; 705 ps.want_blank = true; 706 break; 707 708 case casestmt: /* got word 'case' or 'default' */ 709 scase = true; /* so we can process the later colon properly */ 710 goto copy_id; 711 712 case colon: /* got a ':' */ 713 if (squest > 0) { /* it is part of the <c>?<n>: <n> construct */ 714 --squest; 715 if (ps.want_blank) 716 *e_code++ = ' '; 717 *e_code++ = ':'; 718 ps.want_blank = true; 719 break; 720 } 721 if (ps.in_or_st) { 722 *e_code++ = ':'; 723 ps.want_blank = false; 724 break; 725 } 726 ps.in_stmt = false; /* seeing a label does not imply we are in a 727 * stmt */ 728 /* 729 * turn everything so far into a label 730 */ 731 { 732 int len = e_code - s_code; 733 734 CHECK_SIZE_LAB(len + 3); 735 memcpy(e_lab, s_code, len); 736 e_lab += len; 737 *e_lab++ = ':'; 738 *e_lab = '\0'; 739 e_code = s_code; 740 } 741 force_nl = ps.pcase = scase; /* ps.pcase will be used by 742 * dump_line to decide how to 743 * indent the label. force_nl 744 * will force a case n: to be 745 * on a line by itself */ 746 scase = false; 747 ps.want_blank = false; 748 break; 749 750 case semicolon: /* got a ';' */ 751 if (ps.dec_nest == 0) 752 ps.in_or_st = false;/* we are not in an initialization or 753 * structure declaration */ 754 scase = false; /* these will only need resetting in an error */ 755 squest = 0; 756 if (ps.last_token == rparen) 757 ps.in_parameter_declaration = 0; 758 ps.cast_mask = 0; 759 ps.not_cast_mask = 0; 760 ps.block_init = 0; 761 ps.block_init_level = 0; 762 ps.just_saw_decl--; 763 764 if (ps.in_decl && s_code == e_code && !ps.block_init && 765 !ps.dumped_decl_indent && ps.paren_level == 0) { 766 /* indent stray semicolons in declarations */ 767 indent_declaration(dec_ind - 1, tabs_to_var); 768 ps.dumped_decl_indent = true; 769 } 770 771 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level 772 * structure declaration, we 773 * arent any more */ 774 775 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { 776 777 /* 778 * This should be true iff there were unbalanced parens in the 779 * stmt. It is a bit complicated, because the semicolon might 780 * be in a for stmt 781 */ 782 diag2(1, "Unbalanced parens"); 783 ps.p_l_follow = 0; 784 if (sp_sw) { /* this is a check for an if, while, etc. with 785 * unbalanced parens */ 786 sp_sw = false; 787 parse(hd_type); /* dont lose the if, or whatever */ 788 } 789 } 790 *e_code++ = ';'; 791 ps.want_blank = true; 792 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the 793 * middle of a stmt */ 794 795 if (!sp_sw) { /* if not if for (;;) */ 796 parse(semicolon); /* let parser know about end of stmt */ 797 force_nl = true;/* force newline after an end of stmt */ 798 } 799 break; 800 801 case lbrace: /* got a '{' */ 802 ps.in_stmt = false; /* dont indent the {} */ 803 if (!ps.block_init) 804 force_nl = true;/* force other stuff on same line as '{' onto 805 * new line */ 806 else if (ps.block_init_level <= 0) 807 ps.block_init_level = 1; 808 else 809 ps.block_init_level++; 810 811 if (s_code != e_code && !ps.block_init) { 812 if (!opt.btype_2) { 813 dump_line(); 814 ps.want_blank = false; 815 } 816 else if (ps.in_parameter_declaration && !ps.in_or_st) { 817 ps.i_l_follow = 0; 818 if (opt.function_brace_split) { /* dump the line prior 819 * to the brace ... */ 820 dump_line(); 821 ps.want_blank = false; 822 } else /* add a space between the decl and brace */ 823 ps.want_blank = true; 824 } 825 } 826 if (ps.in_parameter_declaration) 827 prefix_blankline_requested = 0; 828 829 if (ps.p_l_follow > 0) { /* check for preceding unbalanced 830 * parens */ 831 diag2(1, "Unbalanced parens"); 832 ps.p_l_follow = 0; 833 if (sp_sw) { /* check for unclosed if, for, etc. */ 834 sp_sw = false; 835 parse(hd_type); 836 ps.ind_level = ps.i_l_follow; 837 } 838 } 839 if (s_code == e_code) 840 ps.ind_stmt = false; /* dont put extra indentation on line 841 * with '{' */ 842 if (ps.in_decl && ps.in_or_st) { /* this is either a structure 843 * declaration or an init */ 844 di_stack[ps.dec_nest] = dec_ind; 845 if (++ps.dec_nest == nitems(di_stack)) { 846 diag3(0, "Reached internal limit of %d struct levels", 847 nitems(di_stack)); 848 ps.dec_nest--; 849 } 850 /* ? dec_ind = 0; */ 851 } 852 else { 853 ps.decl_on_line = false; /* we can't be in the middle of 854 * a declaration, so don't do 855 * special indentation of 856 * comments */ 857 if (opt.blanklines_after_declarations_at_proctop 858 && ps.in_parameter_declaration) 859 postfix_blankline_requested = 1; 860 ps.in_parameter_declaration = 0; 861 ps.in_decl = false; 862 } 863 dec_ind = 0; 864 parse(lbrace); /* let parser know about this */ 865 if (ps.want_blank) /* put a blank before '{' if '{' is not at 866 * start of line */ 867 *e_code++ = ' '; 868 ps.want_blank = false; 869 *e_code++ = '{'; 870 ps.just_saw_decl = 0; 871 break; 872 873 case rbrace: /* got a '}' */ 874 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be 875 * omitted in 876 * declarations */ 877 parse(semicolon); 878 if (ps.p_l_follow) {/* check for unclosed if, for, else. */ 879 diag2(1, "Unbalanced parens"); 880 ps.p_l_follow = 0; 881 sp_sw = false; 882 } 883 ps.just_saw_decl = 0; 884 ps.block_init_level--; 885 if (s_code != e_code && !ps.block_init) { /* '}' must be first on 886 * line */ 887 if (opt.verbose) 888 diag2(0, "Line broken"); 889 dump_line(); 890 } 891 *e_code++ = '}'; 892 ps.want_blank = true; 893 ps.in_stmt = ps.ind_stmt = false; 894 if (ps.dec_nest > 0) { /* we are in multi-level structure 895 * declaration */ 896 dec_ind = di_stack[--ps.dec_nest]; 897 if (ps.dec_nest == 0 && !ps.in_parameter_declaration) 898 ps.just_saw_decl = 2; 899 ps.in_decl = true; 900 } 901 prefix_blankline_requested = 0; 902 parse(rbrace); /* let parser know about this */ 903 ps.search_brace = opt.cuddle_else && ps.p_stack[ps.tos] == ifhead 904 && ps.il[ps.tos] >= ps.ind_level; 905 if (ps.tos <= 1 && opt.blanklines_after_procs && ps.dec_nest <= 0) 906 postfix_blankline_requested = 1; 907 break; 908 909 case swstmt: /* got keyword "switch" */ 910 sp_sw = true; 911 hd_type = swstmt; /* keep this for when we have seen the 912 * expression */ 913 goto copy_id; /* go move the token into buffer */ 914 915 case sp_paren: /* token is if, while, for */ 916 sp_sw = true; /* the interesting stuff is done after the 917 * expression is scanned */ 918 hd_type = (*token == 'i' ? ifstmt : 919 (*token == 'w' ? whilestmt : forstmt)); 920 921 /* 922 * remember the type of header for later use by parser 923 */ 924 goto copy_id; /* copy the token into line */ 925 926 case sp_nparen: /* got else, do */ 927 ps.in_stmt = false; 928 if (*token == 'e') { 929 if (e_code != s_code && (!opt.cuddle_else || e_code[-1] != '}')) { 930 if (opt.verbose) 931 diag2(0, "Line broken"); 932 dump_line();/* make sure this starts a line */ 933 ps.want_blank = false; 934 } 935 force_nl = true;/* also, following stuff must go onto new line */ 936 last_else = 1; 937 parse(elselit); 938 } 939 else { 940 if (e_code != s_code) { /* make sure this starts a line */ 941 if (opt.verbose) 942 diag2(0, "Line broken"); 943 dump_line(); 944 ps.want_blank = false; 945 } 946 force_nl = true;/* also, following stuff must go onto new line */ 947 last_else = 0; 948 parse(dolit); 949 } 950 goto copy_id; /* move the token into line */ 951 952 case type_def: 953 case storage: 954 prefix_blankline_requested = 0; 955 goto copy_id; 956 957 case structure: 958 if (ps.p_l_follow > 0) 959 goto copy_id; 960 /* FALLTHROUGH */ 961 case decl: /* we have a declaration type (int, etc.) */ 962 parse(decl); /* let parser worry about indentation */ 963 if (ps.last_token == rparen && ps.tos <= 1) { 964 if (s_code != e_code) { 965 dump_line(); 966 ps.want_blank = 0; 967 } 968 } 969 if (ps.in_parameter_declaration && opt.indent_parameters && ps.dec_nest == 0) { 970 ps.ind_level = ps.i_l_follow = 1; 971 ps.ind_stmt = 0; 972 } 973 ps.in_or_st = true; /* this might be a structure or initialization 974 * declaration */ 975 ps.in_decl = ps.decl_on_line = ps.last_token != type_def; 976 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) 977 ps.just_saw_decl = 2; 978 prefix_blankline_requested = 0; 979 for (i = 0; token[i++];); /* get length of token */ 980 981 if (ps.ind_level == 0 || ps.dec_nest > 0) { 982 /* global variable or struct member in local variable */ 983 dec_ind = opt.decl_indent > 0 ? opt.decl_indent : i; 984 tabs_to_var = (opt.use_tabs ? opt.decl_indent > 0 : 0); 985 } else { 986 /* local variable */ 987 dec_ind = opt.local_decl_indent > 0 ? opt.local_decl_indent : i; 988 tabs_to_var = (opt.use_tabs ? opt.local_decl_indent > 0 : 0); 989 } 990 goto copy_id; 991 992 case funcname: 993 case ident: /* got an identifier or constant */ 994 if (ps.in_decl) { 995 if (type_code == funcname) { 996 ps.in_decl = false; 997 if (opt.procnames_start_line && s_code != e_code) { 998 *e_code = '\0'; 999 dump_line(); 1000 } 1001 else if (ps.want_blank) { 1002 *e_code++ = ' '; 1003 } 1004 ps.want_blank = false; 1005 } 1006 else if (!ps.block_init && !ps.dumped_decl_indent && 1007 ps.paren_level == 0) { /* if we are in a declaration, we 1008 * must indent identifier */ 1009 indent_declaration(dec_ind, tabs_to_var); 1010 ps.dumped_decl_indent = true; 1011 ps.want_blank = false; 1012 } 1013 } 1014 else if (sp_sw && ps.p_l_follow == 0) { 1015 sp_sw = false; 1016 force_nl = true; 1017 ps.last_u_d = true; 1018 ps.in_stmt = false; 1019 parse(hd_type); 1020 } 1021 copy_id: 1022 { 1023 int len = e_token - s_token; 1024 1025 CHECK_SIZE_CODE(len + 1); 1026 if (ps.want_blank) 1027 *e_code++ = ' '; 1028 memcpy(e_code, s_token, len); 1029 e_code += len; 1030 } 1031 if (type_code != funcname) 1032 ps.want_blank = true; 1033 break; 1034 1035 case strpfx: 1036 { 1037 int len = e_token - s_token; 1038 1039 CHECK_SIZE_CODE(len + 1); 1040 if (ps.want_blank) 1041 *e_code++ = ' '; 1042 memcpy(e_code, token, len); 1043 e_code += len; 1044 } 1045 ps.want_blank = false; 1046 break; 1047 1048 case period: /* treat a period kind of like a binary 1049 * operation */ 1050 *e_code++ = '.'; /* move the period into line */ 1051 ps.want_blank = false; /* dont put a blank after a period */ 1052 break; 1053 1054 case comma: 1055 ps.want_blank = (s_code != e_code); /* only put blank after comma 1056 * if comma does not start the 1057 * line */ 1058 if (ps.in_decl && ps.procname[0] == '\0' && !ps.block_init && 1059 !ps.dumped_decl_indent && ps.paren_level == 0) { 1060 /* indent leading commas and not the actual identifiers */ 1061 indent_declaration(dec_ind - 1, tabs_to_var); 1062 ps.dumped_decl_indent = true; 1063 } 1064 *e_code++ = ','; 1065 if (ps.p_l_follow == 0) { 1066 if (ps.block_init_level <= 0) 1067 ps.block_init = 0; 1068 if (break_comma && (!opt.leave_comma || 1069 count_spaces_until(compute_code_target(), s_code, e_code) > 1070 opt.max_col - opt.tabsize)) 1071 force_nl = true; 1072 } 1073 break; 1074 1075 case preesc: /* got the character '#' */ 1076 if ((s_com != e_com) || 1077 (s_lab != e_lab) || 1078 (s_code != e_code)) 1079 dump_line(); 1080 CHECK_SIZE_LAB(1); 1081 *e_lab++ = '#'; /* move whole line to 'label' buffer */ 1082 { 1083 int in_comment = 0; 1084 int com_start = 0; 1085 char quote = 0; 1086 int com_end = 0; 1087 1088 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 1089 buf_ptr++; 1090 if (buf_ptr >= buf_end) 1091 fill_buffer(); 1092 } 1093 while (*buf_ptr != '\n' || (in_comment && !had_eof)) { 1094 CHECK_SIZE_LAB(2); 1095 *e_lab = *buf_ptr++; 1096 if (buf_ptr >= buf_end) 1097 fill_buffer(); 1098 switch (*e_lab++) { 1099 case BACKSLASH: 1100 if (!in_comment) { 1101 *e_lab++ = *buf_ptr++; 1102 if (buf_ptr >= buf_end) 1103 fill_buffer(); 1104 } 1105 break; 1106 case '/': 1107 if (*buf_ptr == '*' && !in_comment && !quote) { 1108 in_comment = 1; 1109 *e_lab++ = *buf_ptr++; 1110 com_start = e_lab - s_lab - 2; 1111 } 1112 break; 1113 case '"': 1114 if (quote == '"') 1115 quote = 0; 1116 break; 1117 case '\'': 1118 if (quote == '\'') 1119 quote = 0; 1120 break; 1121 case '*': 1122 if (*buf_ptr == '/' && in_comment) { 1123 in_comment = 0; 1124 *e_lab++ = *buf_ptr++; 1125 com_end = e_lab - s_lab; 1126 } 1127 break; 1128 } 1129 } 1130 1131 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1132 e_lab--; 1133 if (e_lab - s_lab == com_end && bp_save == NULL) { 1134 /* comment on preprocessor line */ 1135 if (sc_end == NULL) { /* if this is the first comment, 1136 * we must set up the buffer */ 1137 save_com = sc_buf; 1138 sc_end = &save_com[0]; 1139 } 1140 else { 1141 *sc_end++ = '\n'; /* add newline between 1142 * comments */ 1143 *sc_end++ = ' '; 1144 --line_no; 1145 } 1146 if (sc_end - save_com + com_end - com_start > sc_size) 1147 errx(1, "input too long"); 1148 memmove(sc_end, s_lab + com_start, com_end - com_start); 1149 sc_end += com_end - com_start; 1150 e_lab = s_lab + com_start; 1151 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1152 e_lab--; 1153 bp_save = buf_ptr; /* save current input buffer */ 1154 be_save = buf_end; 1155 buf_ptr = save_com; /* fix so that subsequent calls to 1156 * lexi will take tokens out of 1157 * save_com */ 1158 *sc_end++ = ' '; /* add trailing blank, just in case */ 1159 buf_end = sc_end; 1160 sc_end = NULL; 1161 } 1162 CHECK_SIZE_LAB(1); 1163 *e_lab = '\0'; /* null terminate line */ 1164 ps.pcase = false; 1165 } 1166 1167 if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */ 1168 if ((size_t)ifdef_level < nitems(state_stack)) { 1169 match_state[ifdef_level].tos = -1; 1170 state_stack[ifdef_level++] = ps; 1171 } 1172 else 1173 diag2(1, "#if stack overflow"); 1174 } 1175 else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */ 1176 if (ifdef_level <= 0) 1177 diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else"); 1178 else { 1179 match_state[ifdef_level - 1] = ps; 1180 ps = state_stack[ifdef_level - 1]; 1181 } 1182 } 1183 else if (strncmp(s_lab, "#endif", 6) == 0) { 1184 if (ifdef_level <= 0) 1185 diag2(1, "Unmatched #endif"); 1186 else 1187 ifdef_level--; 1188 } else { 1189 struct directives { 1190 int size; 1191 const char *string; 1192 } 1193 recognized[] = { 1194 {7, "include"}, 1195 {6, "define"}, 1196 {5, "undef"}, 1197 {4, "line"}, 1198 {5, "error"}, 1199 {6, "pragma"} 1200 }; 1201 int d = nitems(recognized); 1202 while (--d >= 0) 1203 if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0) 1204 break; 1205 if (d < 0) { 1206 diag2(1, "Unrecognized cpp directive"); 1207 break; 1208 } 1209 } 1210 if (opt.blanklines_around_conditional_compilation) { 1211 postfix_blankline_requested++; 1212 n_real_blanklines = 0; 1213 } 1214 else { 1215 postfix_blankline_requested = 0; 1216 prefix_blankline_requested = 0; 1217 } 1218 break; /* subsequent processing of the newline 1219 * character will cause the line to be printed */ 1220 1221 case comment: /* we have gotten a / followed by * this is a biggie */ 1222 pr_comment(); 1223 break; 1224 } /* end of big switch stmt */ 1225 1226 *e_code = '\0'; /* make sure code section is null terminated */ 1227 if (type_code != comment && type_code != newline && type_code != preesc) 1228 ps.last_token = type_code; 1229 } /* end of main while (1) loop */ 1230 } 1231 1232 /* 1233 * copy input file to backup file if in_name is /blah/blah/blah/file, then 1234 * backup file will be ".Bfile" then make the backup file the input and 1235 * original input file the output 1236 */ 1237 static void 1238 bakcopy(void) 1239 { 1240 int n, 1241 bakchn; 1242 char buff[8 * 1024]; 1243 const char *p; 1244 1245 /* construct file name .Bfile */ 1246 for (p = in_name; *p; p++); /* skip to end of string */ 1247 while (p > in_name && *p != '/') /* find last '/' */ 1248 p--; 1249 if (*p == '/') 1250 p++; 1251 sprintf(bakfile, "%s%s", p, simple_backup_suffix); 1252 1253 /* copy in_name to backup file */ 1254 bakchn = creat(bakfile, 0600); 1255 if (bakchn < 0) 1256 err(1, "%s", bakfile); 1257 while ((n = read(fileno(input), buff, sizeof(buff))) > 0) 1258 if (write(bakchn, buff, n) != n) 1259 err(1, "%s", bakfile); 1260 if (n < 0) 1261 err(1, "%s", in_name); 1262 close(bakchn); 1263 fclose(input); 1264 1265 /* re-open backup file as the input file */ 1266 input = fopen(bakfile, "r"); 1267 if (input == NULL) 1268 err(1, "%s", bakfile); 1269 /* now the original input file will be the output */ 1270 output = fopen(in_name, "w"); 1271 if (output == NULL) { 1272 unlink(bakfile); 1273 err(1, "%s", in_name); 1274 } 1275 } 1276 1277 static void 1278 indent_declaration(int cur_dec_ind, int tabs_to_var) 1279 { 1280 int pos = e_code - s_code; 1281 char *startpos = e_code; 1282 1283 /* 1284 * get the tab math right for indentations that are not multiples of tabsize 1285 */ 1286 if ((ps.ind_level * opt.ind_size) % opt.tabsize != 0) { 1287 pos += (ps.ind_level * opt.ind_size) % opt.tabsize; 1288 cur_dec_ind += (ps.ind_level * opt.ind_size) % opt.tabsize; 1289 } 1290 if (tabs_to_var) { 1291 int tpos; 1292 1293 CHECK_SIZE_CODE(cur_dec_ind / opt.tabsize); 1294 while ((tpos = opt.tabsize * (1 + pos / opt.tabsize)) <= cur_dec_ind) { 1295 *e_code++ = '\t'; 1296 pos = tpos; 1297 } 1298 } 1299 CHECK_SIZE_CODE(cur_dec_ind - pos + 1); 1300 while (pos < cur_dec_ind) { 1301 *e_code++ = ' '; 1302 pos++; 1303 } 1304 if (e_code == startpos && ps.want_blank) { 1305 *e_code++ = ' '; 1306 ps.want_blank = false; 1307 } 1308 } 1309