1 /* $OpenBSD: indent.c,v 1.32 2021/01/26 18:21:25 deraadt Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. 6 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 7 * Copyright (c) 1985 Sun Microsystems, Inc. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <fcntl.h> 36 #include <unistd.h> 37 #include <limits.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include "indent_globs.h" 42 #include "indent_codes.h" 43 #include <ctype.h> 44 #include <errno.h> 45 #include <err.h> 46 47 char *in_name = "Standard Input"; /* will always point to name of input 48 * file */ 49 char *out_name = "Standard Output"; /* will always point to name 50 * of output file */ 51 char bakfile[PATH_MAX] = ""; 52 53 FILE *input; /* the fid for the input file */ 54 FILE *output; /* the output file */ 55 56 char *labbuf; /* buffer for label */ 57 char *s_lab; /* start ... */ 58 char *e_lab; /* .. and end of stored label */ 59 char *l_lab; /* limit of label buffer */ 60 61 char *codebuf; /* buffer for code section */ 62 char *s_code; /* start ... */ 63 char *e_code; /* .. and end of stored code */ 64 char *l_code; /* limit of code section */ 65 66 char *combuf; /* buffer for comments */ 67 char *s_com; /* start ... */ 68 char *e_com; /* ... and end of stored comments */ 69 char *l_com; /* limit of comment buffer */ 70 71 char *tokenbuf; /* the last token scanned */ 72 char *s_token; 73 char *e_token; 74 char *l_token; 75 76 char *in_buffer; /* input buffer */ 77 char *in_buffer_limit; /* the end of the input buffer */ 78 char *buf_ptr; /* ptr to next character to be taken from 79 * in_buffer */ 80 char *buf_end; /* ptr to first after last char in in_buffer */ 81 82 char save_com[sc_size]; /* input text is saved here when looking for 83 * the brace after an if, while, etc */ 84 char *sc_end; /* pointer into save_com buffer */ 85 86 char *bp_save; /* saved value of buf_ptr when taking input 87 * from save_com */ 88 char *be_save; /* similarly saved value of buf_end */ 89 90 int pointer_as_binop; 91 int blanklines_after_declarations; 92 int blanklines_before_blockcomments; 93 int blanklines_after_procs; 94 int blanklines_around_conditional_compilation; 95 int swallow_optional_blanklines; 96 int n_real_blanklines; 97 int prefix_blankline_requested; 98 int postfix_blankline_requested; 99 int break_comma; /* when true and not in parens, break after a 100 * comma */ 101 int btype_2; /* when true, brace should be on same line as 102 * if, while, etc */ 103 float case_ind; /* indentation level to be used for a "case 104 * n:" */ 105 int code_lines; /* count of lines with code */ 106 int had_eof; /* set to true when input is exhausted */ 107 int line_no; /* the current line number. */ 108 int max_col; /* the maximum allowable line length */ 109 int verbose; /* when true, non-essential error messages are 110 * printed */ 111 int cuddle_else; /* true if else should cuddle up to '}' */ 112 int star_comment_cont; /* true iff comment continuation lines should 113 * have stars at the beginning of each line. */ 114 int comment_delimiter_on_blankline; 115 int troff; /* true iff were generating troff input */ 116 int procnames_start_line; /* if true, the names of procedures 117 * being defined get placed in column 118 * 1 (ie. a newline is placed between 119 * the type of the procedure and its 120 * name) */ 121 int proc_calls_space; /* If true, procedure calls look like: 122 * foo(bar) rather than foo (bar) */ 123 int format_col1_comments; /* If comments which start in column 1 124 * are to be magically reformatted 125 * (just like comments that begin in 126 * later columns) */ 127 int inhibit_formatting; /* true if INDENT OFF is in effect */ 128 int suppress_blanklines;/* set iff following blanklines should be 129 * suppressed */ 130 int continuation_indent;/* set to the indentation between the edge of 131 * code and continuation lines */ 132 int lineup_to_parens; /* if true, continued code within parens will 133 * be lined up to the open paren */ 134 int Bill_Shannon; /* true iff a blank should always be inserted 135 * after sizeof */ 136 int blanklines_after_declarations_at_proctop; /* This is vaguely 137 * similar to 138 * blanklines_after_decla 139 * rations except that 140 * it only applies to 141 * the first set of 142 * declarations in a 143 * procedure (just after 144 * the first '{') and it 145 * causes a blank line 146 * to be generated even 147 * if there are no 148 * declarations */ 149 int block_comment_max_col; 150 int extra_expression_indent; /* True if continuation lines from the 151 * expression part of "if(e)", 152 * "while(e)", "for(e;e;e)" should be 153 * indented an extra tab stop so that 154 * they don't conflict with the code 155 * that follows */ 156 int use_tabs; /* set true to use tabs for spacing, 157 * false uses all spaces */ 158 159 /* -troff font state information */ 160 161 struct fstate 162 keywordf, /* keyword font */ 163 stringf, /* string font */ 164 boxcomf, /* Box comment font */ 165 blkcomf, /* Block comment font */ 166 scomf, /* Same line comment font */ 167 bodyf; /* major body font */ 168 169 struct parser_state ps; 170 171 int ifdef_level; 172 int rparen_count; 173 struct parser_state state_stack[5]; 174 struct parser_state match_state[5]; 175 176 177 void bakcopy(void); 178 179 int 180 main(int argc, char **argv) 181 { 182 183 extern int found_err; /* flag set in diag() on error */ 184 int dec_ind; /* current indentation for declarations */ 185 int di_stack[20]; /* a stack of structure indentation levels */ 186 int flushed_nl; /* used when buffering up comments to remember 187 * that a newline was passed over */ 188 int force_nl; /* when true, code must be broken */ 189 int hd_type; /* used to store type of stmt for if (...), 190 * for (...), etc */ 191 int i; /* local loop counter */ 192 int scase; /* set to true when we see a case, so we will 193 * know what to do with the following colon */ 194 int sp_sw; /* when true, we are in the expressin of 195 * if(...), while(...), etc. */ 196 int squest; /* when this is positive, we have seen a ? 197 * without the matching : in a <c>?<s>:<s> 198 * construct */ 199 char *t_ptr; /* used for copying tokens */ 200 int tabs_to_var; /* true if using tabs to indent to var name */ 201 int type_code; /* the type of token, returned by lexi */ 202 203 int last_else = 0; /* true iff last keyword was an else */ 204 205 if (pledge("stdio rpath wpath cpath", NULL) == -1) 206 err(1, "pledge"); 207 208 /*-----------------------------------------------*\ 209 | INITIALIZATION | 210 \*-----------------------------------------------*/ 211 212 213 hd_type = 0; 214 ps.p_stack[0] = stmt; /* this is the parser's stack */ 215 ps.last_nl = true; /* this is true if the last thing scanned was 216 * a newline */ 217 ps.last_token = semicolon; 218 combuf = malloc(bufsize); 219 labbuf = malloc(bufsize); 220 codebuf = malloc(bufsize); 221 tokenbuf = malloc(bufsize); 222 if (combuf == NULL || labbuf == NULL || codebuf == NULL || 223 tokenbuf == NULL) 224 err(1, NULL); 225 l_com = combuf + bufsize - 5; 226 l_lab = labbuf + bufsize - 5; 227 l_code = codebuf + bufsize - 5; 228 l_token = tokenbuf + bufsize - 5; 229 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and 230 * comment buffers */ 231 combuf[1] = codebuf[1] = labbuf[1] = '\0'; 232 ps.else_if = 1; /* Default else-if special processing to on */ 233 s_lab = e_lab = labbuf + 1; 234 s_code = e_code = codebuf + 1; 235 s_com = e_com = combuf + 1; 236 s_token = e_token = tokenbuf + 1; 237 238 in_buffer = malloc(10); 239 if (in_buffer == NULL) 240 err(1, NULL); 241 in_buffer_limit = in_buffer + 8; 242 buf_ptr = buf_end = in_buffer; 243 line_no = 1; 244 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; 245 sp_sw = force_nl = false; 246 ps.in_or_st = false; 247 ps.bl_line = true; 248 dec_ind = 0; 249 di_stack[ps.dec_nest = 0] = 0; 250 ps.want_blank = ps.in_stmt = ps.ind_stmt = false; 251 252 253 scase = ps.pcase = false; 254 squest = 0; 255 sc_end = 0; 256 bp_save = 0; 257 be_save = 0; 258 259 output = 0; 260 261 262 263 /*--------------------------------------------------*\ 264 | COMMAND LINE SCAN | 265 \*--------------------------------------------------*/ 266 267 #ifdef undef 268 max_col = 78; /* -l78 */ 269 lineup_to_parens = 1; /* -lp */ 270 ps.ljust_decl = 0; /* -ndj */ 271 ps.com_ind = 33; /* -c33 */ 272 star_comment_cont = 1; /* -sc */ 273 ps.ind_size = 8; /* -i8 */ 274 verbose = 0; 275 ps.decl_indent = 16; /* -di16 */ 276 ps.indent_parameters = 1; /* -ip */ 277 ps.decl_com_ind = 0; /* if this is not set to some positive value 278 * by an arg, we will set this equal to 279 * ps.com_ind */ 280 btype_2 = 1; /* -br */ 281 cuddle_else = 1; /* -ce */ 282 ps.unindent_displace = 0; /* -d0 */ 283 ps.case_indent = 0; /* -cli0 */ 284 format_col1_comments = 1; /* -fc1 */ 285 procnames_start_line = 1; /* -psl */ 286 proc_calls_space = 0; /* -npcs */ 287 comment_delimiter_on_blankline = 1; /* -cdb */ 288 ps.leave_comma = 1; /* -nbc */ 289 #endif 290 291 for (i = 1; i < argc; ++i) 292 if (strcmp(argv[i], "-npro") == 0) 293 break; 294 set_defaults(); 295 if (i >= argc) 296 set_profile(); 297 298 for (i = 1; i < argc; ++i) { 299 300 /* 301 * look thru args (if any) for changes to defaults 302 */ 303 if (argv[i][0] != '-') {/* no flag on parameter */ 304 if (input == 0) { /* we must have the input file */ 305 in_name = argv[i]; /* remember name of input file */ 306 input = fopen(in_name, "r"); 307 if (input == NULL) /* check for open error */ 308 err(1, "%s", in_name); 309 continue; 310 } 311 else if (output == 0) { /* we have the output file */ 312 out_name = argv[i]; /* remember name of output file */ 313 if (strcmp(in_name, out_name) == 0) /* attempt to overwrite 314 * the file */ 315 errx(1, "input and output files must be different"); 316 output = fopen(out_name, "w"); 317 if (output == NULL) /* check for create error */ 318 err(1, "%s", out_name); 319 continue; 320 } 321 errx(1, "unknown parameter: %s", argv[i]); 322 } 323 else 324 set_option(argv[i]); 325 } /* end of for */ 326 if (input == NULL) { 327 input = stdin; 328 } 329 if (output == NULL) { 330 if (troff || input == stdin) 331 output = stdout; 332 else { 333 out_name = in_name; 334 bakcopy(); 335 } 336 } 337 if (ps.com_ind <= 1) 338 ps.com_ind = 2; /* dont put normal comments before column 2 */ 339 if (troff) { 340 if (bodyf.font[0] == 0) 341 parsefont(&bodyf, "R"); 342 if (scomf.font[0] == 0) 343 parsefont(&scomf, "I"); 344 if (blkcomf.font[0] == 0) 345 blkcomf = scomf, blkcomf.size += 2; 346 if (boxcomf.font[0] == 0) 347 boxcomf = blkcomf; 348 if (stringf.font[0] == 0) 349 parsefont(&stringf, "L"); 350 if (keywordf.font[0] == 0) 351 parsefont(&keywordf, "B"); 352 writefdef(&bodyf, 'B'); 353 writefdef(&scomf, 'C'); 354 writefdef(&blkcomf, 'L'); 355 writefdef(&boxcomf, 'X'); 356 writefdef(&stringf, 'S'); 357 writefdef(&keywordf, 'K'); 358 } 359 if (block_comment_max_col <= 0) 360 block_comment_max_col = max_col; 361 if (ps.decl_com_ind <= 0) /* if not specified by user, set this */ 362 ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind; 363 if (continuation_indent == 0) 364 continuation_indent = ps.ind_size; 365 fill_buffer(); /* get first batch of stuff into input buffer */ 366 367 parse(semicolon); 368 { 369 char *p = buf_ptr; 370 int col = 1; 371 372 while (1) { 373 if (*p == ' ') 374 col++; 375 else if (*p == '\t') 376 col = ((col - 1) & ~7) + 9; 377 else 378 break; 379 p++; 380 } 381 if (col > ps.ind_size) 382 ps.ind_level = ps.i_l_follow = col / ps.ind_size; 383 } 384 if (troff) { 385 char *p = in_name, 386 *beg = in_name; 387 388 while (*p) 389 if (*p++ == '/') 390 beg = p; 391 fprintf(output, ".Fn \"%s\"\n", beg); 392 } 393 /* 394 * START OF MAIN LOOP 395 */ 396 397 while (1) { /* this is the main loop. it will go until we 398 * reach eof */ 399 int is_procname; 400 401 type_code = lexi(); /* lexi reads one token. The actual 402 * characters read are stored in "token". lexi 403 * returns a code indicating the type of token */ 404 is_procname = ps.procname[0]; 405 406 /* 407 * The following code moves everything following an if (), while (), 408 * else, etc. up to the start of the following stmt to a buffer. This 409 * allows proper handling of both kinds of brace placement. 410 */ 411 412 flushed_nl = false; 413 while (ps.search_brace) { /* if we scanned an if(), while(), 414 * etc., we might need to copy stuff 415 * into a buffer we must loop, copying 416 * stuff into save_com, until we find 417 * the start of the stmt which follows 418 * the if, or whatever */ 419 switch (type_code) { 420 case newline: 421 ++line_no; 422 flushed_nl = true; 423 case form_feed: 424 break; /* form feeds and newlines found here will be 425 * ignored */ 426 427 case lbrace: /* this is a brace that starts the compound 428 * stmt */ 429 if (sc_end == 0) { /* ignore buffering if a comment wasnt 430 * stored up */ 431 ps.search_brace = false; 432 goto check_type; 433 } 434 if (btype_2) { 435 save_com[0] = '{'; /* we either want to put the brace 436 * right after the if */ 437 goto sw_buffer; /* go to common code to get out of 438 * this loop */ 439 } 440 case comment: /* we have a comment, so we must copy it into 441 * the buffer */ 442 if (!flushed_nl || sc_end != 0) { 443 if (sc_end == 0) { /* if this is the first comment, we 444 * must set up the buffer */ 445 save_com[0] = save_com[1] = ' '; 446 sc_end = &(save_com[2]); 447 } 448 else { 449 *sc_end++ = '\n'; /* add newline between 450 * comments */ 451 *sc_end++ = ' '; 452 --line_no; 453 } 454 *sc_end++ = '/'; /* copy in start of comment */ 455 *sc_end++ = '*'; 456 457 for (;;) { /* loop until we get to the end of the comment */ 458 *sc_end = *buf_ptr++; 459 if (buf_ptr >= buf_end) 460 fill_buffer(); 461 462 if (*sc_end++ == '*' && *buf_ptr == '/') 463 break; /* we are at end of comment */ 464 465 if (sc_end >= &(save_com[sc_size])) { /* check for temp buffer 466 * overflow */ 467 diag(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever."); 468 fflush(output); 469 exit(1); 470 } 471 } 472 *sc_end++ = '/'; /* add ending slash */ 473 if (++buf_ptr >= buf_end) /* get past / in buffer */ 474 fill_buffer(); 475 break; 476 } 477 default: /* it is the start of a normal statment */ 478 if (flushed_nl) /* if we flushed a newline, make sure it is 479 * put back */ 480 force_nl = true; 481 if ((type_code == sp_paren && *token == 'i' 482 && last_else && ps.else_if) || 483 (type_code == sp_nparen && *token == 'e' 484 && e_code != s_code && e_code[-1] == '}')) 485 force_nl = false; 486 487 if (sc_end == 0) { /* ignore buffering if comment wasnt 488 * saved up */ 489 ps.search_brace = false; 490 goto check_type; 491 } 492 if (force_nl) { /* if we should insert a nl here, put it into 493 * the buffer */ 494 force_nl = false; 495 --line_no; /* this will be re-increased when the nl is 496 * read from the buffer */ 497 *sc_end++ = '\n'; 498 *sc_end++ = ' '; 499 if (verbose && !flushed_nl) /* print error msg if the line 500 * was not already broken */ 501 diag(0, "Line broken"); 502 flushed_nl = false; 503 } 504 for (t_ptr = token; *t_ptr; ++t_ptr) 505 *sc_end++ = *t_ptr; /* copy token into temp buffer */ 506 ps.procname[0] = 0; 507 508 sw_buffer: 509 ps.search_brace = false; /* stop looking for start of 510 * stmt */ 511 bp_save = buf_ptr; /* save current input buffer */ 512 be_save = buf_end; 513 buf_ptr = save_com; /* fix so that subsequent calls to 514 * lexi will take tokens out of 515 * save_com */ 516 *sc_end++ = ' ';/* add trailing blank, just in case */ 517 buf_end = sc_end; 518 sc_end = 0; 519 break; 520 } /* end of switch */ 521 if (type_code != 0) /* we must make this check, just in case there 522 * was an unexpected EOF */ 523 type_code = lexi(); /* read another token */ 524 /* if (ps.search_brace) ps.procname[0] = 0; */ 525 if ((is_procname = ps.procname[0]) && flushed_nl 526 && !procnames_start_line && ps.in_decl 527 && type_code == ident) 528 flushed_nl = 0; 529 } /* end of while (search_brace) */ 530 last_else = 0; 531 check_type: 532 if (type_code == 0) { /* we got eof */ 533 if (s_lab != e_lab || s_code != e_code 534 || s_com != e_com) /* must dump end of line */ 535 dump_line(); 536 if (ps.tos > 1) /* check for balanced braces */ 537 diag(1, "Missing braces at end of file."); 538 539 if (verbose) { 540 printf("There were %d output lines and %d comments\n", 541 ps.out_lines, ps.out_coms); 542 printf("(Lines with comments)/(Lines with code): %6.3f\n", 543 (1.0 * ps.com_lines) / code_lines); 544 } 545 fflush(output); 546 exit(found_err); 547 } 548 if ( 549 (type_code != comment) && 550 (type_code != newline) && 551 (type_code != preesc) && 552 (type_code != form_feed)) { 553 if (force_nl && 554 (type_code != semicolon) && 555 (type_code != lbrace || !btype_2)) { 556 /* we should force a broken line here */ 557 if (verbose && !flushed_nl) 558 diag(0, "Line broken"); 559 flushed_nl = false; 560 dump_line(); 561 ps.want_blank = false; /* dont insert blank at line start */ 562 force_nl = false; 563 } 564 ps.in_stmt = true; /* turn on flag which causes an extra level of 565 * indentation. this is turned off by a ; or 566 * '}' */ 567 if (s_com != e_com) { /* the turkey has embedded a comment 568 * in a line. fix it */ 569 *e_code++ = ' '; 570 for (t_ptr = s_com; *t_ptr; ++t_ptr) { 571 CHECK_SIZE_CODE; 572 *e_code++ = *t_ptr; 573 } 574 *e_code++ = ' '; 575 *e_code = '\0'; /* null terminate code sect */ 576 ps.want_blank = false; 577 e_com = s_com; 578 } 579 } 580 else if (type_code != comment) /* preserve force_nl thru a comment */ 581 force_nl = false; /* cancel forced newline after newline, form 582 * feed, etc */ 583 584 585 586 /*-----------------------------------------------------*\ 587 | do switch on type of token scanned | 588 \*-----------------------------------------------------*/ 589 CHECK_SIZE_CODE; 590 switch (type_code) { /* now, decide what to do with the token */ 591 592 case form_feed: /* found a form feed in line */ 593 ps.use_ff = true; /* a form feed is treated much like a newline */ 594 dump_line(); 595 ps.want_blank = false; 596 break; 597 598 case newline: 599 if (ps.last_token != comma || ps.p_l_follow > 0 600 || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) { 601 dump_line(); 602 ps.want_blank = false; 603 } 604 ++line_no; /* keep track of input line number */ 605 break; 606 607 case lparen: /* got a '(' or '[' */ 608 ++ps.p_l_follow; /* count parens to make Healy happy */ 609 if (ps.want_blank && *token != '[' && 610 (ps.last_token != ident || proc_calls_space 611 || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon)))) 612 *e_code++ = ' '; 613 if (ps.in_decl && !ps.block_init) 614 if (troff && !ps.dumped_decl_indent && !is_procname && ps.last_token == decl) { 615 ps.dumped_decl_indent = 1; 616 snprintf(e_code, (l_code - e_code) + 5, 617 "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); 618 e_code += strlen(e_code); 619 CHECK_SIZE_CODE; 620 } 621 else { 622 while ((e_code - s_code) < dec_ind) { 623 CHECK_SIZE_CODE; 624 *e_code++ = ' '; 625 } 626 *e_code++ = token[0]; 627 } 628 else 629 *e_code++ = token[0]; 630 ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code; 631 if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent 632 && ps.paren_indents[0] < 2 * ps.ind_size) 633 ps.paren_indents[0] = 2 * ps.ind_size; 634 ps.want_blank = false; 635 if (ps.in_or_st && *token == '(' && ps.tos <= 2) { 636 /* 637 * this is a kluge to make sure that declarations will be 638 * aligned right if proc decl has an explicit type on it, i.e. 639 * "int a(x) {..." 640 */ 641 parse(semicolon); /* I said this was a kluge... */ 642 ps.in_or_st = false; /* turn off flag for structure decl or 643 * initialization */ 644 } 645 if (ps.sizeof_keyword) 646 ps.sizeof_mask |= 1 << ps.p_l_follow; 647 break; 648 649 case rparen: /* got a ')' or ']' */ 650 rparen_count--; 651 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) { 652 ps.last_u_d = true; 653 ps.cast_mask &= (1 << ps.p_l_follow) - 1; 654 } 655 ps.sizeof_mask &= (1 << ps.p_l_follow) - 1; 656 if (--ps.p_l_follow < 0) { 657 ps.p_l_follow = 0; 658 diag(0, "Extra %c", *token); 659 } 660 if (e_code == s_code) /* if the paren starts the line */ 661 ps.paren_level = ps.p_l_follow; /* then indent it */ 662 663 *e_code++ = token[0]; 664 ps.want_blank = true; 665 666 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if 667 * (...), or some such */ 668 sp_sw = false; 669 force_nl = true;/* must force newline after if */ 670 ps.last_u_d = true; /* inform lexi that a following 671 * operator is unary */ 672 ps.in_stmt = false; /* dont use stmt continuation 673 * indentation */ 674 675 parse(hd_type); /* let parser worry about if, or whatever */ 676 } 677 ps.search_brace = btype_2; /* this should insure that constructs 678 * such as main(){...} and int[]{...} 679 * have their braces put in the right 680 * place */ 681 break; 682 683 case unary_op: /* this could be any unary operation */ 684 if (ps.want_blank) 685 *e_code++ = ' '; 686 687 if (troff && !ps.dumped_decl_indent && ps.in_decl && !is_procname) { 688 snprintf(e_code, (l_code - e_code) + 5, 689 "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); 690 ps.dumped_decl_indent = 1; 691 e_code += strlen(e_code); 692 CHECK_SIZE_CODE; 693 } 694 else { 695 char *res = token; 696 697 if (ps.in_decl && !ps.block_init) { /* if this is a unary op 698 * in a declaration, we 699 * should indent this 700 * token */ 701 for (i = 0; token[i]; ++i); /* find length of token */ 702 while ((e_code - s_code) < (dec_ind - i)) { 703 CHECK_SIZE_CODE; 704 *e_code++ = ' '; /* pad it */ 705 } 706 } 707 if (troff && token[0] == '-' && token[1] == '>') 708 res = "\\(->"; 709 for (t_ptr = res; *t_ptr; ++t_ptr) { 710 CHECK_SIZE_CODE; 711 *e_code++ = *t_ptr; 712 } 713 } 714 ps.want_blank = false; 715 break; 716 717 case binary_op: /* any binary operation */ 718 if (ps.want_blank) 719 *e_code++ = ' '; 720 { 721 char *res = token; 722 723 if (troff) 724 switch (token[0]) { 725 case '<': 726 if (token[1] == '=') 727 res = "\\(<="; 728 break; 729 case '>': 730 if (token[1] == '=') 731 res = "\\(>="; 732 break; 733 case '!': 734 if (token[1] == '=') 735 res = "\\(!="; 736 break; 737 case '|': 738 if (token[1] == '|') 739 res = "\\(br\\(br"; 740 else if (token[1] == 0) 741 res = "\\(br"; 742 break; 743 } 744 for (t_ptr = res; *t_ptr; ++t_ptr) { 745 CHECK_SIZE_CODE; 746 *e_code++ = *t_ptr; /* move the operator */ 747 } 748 } 749 ps.want_blank = true; 750 break; 751 752 case postop: /* got a trailing ++ or -- */ 753 *e_code++ = token[0]; 754 *e_code++ = token[1]; 755 ps.want_blank = true; 756 break; 757 758 case question: /* got a ? */ 759 squest++; /* this will be used when a later colon 760 * appears so we can distinguish the 761 * <c>?<n>:<n> construct */ 762 if (ps.want_blank) 763 *e_code++ = ' '; 764 *e_code++ = '?'; 765 ps.want_blank = true; 766 break; 767 768 case casestmt: /* got word 'case' or 'default' */ 769 scase = true; /* so we can process the later colon properly */ 770 goto copy_id; 771 772 case colon: /* got a ':' */ 773 if (squest > 0) { /* it is part of the <c>?<n>: <n> construct */ 774 --squest; 775 if (ps.want_blank) 776 *e_code++ = ' '; 777 *e_code++ = ':'; 778 ps.want_blank = true; 779 break; 780 } 781 if (ps.in_decl) { 782 *e_code++ = ':'; 783 ps.want_blank = false; 784 break; 785 } 786 ps.in_stmt = false; /* seeing a label does not imply we are in a 787 * stmt */ 788 for (t_ptr = s_code; *t_ptr; ++t_ptr) 789 *e_lab++ = *t_ptr; /* turn everything so far into a label */ 790 e_code = s_code; 791 *e_lab++ = ':'; 792 *e_lab++ = ' '; 793 *e_lab = '\0'; 794 795 force_nl = ps.pcase = scase; /* ps.pcase will be used by 796 * dump_line to decide how to 797 * indent the label. force_nl 798 * will force a case n: to be 799 * on a line by itself */ 800 scase = false; 801 ps.want_blank = false; 802 break; 803 804 case semicolon: /* got a ';' */ 805 ps.in_or_st = false;/* we are not in an initialization or 806 * structure declaration */ 807 scase = false; /* these will only need resetting in a error */ 808 squest = 0; 809 if (ps.last_token == rparen && rparen_count == 0) 810 ps.in_parameter_declaration = 0; 811 ps.cast_mask = 0; 812 ps.sizeof_mask = 0; 813 ps.block_init = 0; 814 ps.block_init_level = 0; 815 ps.just_saw_decl--; 816 817 if (ps.in_decl && s_code == e_code && !ps.block_init) 818 while ((e_code - s_code) < (dec_ind - 1)) { 819 CHECK_SIZE_CODE; 820 *e_code++ = ' '; 821 } 822 823 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level 824 * structure declaration, we 825 * arent any more */ 826 827 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { 828 829 /* 830 * This should be true iff there were unbalanced parens in the 831 * stmt. It is a bit complicated, because the semicolon might 832 * be in a for stmt 833 */ 834 diag(1, "Unbalanced parens"); 835 ps.p_l_follow = 0; 836 if (sp_sw) { /* this is a check for a if, while, etc. with 837 * unbalanced parens */ 838 sp_sw = false; 839 parse(hd_type); /* dont lose the if, or whatever */ 840 } 841 } 842 *e_code++ = ';'; 843 ps.want_blank = true; 844 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the 845 * middle of a stmt */ 846 847 if (!sp_sw) { /* if not if for (;;) */ 848 parse(semicolon); /* let parser know about end of stmt */ 849 force_nl = true;/* force newline after a end of stmt */ 850 } 851 break; 852 853 case lbrace: /* got a '{' */ 854 ps.in_stmt = false; /* dont indent the {} */ 855 if (!ps.block_init) 856 force_nl = true;/* force other stuff on same line as '{' onto 857 * new line */ 858 else if (ps.block_init_level <= 0) 859 ps.block_init_level = 1; 860 else 861 ps.block_init_level++; 862 863 if (s_code != e_code && !ps.block_init) { 864 if (!btype_2) { 865 dump_line(); 866 ps.want_blank = false; 867 } 868 else if (ps.in_parameter_declaration && !ps.in_or_st) { 869 ps.i_l_follow = 0; 870 dump_line(); 871 ps.want_blank = false; 872 } 873 } 874 if (ps.in_parameter_declaration) 875 prefix_blankline_requested = 0; 876 877 if (ps.p_l_follow > 0) { /* check for preceding unbalanced 878 * parens */ 879 diag(1, "Unbalanced parens"); 880 ps.p_l_follow = 0; 881 if (sp_sw) { /* check for unclosed if, for, etc. */ 882 sp_sw = false; 883 parse(hd_type); 884 ps.ind_level = ps.i_l_follow; 885 } 886 } 887 if (s_code == e_code) 888 ps.ind_stmt = false; /* dont put extra indentation on line 889 * with '{' */ 890 if (ps.in_decl && ps.in_or_st) { /* this is either a structure 891 * declaration or an init */ 892 di_stack[ps.dec_nest++] = dec_ind; 893 /* ? dec_ind = 0; */ 894 } 895 else { 896 ps.decl_on_line = false; 897 /* we can't be in the middle of a declaration, so don't do 898 * special indentation of comments */ 899 if (blanklines_after_declarations_at_proctop 900 && ps.in_parameter_declaration) 901 postfix_blankline_requested = 1; 902 ps.in_parameter_declaration = 0; 903 } 904 dec_ind = 0; 905 parse(lbrace); /* let parser know about this */ 906 if (ps.want_blank) /* put a blank before '{' if '{' is not at 907 * start of line */ 908 *e_code++ = ' '; 909 ps.want_blank = false; 910 *e_code++ = '{'; 911 ps.just_saw_decl = 0; 912 break; 913 914 case rbrace: /* got a '}' */ 915 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be 916 * omitted in 917 * declarations */ 918 parse(semicolon); 919 if (ps.p_l_follow) {/* check for unclosed if, for, else. */ 920 diag(1, "Unbalanced parens"); 921 ps.p_l_follow = 0; 922 sp_sw = false; 923 } 924 ps.just_saw_decl = 0; 925 ps.block_init_level--; 926 if (s_code != e_code && !ps.block_init) { /* '}' must be first on 927 * line */ 928 if (verbose) 929 diag(0, "Line broken"); 930 dump_line(); 931 } 932 *e_code++ = '}'; 933 ps.want_blank = true; 934 ps.in_stmt = ps.ind_stmt = false; 935 if (ps.dec_nest > 0) { /* we are in multi-level structure 936 * declaration */ 937 dec_ind = di_stack[--ps.dec_nest]; 938 if (ps.dec_nest == 0 && !ps.in_parameter_declaration) 939 ps.just_saw_decl = 2; 940 ps.in_decl = true; 941 } 942 prefix_blankline_requested = 0; 943 parse(rbrace); /* let parser know about this */ 944 ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead 945 && ps.il[ps.tos] >= ps.ind_level; 946 if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0) 947 postfix_blankline_requested = 1; 948 break; 949 950 case swstmt: /* got keyword "switch" */ 951 sp_sw = true; 952 hd_type = swstmt; /* keep this for when we have seen the 953 * expression */ 954 goto copy_id; /* go move the token into buffer */ 955 956 case sp_paren: /* token is if, while, for */ 957 sp_sw = true; /* the interesting stuff is done after the 958 * expression is scanned */ 959 hd_type = (*token == 'i' ? ifstmt : 960 (*token == 'w' ? whilestmt : forstmt)); 961 962 /* 963 * remember the type of header for later use by parser 964 */ 965 goto copy_id; /* copy the token into line */ 966 967 case sp_nparen: /* got else, do */ 968 ps.in_stmt = false; 969 if (*token == 'e') { 970 if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) { 971 if (verbose) 972 diag(0, "Line broken"); 973 dump_line();/* make sure this starts a line */ 974 ps.want_blank = false; 975 } 976 force_nl = true;/* also, following stuff must go onto new line */ 977 last_else = 1; 978 parse(elselit); 979 } 980 else { 981 if (e_code != s_code) { /* make sure this starts a line */ 982 if (verbose) 983 diag(0, "Line broken"); 984 dump_line(); 985 ps.want_blank = false; 986 } 987 force_nl = true;/* also, following stuff must go onto new line */ 988 last_else = 0; 989 parse(dolit); 990 } 991 goto copy_id; /* move the token into line */ 992 993 case decl: /* we have a declaration type (int, register, 994 * etc.) */ 995 parse(decl); /* let parser worry about indentation */ 996 if (ps.last_token == rparen && ps.tos <= 1) { 997 ps.in_parameter_declaration = 1; 998 if (s_code != e_code) { 999 dump_line(); 1000 ps.want_blank = 0; 1001 } 1002 } 1003 if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) { 1004 ps.ind_level = ps.i_l_follow = 1; 1005 ps.ind_stmt = 0; 1006 } 1007 ps.in_or_st = true; /* this might be a structure or initialization 1008 * declaration */ 1009 ps.in_decl = ps.decl_on_line = true; 1010 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) 1011 ps.just_saw_decl = 2; 1012 prefix_blankline_requested = 0; 1013 for (i = 0; token[i++];); /* get length of token */ 1014 1015 /* 1016 * dec_ind = e_code - s_code + (ps.decl_indent>i ? ps.decl_indent 1017 * : i); 1018 */ 1019 dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i; 1020 tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0); 1021 goto copy_id; 1022 1023 case ident: /* got an identifier or constant */ 1024 if (ps.in_decl) { /* if we are in a declaration, we must indent 1025 * identifier */ 1026 if (ps.want_blank) 1027 *e_code++ = ' '; 1028 ps.want_blank = false; 1029 if (is_procname == 0 || !procnames_start_line) { 1030 if (!ps.block_init) { 1031 if (troff && !ps.dumped_decl_indent) { 1032 snprintf(e_code, (l_code - e_code) + 5, 1033 "\n.De %dp+\200p\n", dec_ind * 7); 1034 ps.dumped_decl_indent = 1; 1035 e_code += strlen(e_code); 1036 CHECK_SIZE_CODE; 1037 } else { 1038 int cur_dec_ind; 1039 int pos, startpos; 1040 1041 /* 1042 * in order to get the tab math right for 1043 * indentations that are not multiples of 8 we 1044 * need to modify both startpos and dec_ind 1045 * (cur_dec_ind) here by eight minus the 1046 * remainder of the current starting column 1047 * divided by eight. This seems to be a 1048 * properly working fix 1049 */ 1050 startpos = e_code - s_code; 1051 cur_dec_ind = dec_ind; 1052 pos = startpos; 1053 if ((ps.ind_level * ps.ind_size) % 8 != 0) { 1054 pos += (ps.ind_level * ps.ind_size) % 8; 1055 cur_dec_ind += (ps.ind_level * ps.ind_size) % 8; 1056 } 1057 1058 if (tabs_to_var) { 1059 while ((pos & ~7) + 8 <= cur_dec_ind) { 1060 CHECK_SIZE_CODE; 1061 *e_code++ = '\t'; 1062 pos = (pos & ~7) + 8; 1063 } 1064 } 1065 while (pos < cur_dec_ind) { 1066 CHECK_SIZE_CODE; 1067 *e_code++ = ' '; 1068 pos++; 1069 } 1070 if (ps.want_blank && e_code - s_code == startpos) 1071 *e_code++ = ' '; 1072 ps.want_blank = false; 1073 } 1074 } 1075 } 1076 else { 1077 if (dec_ind && s_code != e_code) 1078 dump_line(); 1079 dec_ind = 0; 1080 ps.want_blank = false; 1081 } 1082 } 1083 else if (sp_sw && ps.p_l_follow == 0) { 1084 sp_sw = false; 1085 force_nl = true; 1086 ps.last_u_d = true; 1087 ps.in_stmt = false; 1088 parse(hd_type); 1089 } 1090 copy_id: 1091 if (ps.want_blank) 1092 *e_code++ = ' '; 1093 if (troff && ps.its_a_keyword) { 1094 e_code = chfont(&bodyf, &keywordf, e_code); 1095 for (t_ptr = token; *t_ptr; ++t_ptr) { 1096 CHECK_SIZE_CODE; 1097 *e_code++ = keywordf.allcaps && 1098 islower((unsigned char)*t_ptr) ? 1099 toupper((unsigned char)*t_ptr) : *t_ptr; 1100 } 1101 e_code = chfont(&keywordf, &bodyf, e_code); 1102 } 1103 else 1104 for (t_ptr = token; *t_ptr; ++t_ptr) { 1105 CHECK_SIZE_CODE; 1106 *e_code++ = *t_ptr; 1107 } 1108 ps.want_blank = true; 1109 break; 1110 1111 case period: /* treat a period kind of like a binary 1112 * operation */ 1113 *e_code++ = '.'; /* move the period into line */ 1114 ps.want_blank = false; /* dont put a blank after a period */ 1115 break; 1116 1117 case comma: 1118 ps.want_blank = (s_code != e_code); /* only put blank after comma 1119 * if comma does not start the 1120 * line */ 1121 if (ps.in_decl && is_procname == 0 && !ps.block_init) 1122 while ((e_code - s_code) < (dec_ind - 1)) { 1123 CHECK_SIZE_CODE; 1124 *e_code++ = ' '; 1125 } 1126 1127 *e_code++ = ','; 1128 if (ps.p_l_follow == 0) { 1129 if (ps.block_init_level <= 0) 1130 ps.block_init = 0; 1131 if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8)) 1132 force_nl = true; 1133 } 1134 break; 1135 1136 case preesc: /* got the character '#' */ 1137 if ((s_com != e_com) || 1138 (s_lab != e_lab) || 1139 (s_code != e_code)) 1140 dump_line(); 1141 *e_lab++ = '#'; /* move whole line to 'label' buffer */ 1142 { 1143 int in_comment = 0; 1144 int com_start = 0; 1145 char quote = 0; 1146 int com_end = 0; 1147 1148 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 1149 buf_ptr++; 1150 if (buf_ptr >= buf_end) 1151 fill_buffer(); 1152 } 1153 while (*buf_ptr != '\n' || (in_comment && !had_eof)) { 1154 CHECK_SIZE_LAB; 1155 *e_lab = *buf_ptr++; 1156 if (buf_ptr >= buf_end) 1157 fill_buffer(); 1158 switch (*e_lab++) { 1159 case BACKSLASH: 1160 if (troff) 1161 *e_lab++ = BACKSLASH; 1162 if (!in_comment) { 1163 *e_lab++ = *buf_ptr++; 1164 if (buf_ptr >= buf_end) 1165 fill_buffer(); 1166 } 1167 break; 1168 case '/': 1169 if (*buf_ptr == '*' && !in_comment && !quote) { 1170 in_comment = 1; 1171 *e_lab++ = *buf_ptr++; 1172 com_start = e_lab - s_lab - 2; 1173 } 1174 break; 1175 case '"': 1176 if (quote == '"') 1177 quote = 0; 1178 break; 1179 case '\'': 1180 if (quote == '\'') 1181 quote = 0; 1182 break; 1183 case '*': 1184 if (*buf_ptr == '/' && in_comment) { 1185 in_comment = 0; 1186 *e_lab++ = *buf_ptr++; 1187 com_end = e_lab - s_lab; 1188 } 1189 break; 1190 } 1191 } 1192 1193 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1194 e_lab--; 1195 if (e_lab - s_lab == com_end && bp_save == 0) { /* comment on 1196 * preprocessor line */ 1197 if (sc_end == 0) /* if this is the first comment, we 1198 * must set up the buffer */ 1199 sc_end = &(save_com[0]); 1200 else { 1201 *sc_end++ = '\n'; /* add newline between 1202 * comments */ 1203 *sc_end++ = ' '; 1204 --line_no; 1205 } 1206 bcopy(s_lab + com_start, sc_end, com_end - com_start); 1207 sc_end += com_end - com_start; 1208 if (sc_end >= &save_com[sc_size]) 1209 abort(); 1210 e_lab = s_lab + com_start; 1211 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1212 e_lab--; 1213 bp_save = buf_ptr; /* save current input buffer */ 1214 be_save = buf_end; 1215 buf_ptr = save_com; /* fix so that subsequent calls to 1216 * lexi will take tokens out of 1217 * save_com */ 1218 *sc_end++ = ' '; /* add trailing blank, just in case */ 1219 buf_end = sc_end; 1220 sc_end = 0; 1221 } 1222 *e_lab = '\0'; /* null terminate line */ 1223 ps.pcase = false; 1224 } 1225 1226 if (strncmp(s_lab, "#if", 3) == 0) { 1227 if (blanklines_around_conditional_compilation) { 1228 int c; 1229 prefix_blankline_requested++; 1230 while ((c = getc(input)) == '\n'); 1231 ungetc(c, input); 1232 } 1233 if (ifdef_level < sizeof state_stack / sizeof state_stack[0]) { 1234 match_state[ifdef_level].tos = -1; 1235 state_stack[ifdef_level++] = ps; 1236 } 1237 else 1238 diag(1, "#if stack overflow"); 1239 } 1240 else if (strncmp(s_lab, "#else", 5) == 0) 1241 if (ifdef_level <= 0) 1242 diag(1, "Unmatched #else"); 1243 else { 1244 match_state[ifdef_level - 1] = ps; 1245 ps = state_stack[ifdef_level - 1]; 1246 } 1247 else if (strncmp(s_lab, "#endif", 6) == 0) { 1248 if (ifdef_level <= 0) 1249 diag(1, "Unmatched #endif"); 1250 else { 1251 ifdef_level--; 1252 1253 #ifdef undef 1254 /* 1255 * This match needs to be more intelligent before the 1256 * message is useful 1257 */ 1258 if (match_state[ifdef_level].tos >= 0 1259 && bcmp(&ps, &match_state[ifdef_level], sizeof ps)) 1260 diag(0, "Syntactically inconsistent #ifdef alternatives."); 1261 #endif 1262 } 1263 if (blanklines_around_conditional_compilation) { 1264 postfix_blankline_requested++; 1265 n_real_blanklines = 0; 1266 } 1267 } 1268 break; /* subsequent processing of the newline 1269 * character will cause the line to be printed */ 1270 1271 case comment: /* we have gotten a comment this is a biggie */ 1272 if (flushed_nl) { /* we should force a broken line here */ 1273 flushed_nl = false; 1274 dump_line(); 1275 ps.want_blank = false; /* dont insert blank at line start */ 1276 force_nl = false; 1277 } 1278 pr_comment(); 1279 break; 1280 } /* end of big switch stmt */ 1281 1282 *e_code = '\0'; /* make sure code section is null terminated */ 1283 if (type_code != comment && type_code != newline && type_code != preesc) 1284 ps.last_token = type_code; 1285 } /* end of main while (1) loop */ 1286 } 1287 1288 /* 1289 * copy input file to backup file if in_name is /blah/blah/blah/file, then 1290 * backup file will be ".Bfile" then make the backup file the input and 1291 * original input file the output 1292 */ 1293 void 1294 bakcopy(void) 1295 { 1296 int n, 1297 bakchn; 1298 char buff[8 * 1024]; 1299 char *p; 1300 1301 /* construct file name .Bfile */ 1302 for (p = in_name; *p; p++); /* skip to end of string */ 1303 while (p > in_name && *p != '/') /* find last '/' */ 1304 p--; 1305 if (*p == '/') 1306 p++; 1307 if (snprintf(bakfile, PATH_MAX, "%s.BAK", p) >= PATH_MAX) 1308 errc(1, ENAMETOOLONG, "%s.BAK", p); 1309 1310 /* copy in_name to backup file */ 1311 bakchn = open(bakfile, O_CREAT | O_TRUNC | O_WRONLY, 0600); 1312 if (bakchn == -1) 1313 err(1, "%s", bakfile); 1314 while ((n = read(fileno(input), buff, sizeof buff)) > 0) 1315 if (write(bakchn, buff, n) != n) 1316 err(1, "%s", bakfile); 1317 if (n == -1) 1318 err(1, "%s", in_name); 1319 close(bakchn); 1320 fclose(input); 1321 1322 /* re-open backup file as the input file */ 1323 input = fopen(bakfile, "r"); 1324 if (input == NULL) 1325 err(1, "%s", bakfile); 1326 /* now the original input file will be the output */ 1327 output = fopen(in_name, "w"); 1328 if (output == NULL) { 1329 int saved_errno = errno; 1330 unlink(bakfile); 1331 errc(1, saved_errno, "%s", in_name); 1332 } 1333 } 1334