1 /* $NetBSD: indent.c,v 1.14 2003/06/19 15:45:22 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 7 * Copyright (c) 1985 Sun Microsystems, Inc. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 #ifndef lint 41 __COPYRIGHT("@(#) Copyright (c) 1985 Sun Microsystems, Inc.\n\ 42 @(#) Copyright (c) 1976 Board of Trustees of the University of Illinois.\n\ 43 @(#) Copyright (c) 1980, 1993\n\ 44 The Regents of the University of California. All rights reserved.\n"); 45 #endif /* not lint */ 46 47 #ifndef lint 48 #if 0 49 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93"; 50 #else 51 __RCSID("$NetBSD: indent.c,v 1.14 2003/06/19 15:45:22 christos Exp $"); 52 #endif 53 #endif /* not lint */ 54 55 #include <sys/param.h> 56 #include <ctype.h> 57 #include <err.h> 58 #include <errno.h> 59 #include <fcntl.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <string.h> 63 #include <unistd.h> 64 #include <locale.h> 65 #define EXTERN 66 #include "indent_globs.h" 67 #undef EXTERN 68 #include "indent_codes.h" 69 70 char *in_name = "Standard Input"; /* will always point to name of input 71 * file */ 72 char *out_name = "Standard Output"; /* will always point to name of output 73 * file */ 74 char bakfile[MAXPATHLEN] = ""; 75 76 int main(int, char **); 77 78 int 79 main(int argc, char **argv) 80 { 81 82 extern int found_err; /* flag set in diag() on error */ 83 int dec_ind; /* current indentation for declarations */ 84 int di_stack[20]; /* a stack of structure indentation levels */ 85 int flushed_nl; /* used when buffering up comments to remember 86 * that a newline was passed over */ 87 int force_nl; /* when true, code must be broken */ 88 int hd_type; /* used to store type of stmt for if (...), 89 * for (...), etc */ 90 int i; /* local loop counter */ 91 int scase; /* set to true when we see a case, so we will 92 * know what to do with the following colon */ 93 int sp_sw; /* when true, we are in the expressin of 94 * if(...), while(...), etc. */ 95 int squest; /* when this is positive, we have seen a ? 96 * without the matching : in a <c>?<s>:<s> 97 * construct */ 98 char *t_ptr; /* used for copying tokens */ 99 int type_code; /* the type of token, returned by lexi */ 100 101 int last_else = 0; /* true iff last keyword was an else */ 102 103 104 /*-----------------------------------------------*\ 105 | INITIALIZATION | 106 \*-----------------------------------------------*/ 107 108 if (!setlocale(LC_ALL, "")) 109 fprintf(stderr, "indent: can't set locale.\n"); 110 111 hd_type = 0; 112 ps.p_stack[0] = stmt; /* this is the parser's stack */ 113 ps.last_nl = true; /* this is true if the last thing scanned was 114 * a newline */ 115 ps.last_token = semicolon; 116 combuf = (char *) malloc(bufsize); 117 labbuf = (char *) malloc(bufsize); 118 codebuf = (char *) malloc(bufsize); 119 tokenbuf = (char *) malloc(bufsize); 120 l_com = combuf + bufsize - 5; 121 l_lab = labbuf + bufsize - 5; 122 l_code = codebuf + bufsize - 5; 123 l_token = tokenbuf + bufsize - 5; 124 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, 125 * and comment buffers */ 126 combuf[1] = codebuf[1] = labbuf[1] = '\0'; 127 ps.else_if = 1; /* Default else-if special processing to on */ 128 s_lab = e_lab = labbuf + 1; 129 s_code = e_code = codebuf + 1; 130 s_com = e_com = combuf + 1; 131 s_token = e_token = tokenbuf + 1; 132 133 in_buffer = (char *) malloc(10); 134 in_buffer_limit = in_buffer + 8; 135 buf_ptr = buf_end = in_buffer; 136 line_no = 1; 137 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; 138 sp_sw = force_nl = false; 139 ps.in_or_st = false; 140 ps.bl_line = true; 141 dec_ind = 0; 142 di_stack[ps.dec_nest = 0] = 0; 143 ps.want_blank = ps.in_stmt = ps.ind_stmt = false; 144 145 146 scase = ps.pcase = false; 147 squest = 0; 148 sc_end = 0; 149 bp_save = 0; 150 be_save = 0; 151 152 output = 0; 153 154 155 156 /*--------------------------------------------------*\ 157 | COMMAND LINE SCAN | 158 \*--------------------------------------------------*/ 159 160 #ifdef undef 161 max_col = 78; /* -l78 */ 162 lineup_to_parens = 1; /* -lp */ 163 ps.ljust_decl = 0; /* -ndj */ 164 ps.com_ind = 33; /* -c33 */ 165 star_comment_cont = 1; /* -sc */ 166 ps.ind_size = 8; /* -i8 */ 167 verbose = 0; 168 ps.decl_indent = 16; /* -di16 */ 169 ps.indent_parameters = 1; /* -ip */ 170 ps.decl_com_ind = 0; /* if this is not set to some positive value 171 * by an arg, we will set this equal to 172 * ps.com_ind */ 173 btype_2 = 1; /* -br */ 174 cuddle_else = 1; /* -ce */ 175 ps.unindent_displace = 0; /* -d0 */ 176 ps.case_indent = 0; /* -cli0 */ 177 format_col1_comments = 1; /* -fc1 */ 178 procnames_start_line = 1; /* -psl */ 179 proc_calls_space = 0; /* -npcs */ 180 comment_delimiter_on_blankline = 1; /* -cdb */ 181 ps.leave_comma = 1; /* -nbc */ 182 #endif 183 184 for (i = 1; i < argc; ++i) 185 if (strcmp(argv[i], "-npro") == 0) 186 break; 187 set_defaults(); 188 if (i >= argc) 189 set_profile(); 190 191 for (i = 1; i < argc; ++i) { 192 193 /* 194 * look thru args (if any) for changes to defaults 195 */ 196 if (argv[i][0] != '-') { /* no flag on parameter */ 197 if (input == 0) { /* we must have the input file */ 198 in_name = argv[i]; /* remember name of 199 * input file */ 200 input = fopen(in_name, "r"); 201 if (input == 0) /* check for open error */ 202 err(1, "%s", in_name); 203 continue; 204 } else 205 if (output == 0) { /* we have the output 206 * file */ 207 out_name = argv[i]; /* remember name of 208 * output file */ 209 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite 210 * the file */ 211 fprintf(stderr, "indent: input and output files must be different\n"); 212 exit(1); 213 } 214 output = fopen(out_name, "w"); 215 if (output == 0) /* check for create 216 * error */ 217 err(1, "%s", out_name); 218 continue; 219 } 220 fprintf(stderr, "indent: unknown parameter: %s\n", argv[i]); 221 exit(1); 222 } else 223 set_option(argv[i]); 224 } /* end of for */ 225 if (input == 0) { 226 input = stdin; 227 } 228 if (output == 0) { 229 if (troff || input == stdin) 230 output = stdout; 231 else { 232 out_name = in_name; 233 bakcopy(); 234 } 235 } 236 if (ps.com_ind <= 1) 237 ps.com_ind = 2; /* dont put normal comments before column 2 */ 238 if (troff) { 239 if (bodyf.font[0] == 0) 240 parsefont(&bodyf, "R"); 241 if (scomf.font[0] == 0) 242 parsefont(&scomf, "I"); 243 if (blkcomf.font[0] == 0) 244 blkcomf = scomf, blkcomf.size += 2; 245 if (boxcomf.font[0] == 0) 246 boxcomf = blkcomf; 247 if (stringf.font[0] == 0) 248 parsefont(&stringf, "L"); 249 if (keywordf.font[0] == 0) 250 parsefont(&keywordf, "B"); 251 writefdef(&bodyf, 'B'); 252 writefdef(&scomf, 'C'); 253 writefdef(&blkcomf, 'L'); 254 writefdef(&boxcomf, 'X'); 255 writefdef(&stringf, 'S'); 256 writefdef(&keywordf, 'K'); 257 } 258 if (block_comment_max_col <= 0) 259 block_comment_max_col = max_col; 260 if (ps.decl_com_ind <= 0) /* if not specified by user, set this */ 261 ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind; 262 if (continuation_indent == 0) 263 continuation_indent = ps.ind_size; 264 fill_buffer(); /* get first batch of stuff into input buffer */ 265 266 parse(semicolon); 267 { 268 char *p = buf_ptr; 269 int col = 1; 270 271 while (1) { 272 if (*p == ' ') 273 col++; 274 else 275 if (*p == '\t') 276 col = ((col - 1) & ~7) + 9; 277 else 278 break; 279 p++; 280 } 281 if (col > ps.ind_size) 282 ps.ind_level = ps.i_l_follow = col / ps.ind_size; 283 } 284 if (troff) { 285 char *p = in_name, *beg = in_name; 286 287 while (*p) 288 if (*p++ == '/') 289 beg = p; 290 fprintf(output, ".Fn \"%s\"\n", beg); 291 } 292 /* 293 * START OF MAIN LOOP 294 */ 295 296 while (1) { /* this is the main loop. it will go until we 297 * reach eof */ 298 int is_procname; 299 300 type_code = lexi(); /* lexi reads one token. The actual 301 * characters read are stored in 302 * "token". lexi returns a code 303 * indicating the type of token */ 304 is_procname = ps.procname[0]; 305 306 /* 307 * The following code moves everything following an if (), while (), 308 * else, etc. up to the start of the following stmt to a buffer. This 309 * allows proper handling of both kinds of brace placement. 310 */ 311 312 flushed_nl = false; 313 while (ps.search_brace) { /* if we scanned an if(), 314 * while(), etc., we might 315 * need to copy stuff into a 316 * buffer we must loop, 317 * copying stuff into 318 * save_com, until we find the 319 * start of the stmt which 320 * follows the if, or whatever */ 321 switch (type_code) { 322 case newline: 323 ++line_no; 324 flushed_nl = true; 325 case form_feed: 326 break; /* form feeds and newlines found here 327 * will be ignored */ 328 329 case lbrace: /* this is a brace that starts the 330 * compound stmt */ 331 if (sc_end == 0) { /* ignore buffering if a 332 * comment wasnt stored 333 * up */ 334 ps.search_brace = false; 335 goto check_type; 336 } 337 if (btype_2) { 338 save_com[0] = '{'; /* we either want to put 339 * the brace right after 340 * the if */ 341 goto sw_buffer; /* go to common code to 342 * get out of this loop */ 343 } 344 case comment: /* we have a comment, so we must copy 345 * it into the buffer */ 346 if (!flushed_nl || sc_end != 0) { 347 if (sc_end == 0) { /* if this is the first 348 * comment, we must set 349 * up the buffer */ 350 save_com[0] = save_com[1] = ' '; 351 sc_end = &(save_com[2]); 352 } else { 353 *sc_end++ = '\n'; /* add newline between 354 * comments */ 355 *sc_end++ = ' '; 356 --line_no; 357 } 358 *sc_end++ = '/'; /* copy in start of 359 * comment */ 360 *sc_end++ = '*'; 361 362 for (;;) { /* loop until we get to 363 * the end of the 364 * comment */ 365 *sc_end = *buf_ptr++; 366 if (buf_ptr >= buf_end) 367 fill_buffer(); 368 369 if (*sc_end++ == '*' && *buf_ptr == '/') 370 break; /* we are at end of 371 * comment */ 372 373 if (sc_end >= &(save_com[sc_size])) { /* check for temp buffer 374 * overflow */ 375 diag(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever."); 376 fflush(output); 377 exit(1); 378 } 379 } 380 *sc_end++ = '/'; /* add ending slash */ 381 if (++buf_ptr >= buf_end) /* get past / in buffer */ 382 fill_buffer(); 383 break; 384 } 385 default: /* it is the start of a normal 386 * statment */ 387 if (flushed_nl) /* if we flushed a newline, 388 * make sure it is put back */ 389 force_nl = true; 390 if ((type_code == sp_paren && *token == 'i' 391 && last_else && ps.else_if) || 392 (type_code == sp_nparen && *token == 'e' 393 && e_code != s_code && e_code[-1] == '}')) 394 force_nl = false; 395 396 if (sc_end == 0) { /* ignore buffering if 397 * comment wasnt saved 398 * up */ 399 ps.search_brace = false; 400 goto check_type; 401 } 402 if (force_nl) { /* if we should insert a nl 403 * here, put it into the 404 * buffer */ 405 force_nl = false; 406 --line_no; /* this will be 407 * re-increased when the 408 * nl is read from the 409 * buffer */ 410 *sc_end++ = '\n'; 411 *sc_end++ = ' '; 412 if (verbose && !flushed_nl) /* print error msg if 413 * the line was not 414 * already broken */ 415 diag(0, "Line broken"); 416 flushed_nl = false; 417 } 418 for (t_ptr = token; *t_ptr; ++t_ptr) 419 *sc_end++ = *t_ptr; /* copy token into temp 420 * buffer */ 421 ps.procname[0] = 0; 422 423 sw_buffer: 424 ps.search_brace = false; /* stop looking for 425 * start of stmt */ 426 bp_save = buf_ptr; /* save current input 427 * buffer */ 428 be_save = buf_end; 429 buf_ptr = save_com; /* fix so that 430 * subsequent calls to 431 * lexi will take tokens 432 * out of save_com */ 433 *sc_end++ = ' '; /* add trailing blank, 434 * just in case */ 435 buf_end = sc_end; 436 sc_end = 0; 437 break; 438 } /* end of switch */ 439 if (type_code != 0) /* we must make this check, 440 * just in case there was an 441 * unexpected EOF */ 442 type_code = lexi(); /* read another token */ 443 /* if (ps.search_brace) ps.procname[0] = 0; */ 444 if ((is_procname = ps.procname[0]) && flushed_nl 445 && !procnames_start_line && ps.in_decl 446 && type_code == ident) 447 flushed_nl = 0; 448 } /* end of while (search_brace) */ 449 last_else = 0; 450 check_type: 451 if (type_code == 0) { /* we got eof */ 452 if (s_lab != e_lab || s_code != e_code 453 || s_com != e_com) /* must dump end of line */ 454 dump_line(); 455 if (ps.tos > 1) /* check for balanced braces */ 456 diag(1, "Stuff missing from end of file."); 457 458 if (verbose) { 459 printf("There were %d output lines and %d comments\n", 460 ps.out_lines, ps.out_coms); 461 printf("(Lines with comments)/(Lines with code): %6.3f\n", 462 (1.0 * ps.com_lines) / code_lines); 463 } 464 fflush(output); 465 exit(found_err); 466 } 467 if ( 468 (type_code != comment) && 469 (type_code != newline) && 470 (type_code != preesc) && 471 (type_code != form_feed)) { 472 if (force_nl && 473 (type_code != semicolon) && 474 (type_code != lbrace || !btype_2)) { 475 /* we should force a broken line here */ 476 if (verbose && !flushed_nl) 477 diag(0, "Line broken"); 478 flushed_nl = false; 479 dump_line(); 480 ps.want_blank = false; /* dont insert blank at 481 * line start */ 482 force_nl = false; 483 } 484 ps.in_stmt = true; /* turn on flag which causes 485 * an extra level of 486 * indentation. this is turned 487 * off by a ; or '}' */ 488 if (s_com != e_com) { /* the turkey has embedded a 489 * comment in a line. fix it */ 490 *e_code++ = ' '; 491 for (t_ptr = s_com; *t_ptr; ++t_ptr) { 492 CHECK_SIZE_CODE; 493 *e_code++ = *t_ptr; 494 } 495 *e_code++ = ' '; 496 *e_code = '\0'; /* null terminate code sect */ 497 ps.want_blank = false; 498 e_com = s_com; 499 } 500 } else 501 if (type_code != comment) /* preserve force_nl 502 * thru a comment */ 503 force_nl = false; /* cancel forced newline 504 * after newline, form 505 * feed, etc */ 506 507 508 509 /*-----------------------------------------------------*\ 510 | do switch on type of token scanned | 511 \*-----------------------------------------------------*/ 512 CHECK_SIZE_CODE; 513 switch (type_code) { /* now, decide what to do with the 514 * token */ 515 516 case form_feed:/* found a form feed in line */ 517 ps.use_ff = true; /* a form feed is treated much 518 * like a newline */ 519 dump_line(); 520 ps.want_blank = false; 521 break; 522 523 case newline: 524 if (ps.last_token != comma || ps.p_l_follow > 0 525 || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) { 526 dump_line(); 527 ps.want_blank = false; 528 } 529 ++line_no; /* keep track of input line number */ 530 break; 531 532 case lparen: /* got a '(' or '[' */ 533 ++ps.p_l_follow; /* count parens to make Healy 534 * happy */ 535 if (ps.want_blank && *token != '[' && 536 (ps.last_token != ident || proc_calls_space 537 || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon)))) 538 *e_code++ = ' '; 539 if (ps.in_decl && !ps.block_init) { 540 if (troff && !ps.dumped_decl_indent && !is_procname && ps.last_token == decl) { 541 ps.dumped_decl_indent = 1; 542 sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); 543 e_code += strlen(e_code); 544 } else { 545 while ((e_code - s_code) < dec_ind) { 546 CHECK_SIZE_CODE; 547 *e_code++ = ' '; 548 } 549 *e_code++ = token[0]; 550 } 551 } else 552 *e_code++ = token[0]; 553 ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code; 554 if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent 555 && ps.paren_indents[0] < 2 * ps.ind_size) 556 ps.paren_indents[0] = 2 * ps.ind_size; 557 ps.want_blank = false; 558 if (ps.in_or_st && *token == '(' && ps.tos <= 2) { 559 /* 560 * this is a kluge to make sure that declarations will be 561 * aligned right if proc decl has an explicit type on it, i.e. 562 * "int a(x) {..." 563 */ 564 parse(semicolon); /* I said this was a 565 * kluge... */ 566 ps.in_or_st = false; /* turn off flag for 567 * structure decl or 568 * initialization */ 569 } 570 if (ps.sizeof_keyword) 571 ps.sizeof_mask |= 1 << ps.p_l_follow; 572 break; 573 574 case rparen: /* got a ')' or ']' */ 575 rparen_count--; 576 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) { 577 ps.last_u_d = true; 578 ps.cast_mask &= (1 << ps.p_l_follow) - 1; 579 } 580 ps.sizeof_mask &= (1 << ps.p_l_follow) - 1; 581 if (--ps.p_l_follow < 0) { 582 ps.p_l_follow = 0; 583 diag(0, "Extra %c", *token); 584 } 585 if (e_code == s_code) /* if the paren starts the 586 * line */ 587 ps.paren_level = ps.p_l_follow; /* then indent it */ 588 589 *e_code++ = token[0]; 590 ps.want_blank = true; 591 592 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if 593 * (...), or some such */ 594 sp_sw = false; 595 force_nl = true; /* must force newline 596 * after if */ 597 ps.last_u_d = true; /* inform lexi that a 598 * following operator is 599 * unary */ 600 ps.in_stmt = false; /* dont use stmt 601 * continuation 602 * indentation */ 603 604 parse(hd_type); /* let parser worry about if, 605 * or whatever */ 606 } 607 ps.search_brace = btype_2; /* this should insure 608 * that constructs such 609 * as main(){...} and 610 * int[]{...} have their 611 * braces put in the 612 * right place */ 613 break; 614 615 case unary_op: /* this could be any unary operation */ 616 if (ps.want_blank) 617 *e_code++ = ' '; 618 619 if (troff && !ps.dumped_decl_indent && ps.in_decl && !is_procname) { 620 sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); 621 ps.dumped_decl_indent = 1; 622 e_code += strlen(e_code); 623 } else { 624 char *res = token; 625 626 if (ps.in_decl && !ps.block_init) { /* if this is a unary op 627 * in a declaration, we 628 * should indent this 629 * token */ 630 for (i = 0; token[i]; ++i); /* find length of token */ 631 while ((e_code - s_code) < (dec_ind - i)) { 632 CHECK_SIZE_CODE; 633 *e_code++ = ' '; /* pad it */ 634 } 635 } 636 if (troff && token[0] == '-' && token[1] == '>') 637 res = "\\(->"; 638 for (t_ptr = res; *t_ptr; ++t_ptr) { 639 CHECK_SIZE_CODE; 640 *e_code++ = *t_ptr; 641 } 642 } 643 ps.want_blank = false; 644 break; 645 646 case binary_op:/* any binary operation */ 647 if (ps.want_blank) 648 *e_code++ = ' '; 649 { 650 char *res = token; 651 652 if (troff) 653 switch (token[0]) { 654 case '<': 655 if (token[1] == '=') 656 res = "\\(<="; 657 break; 658 case '>': 659 if (token[1] == '=') 660 res = "\\(>="; 661 break; 662 case '!': 663 if (token[1] == '=') 664 res = "\\(!="; 665 break; 666 case '|': 667 if (token[1] == '|') 668 res = "\\(br\\(br"; 669 else 670 if (token[1] == 0) 671 res = "\\(br"; 672 break; 673 } 674 for (t_ptr = res; *t_ptr; ++t_ptr) { 675 CHECK_SIZE_CODE; 676 *e_code++ = *t_ptr; /* move the operator */ 677 } 678 } 679 ps.want_blank = true; 680 break; 681 682 case postop: /* got a trailing ++ or -- */ 683 *e_code++ = token[0]; 684 *e_code++ = token[1]; 685 ps.want_blank = true; 686 break; 687 688 case question: /* got a ? */ 689 squest++; /* this will be used when a later 690 * colon appears so we can distinguish 691 * the <c>?<n>:<n> construct */ 692 if (ps.want_blank) 693 *e_code++ = ' '; 694 *e_code++ = '?'; 695 ps.want_blank = true; 696 break; 697 698 case casestmt: /* got word 'case' or 'default' */ 699 scase = true; /* so we can process the later colon 700 * properly */ 701 goto copy_id; 702 703 case colon: /* got a ':' */ 704 if (squest > 0) { /* it is part of the <c>?<n>: 705 * <n> construct */ 706 --squest; 707 if (ps.want_blank) 708 *e_code++ = ' '; 709 *e_code++ = ':'; 710 ps.want_blank = true; 711 break; 712 } 713 if (ps.in_or_st) { 714 *e_code++ = ':'; 715 ps.want_blank = false; 716 break; 717 } 718 ps.in_stmt = false; /* seeing a label does not 719 * imply we are in a stmt */ 720 for (t_ptr = s_code; *t_ptr; ++t_ptr) 721 *e_lab++ = *t_ptr; /* turn everything so 722 * far into a label */ 723 e_code = s_code; 724 *e_lab++ = ':'; 725 *e_lab++ = ' '; 726 *e_lab = '\0'; 727 728 force_nl = ps.pcase = scase; /* ps.pcase will be used 729 * by dump_line to 730 * decide how to indent 731 * the label. force_nl 732 * will force a case n: 733 * to be on a line by 734 * itself */ 735 scase = false; 736 ps.want_blank = false; 737 break; 738 739 case semicolon:/* got a ';' */ 740 ps.in_or_st = false; /* we are not in an 741 * initialization or structure 742 * declaration */ 743 scase = false; /* these will only need resetting in a 744 * error */ 745 squest = 0; 746 if (ps.last_token == rparen && rparen_count == 0) 747 ps.in_parameter_declaration = 0; 748 ps.cast_mask = 0; 749 ps.sizeof_mask = 0; 750 ps.block_init = 0; 751 ps.block_init_level = 0; 752 ps.just_saw_decl--; 753 754 if (ps.in_decl && s_code == e_code && !ps.block_init) 755 while ((e_code - s_code) < (dec_ind - 1)) { 756 CHECK_SIZE_CODE; 757 *e_code++ = ' '; 758 } 759 760 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first 761 * level structure 762 * declaration, we arent 763 * any more */ 764 765 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { 766 767 /* 768 * This should be true iff there were unbalanced parens in the 769 * stmt. It is a bit complicated, because the semicolon might 770 * be in a for stmt 771 */ 772 diag(1, "Unbalanced parens"); 773 ps.p_l_follow = 0; 774 if (sp_sw) { /* this is a check for a if, 775 * while, etc. with unbalanced 776 * parens */ 777 sp_sw = false; 778 parse(hd_type); /* dont lose the if, or 779 * whatever */ 780 } 781 } 782 *e_code++ = ';'; 783 ps.want_blank = true; 784 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in 785 * the middle of a stmt */ 786 787 if (!sp_sw) { /* if not if for (;;) */ 788 parse(semicolon); /* let parser know about 789 * end of stmt */ 790 force_nl = true; /* force newline after a 791 * end of stmt */ 792 } 793 break; 794 795 case lbrace: /* got a '{' */ 796 ps.in_stmt = false; /* dont indent the {} */ 797 if (!ps.block_init) 798 force_nl = true; /* force other stuff on 799 * same line as '{' onto 800 * new line */ 801 else 802 if (ps.block_init_level <= 0) 803 ps.block_init_level = 1; 804 else 805 ps.block_init_level++; 806 807 if (s_code != e_code && !ps.block_init) { 808 if (!btype_2) { 809 dump_line(); 810 ps.want_blank = false; 811 } else 812 if (ps.in_parameter_declaration && !ps.in_or_st) { 813 ps.i_l_follow = 0; 814 dump_line(); 815 ps.want_blank = false; 816 } 817 } 818 if (ps.in_parameter_declaration) 819 prefix_blankline_requested = 0; 820 821 if (ps.p_l_follow > 0) { /* check for preceding 822 * unbalanced parens */ 823 diag(1, "Unbalanced parens"); 824 ps.p_l_follow = 0; 825 if (sp_sw) { /* check for unclosed if, for, 826 * etc. */ 827 sp_sw = false; 828 parse(hd_type); 829 ps.ind_level = ps.i_l_follow; 830 } 831 } 832 if (s_code == e_code) 833 ps.ind_stmt = false; /* dont put extra 834 * indentation on line 835 * with '{' */ 836 if (ps.in_decl && ps.in_or_st) { /* this is either a 837 * structure declaration 838 * or an init */ 839 di_stack[ps.dec_nest++] = dec_ind; 840 /* ? dec_ind = 0; */ 841 } else { 842 ps.decl_on_line = false; /* we cant be in the 843 * middle of a 844 * declaration, so dont 845 * do special 846 * indentation of 847 * comments */ 848 if (blanklines_after_declarations_at_proctop 849 && ps.in_parameter_declaration) 850 postfix_blankline_requested = 1; 851 ps.in_parameter_declaration = 0; 852 } 853 dec_ind = 0; 854 parse(lbrace); /* let parser know about this */ 855 if (ps.want_blank) /* put a blank before '{' if 856 * '{' is not at start of line */ 857 *e_code++ = ' '; 858 ps.want_blank = false; 859 *e_code++ = '{'; 860 ps.just_saw_decl = 0; 861 break; 862 863 case rbrace: /* got a '}' */ 864 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be 865 * omitted in 866 * declarations */ 867 parse(semicolon); 868 if (ps.p_l_follow) { /* check for unclosed if, for, 869 * else. */ 870 diag(1, "Unbalanced parens"); 871 ps.p_l_follow = 0; 872 sp_sw = false; 873 } 874 ps.just_saw_decl = 0; 875 ps.block_init_level--; 876 if (s_code != e_code && !ps.block_init) { /* '}' must be first on 877 * line */ 878 if (verbose) 879 diag(0, "Line broken"); 880 dump_line(); 881 } 882 *e_code++ = '}'; 883 ps.want_blank = true; 884 ps.in_stmt = ps.ind_stmt = false; 885 if (ps.dec_nest > 0) { /* we are in multi-level 886 * structure declaration */ 887 dec_ind = di_stack[--ps.dec_nest]; 888 if (ps.dec_nest == 0 && !ps.in_parameter_declaration) 889 ps.just_saw_decl = 2; 890 ps.in_decl = true; 891 } 892 prefix_blankline_requested = 0; 893 parse(rbrace); /* let parser know about this */ 894 ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead 895 && ps.il[ps.tos] >= ps.ind_level; 896 if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0) 897 postfix_blankline_requested = 1; 898 break; 899 900 case swstmt: /* got keyword "switch" */ 901 sp_sw = true; 902 hd_type = swstmt; /* keep this for when we have 903 * seen the expression */ 904 goto copy_id; /* go move the token into buffer */ 905 906 case sp_paren: /* token is if, while, for */ 907 sp_sw = true; /* the interesting stuff is done after 908 * the expression is scanned */ 909 hd_type = (*token == 'i' ? ifstmt : 910 (*token == 'w' ? whilestmt : forstmt)); 911 912 /* 913 * remember the type of header for later use by parser 914 */ 915 goto copy_id; /* copy the token into line */ 916 917 case sp_nparen:/* got else, do */ 918 ps.in_stmt = false; 919 if (*token == 'e') { 920 if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) { 921 if (verbose) 922 diag(0, "Line broken"); 923 dump_line(); /* make sure this starts 924 * a line */ 925 ps.want_blank = false; 926 } 927 force_nl = true; /* also, following stuff 928 * must go onto new line */ 929 last_else = 1; 930 parse(elselit); 931 } else { 932 if (e_code != s_code) { /* make sure this starts 933 * a line */ 934 if (verbose) 935 diag(0, "Line broken"); 936 dump_line(); 937 ps.want_blank = false; 938 } 939 force_nl = true; /* also, following stuff 940 * must go onto new line */ 941 last_else = 0; 942 parse(dolit); 943 } 944 goto copy_id; /* move the token into line */ 945 946 case decl: /* we have a declaration type (int, register, 947 * etc.) */ 948 parse(decl); /* let parser worry about indentation */ 949 if (ps.last_token == rparen && ps.tos <= 1) { 950 ps.in_parameter_declaration = 1; 951 if (s_code != e_code) { 952 dump_line(); 953 ps.want_blank = 0; 954 } 955 } 956 if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) { 957 ps.ind_level = ps.i_l_follow = 1; 958 ps.ind_stmt = 0; 959 } 960 ps.in_or_st = true; /* this might be a structure 961 * or initialization 962 * declaration */ 963 ps.in_decl = ps.decl_on_line = true; 964 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) 965 ps.just_saw_decl = 2; 966 prefix_blankline_requested = 0; 967 for (i = 0; token[i++];); /* get length of token */ 968 969 /* 970 * dec_ind = e_code - s_code + (ps.decl_indent>i ? ps.decl_indent 971 * : i); 972 */ 973 dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i; 974 goto copy_id; 975 976 case ident: /* got an identifier or constant */ 977 if (ps.in_decl) { /* if we are in a declaration, 978 * we must indent identifier */ 979 if (ps.want_blank) 980 *e_code++ = ' '; 981 ps.want_blank = false; 982 if (is_procname == 0 || !procnames_start_line) { 983 if (!ps.block_init) { 984 if (troff && !ps.dumped_decl_indent) { 985 sprintf(e_code, "\n.De %dp+\200p\n", dec_ind * 7); 986 ps.dumped_decl_indent = 1; 987 e_code += strlen(e_code); 988 } else 989 while ((e_code - s_code) < dec_ind) { 990 CHECK_SIZE_CODE; 991 *e_code++ = ' '; 992 } 993 } 994 } else { 995 if (dec_ind && s_code != e_code) 996 dump_line(); 997 dec_ind = 0; 998 ps.want_blank = false; 999 } 1000 } else 1001 if (sp_sw && ps.p_l_follow == 0) { 1002 sp_sw = false; 1003 force_nl = true; 1004 ps.last_u_d = true; 1005 ps.in_stmt = false; 1006 parse(hd_type); 1007 } 1008 copy_id: 1009 if (ps.want_blank) 1010 *e_code++ = ' '; 1011 if (troff && ps.its_a_keyword) { 1012 e_code = chfont(&bodyf, &keywordf, e_code); 1013 for (t_ptr = token; *t_ptr; ++t_ptr) { 1014 CHECK_SIZE_CODE; 1015 *e_code++ = keywordf.allcaps && islower((unsigned char)*t_ptr) 1016 ? toupper(*t_ptr) : *t_ptr; 1017 } 1018 e_code = chfont(&keywordf, &bodyf, e_code); 1019 } else 1020 for (t_ptr = token; *t_ptr; ++t_ptr) { 1021 CHECK_SIZE_CODE; 1022 *e_code++ = *t_ptr; 1023 } 1024 ps.want_blank = true; 1025 break; 1026 1027 case period: /* treat a period kind of like a binary 1028 * operation */ 1029 *e_code++ = '.'; /* move the period into line */ 1030 ps.want_blank = false; /* dont put a blank after a 1031 * period */ 1032 break; 1033 1034 case comma: 1035 ps.want_blank = (s_code != e_code); /* only put blank after 1036 * comma if comma does 1037 * not start the line */ 1038 if (ps.in_decl && is_procname == 0 && !ps.block_init) 1039 while ((e_code - s_code) < (dec_ind - 1)) { 1040 CHECK_SIZE_CODE; 1041 *e_code++ = ' '; 1042 } 1043 1044 *e_code++ = ','; 1045 if (ps.p_l_follow == 0) { 1046 if (ps.block_init_level <= 0) 1047 ps.block_init = 0; 1048 if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8)) 1049 force_nl = true; 1050 } 1051 break; 1052 1053 case preesc: /* got the character '#' */ 1054 if ((s_com != e_com) || 1055 (s_lab != e_lab) || 1056 (s_code != e_code)) 1057 dump_line(); 1058 *e_lab++ = '#'; /* move whole line to 'label' buffer */ 1059 { 1060 int in_comment = 0; 1061 int com_start = 0; 1062 char quote = 0; 1063 int com_end = 0; 1064 1065 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 1066 buf_ptr++; 1067 if (buf_ptr >= buf_end) 1068 fill_buffer(); 1069 } 1070 while (*buf_ptr != '\n' || in_comment) { 1071 CHECK_SIZE_LAB; 1072 *e_lab = *buf_ptr++; 1073 if (buf_ptr >= buf_end) 1074 fill_buffer(); 1075 switch (*e_lab++) { 1076 case BACKSLASH: 1077 if (troff) 1078 *e_lab++ = BACKSLASH; 1079 if (!in_comment) { 1080 *e_lab++ = *buf_ptr++; 1081 if (buf_ptr >= buf_end) 1082 fill_buffer(); 1083 } 1084 break; 1085 case '/': 1086 if (*buf_ptr == '*' && !in_comment && !quote) { 1087 in_comment = 1; 1088 *e_lab++ = *buf_ptr++; 1089 com_start = e_lab - s_lab - 2; 1090 } 1091 break; 1092 case '"': 1093 if (quote == '"') 1094 quote = 0; 1095 break; 1096 case '\'': 1097 if (quote == '\'') 1098 quote = 0; 1099 break; 1100 case '*': 1101 if (*buf_ptr == '/' && in_comment) { 1102 in_comment = 0; 1103 *e_lab++ = *buf_ptr++; 1104 com_end = e_lab - s_lab; 1105 } 1106 break; 1107 } 1108 } 1109 1110 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1111 e_lab--; 1112 if (e_lab - s_lab == com_end && bp_save == 0) { /* comment on 1113 * preprocessor line */ 1114 if (sc_end == 0) /* if this is the first 1115 * comment, we must set 1116 * up the buffer */ 1117 sc_end = &(save_com[0]); 1118 else { 1119 *sc_end++ = '\n'; /* add newline between 1120 * comments */ 1121 *sc_end++ = ' '; 1122 --line_no; 1123 } 1124 memmove(sc_end, s_lab + com_start, com_end - com_start); 1125 sc_end += com_end - com_start; 1126 if (sc_end >= &save_com[sc_size]) 1127 abort(); 1128 e_lab = s_lab + com_start; 1129 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1130 e_lab--; 1131 bp_save = buf_ptr; /* save current input 1132 * buffer */ 1133 be_save = buf_end; 1134 buf_ptr = save_com; /* fix so that 1135 * subsequent calls to 1136 * lexi will take tokens 1137 * out of save_com */ 1138 *sc_end++ = ' '; /* add trailing blank, 1139 * just in case */ 1140 buf_end = sc_end; 1141 sc_end = 0; 1142 } 1143 *e_lab = '\0'; /* null terminate line */ 1144 ps.pcase = false; 1145 } 1146 1147 if (strncmp(s_lab, "#if", 3) == 0) { 1148 if (blanklines_around_conditional_compilation) { 1149 int c; 1150 prefix_blankline_requested++; 1151 while ((c = getc(input)) == '\n'); 1152 ungetc(c, input); 1153 } 1154 if (ifdef_level < sizeof state_stack / sizeof state_stack[0]) { 1155 match_state[ifdef_level].tos = -1; 1156 state_stack[ifdef_level++] = ps; 1157 } else 1158 diag(1, "#if stack overflow"); 1159 } else 1160 if (strncmp(s_lab, "#else", 5) == 0) { 1161 if (ifdef_level <= 0) 1162 diag(1, "Unmatched #else"); 1163 else { 1164 match_state[ifdef_level - 1] = ps; 1165 ps = state_stack[ifdef_level - 1]; 1166 } 1167 } else 1168 if (strncmp(s_lab, "#endif", 6) == 0) { 1169 if (ifdef_level <= 0) 1170 diag(1, "Unmatched #endif"); 1171 else { 1172 ifdef_level--; 1173 1174 #ifdef undef 1175 /* 1176 * This match needs to be more intelligent before the 1177 * message is useful 1178 */ 1179 if (match_state[ifdef_level].tos >= 0 1180 && memcmp(&ps, &match_state[ifdef_level], sizeof ps)) 1181 diag(0, "Syntactically inconsistant #ifdef alternatives."); 1182 #endif 1183 } 1184 if (blanklines_around_conditional_compilation) { 1185 postfix_blankline_requested++; 1186 n_real_blanklines = 0; 1187 } 1188 } 1189 break; /* subsequent processing of the newline 1190 * character will cause the line to be printed */ 1191 1192 case comment: /* we have gotten a start comment */ 1193 /* this is a biggie */ 1194 if (flushed_nl) { /* we should force a broken 1195 * line here */ 1196 flushed_nl = false; 1197 dump_line(); 1198 ps.want_blank = false; /* dont insert blank at 1199 * line start */ 1200 force_nl = false; 1201 } 1202 pr_comment(); 1203 break; 1204 } /* end of big switch stmt */ 1205 1206 *e_code = '\0'; /* make sure code section is null terminated */ 1207 if (type_code != comment && type_code != newline && type_code != preesc) 1208 ps.last_token = type_code; 1209 } /* end of main while (1) loop */ 1210 } 1211 /* 1212 * copy input file to backup file if in_name is /blah/blah/blah/file, then 1213 * backup file will be ".Bfile" then make the backup file the input and 1214 * original input file the output 1215 */ 1216 void 1217 bakcopy(void) 1218 { 1219 int n, bakchn; 1220 char buff[8 * 1024]; 1221 char *p; 1222 1223 /* construct file name .Bfile */ 1224 for (p = in_name; *p; p++); /* skip to end of string */ 1225 while (p > in_name && *p != '/') /* find last '/' */ 1226 p--; 1227 if (*p == '/') 1228 p++; 1229 sprintf(bakfile, "%s.BAK", p); 1230 1231 /* copy in_name to backup file */ 1232 bakchn = creat(bakfile, 0600); 1233 if (bakchn < 0) 1234 err(1, "%s", bakfile); 1235 while ((n = read(fileno(input), buff, sizeof buff)) > 0) 1236 if (write(bakchn, buff, n) != n) 1237 err(1, "%s", bakfile); 1238 if (n < 0) 1239 err(1, "%s", in_name); 1240 close(bakchn); 1241 fclose(input); 1242 1243 /* re-open backup file as the input file */ 1244 input = fopen(bakfile, "r"); 1245 if (input == 0) 1246 err(1, "%s", bakfile); 1247 /* now the original input file will be the output */ 1248 output = fopen(in_name, "w"); 1249 if (output == 0) { 1250 unlink(bakfile); 1251 err(1, "%s", in_name); 1252 } 1253 } 1254