1 /* $NetBSD: indent.c,v 1.23 2016/09/05 00:40:29 sevan Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 34 * Copyright (c) 1985 Sun Microsystems, Inc. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed by the University of 48 * California, Berkeley and its contributors. 49 * 4. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 */ 65 66 #include <sys/cdefs.h> 67 #ifndef lint 68 __COPYRIGHT("@(#) Copyright (c) 1985 Sun Microsystems, Inc.\ 69 Copyright (c) 1976 Board of Trustees of the University of Illinois.\ 70 Copyright (c) 1980, 1993\ 71 The Regents of the University of California. All rights reserved."); 72 #endif /* not lint */ 73 74 #ifndef lint 75 #if 0 76 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93"; 77 #else 78 __RCSID("$NetBSD: indent.c,v 1.23 2016/09/05 00:40:29 sevan Exp $"); 79 #endif 80 #endif /* not lint */ 81 82 #include <sys/param.h> 83 #include <ctype.h> 84 #include <err.h> 85 #include <errno.h> 86 #include <fcntl.h> 87 #include <stdio.h> 88 #include <stdlib.h> 89 #include <string.h> 90 #include <unistd.h> 91 #include <locale.h> 92 #define EXTERN 93 #include "indent_globs.h" 94 #undef EXTERN 95 #include "indent_codes.h" 96 97 const char *in_name = "Standard Input"; /* will always point to name of 98 * input file */ 99 const char *out_name = "Standard Output"; /* will always point to name of 100 * output file */ 101 char bakfile[MAXPATHLEN] = ""; 102 103 int 104 main(int argc, char **argv) 105 { 106 107 extern int found_err; /* flag set in diag() on error */ 108 int dec_ind; /* current indentation for declarations */ 109 int di_stack[20]; /* a stack of structure indentation levels */ 110 int flushed_nl; /* used when buffering up comments to remember 111 * that a newline was passed over */ 112 int force_nl; /* when true, code must be broken */ 113 int hd_type; /* used to store type of stmt for if (...), 114 * for (...), etc */ 115 int i; /* local loop counter */ 116 int scase; /* set to true when we see a case, so we will 117 * know what to do with the following colon */ 118 int sp_sw; /* when true, we are in the expressin of 119 * if(...), while(...), etc. */ 120 int squest; /* when this is positive, we have seen a ? 121 * without the matching : in a <c>?<s>:<s> 122 * construct */ 123 const char *t_ptr; /* used for copying tokens */ 124 int tabs_to_var = 0; /* true if using tabs to indent to var name */ 125 int type_code; /* the type of token, returned by lexi */ 126 127 int last_else = 0; /* true iff last keyword was an else */ 128 129 130 /*-----------------------------------------------*\ 131 | INITIALIZATION | 132 \*-----------------------------------------------*/ 133 134 if (!setlocale(LC_ALL, "")) 135 warnx("can't set locale."); 136 137 hd_type = 0; 138 ps.p_stack[0] = stmt; /* this is the parser's stack */ 139 ps.last_nl = true; /* this is true if the last thing scanned was 140 * a newline */ 141 ps.last_token = semicolon; 142 combuf = (char *) malloc(bufsize); 143 labbuf = (char *) malloc(bufsize); 144 codebuf = (char *) malloc(bufsize); 145 tokenbuf = (char *) malloc(bufsize); 146 l_com = combuf + bufsize - 5; 147 l_lab = labbuf + bufsize - 5; 148 l_code = codebuf + bufsize - 5; 149 l_token = tokenbuf + bufsize - 5; 150 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, 151 * and comment buffers */ 152 combuf[1] = codebuf[1] = labbuf[1] = '\0'; 153 ps.else_if = 1; /* Default else-if special processing to on */ 154 s_lab = e_lab = labbuf + 1; 155 s_code = e_code = codebuf + 1; 156 s_com = e_com = combuf + 1; 157 s_token = e_token = tokenbuf + 1; 158 159 in_buffer = (char *) malloc(10); 160 in_buffer_limit = in_buffer + 8; 161 buf_ptr = buf_end = in_buffer; 162 line_no = 1; 163 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; 164 sp_sw = force_nl = false; 165 ps.in_or_st = false; 166 ps.bl_line = true; 167 dec_ind = 0; 168 di_stack[ps.dec_nest = 0] = 0; 169 ps.want_blank = ps.in_stmt = ps.ind_stmt = false; 170 171 172 scase = ps.pcase = false; 173 squest = 0; 174 sc_end = 0; 175 bp_save = 0; 176 be_save = 0; 177 178 output = 0; 179 180 181 182 /*--------------------------------------------------*\ 183 | COMMAND LINE SCAN | 184 \*--------------------------------------------------*/ 185 186 #ifdef undef 187 max_col = 78; /* -l78 */ 188 lineup_to_parens = 1; /* -lp */ 189 ps.ljust_decl = 0; /* -ndj */ 190 ps.com_ind = 33; /* -c33 */ 191 star_comment_cont = 1; /* -sc */ 192 ps.ind_size = 8; /* -i8 */ 193 verbose = 0; 194 ps.decl_indent = 16; /* -di16 */ 195 ps.indent_parameters = 1; /* -ip */ 196 ps.decl_com_ind = 0; /* if this is not set to some positive value 197 * by an arg, we will set this equal to 198 * ps.com_ind */ 199 btype_2 = 1; /* -br */ 200 cuddle_else = 1; /* -ce */ 201 ps.unindent_displace = 0; /* -d0 */ 202 ps.case_indent = 0; /* -cli0 */ 203 format_col1_comments = 1; /* -fc1 */ 204 procnames_start_line = 1; /* -psl */ 205 proc_calls_space = 0; /* -npcs */ 206 comment_delimiter_on_blankline = 1; /* -cdb */ 207 ps.leave_comma = 1; /* -nbc */ 208 #endif 209 210 for (i = 1; i < argc; ++i) 211 if (strcmp(argv[i], "-npro") == 0) 212 break; 213 set_defaults(); 214 if (i >= argc) 215 set_profile(); 216 217 for (i = 1; i < argc; ++i) { 218 219 /* 220 * look thru args (if any) for changes to defaults 221 */ 222 if (argv[i][0] != '-') { /* no flag on parameter */ 223 if (input == 0) { /* we must have the input file */ 224 in_name = argv[i]; /* remember name of 225 * input file */ 226 input = fopen(in_name, "r"); 227 if (input == 0) /* check for open error */ 228 err(1, "%s", in_name); 229 continue; 230 } else 231 if (output == 0) { /* we have the output 232 * file */ 233 out_name = argv[i]; /* remember name of 234 * output file */ 235 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite 236 * the file */ 237 errx(1, "input and output files must be different"); 238 } 239 output = fopen(out_name, "w"); 240 if (output == 0) /* check for create 241 * error */ 242 err(1, "%s", out_name); 243 continue; 244 } 245 errx(1, "unknown parameter: %s", argv[i]); 246 } else 247 set_option(argv[i]); 248 } /* end of for */ 249 if (input == 0) { 250 input = stdin; 251 } 252 if (output == 0) { 253 if (troff || input == stdin) 254 output = stdout; 255 else { 256 out_name = in_name; 257 bakcopy(); 258 } 259 } 260 if (ps.com_ind <= 1) 261 ps.com_ind = 2; /* don't put normal comments before column 2 */ 262 if (troff) { 263 if (bodyf.font[0] == 0) 264 parsefont(&bodyf, "R"); 265 if (scomf.font[0] == 0) 266 parsefont(&scomf, "I"); 267 if (blkcomf.font[0] == 0) 268 blkcomf = scomf, blkcomf.size += 2; 269 if (boxcomf.font[0] == 0) 270 boxcomf = blkcomf; 271 if (stringf.font[0] == 0) 272 parsefont(&stringf, "L"); 273 if (keywordf.font[0] == 0) 274 parsefont(&keywordf, "B"); 275 writefdef(&bodyf, 'B'); 276 writefdef(&scomf, 'C'); 277 writefdef(&blkcomf, 'L'); 278 writefdef(&boxcomf, 'X'); 279 writefdef(&stringf, 'S'); 280 writefdef(&keywordf, 'K'); 281 } 282 if (block_comment_max_col <= 0) 283 block_comment_max_col = max_col; 284 if (ps.decl_com_ind <= 0) /* if not specified by user, set this */ 285 ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind; 286 if (continuation_indent == 0) 287 continuation_indent = ps.ind_size; 288 fill_buffer(); /* get first batch of stuff into input buffer */ 289 290 parse(semicolon); 291 { 292 char *p = buf_ptr; 293 int col = 1; 294 295 while (1) { 296 if (*p == ' ') 297 col++; 298 else 299 if (*p == '\t') 300 col = ((col - 1) & ~7) + 9; 301 else 302 break; 303 p++; 304 } 305 if (col > ps.ind_size) 306 ps.ind_level = ps.i_l_follow = col / ps.ind_size; 307 } 308 if (troff) { 309 const char *p = in_name, *beg = in_name; 310 311 while (*p) 312 if (*p++ == '/') 313 beg = p; 314 fprintf(output, ".Fn \"%s\"\n", beg); 315 } 316 /* 317 * START OF MAIN LOOP 318 */ 319 320 while (1) { /* this is the main loop. it will go until we 321 * reach eof */ 322 int is_procname; 323 324 type_code = lexi(); /* lexi reads one token. The actual 325 * characters read are stored in 326 * "token". lexi returns a code 327 * indicating the type of token */ 328 is_procname = ps.procname[0]; 329 330 /* 331 * The following code moves everything following an if (), while (), 332 * else, etc. up to the start of the following stmt to a buffer. This 333 * allows proper handling of both kinds of brace placement. 334 */ 335 336 flushed_nl = false; 337 while (ps.search_brace) { /* if we scanned an if(), 338 * while(), etc., we might 339 * need to copy stuff into a 340 * buffer we must loop, 341 * copying stuff into 342 * save_com, until we find the 343 * start of the stmt which 344 * follows the if, or whatever */ 345 switch (type_code) { 346 case newline: 347 ++line_no; 348 flushed_nl = true; 349 case form_feed: 350 break; /* form feeds and newlines found here 351 * will be ignored */ 352 353 case lbrace: /* this is a brace that starts the 354 * compound stmt */ 355 if (sc_end == 0) { /* ignore buffering if a 356 * comment wasn't stored 357 * up */ 358 ps.search_brace = false; 359 goto check_type; 360 } 361 if (btype_2) { 362 save_com[0] = '{'; /* we either want to put 363 * the brace right after 364 * the if */ 365 goto sw_buffer; /* go to common code to 366 * get out of this loop */ 367 } 368 case comment: /* we have a comment, so we must copy 369 * it into the buffer */ 370 if (!flushed_nl || sc_end != 0) { 371 if (sc_end == 0) { /* if this is the first 372 * comment, we must set 373 * up the buffer */ 374 save_com[0] = save_com[1] = ' '; 375 sc_end = &(save_com[2]); 376 } else { 377 *sc_end++ = '\n'; /* add newline between 378 * comments */ 379 *sc_end++ = ' '; 380 --line_no; 381 } 382 *sc_end++ = '/'; /* copy in start of 383 * comment */ 384 *sc_end++ = '*'; 385 386 for (;;) { /* loop until we get to 387 * the end of the 388 * comment */ 389 *sc_end = *buf_ptr++; 390 if (buf_ptr >= buf_end) 391 fill_buffer(); 392 393 if (*sc_end++ == '*' && *buf_ptr == '/') 394 break; /* we are at end of 395 * comment */ 396 397 if (sc_end >= &(save_com[sc_size])) { /* check for temp buffer 398 * overflow */ 399 diag(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever."); 400 fflush(output); 401 exit(1); 402 } 403 } 404 *sc_end++ = '/'; /* add ending slash */ 405 if (++buf_ptr >= buf_end) /* get past / in buffer */ 406 fill_buffer(); 407 break; 408 } 409 default: /* it is the start of a normal 410 * statment */ 411 if (flushed_nl) /* if we flushed a newline, 412 * make sure it is put back */ 413 force_nl = true; 414 if ((type_code == sp_paren && *token == 'i' 415 && last_else && ps.else_if) || 416 (type_code == sp_nparen && *token == 'e' 417 && e_code != s_code && e_code[-1] == '}')) 418 force_nl = false; 419 420 if (sc_end == 0) { /* ignore buffering if 421 * comment wasn't saved 422 * up */ 423 ps.search_brace = false; 424 goto check_type; 425 } 426 if (force_nl) { /* if we should insert a nl 427 * here, put it into the 428 * buffer */ 429 force_nl = false; 430 --line_no; /* this will be 431 * re-increased when the 432 * nl is read from the 433 * buffer */ 434 *sc_end++ = '\n'; 435 *sc_end++ = ' '; 436 if (verbose && !flushed_nl) /* print error msg if 437 * the line was not 438 * already broken */ 439 diag(0, "Line broken"); 440 flushed_nl = false; 441 } 442 for (t_ptr = token; *t_ptr; ++t_ptr) 443 *sc_end++ = *t_ptr; /* copy token into temp 444 * buffer */ 445 ps.procname[0] = 0; 446 447 sw_buffer: 448 ps.search_brace = false; /* stop looking for 449 * start of stmt */ 450 bp_save = buf_ptr; /* save current input 451 * buffer */ 452 be_save = buf_end; 453 buf_ptr = save_com; /* fix so that 454 * subsequent calls to 455 * lexi will take tokens 456 * out of save_com */ 457 *sc_end++ = ' '; /* add trailing blank, 458 * just in case */ 459 buf_end = sc_end; 460 sc_end = 0; 461 break; 462 } /* end of switch */ 463 if (type_code != 0) /* we must make this check, 464 * just in case there was an 465 * unexpected EOF */ 466 type_code = lexi(); /* read another token */ 467 /* if (ps.search_brace) ps.procname[0] = 0; */ 468 if ((is_procname = ps.procname[0]) && flushed_nl 469 && !procnames_start_line && ps.in_decl 470 && type_code == ident) 471 flushed_nl = 0; 472 } /* end of while (search_brace) */ 473 last_else = 0; 474 check_type: 475 if (type_code == 0) { /* we got eof */ 476 if (s_lab != e_lab || s_code != e_code 477 || s_com != e_com) /* must dump end of line */ 478 dump_line(); 479 if (ps.tos > 1) /* check for balanced braces */ 480 diag(1, "Stuff missing from end of file."); 481 482 if (verbose) { 483 printf("There were %d output lines and %d comments\n", 484 ps.out_lines, ps.out_coms); 485 printf("(Lines with comments)/(Lines with code): %6.3f\n", 486 (1.0 * ps.com_lines) / code_lines); 487 } 488 fflush(output); 489 exit(found_err); 490 } 491 if ( 492 (type_code != comment) && 493 (type_code != newline) && 494 (type_code != preesc) && 495 (type_code != form_feed)) { 496 if (force_nl && 497 (type_code != semicolon) && 498 (type_code != lbrace || !btype_2)) { 499 /* we should force a broken line here */ 500 if (verbose && !flushed_nl) 501 diag(0, "Line broken"); 502 flushed_nl = false; 503 dump_line(); 504 ps.want_blank = false; /* don't insert blank at 505 * line start */ 506 force_nl = false; 507 } 508 ps.in_stmt = true; /* turn on flag which causes 509 * an extra level of 510 * indentation. this is turned 511 * off by a ; or '}' */ 512 if (s_com != e_com) { /* the turkey has embedded a 513 * comment in a line. fix it */ 514 *e_code++ = ' '; 515 for (t_ptr = s_com; *t_ptr; ++t_ptr) { 516 CHECK_SIZE_CODE; 517 *e_code++ = *t_ptr; 518 } 519 *e_code++ = ' '; 520 *e_code = '\0'; /* null terminate code sect */ 521 ps.want_blank = false; 522 e_com = s_com; 523 } 524 } else 525 if (type_code != comment) /* preserve force_nl 526 * thru a comment */ 527 force_nl = false; /* cancel forced newline 528 * after newline, form 529 * feed, etc */ 530 531 532 533 /*-----------------------------------------------------*\ 534 | do switch on type of token scanned | 535 \*-----------------------------------------------------*/ 536 CHECK_SIZE_CODE; 537 switch (type_code) { /* now, decide what to do with the 538 * token */ 539 540 case form_feed:/* found a form feed in line */ 541 ps.use_ff = true; /* a form feed is treated much 542 * like a newline */ 543 dump_line(); 544 ps.want_blank = false; 545 break; 546 547 case newline: 548 if (ps.last_token != comma || ps.p_l_follow > 0 549 || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) { 550 dump_line(); 551 ps.want_blank = false; 552 } 553 ++line_no; /* keep track of input line number */ 554 break; 555 556 case lparen: /* got a '(' or '[' */ 557 ++ps.p_l_follow; /* count parens to make Healy 558 * happy */ 559 if (ps.want_blank && *token != '[' && 560 (ps.last_token != ident || proc_calls_space 561 || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon)))) 562 *e_code++ = ' '; 563 if (ps.in_decl && !ps.block_init) { 564 if (troff && !ps.dumped_decl_indent && !is_procname && ps.last_token == decl) { 565 ps.dumped_decl_indent = 1; 566 sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); 567 e_code += strlen(e_code); 568 } else { 569 while ((e_code - s_code) < dec_ind) { 570 CHECK_SIZE_CODE; 571 *e_code++ = ' '; 572 } 573 *e_code++ = token[0]; 574 } 575 } else 576 *e_code++ = token[0]; 577 ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code; 578 if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent 579 && ps.paren_indents[0] < 2 * ps.ind_size) 580 ps.paren_indents[0] = 2 * ps.ind_size; 581 ps.want_blank = false; 582 if (ps.in_or_st && *token == '(' && ps.tos <= 2) { 583 /* 584 * this is a kluge to make sure that declarations will be 585 * aligned right if proc decl has an explicit type on it, i.e. 586 * "int a(x) {..." 587 */ 588 parse(semicolon); /* I said this was a 589 * kluge... */ 590 ps.in_or_st = false; /* turn off flag for 591 * structure decl or 592 * initialization */ 593 } 594 if (ps.sizeof_keyword) 595 ps.sizeof_mask |= 1 << ps.p_l_follow; 596 break; 597 598 case rparen: /* got a ')' or ']' */ 599 rparen_count--; 600 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) { 601 ps.last_u_d = true; 602 ps.cast_mask &= (1 << ps.p_l_follow) - 1; 603 } 604 ps.sizeof_mask &= (1 << ps.p_l_follow) - 1; 605 if (--ps.p_l_follow < 0) { 606 ps.p_l_follow = 0; 607 diag(0, "Extra %c", *token); 608 } 609 if (e_code == s_code) /* if the paren starts the 610 * line */ 611 ps.paren_level = ps.p_l_follow; /* then indent it */ 612 613 *e_code++ = token[0]; 614 ps.want_blank = true; 615 616 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if 617 * (...), or some such */ 618 sp_sw = false; 619 force_nl = true; /* must force newline 620 * after if */ 621 ps.last_u_d = true; /* inform lexi that a 622 * following operator is 623 * unary */ 624 ps.in_stmt = false; /* don't use stmt 625 * continuation 626 * indentation */ 627 628 parse(hd_type); /* let parser worry about if, 629 * or whatever */ 630 } 631 ps.search_brace = btype_2; /* this should insure 632 * that constructs such 633 * as main(){...} and 634 * int[]{...} have their 635 * braces put in the 636 * right place */ 637 break; 638 639 case unary_op: /* this could be any unary operation */ 640 if (ps.want_blank) 641 *e_code++ = ' '; 642 643 if (troff && !ps.dumped_decl_indent && ps.in_decl && !is_procname) { 644 sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); 645 ps.dumped_decl_indent = 1; 646 e_code += strlen(e_code); 647 } else { 648 const char *res = token; 649 650 if (ps.in_decl && !ps.block_init) { /* if this is a unary op 651 * in a declaration, we 652 * should indent this 653 * token */ 654 for (i = 0; token[i]; ++i); /* find length of token */ 655 while ((e_code - s_code) < (dec_ind - i)) { 656 CHECK_SIZE_CODE; 657 *e_code++ = ' '; /* pad it */ 658 } 659 } 660 if (troff && token[0] == '-' && token[1] == '>') 661 res = "\\(->"; 662 for (t_ptr = res; *t_ptr; ++t_ptr) { 663 CHECK_SIZE_CODE; 664 *e_code++ = *t_ptr; 665 } 666 } 667 ps.want_blank = false; 668 break; 669 670 case binary_op:/* any binary operation */ 671 if (ps.want_blank) 672 *e_code++ = ' '; 673 { 674 const char *res = token; 675 676 if (troff) 677 switch (token[0]) { 678 case '<': 679 if (token[1] == '=') 680 res = "\\(<="; 681 break; 682 case '>': 683 if (token[1] == '=') 684 res = "\\(>="; 685 break; 686 case '!': 687 if (token[1] == '=') 688 res = "\\(!="; 689 break; 690 case '|': 691 if (token[1] == '|') 692 res = "\\(br\\(br"; 693 else 694 if (token[1] == 0) 695 res = "\\(br"; 696 break; 697 } 698 for (t_ptr = res; *t_ptr; ++t_ptr) { 699 CHECK_SIZE_CODE; 700 *e_code++ = *t_ptr; /* move the operator */ 701 } 702 } 703 ps.want_blank = true; 704 break; 705 706 case postop: /* got a trailing ++ or -- */ 707 *e_code++ = token[0]; 708 *e_code++ = token[1]; 709 ps.want_blank = true; 710 break; 711 712 case question: /* got a ? */ 713 squest++; /* this will be used when a later 714 * colon appears so we can distinguish 715 * the <c>?<n>:<n> construct */ 716 if (ps.want_blank) 717 *e_code++ = ' '; 718 *e_code++ = '?'; 719 ps.want_blank = true; 720 break; 721 722 case casestmt: /* got word 'case' or 'default' */ 723 scase = true; /* so we can process the later colon 724 * properly */ 725 goto copy_id; 726 727 case colon: /* got a ':' */ 728 if (squest > 0) { /* it is part of the <c>?<n>: 729 * <n> construct */ 730 --squest; 731 if (ps.want_blank) 732 *e_code++ = ' '; 733 *e_code++ = ':'; 734 ps.want_blank = true; 735 break; 736 } 737 if (ps.in_or_st) { 738 *e_code++ = ':'; 739 ps.want_blank = false; 740 break; 741 } 742 ps.in_stmt = false; /* seeing a label does not 743 * imply we are in a stmt */ 744 for (t_ptr = s_code; *t_ptr; ++t_ptr) 745 *e_lab++ = *t_ptr; /* turn everything so 746 * far into a label */ 747 e_code = s_code; 748 *e_lab++ = ':'; 749 *e_lab++ = ' '; 750 *e_lab = '\0'; 751 752 force_nl = ps.pcase = scase; /* ps.pcase will be used 753 * by dump_line to 754 * decide how to indent 755 * the label. force_nl 756 * will force a case n: 757 * to be on a line by 758 * itself */ 759 scase = false; 760 ps.want_blank = false; 761 break; 762 763 case semicolon:/* got a ';' */ 764 ps.in_or_st = false; /* we are not in an 765 * initialization or structure 766 * declaration */ 767 scase = false; /* these will only need resetting in a 768 * error */ 769 squest = 0; 770 if (ps.last_token == rparen && rparen_count == 0) 771 ps.in_parameter_declaration = 0; 772 ps.cast_mask = 0; 773 ps.sizeof_mask = 0; 774 ps.block_init = 0; 775 ps.block_init_level = 0; 776 ps.just_saw_decl--; 777 778 if (ps.in_decl && s_code == e_code && !ps.block_init) 779 while ((e_code - s_code) < (dec_ind - 1)) { 780 CHECK_SIZE_CODE; 781 *e_code++ = ' '; 782 } 783 784 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first 785 * level structure 786 * declaration, we 787 * aren't any more */ 788 789 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { 790 791 /* 792 * This should be true iff there were unbalanced parens in the 793 * stmt. It is a bit complicated, because the semicolon might 794 * be in a for stmt 795 */ 796 diag(1, "Unbalanced parens"); 797 ps.p_l_follow = 0; 798 if (sp_sw) { /* this is a check for a if, 799 * while, etc. with unbalanced 800 * parens */ 801 sp_sw = false; 802 parse(hd_type); /* don't lose the if, 803 * or whatever */ 804 } 805 } 806 *e_code++ = ';'; 807 ps.want_blank = true; 808 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in 809 * the middle of a stmt */ 810 811 if (!sp_sw) { /* if not if for (;;) */ 812 parse(semicolon); /* let parser know about 813 * end of stmt */ 814 force_nl = true; /* force newline after a 815 * end of stmt */ 816 } 817 break; 818 819 case lbrace: /* got a '{' */ 820 ps.in_stmt = false; /* don't indent the {} */ 821 if (!ps.block_init) 822 force_nl = true; /* force other stuff on 823 * same line as '{' onto 824 * new line */ 825 else 826 if (ps.block_init_level <= 0) 827 ps.block_init_level = 1; 828 else 829 ps.block_init_level++; 830 831 if (s_code != e_code && !ps.block_init) { 832 if (!btype_2) { 833 dump_line(); 834 ps.want_blank = false; 835 } else 836 if (ps.in_parameter_declaration && !ps.in_or_st) { 837 ps.i_l_follow = 0; 838 dump_line(); 839 ps.want_blank = false; 840 } 841 } 842 if (ps.in_parameter_declaration) 843 prefix_blankline_requested = 0; 844 845 if (ps.p_l_follow > 0) { /* check for preceding 846 * unbalanced parens */ 847 diag(1, "Unbalanced parens"); 848 ps.p_l_follow = 0; 849 if (sp_sw) { /* check for unclosed if, for, 850 * etc. */ 851 sp_sw = false; 852 parse(hd_type); 853 ps.ind_level = ps.i_l_follow; 854 } 855 } 856 if (s_code == e_code) 857 ps.ind_stmt = false; /* don't put extra 858 * indentation on line 859 * with '{' */ 860 if (ps.in_decl && ps.in_or_st) { /* this is either a 861 * structure declaration 862 * or an init */ 863 di_stack[ps.dec_nest++] = dec_ind; 864 /* ? dec_ind = 0; */ 865 } else { 866 ps.decl_on_line = false; /* we can't be in the 867 * middle of a 868 * declaration, so don't 869 * do special 870 * indentation of 871 * comments */ 872 if (blanklines_after_declarations_at_proctop 873 && ps.in_parameter_declaration) 874 postfix_blankline_requested = 1; 875 ps.in_parameter_declaration = 0; 876 } 877 dec_ind = 0; 878 parse(lbrace); /* let parser know about this */ 879 if (ps.want_blank) /* put a blank before '{' if 880 * '{' is not at start of line */ 881 *e_code++ = ' '; 882 ps.want_blank = false; 883 *e_code++ = '{'; 884 ps.just_saw_decl = 0; 885 break; 886 887 case rbrace: /* got a '}' */ 888 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be 889 * omitted in 890 * declarations */ 891 parse(semicolon); 892 if (ps.p_l_follow) { /* check for unclosed if, for, 893 * else. */ 894 diag(1, "Unbalanced parens"); 895 ps.p_l_follow = 0; 896 sp_sw = false; 897 } 898 ps.just_saw_decl = 0; 899 ps.block_init_level--; 900 if (s_code != e_code && !ps.block_init) { /* '}' must be first on 901 * line */ 902 if (verbose) 903 diag(0, "Line broken"); 904 dump_line(); 905 } 906 *e_code++ = '}'; 907 ps.want_blank = true; 908 ps.in_stmt = ps.ind_stmt = false; 909 if (ps.dec_nest > 0) { /* we are in multi-level 910 * structure declaration */ 911 dec_ind = di_stack[--ps.dec_nest]; 912 if (ps.dec_nest == 0 && !ps.in_parameter_declaration) 913 ps.just_saw_decl = 2; 914 ps.in_decl = true; 915 } 916 prefix_blankline_requested = 0; 917 parse(rbrace); /* let parser know about this */ 918 ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead 919 && ps.il[ps.tos] >= ps.ind_level; 920 if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0) 921 postfix_blankline_requested = 1; 922 break; 923 924 case swstmt: /* got keyword "switch" */ 925 sp_sw = true; 926 hd_type = swstmt; /* keep this for when we have 927 * seen the expression */ 928 goto copy_id; /* go move the token into buffer */ 929 930 case sp_paren: /* token is if, while, for */ 931 sp_sw = true; /* the interesting stuff is done after 932 * the expression is scanned */ 933 hd_type = (*token == 'i' ? ifstmt : 934 (*token == 'w' ? whilestmt : forstmt)); 935 936 /* 937 * remember the type of header for later use by parser 938 */ 939 goto copy_id; /* copy the token into line */ 940 941 case sp_nparen:/* got else, do */ 942 ps.in_stmt = false; 943 if (*token == 'e') { 944 if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) { 945 if (verbose) 946 diag(0, "Line broken"); 947 dump_line(); /* make sure this starts 948 * a line */ 949 ps.want_blank = false; 950 } 951 force_nl = true; /* also, following stuff 952 * must go onto new line */ 953 last_else = 1; 954 parse(elselit); 955 } else { 956 if (e_code != s_code) { /* make sure this starts 957 * a line */ 958 if (verbose) 959 diag(0, "Line broken"); 960 dump_line(); 961 ps.want_blank = false; 962 } 963 force_nl = true; /* also, following stuff 964 * must go onto new line */ 965 last_else = 0; 966 parse(dolit); 967 } 968 goto copy_id; /* move the token into line */ 969 970 case decl: /* we have a declaration type (int, register, 971 * etc.) */ 972 parse(decl); /* let parser worry about indentation */ 973 if (ps.last_token == rparen && ps.tos <= 1) { 974 ps.in_parameter_declaration = 1; 975 if (s_code != e_code) { 976 dump_line(); 977 ps.want_blank = 0; 978 } 979 } 980 if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) { 981 ps.ind_level = ps.i_l_follow = 1; 982 ps.ind_stmt = 0; 983 } 984 ps.in_or_st = true; /* this might be a structure 985 * or initialization 986 * declaration */ 987 ps.in_decl = ps.decl_on_line = true; 988 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) 989 ps.just_saw_decl = 2; 990 prefix_blankline_requested = 0; 991 for (i = 0; token[i++];); /* get length of token */ 992 993 /* 994 * dec_ind = e_code - s_code + (ps.decl_indent>i ? ps.decl_indent 995 * : i); 996 */ 997 dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i; 998 tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0); 999 goto copy_id; 1000 1001 case ident: /* got an identifier or constant */ 1002 if (ps.in_decl) { /* if we are in a declaration, 1003 * we must indent identifier */ 1004 if (ps.want_blank) 1005 *e_code++ = ' '; 1006 ps.want_blank = false; 1007 if (is_procname == 0 || !procnames_start_line) { 1008 if (!ps.block_init) { 1009 if (troff && !ps.dumped_decl_indent) { 1010 sprintf(e_code, "\n.De %dp+\200p\n", dec_ind * 7); 1011 ps.dumped_decl_indent = 1; 1012 e_code += strlen(e_code); 1013 CHECK_SIZE_CODE; 1014 } else { 1015 int cur_dec_ind; 1016 int pos, startpos; 1017 1018 /* 1019 * in order to get the tab math right for 1020 * indentations that are not multiples of 8 we 1021 * need to modify both startpos and dec_ind 1022 * (cur_dec_ind) here by eight minus the 1023 * remainder of the current starting column 1024 * divided by eight. This seems to be a 1025 * properly working fix 1026 */ 1027 startpos = e_code - s_code; 1028 cur_dec_ind = dec_ind; 1029 pos = startpos; 1030 if ((ps.ind_level * ps.ind_size) % 8 != 0) { 1031 pos += (ps.ind_level * ps.ind_size) % 8; 1032 cur_dec_ind += (ps.ind_level * ps.ind_size) % 8; 1033 } 1034 1035 if (tabs_to_var) { 1036 while ((pos & ~7) + 8 <= cur_dec_ind) { 1037 CHECK_SIZE_CODE; 1038 *e_code++ = '\t'; 1039 pos = (pos & ~7) + 8; 1040 } 1041 } 1042 while (pos < cur_dec_ind) { 1043 CHECK_SIZE_CODE; 1044 *e_code++ = ' '; 1045 pos++; 1046 } 1047 if (ps.want_blank && e_code - s_code == startpos) 1048 *e_code++ = ' '; 1049 ps.want_blank = false; 1050 } 1051 } 1052 } else { 1053 if (dec_ind && s_code != e_code) 1054 dump_line(); 1055 dec_ind = 0; 1056 ps.want_blank = false; 1057 } 1058 } else 1059 if (sp_sw && ps.p_l_follow == 0) { 1060 sp_sw = false; 1061 force_nl = true; 1062 ps.last_u_d = true; 1063 ps.in_stmt = false; 1064 parse(hd_type); 1065 } 1066 copy_id: 1067 if (ps.want_blank) 1068 *e_code++ = ' '; 1069 if (troff && ps.its_a_keyword) { 1070 e_code = chfont(&bodyf, &keywordf, e_code); 1071 for (t_ptr = token; *t_ptr; ++t_ptr) { 1072 CHECK_SIZE_CODE; 1073 *e_code++ = keywordf.allcaps 1074 ? toupper((unsigned char)*t_ptr) 1075 : *t_ptr; 1076 } 1077 e_code = chfont(&keywordf, &bodyf, e_code); 1078 } else 1079 for (t_ptr = token; *t_ptr; ++t_ptr) { 1080 CHECK_SIZE_CODE; 1081 *e_code++ = *t_ptr; 1082 } 1083 ps.want_blank = true; 1084 break; 1085 1086 case period: /* treat a period kind of like a binary 1087 * operation */ 1088 *e_code++ = '.'; /* move the period into line */ 1089 ps.want_blank = false; /* don't put a blank after a 1090 * period */ 1091 break; 1092 1093 case comma: 1094 ps.want_blank = (s_code != e_code); /* only put blank after 1095 * comma if comma does 1096 * not start the line */ 1097 if (ps.in_decl && is_procname == 0 && !ps.block_init) 1098 while ((e_code - s_code) < (dec_ind - 1)) { 1099 CHECK_SIZE_CODE; 1100 *e_code++ = ' '; 1101 } 1102 1103 *e_code++ = ','; 1104 if (ps.p_l_follow == 0) { 1105 if (ps.block_init_level <= 0) 1106 ps.block_init = 0; 1107 if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8)) 1108 force_nl = true; 1109 } 1110 break; 1111 1112 case preesc: /* got the character '#' */ 1113 if ((s_com != e_com) || 1114 (s_lab != e_lab) || 1115 (s_code != e_code)) 1116 dump_line(); 1117 *e_lab++ = '#'; /* move whole line to 'label' buffer */ 1118 { 1119 int in_comment = 0; 1120 int com_start = 0; 1121 char quote = 0; 1122 int com_end = 0; 1123 1124 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 1125 buf_ptr++; 1126 if (buf_ptr >= buf_end) 1127 fill_buffer(); 1128 } 1129 while (*buf_ptr != '\n' || in_comment) { 1130 CHECK_SIZE_LAB; 1131 *e_lab = *buf_ptr++; 1132 if (buf_ptr >= buf_end) 1133 fill_buffer(); 1134 switch (*e_lab++) { 1135 case BACKSLASH: 1136 if (troff) 1137 *e_lab++ = BACKSLASH; 1138 if (!in_comment) { 1139 *e_lab++ = *buf_ptr++; 1140 if (buf_ptr >= buf_end) 1141 fill_buffer(); 1142 } 1143 break; 1144 case '/': 1145 if (*buf_ptr == '*' && !in_comment && !quote) { 1146 in_comment = 1; 1147 *e_lab++ = *buf_ptr++; 1148 com_start = e_lab - s_lab - 2; 1149 } 1150 break; 1151 case '"': 1152 if (quote == '"') 1153 quote = 0; 1154 break; 1155 case '\'': 1156 if (quote == '\'') 1157 quote = 0; 1158 break; 1159 case '*': 1160 if (*buf_ptr == '/' && in_comment) { 1161 in_comment = 0; 1162 *e_lab++ = *buf_ptr++; 1163 com_end = e_lab - s_lab; 1164 } 1165 break; 1166 } 1167 } 1168 1169 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1170 e_lab--; 1171 if (e_lab - s_lab == com_end && bp_save == 0) { /* comment on 1172 * preprocessor line */ 1173 if (sc_end == 0) /* if this is the first 1174 * comment, we must set 1175 * up the buffer */ 1176 sc_end = &(save_com[0]); 1177 else { 1178 *sc_end++ = '\n'; /* add newline between 1179 * comments */ 1180 *sc_end++ = ' '; 1181 --line_no; 1182 } 1183 memmove(sc_end, s_lab + com_start, com_end - com_start); 1184 sc_end += com_end - com_start; 1185 if (sc_end >= &save_com[sc_size]) 1186 abort(); 1187 e_lab = s_lab + com_start; 1188 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1189 e_lab--; 1190 bp_save = buf_ptr; /* save current input 1191 * buffer */ 1192 be_save = buf_end; 1193 buf_ptr = save_com; /* fix so that 1194 * subsequent calls to 1195 * lexi will take tokens 1196 * out of save_com */ 1197 *sc_end++ = ' '; /* add trailing blank, 1198 * just in case */ 1199 buf_end = sc_end; 1200 sc_end = 0; 1201 } 1202 *e_lab = '\0'; /* null terminate line */ 1203 ps.pcase = false; 1204 } 1205 1206 if (strncmp(s_lab, "#if", 3) == 0) { 1207 if (blanklines_around_conditional_compilation) { 1208 int c; 1209 prefix_blankline_requested++; 1210 while ((c = getc(input)) == '\n'); 1211 ungetc(c, input); 1212 } 1213 if (ifdef_level < (int)(sizeof state_stack / sizeof state_stack[0])) { 1214 match_state[ifdef_level].tos = -1; 1215 state_stack[ifdef_level++] = ps; 1216 } else 1217 diag(1, "#if stack overflow"); 1218 } else 1219 if (strncmp(s_lab, "#else", 5) == 0) { 1220 if (ifdef_level <= 0) 1221 diag(1, "Unmatched #else"); 1222 else { 1223 match_state[ifdef_level - 1] = ps; 1224 ps = state_stack[ifdef_level - 1]; 1225 } 1226 } else 1227 if (strncmp(s_lab, "#endif", 6) == 0) { 1228 if (ifdef_level <= 0) 1229 diag(1, "Unmatched #endif"); 1230 else { 1231 ifdef_level--; 1232 1233 #ifdef undef 1234 /* 1235 * This match needs to be more intelligent before the 1236 * message is useful 1237 */ 1238 if (match_state[ifdef_level].tos >= 0 1239 && memcmp(&ps, &match_state[ifdef_level], sizeof ps)) 1240 diag(0, "Syntactically inconsistant #ifdef alternatives."); 1241 #endif 1242 } 1243 if (blanklines_around_conditional_compilation) { 1244 postfix_blankline_requested++; 1245 n_real_blanklines = 0; 1246 } 1247 } 1248 break; /* subsequent processing of the newline 1249 * character will cause the line to be printed */ 1250 1251 case comment: /* we have gotten a start comment */ 1252 /* this is a biggie */ 1253 if (flushed_nl) { /* we should force a broken 1254 * line here */ 1255 flushed_nl = false; 1256 dump_line(); 1257 ps.want_blank = false; /* don't insert blank at 1258 * line start */ 1259 force_nl = false; 1260 } 1261 pr_comment(); 1262 break; 1263 } /* end of big switch stmt */ 1264 1265 *e_code = '\0'; /* make sure code section is null terminated */ 1266 if (type_code != comment && type_code != newline && type_code != preesc) 1267 ps.last_token = type_code; 1268 } /* end of main while (1) loop */ 1269 } 1270 /* 1271 * copy input file to backup file if in_name is /blah/blah/blah/file, then 1272 * backup file will be ".Bfile" then make the backup file the input and 1273 * original input file the output 1274 */ 1275 void 1276 bakcopy(void) 1277 { 1278 int n, bakchn; 1279 char buff[8 * 1024]; 1280 const char *p; 1281 1282 /* construct file name .Bfile */ 1283 for (p = in_name; *p; p++); /* skip to end of string */ 1284 while (p > in_name && *p != '/') /* find last '/' */ 1285 p--; 1286 if (*p == '/') 1287 p++; 1288 sprintf(bakfile, "%s.BAK", p); 1289 1290 /* copy in_name to backup file */ 1291 bakchn = creat(bakfile, 0600); 1292 if (bakchn < 0) 1293 err(1, "%s", bakfile); 1294 while ((n = read(fileno(input), buff, sizeof buff)) > 0) 1295 if (write(bakchn, buff, n) != n) 1296 err(1, "%s", bakfile); 1297 if (n < 0) 1298 err(1, "%s", in_name); 1299 close(bakchn); 1300 fclose(input); 1301 1302 /* re-open backup file as the input file */ 1303 input = fopen(bakfile, "r"); 1304 if (input == 0) 1305 err(1, "%s", bakfile); 1306 /* now the original input file will be the output */ 1307 output = fopen(in_name, "w"); 1308 if (output == 0) { 1309 unlink(bakfile); 1310 err(1, "%s", in_name); 1311 } 1312 } 1313