1 /* $NetBSD: indent.c,v 1.22 2016/02/25 13:23:27 ginsbach Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 34 * Copyright (c) 1985 Sun Microsystems, Inc. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed by the University of 48 * California, Berkeley and its contributors. 49 * 4. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 */ 65 66 #include <sys/cdefs.h> 67 #ifndef lint 68 __COPYRIGHT("@(#) Copyright (c) 1985 Sun Microsystems, Inc.\ 69 Copyright (c) 1976 Board of Trustees of the University of Illinois.\ 70 Copyright (c) 1980, 1993\ 71 The Regents of the University of California. All rights reserved."); 72 #endif /* not lint */ 73 74 #ifndef lint 75 #if 0 76 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93"; 77 #else 78 __RCSID("$NetBSD: indent.c,v 1.22 2016/02/25 13:23:27 ginsbach Exp $"); 79 #endif 80 #endif /* not lint */ 81 82 #include <sys/param.h> 83 #include <ctype.h> 84 #include <err.h> 85 #include <errno.h> 86 #include <fcntl.h> 87 #include <stdio.h> 88 #include <stdlib.h> 89 #include <string.h> 90 #include <unistd.h> 91 #include <locale.h> 92 #define EXTERN 93 #include "indent_globs.h" 94 #undef EXTERN 95 #include "indent_codes.h" 96 97 const char *in_name = "Standard Input"; /* will always point to name of 98 * input file */ 99 const char *out_name = "Standard Output"; /* will always point to name of 100 * output file */ 101 char bakfile[MAXPATHLEN] = ""; 102 103 int main(int, char **); 104 105 int 106 main(int argc, char **argv) 107 { 108 109 extern int found_err; /* flag set in diag() on error */ 110 int dec_ind; /* current indentation for declarations */ 111 int di_stack[20]; /* a stack of structure indentation levels */ 112 int flushed_nl; /* used when buffering up comments to remember 113 * that a newline was passed over */ 114 int force_nl; /* when true, code must be broken */ 115 int hd_type; /* used to store type of stmt for if (...), 116 * for (...), etc */ 117 int i; /* local loop counter */ 118 int scase; /* set to true when we see a case, so we will 119 * know what to do with the following colon */ 120 int sp_sw; /* when true, we are in the expressin of 121 * if(...), while(...), etc. */ 122 int squest; /* when this is positive, we have seen a ? 123 * without the matching : in a <c>?<s>:<s> 124 * construct */ 125 const char *t_ptr; /* used for copying tokens */ 126 int tabs_to_var = 0; /* true if using tabs to indent to var name */ 127 int type_code; /* the type of token, returned by lexi */ 128 129 int last_else = 0; /* true iff last keyword was an else */ 130 131 132 /*-----------------------------------------------*\ 133 | INITIALIZATION | 134 \*-----------------------------------------------*/ 135 136 if (!setlocale(LC_ALL, "")) 137 warnx("can't set locale."); 138 139 hd_type = 0; 140 ps.p_stack[0] = stmt; /* this is the parser's stack */ 141 ps.last_nl = true; /* this is true if the last thing scanned was 142 * a newline */ 143 ps.last_token = semicolon; 144 combuf = (char *) malloc(bufsize); 145 labbuf = (char *) malloc(bufsize); 146 codebuf = (char *) malloc(bufsize); 147 tokenbuf = (char *) malloc(bufsize); 148 l_com = combuf + bufsize - 5; 149 l_lab = labbuf + bufsize - 5; 150 l_code = codebuf + bufsize - 5; 151 l_token = tokenbuf + bufsize - 5; 152 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, 153 * and comment buffers */ 154 combuf[1] = codebuf[1] = labbuf[1] = '\0'; 155 ps.else_if = 1; /* Default else-if special processing to on */ 156 s_lab = e_lab = labbuf + 1; 157 s_code = e_code = codebuf + 1; 158 s_com = e_com = combuf + 1; 159 s_token = e_token = tokenbuf + 1; 160 161 in_buffer = (char *) malloc(10); 162 in_buffer_limit = in_buffer + 8; 163 buf_ptr = buf_end = in_buffer; 164 line_no = 1; 165 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; 166 sp_sw = force_nl = false; 167 ps.in_or_st = false; 168 ps.bl_line = true; 169 dec_ind = 0; 170 di_stack[ps.dec_nest = 0] = 0; 171 ps.want_blank = ps.in_stmt = ps.ind_stmt = false; 172 173 174 scase = ps.pcase = false; 175 squest = 0; 176 sc_end = 0; 177 bp_save = 0; 178 be_save = 0; 179 180 output = 0; 181 182 183 184 /*--------------------------------------------------*\ 185 | COMMAND LINE SCAN | 186 \*--------------------------------------------------*/ 187 188 #ifdef undef 189 max_col = 78; /* -l78 */ 190 lineup_to_parens = 1; /* -lp */ 191 ps.ljust_decl = 0; /* -ndj */ 192 ps.com_ind = 33; /* -c33 */ 193 star_comment_cont = 1; /* -sc */ 194 ps.ind_size = 8; /* -i8 */ 195 verbose = 0; 196 ps.decl_indent = 16; /* -di16 */ 197 ps.indent_parameters = 1; /* -ip */ 198 ps.decl_com_ind = 0; /* if this is not set to some positive value 199 * by an arg, we will set this equal to 200 * ps.com_ind */ 201 btype_2 = 1; /* -br */ 202 cuddle_else = 1; /* -ce */ 203 ps.unindent_displace = 0; /* -d0 */ 204 ps.case_indent = 0; /* -cli0 */ 205 format_col1_comments = 1; /* -fc1 */ 206 procnames_start_line = 1; /* -psl */ 207 proc_calls_space = 0; /* -npcs */ 208 comment_delimiter_on_blankline = 1; /* -cdb */ 209 ps.leave_comma = 1; /* -nbc */ 210 #endif 211 212 for (i = 1; i < argc; ++i) 213 if (strcmp(argv[i], "-npro") == 0) 214 break; 215 set_defaults(); 216 if (i >= argc) 217 set_profile(); 218 219 for (i = 1; i < argc; ++i) { 220 221 /* 222 * look thru args (if any) for changes to defaults 223 */ 224 if (argv[i][0] != '-') { /* no flag on parameter */ 225 if (input == 0) { /* we must have the input file */ 226 in_name = argv[i]; /* remember name of 227 * input file */ 228 input = fopen(in_name, "r"); 229 if (input == 0) /* check for open error */ 230 err(1, "%s", in_name); 231 continue; 232 } else 233 if (output == 0) { /* we have the output 234 * file */ 235 out_name = argv[i]; /* remember name of 236 * output file */ 237 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite 238 * the file */ 239 errx(1, "input and output files must be different"); 240 } 241 output = fopen(out_name, "w"); 242 if (output == 0) /* check for create 243 * error */ 244 err(1, "%s", out_name); 245 continue; 246 } 247 errx(1, "unknown parameter: %s", argv[i]); 248 } else 249 set_option(argv[i]); 250 } /* end of for */ 251 if (input == 0) { 252 input = stdin; 253 } 254 if (output == 0) { 255 if (troff || input == stdin) 256 output = stdout; 257 else { 258 out_name = in_name; 259 bakcopy(); 260 } 261 } 262 if (ps.com_ind <= 1) 263 ps.com_ind = 2; /* don't put normal comments before column 2 */ 264 if (troff) { 265 if (bodyf.font[0] == 0) 266 parsefont(&bodyf, "R"); 267 if (scomf.font[0] == 0) 268 parsefont(&scomf, "I"); 269 if (blkcomf.font[0] == 0) 270 blkcomf = scomf, blkcomf.size += 2; 271 if (boxcomf.font[0] == 0) 272 boxcomf = blkcomf; 273 if (stringf.font[0] == 0) 274 parsefont(&stringf, "L"); 275 if (keywordf.font[0] == 0) 276 parsefont(&keywordf, "B"); 277 writefdef(&bodyf, 'B'); 278 writefdef(&scomf, 'C'); 279 writefdef(&blkcomf, 'L'); 280 writefdef(&boxcomf, 'X'); 281 writefdef(&stringf, 'S'); 282 writefdef(&keywordf, 'K'); 283 } 284 if (block_comment_max_col <= 0) 285 block_comment_max_col = max_col; 286 if (ps.decl_com_ind <= 0) /* if not specified by user, set this */ 287 ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind; 288 if (continuation_indent == 0) 289 continuation_indent = ps.ind_size; 290 fill_buffer(); /* get first batch of stuff into input buffer */ 291 292 parse(semicolon); 293 { 294 char *p = buf_ptr; 295 int col = 1; 296 297 while (1) { 298 if (*p == ' ') 299 col++; 300 else 301 if (*p == '\t') 302 col = ((col - 1) & ~7) + 9; 303 else 304 break; 305 p++; 306 } 307 if (col > ps.ind_size) 308 ps.ind_level = ps.i_l_follow = col / ps.ind_size; 309 } 310 if (troff) { 311 const char *p = in_name, *beg = in_name; 312 313 while (*p) 314 if (*p++ == '/') 315 beg = p; 316 fprintf(output, ".Fn \"%s\"\n", beg); 317 } 318 /* 319 * START OF MAIN LOOP 320 */ 321 322 while (1) { /* this is the main loop. it will go until we 323 * reach eof */ 324 int is_procname; 325 326 type_code = lexi(); /* lexi reads one token. The actual 327 * characters read are stored in 328 * "token". lexi returns a code 329 * indicating the type of token */ 330 is_procname = ps.procname[0]; 331 332 /* 333 * The following code moves everything following an if (), while (), 334 * else, etc. up to the start of the following stmt to a buffer. This 335 * allows proper handling of both kinds of brace placement. 336 */ 337 338 flushed_nl = false; 339 while (ps.search_brace) { /* if we scanned an if(), 340 * while(), etc., we might 341 * need to copy stuff into a 342 * buffer we must loop, 343 * copying stuff into 344 * save_com, until we find the 345 * start of the stmt which 346 * follows the if, or whatever */ 347 switch (type_code) { 348 case newline: 349 ++line_no; 350 flushed_nl = true; 351 case form_feed: 352 break; /* form feeds and newlines found here 353 * will be ignored */ 354 355 case lbrace: /* this is a brace that starts the 356 * compound stmt */ 357 if (sc_end == 0) { /* ignore buffering if a 358 * comment wasn't stored 359 * up */ 360 ps.search_brace = false; 361 goto check_type; 362 } 363 if (btype_2) { 364 save_com[0] = '{'; /* we either want to put 365 * the brace right after 366 * the if */ 367 goto sw_buffer; /* go to common code to 368 * get out of this loop */ 369 } 370 case comment: /* we have a comment, so we must copy 371 * it into the buffer */ 372 if (!flushed_nl || sc_end != 0) { 373 if (sc_end == 0) { /* if this is the first 374 * comment, we must set 375 * up the buffer */ 376 save_com[0] = save_com[1] = ' '; 377 sc_end = &(save_com[2]); 378 } else { 379 *sc_end++ = '\n'; /* add newline between 380 * comments */ 381 *sc_end++ = ' '; 382 --line_no; 383 } 384 *sc_end++ = '/'; /* copy in start of 385 * comment */ 386 *sc_end++ = '*'; 387 388 for (;;) { /* loop until we get to 389 * the end of the 390 * comment */ 391 *sc_end = *buf_ptr++; 392 if (buf_ptr >= buf_end) 393 fill_buffer(); 394 395 if (*sc_end++ == '*' && *buf_ptr == '/') 396 break; /* we are at end of 397 * comment */ 398 399 if (sc_end >= &(save_com[sc_size])) { /* check for temp buffer 400 * overflow */ 401 diag(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever."); 402 fflush(output); 403 exit(1); 404 } 405 } 406 *sc_end++ = '/'; /* add ending slash */ 407 if (++buf_ptr >= buf_end) /* get past / in buffer */ 408 fill_buffer(); 409 break; 410 } 411 default: /* it is the start of a normal 412 * statment */ 413 if (flushed_nl) /* if we flushed a newline, 414 * make sure it is put back */ 415 force_nl = true; 416 if ((type_code == sp_paren && *token == 'i' 417 && last_else && ps.else_if) || 418 (type_code == sp_nparen && *token == 'e' 419 && e_code != s_code && e_code[-1] == '}')) 420 force_nl = false; 421 422 if (sc_end == 0) { /* ignore buffering if 423 * comment wasn't saved 424 * up */ 425 ps.search_brace = false; 426 goto check_type; 427 } 428 if (force_nl) { /* if we should insert a nl 429 * here, put it into the 430 * buffer */ 431 force_nl = false; 432 --line_no; /* this will be 433 * re-increased when the 434 * nl is read from the 435 * buffer */ 436 *sc_end++ = '\n'; 437 *sc_end++ = ' '; 438 if (verbose && !flushed_nl) /* print error msg if 439 * the line was not 440 * already broken */ 441 diag(0, "Line broken"); 442 flushed_nl = false; 443 } 444 for (t_ptr = token; *t_ptr; ++t_ptr) 445 *sc_end++ = *t_ptr; /* copy token into temp 446 * buffer */ 447 ps.procname[0] = 0; 448 449 sw_buffer: 450 ps.search_brace = false; /* stop looking for 451 * start of stmt */ 452 bp_save = buf_ptr; /* save current input 453 * buffer */ 454 be_save = buf_end; 455 buf_ptr = save_com; /* fix so that 456 * subsequent calls to 457 * lexi will take tokens 458 * out of save_com */ 459 *sc_end++ = ' '; /* add trailing blank, 460 * just in case */ 461 buf_end = sc_end; 462 sc_end = 0; 463 break; 464 } /* end of switch */ 465 if (type_code != 0) /* we must make this check, 466 * just in case there was an 467 * unexpected EOF */ 468 type_code = lexi(); /* read another token */ 469 /* if (ps.search_brace) ps.procname[0] = 0; */ 470 if ((is_procname = ps.procname[0]) && flushed_nl 471 && !procnames_start_line && ps.in_decl 472 && type_code == ident) 473 flushed_nl = 0; 474 } /* end of while (search_brace) */ 475 last_else = 0; 476 check_type: 477 if (type_code == 0) { /* we got eof */ 478 if (s_lab != e_lab || s_code != e_code 479 || s_com != e_com) /* must dump end of line */ 480 dump_line(); 481 if (ps.tos > 1) /* check for balanced braces */ 482 diag(1, "Stuff missing from end of file."); 483 484 if (verbose) { 485 printf("There were %d output lines and %d comments\n", 486 ps.out_lines, ps.out_coms); 487 printf("(Lines with comments)/(Lines with code): %6.3f\n", 488 (1.0 * ps.com_lines) / code_lines); 489 } 490 fflush(output); 491 exit(found_err); 492 } 493 if ( 494 (type_code != comment) && 495 (type_code != newline) && 496 (type_code != preesc) && 497 (type_code != form_feed)) { 498 if (force_nl && 499 (type_code != semicolon) && 500 (type_code != lbrace || !btype_2)) { 501 /* we should force a broken line here */ 502 if (verbose && !flushed_nl) 503 diag(0, "Line broken"); 504 flushed_nl = false; 505 dump_line(); 506 ps.want_blank = false; /* don't insert blank at 507 * line start */ 508 force_nl = false; 509 } 510 ps.in_stmt = true; /* turn on flag which causes 511 * an extra level of 512 * indentation. this is turned 513 * off by a ; or '}' */ 514 if (s_com != e_com) { /* the turkey has embedded a 515 * comment in a line. fix it */ 516 *e_code++ = ' '; 517 for (t_ptr = s_com; *t_ptr; ++t_ptr) { 518 CHECK_SIZE_CODE; 519 *e_code++ = *t_ptr; 520 } 521 *e_code++ = ' '; 522 *e_code = '\0'; /* null terminate code sect */ 523 ps.want_blank = false; 524 e_com = s_com; 525 } 526 } else 527 if (type_code != comment) /* preserve force_nl 528 * thru a comment */ 529 force_nl = false; /* cancel forced newline 530 * after newline, form 531 * feed, etc */ 532 533 534 535 /*-----------------------------------------------------*\ 536 | do switch on type of token scanned | 537 \*-----------------------------------------------------*/ 538 CHECK_SIZE_CODE; 539 switch (type_code) { /* now, decide what to do with the 540 * token */ 541 542 case form_feed:/* found a form feed in line */ 543 ps.use_ff = true; /* a form feed is treated much 544 * like a newline */ 545 dump_line(); 546 ps.want_blank = false; 547 break; 548 549 case newline: 550 if (ps.last_token != comma || ps.p_l_follow > 0 551 || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) { 552 dump_line(); 553 ps.want_blank = false; 554 } 555 ++line_no; /* keep track of input line number */ 556 break; 557 558 case lparen: /* got a '(' or '[' */ 559 ++ps.p_l_follow; /* count parens to make Healy 560 * happy */ 561 if (ps.want_blank && *token != '[' && 562 (ps.last_token != ident || proc_calls_space 563 || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon)))) 564 *e_code++ = ' '; 565 if (ps.in_decl && !ps.block_init) { 566 if (troff && !ps.dumped_decl_indent && !is_procname && ps.last_token == decl) { 567 ps.dumped_decl_indent = 1; 568 sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); 569 e_code += strlen(e_code); 570 } else { 571 while ((e_code - s_code) < dec_ind) { 572 CHECK_SIZE_CODE; 573 *e_code++ = ' '; 574 } 575 *e_code++ = token[0]; 576 } 577 } else 578 *e_code++ = token[0]; 579 ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code; 580 if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent 581 && ps.paren_indents[0] < 2 * ps.ind_size) 582 ps.paren_indents[0] = 2 * ps.ind_size; 583 ps.want_blank = false; 584 if (ps.in_or_st && *token == '(' && ps.tos <= 2) { 585 /* 586 * this is a kluge to make sure that declarations will be 587 * aligned right if proc decl has an explicit type on it, i.e. 588 * "int a(x) {..." 589 */ 590 parse(semicolon); /* I said this was a 591 * kluge... */ 592 ps.in_or_st = false; /* turn off flag for 593 * structure decl or 594 * initialization */ 595 } 596 if (ps.sizeof_keyword) 597 ps.sizeof_mask |= 1 << ps.p_l_follow; 598 break; 599 600 case rparen: /* got a ')' or ']' */ 601 rparen_count--; 602 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) { 603 ps.last_u_d = true; 604 ps.cast_mask &= (1 << ps.p_l_follow) - 1; 605 } 606 ps.sizeof_mask &= (1 << ps.p_l_follow) - 1; 607 if (--ps.p_l_follow < 0) { 608 ps.p_l_follow = 0; 609 diag(0, "Extra %c", *token); 610 } 611 if (e_code == s_code) /* if the paren starts the 612 * line */ 613 ps.paren_level = ps.p_l_follow; /* then indent it */ 614 615 *e_code++ = token[0]; 616 ps.want_blank = true; 617 618 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if 619 * (...), or some such */ 620 sp_sw = false; 621 force_nl = true; /* must force newline 622 * after if */ 623 ps.last_u_d = true; /* inform lexi that a 624 * following operator is 625 * unary */ 626 ps.in_stmt = false; /* don't use stmt 627 * continuation 628 * indentation */ 629 630 parse(hd_type); /* let parser worry about if, 631 * or whatever */ 632 } 633 ps.search_brace = btype_2; /* this should insure 634 * that constructs such 635 * as main(){...} and 636 * int[]{...} have their 637 * braces put in the 638 * right place */ 639 break; 640 641 case unary_op: /* this could be any unary operation */ 642 if (ps.want_blank) 643 *e_code++ = ' '; 644 645 if (troff && !ps.dumped_decl_indent && ps.in_decl && !is_procname) { 646 sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); 647 ps.dumped_decl_indent = 1; 648 e_code += strlen(e_code); 649 } else { 650 const char *res = token; 651 652 if (ps.in_decl && !ps.block_init) { /* if this is a unary op 653 * in a declaration, we 654 * should indent this 655 * token */ 656 for (i = 0; token[i]; ++i); /* find length of token */ 657 while ((e_code - s_code) < (dec_ind - i)) { 658 CHECK_SIZE_CODE; 659 *e_code++ = ' '; /* pad it */ 660 } 661 } 662 if (troff && token[0] == '-' && token[1] == '>') 663 res = "\\(->"; 664 for (t_ptr = res; *t_ptr; ++t_ptr) { 665 CHECK_SIZE_CODE; 666 *e_code++ = *t_ptr; 667 } 668 } 669 ps.want_blank = false; 670 break; 671 672 case binary_op:/* any binary operation */ 673 if (ps.want_blank) 674 *e_code++ = ' '; 675 { 676 const char *res = token; 677 678 if (troff) 679 switch (token[0]) { 680 case '<': 681 if (token[1] == '=') 682 res = "\\(<="; 683 break; 684 case '>': 685 if (token[1] == '=') 686 res = "\\(>="; 687 break; 688 case '!': 689 if (token[1] == '=') 690 res = "\\(!="; 691 break; 692 case '|': 693 if (token[1] == '|') 694 res = "\\(br\\(br"; 695 else 696 if (token[1] == 0) 697 res = "\\(br"; 698 break; 699 } 700 for (t_ptr = res; *t_ptr; ++t_ptr) { 701 CHECK_SIZE_CODE; 702 *e_code++ = *t_ptr; /* move the operator */ 703 } 704 } 705 ps.want_blank = true; 706 break; 707 708 case postop: /* got a trailing ++ or -- */ 709 *e_code++ = token[0]; 710 *e_code++ = token[1]; 711 ps.want_blank = true; 712 break; 713 714 case question: /* got a ? */ 715 squest++; /* this will be used when a later 716 * colon appears so we can distinguish 717 * the <c>?<n>:<n> construct */ 718 if (ps.want_blank) 719 *e_code++ = ' '; 720 *e_code++ = '?'; 721 ps.want_blank = true; 722 break; 723 724 case casestmt: /* got word 'case' or 'default' */ 725 scase = true; /* so we can process the later colon 726 * properly */ 727 goto copy_id; 728 729 case colon: /* got a ':' */ 730 if (squest > 0) { /* it is part of the <c>?<n>: 731 * <n> construct */ 732 --squest; 733 if (ps.want_blank) 734 *e_code++ = ' '; 735 *e_code++ = ':'; 736 ps.want_blank = true; 737 break; 738 } 739 if (ps.in_or_st) { 740 *e_code++ = ':'; 741 ps.want_blank = false; 742 break; 743 } 744 ps.in_stmt = false; /* seeing a label does not 745 * imply we are in a stmt */ 746 for (t_ptr = s_code; *t_ptr; ++t_ptr) 747 *e_lab++ = *t_ptr; /* turn everything so 748 * far into a label */ 749 e_code = s_code; 750 *e_lab++ = ':'; 751 *e_lab++ = ' '; 752 *e_lab = '\0'; 753 754 force_nl = ps.pcase = scase; /* ps.pcase will be used 755 * by dump_line to 756 * decide how to indent 757 * the label. force_nl 758 * will force a case n: 759 * to be on a line by 760 * itself */ 761 scase = false; 762 ps.want_blank = false; 763 break; 764 765 case semicolon:/* got a ';' */ 766 ps.in_or_st = false; /* we are not in an 767 * initialization or structure 768 * declaration */ 769 scase = false; /* these will only need resetting in a 770 * error */ 771 squest = 0; 772 if (ps.last_token == rparen && rparen_count == 0) 773 ps.in_parameter_declaration = 0; 774 ps.cast_mask = 0; 775 ps.sizeof_mask = 0; 776 ps.block_init = 0; 777 ps.block_init_level = 0; 778 ps.just_saw_decl--; 779 780 if (ps.in_decl && s_code == e_code && !ps.block_init) 781 while ((e_code - s_code) < (dec_ind - 1)) { 782 CHECK_SIZE_CODE; 783 *e_code++ = ' '; 784 } 785 786 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first 787 * level structure 788 * declaration, we 789 * aren't any more */ 790 791 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { 792 793 /* 794 * This should be true iff there were unbalanced parens in the 795 * stmt. It is a bit complicated, because the semicolon might 796 * be in a for stmt 797 */ 798 diag(1, "Unbalanced parens"); 799 ps.p_l_follow = 0; 800 if (sp_sw) { /* this is a check for a if, 801 * while, etc. with unbalanced 802 * parens */ 803 sp_sw = false; 804 parse(hd_type); /* don't lose the if, 805 * or whatever */ 806 } 807 } 808 *e_code++ = ';'; 809 ps.want_blank = true; 810 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in 811 * the middle of a stmt */ 812 813 if (!sp_sw) { /* if not if for (;;) */ 814 parse(semicolon); /* let parser know about 815 * end of stmt */ 816 force_nl = true; /* force newline after a 817 * end of stmt */ 818 } 819 break; 820 821 case lbrace: /* got a '{' */ 822 ps.in_stmt = false; /* don't indent the {} */ 823 if (!ps.block_init) 824 force_nl = true; /* force other stuff on 825 * same line as '{' onto 826 * new line */ 827 else 828 if (ps.block_init_level <= 0) 829 ps.block_init_level = 1; 830 else 831 ps.block_init_level++; 832 833 if (s_code != e_code && !ps.block_init) { 834 if (!btype_2) { 835 dump_line(); 836 ps.want_blank = false; 837 } else 838 if (ps.in_parameter_declaration && !ps.in_or_st) { 839 ps.i_l_follow = 0; 840 dump_line(); 841 ps.want_blank = false; 842 } 843 } 844 if (ps.in_parameter_declaration) 845 prefix_blankline_requested = 0; 846 847 if (ps.p_l_follow > 0) { /* check for preceding 848 * unbalanced parens */ 849 diag(1, "Unbalanced parens"); 850 ps.p_l_follow = 0; 851 if (sp_sw) { /* check for unclosed if, for, 852 * etc. */ 853 sp_sw = false; 854 parse(hd_type); 855 ps.ind_level = ps.i_l_follow; 856 } 857 } 858 if (s_code == e_code) 859 ps.ind_stmt = false; /* don't put extra 860 * indentation on line 861 * with '{' */ 862 if (ps.in_decl && ps.in_or_st) { /* this is either a 863 * structure declaration 864 * or an init */ 865 di_stack[ps.dec_nest++] = dec_ind; 866 /* ? dec_ind = 0; */ 867 } else { 868 ps.decl_on_line = false; /* we can't be in the 869 * middle of a 870 * declaration, so don't 871 * do special 872 * indentation of 873 * comments */ 874 if (blanklines_after_declarations_at_proctop 875 && ps.in_parameter_declaration) 876 postfix_blankline_requested = 1; 877 ps.in_parameter_declaration = 0; 878 } 879 dec_ind = 0; 880 parse(lbrace); /* let parser know about this */ 881 if (ps.want_blank) /* put a blank before '{' if 882 * '{' is not at start of line */ 883 *e_code++ = ' '; 884 ps.want_blank = false; 885 *e_code++ = '{'; 886 ps.just_saw_decl = 0; 887 break; 888 889 case rbrace: /* got a '}' */ 890 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be 891 * omitted in 892 * declarations */ 893 parse(semicolon); 894 if (ps.p_l_follow) { /* check for unclosed if, for, 895 * else. */ 896 diag(1, "Unbalanced parens"); 897 ps.p_l_follow = 0; 898 sp_sw = false; 899 } 900 ps.just_saw_decl = 0; 901 ps.block_init_level--; 902 if (s_code != e_code && !ps.block_init) { /* '}' must be first on 903 * line */ 904 if (verbose) 905 diag(0, "Line broken"); 906 dump_line(); 907 } 908 *e_code++ = '}'; 909 ps.want_blank = true; 910 ps.in_stmt = ps.ind_stmt = false; 911 if (ps.dec_nest > 0) { /* we are in multi-level 912 * structure declaration */ 913 dec_ind = di_stack[--ps.dec_nest]; 914 if (ps.dec_nest == 0 && !ps.in_parameter_declaration) 915 ps.just_saw_decl = 2; 916 ps.in_decl = true; 917 } 918 prefix_blankline_requested = 0; 919 parse(rbrace); /* let parser know about this */ 920 ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead 921 && ps.il[ps.tos] >= ps.ind_level; 922 if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0) 923 postfix_blankline_requested = 1; 924 break; 925 926 case swstmt: /* got keyword "switch" */ 927 sp_sw = true; 928 hd_type = swstmt; /* keep this for when we have 929 * seen the expression */ 930 goto copy_id; /* go move the token into buffer */ 931 932 case sp_paren: /* token is if, while, for */ 933 sp_sw = true; /* the interesting stuff is done after 934 * the expression is scanned */ 935 hd_type = (*token == 'i' ? ifstmt : 936 (*token == 'w' ? whilestmt : forstmt)); 937 938 /* 939 * remember the type of header for later use by parser 940 */ 941 goto copy_id; /* copy the token into line */ 942 943 case sp_nparen:/* got else, do */ 944 ps.in_stmt = false; 945 if (*token == 'e') { 946 if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) { 947 if (verbose) 948 diag(0, "Line broken"); 949 dump_line(); /* make sure this starts 950 * a line */ 951 ps.want_blank = false; 952 } 953 force_nl = true; /* also, following stuff 954 * must go onto new line */ 955 last_else = 1; 956 parse(elselit); 957 } else { 958 if (e_code != s_code) { /* make sure this starts 959 * a line */ 960 if (verbose) 961 diag(0, "Line broken"); 962 dump_line(); 963 ps.want_blank = false; 964 } 965 force_nl = true; /* also, following stuff 966 * must go onto new line */ 967 last_else = 0; 968 parse(dolit); 969 } 970 goto copy_id; /* move the token into line */ 971 972 case decl: /* we have a declaration type (int, register, 973 * etc.) */ 974 parse(decl); /* let parser worry about indentation */ 975 if (ps.last_token == rparen && ps.tos <= 1) { 976 ps.in_parameter_declaration = 1; 977 if (s_code != e_code) { 978 dump_line(); 979 ps.want_blank = 0; 980 } 981 } 982 if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) { 983 ps.ind_level = ps.i_l_follow = 1; 984 ps.ind_stmt = 0; 985 } 986 ps.in_or_st = true; /* this might be a structure 987 * or initialization 988 * declaration */ 989 ps.in_decl = ps.decl_on_line = true; 990 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) 991 ps.just_saw_decl = 2; 992 prefix_blankline_requested = 0; 993 for (i = 0; token[i++];); /* get length of token */ 994 995 /* 996 * dec_ind = e_code - s_code + (ps.decl_indent>i ? ps.decl_indent 997 * : i); 998 */ 999 dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i; 1000 tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0); 1001 goto copy_id; 1002 1003 case ident: /* got an identifier or constant */ 1004 if (ps.in_decl) { /* if we are in a declaration, 1005 * we must indent identifier */ 1006 if (ps.want_blank) 1007 *e_code++ = ' '; 1008 ps.want_blank = false; 1009 if (is_procname == 0 || !procnames_start_line) { 1010 if (!ps.block_init) { 1011 if (troff && !ps.dumped_decl_indent) { 1012 sprintf(e_code, "\n.De %dp+\200p\n", dec_ind * 7); 1013 ps.dumped_decl_indent = 1; 1014 e_code += strlen(e_code); 1015 CHECK_SIZE_CODE; 1016 } else { 1017 int cur_dec_ind; 1018 int pos, startpos; 1019 1020 /* 1021 * in order to get the tab math right for 1022 * indentations that are not multiples of 8 we 1023 * need to modify both startpos and dec_ind 1024 * (cur_dec_ind) here by eight minus the 1025 * remainder of the current starting column 1026 * divided by eight. This seems to be a 1027 * properly working fix 1028 */ 1029 startpos = e_code - s_code; 1030 cur_dec_ind = dec_ind; 1031 pos = startpos; 1032 if ((ps.ind_level * ps.ind_size) % 8 != 0) { 1033 pos += (ps.ind_level * ps.ind_size) % 8; 1034 cur_dec_ind += (ps.ind_level * ps.ind_size) % 8; 1035 } 1036 1037 if (tabs_to_var) { 1038 while ((pos & ~7) + 8 <= cur_dec_ind) { 1039 CHECK_SIZE_CODE; 1040 *e_code++ = '\t'; 1041 pos = (pos & ~7) + 8; 1042 } 1043 } 1044 while (pos < cur_dec_ind) { 1045 CHECK_SIZE_CODE; 1046 *e_code++ = ' '; 1047 pos++; 1048 } 1049 if (ps.want_blank && e_code - s_code == startpos) 1050 *e_code++ = ' '; 1051 ps.want_blank = false; 1052 } 1053 } 1054 } else { 1055 if (dec_ind && s_code != e_code) 1056 dump_line(); 1057 dec_ind = 0; 1058 ps.want_blank = false; 1059 } 1060 } else 1061 if (sp_sw && ps.p_l_follow == 0) { 1062 sp_sw = false; 1063 force_nl = true; 1064 ps.last_u_d = true; 1065 ps.in_stmt = false; 1066 parse(hd_type); 1067 } 1068 copy_id: 1069 if (ps.want_blank) 1070 *e_code++ = ' '; 1071 if (troff && ps.its_a_keyword) { 1072 e_code = chfont(&bodyf, &keywordf, e_code); 1073 for (t_ptr = token; *t_ptr; ++t_ptr) { 1074 CHECK_SIZE_CODE; 1075 *e_code++ = keywordf.allcaps 1076 ? toupper((unsigned char)*t_ptr) 1077 : *t_ptr; 1078 } 1079 e_code = chfont(&keywordf, &bodyf, e_code); 1080 } else 1081 for (t_ptr = token; *t_ptr; ++t_ptr) { 1082 CHECK_SIZE_CODE; 1083 *e_code++ = *t_ptr; 1084 } 1085 ps.want_blank = true; 1086 break; 1087 1088 case period: /* treat a period kind of like a binary 1089 * operation */ 1090 *e_code++ = '.'; /* move the period into line */ 1091 ps.want_blank = false; /* don't put a blank after a 1092 * period */ 1093 break; 1094 1095 case comma: 1096 ps.want_blank = (s_code != e_code); /* only put blank after 1097 * comma if comma does 1098 * not start the line */ 1099 if (ps.in_decl && is_procname == 0 && !ps.block_init) 1100 while ((e_code - s_code) < (dec_ind - 1)) { 1101 CHECK_SIZE_CODE; 1102 *e_code++ = ' '; 1103 } 1104 1105 *e_code++ = ','; 1106 if (ps.p_l_follow == 0) { 1107 if (ps.block_init_level <= 0) 1108 ps.block_init = 0; 1109 if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8)) 1110 force_nl = true; 1111 } 1112 break; 1113 1114 case preesc: /* got the character '#' */ 1115 if ((s_com != e_com) || 1116 (s_lab != e_lab) || 1117 (s_code != e_code)) 1118 dump_line(); 1119 *e_lab++ = '#'; /* move whole line to 'label' buffer */ 1120 { 1121 int in_comment = 0; 1122 int com_start = 0; 1123 char quote = 0; 1124 int com_end = 0; 1125 1126 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 1127 buf_ptr++; 1128 if (buf_ptr >= buf_end) 1129 fill_buffer(); 1130 } 1131 while (*buf_ptr != '\n' || in_comment) { 1132 CHECK_SIZE_LAB; 1133 *e_lab = *buf_ptr++; 1134 if (buf_ptr >= buf_end) 1135 fill_buffer(); 1136 switch (*e_lab++) { 1137 case BACKSLASH: 1138 if (troff) 1139 *e_lab++ = BACKSLASH; 1140 if (!in_comment) { 1141 *e_lab++ = *buf_ptr++; 1142 if (buf_ptr >= buf_end) 1143 fill_buffer(); 1144 } 1145 break; 1146 case '/': 1147 if (*buf_ptr == '*' && !in_comment && !quote) { 1148 in_comment = 1; 1149 *e_lab++ = *buf_ptr++; 1150 com_start = e_lab - s_lab - 2; 1151 } 1152 break; 1153 case '"': 1154 if (quote == '"') 1155 quote = 0; 1156 break; 1157 case '\'': 1158 if (quote == '\'') 1159 quote = 0; 1160 break; 1161 case '*': 1162 if (*buf_ptr == '/' && in_comment) { 1163 in_comment = 0; 1164 *e_lab++ = *buf_ptr++; 1165 com_end = e_lab - s_lab; 1166 } 1167 break; 1168 } 1169 } 1170 1171 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1172 e_lab--; 1173 if (e_lab - s_lab == com_end && bp_save == 0) { /* comment on 1174 * preprocessor line */ 1175 if (sc_end == 0) /* if this is the first 1176 * comment, we must set 1177 * up the buffer */ 1178 sc_end = &(save_com[0]); 1179 else { 1180 *sc_end++ = '\n'; /* add newline between 1181 * comments */ 1182 *sc_end++ = ' '; 1183 --line_no; 1184 } 1185 memmove(sc_end, s_lab + com_start, com_end - com_start); 1186 sc_end += com_end - com_start; 1187 if (sc_end >= &save_com[sc_size]) 1188 abort(); 1189 e_lab = s_lab + com_start; 1190 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1191 e_lab--; 1192 bp_save = buf_ptr; /* save current input 1193 * buffer */ 1194 be_save = buf_end; 1195 buf_ptr = save_com; /* fix so that 1196 * subsequent calls to 1197 * lexi will take tokens 1198 * out of save_com */ 1199 *sc_end++ = ' '; /* add trailing blank, 1200 * just in case */ 1201 buf_end = sc_end; 1202 sc_end = 0; 1203 } 1204 *e_lab = '\0'; /* null terminate line */ 1205 ps.pcase = false; 1206 } 1207 1208 if (strncmp(s_lab, "#if", 3) == 0) { 1209 if (blanklines_around_conditional_compilation) { 1210 int c; 1211 prefix_blankline_requested++; 1212 while ((c = getc(input)) == '\n'); 1213 ungetc(c, input); 1214 } 1215 if (ifdef_level < (int)(sizeof state_stack / sizeof state_stack[0])) { 1216 match_state[ifdef_level].tos = -1; 1217 state_stack[ifdef_level++] = ps; 1218 } else 1219 diag(1, "#if stack overflow"); 1220 } else 1221 if (strncmp(s_lab, "#else", 5) == 0) { 1222 if (ifdef_level <= 0) 1223 diag(1, "Unmatched #else"); 1224 else { 1225 match_state[ifdef_level - 1] = ps; 1226 ps = state_stack[ifdef_level - 1]; 1227 } 1228 } else 1229 if (strncmp(s_lab, "#endif", 6) == 0) { 1230 if (ifdef_level <= 0) 1231 diag(1, "Unmatched #endif"); 1232 else { 1233 ifdef_level--; 1234 1235 #ifdef undef 1236 /* 1237 * This match needs to be more intelligent before the 1238 * message is useful 1239 */ 1240 if (match_state[ifdef_level].tos >= 0 1241 && memcmp(&ps, &match_state[ifdef_level], sizeof ps)) 1242 diag(0, "Syntactically inconsistant #ifdef alternatives."); 1243 #endif 1244 } 1245 if (blanklines_around_conditional_compilation) { 1246 postfix_blankline_requested++; 1247 n_real_blanklines = 0; 1248 } 1249 } 1250 break; /* subsequent processing of the newline 1251 * character will cause the line to be printed */ 1252 1253 case comment: /* we have gotten a start comment */ 1254 /* this is a biggie */ 1255 if (flushed_nl) { /* we should force a broken 1256 * line here */ 1257 flushed_nl = false; 1258 dump_line(); 1259 ps.want_blank = false; /* don't insert blank at 1260 * line start */ 1261 force_nl = false; 1262 } 1263 pr_comment(); 1264 break; 1265 } /* end of big switch stmt */ 1266 1267 *e_code = '\0'; /* make sure code section is null terminated */ 1268 if (type_code != comment && type_code != newline && type_code != preesc) 1269 ps.last_token = type_code; 1270 } /* end of main while (1) loop */ 1271 } 1272 /* 1273 * copy input file to backup file if in_name is /blah/blah/blah/file, then 1274 * backup file will be ".Bfile" then make the backup file the input and 1275 * original input file the output 1276 */ 1277 void 1278 bakcopy(void) 1279 { 1280 int n, bakchn; 1281 char buff[8 * 1024]; 1282 const char *p; 1283 1284 /* construct file name .Bfile */ 1285 for (p = in_name; *p; p++); /* skip to end of string */ 1286 while (p > in_name && *p != '/') /* find last '/' */ 1287 p--; 1288 if (*p == '/') 1289 p++; 1290 sprintf(bakfile, "%s.BAK", p); 1291 1292 /* copy in_name to backup file */ 1293 bakchn = creat(bakfile, 0600); 1294 if (bakchn < 0) 1295 err(1, "%s", bakfile); 1296 while ((n = read(fileno(input), buff, sizeof buff)) > 0) 1297 if (write(bakchn, buff, n) != n) 1298 err(1, "%s", bakfile); 1299 if (n < 0) 1300 err(1, "%s", in_name); 1301 close(bakchn); 1302 fclose(input); 1303 1304 /* re-open backup file as the input file */ 1305 input = fopen(bakfile, "r"); 1306 if (input == 0) 1307 err(1, "%s", bakfile); 1308 /* now the original input file will be the output */ 1309 output = fopen(in_name, "w"); 1310 if (output == 0) { 1311 unlink(bakfile); 1312 err(1, "%s", in_name); 1313 } 1314 } 1315