1 /* $NetBSD: indent.c,v 1.19 2014/09/04 04:06:07 mrg Exp $ */ 2 3 /* 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 34 * Copyright (c) 1985 Sun Microsystems, Inc. 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed by the University of 48 * California, Berkeley and its contributors. 49 * 4. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 */ 65 66 #include <sys/cdefs.h> 67 #ifndef lint 68 __COPYRIGHT("@(#) Copyright (c) 1985 Sun Microsystems, Inc.\ 69 Copyright (c) 1976 Board of Trustees of the University of Illinois.\ 70 Copyright (c) 1980, 1993\ 71 The Regents of the University of California. All rights reserved."); 72 #endif /* not lint */ 73 74 #ifndef lint 75 #if 0 76 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93"; 77 #else 78 __RCSID("$NetBSD: indent.c,v 1.19 2014/09/04 04:06:07 mrg Exp $"); 79 #endif 80 #endif /* not lint */ 81 82 #include <sys/param.h> 83 #include <ctype.h> 84 #include <err.h> 85 #include <errno.h> 86 #include <fcntl.h> 87 #include <stdio.h> 88 #include <stdlib.h> 89 #include <string.h> 90 #include <unistd.h> 91 #include <locale.h> 92 #define EXTERN 93 #include "indent_globs.h" 94 #undef EXTERN 95 #include "indent_codes.h" 96 97 const char *in_name = "Standard Input"; /* will always point to name of 98 * input file */ 99 const char *out_name = "Standard Output"; /* will always point to name of 100 * output file */ 101 char bakfile[MAXPATHLEN] = ""; 102 103 int main(int, char **); 104 105 int 106 main(int argc, char **argv) 107 { 108 109 extern int found_err; /* flag set in diag() on error */ 110 int dec_ind; /* current indentation for declarations */ 111 int di_stack[20]; /* a stack of structure indentation levels */ 112 int flushed_nl; /* used when buffering up comments to remember 113 * that a newline was passed over */ 114 int force_nl; /* when true, code must be broken */ 115 int hd_type; /* used to store type of stmt for if (...), 116 * for (...), etc */ 117 int i; /* local loop counter */ 118 int scase; /* set to true when we see a case, so we will 119 * know what to do with the following colon */ 120 int sp_sw; /* when true, we are in the expressin of 121 * if(...), while(...), etc. */ 122 int squest; /* when this is positive, we have seen a ? 123 * without the matching : in a <c>?<s>:<s> 124 * construct */ 125 const char *t_ptr; /* used for copying tokens */ 126 int tabs_to_var = 0; /* true if using tabs to indent to var name */ 127 int type_code; /* the type of token, returned by lexi */ 128 129 int last_else = 0; /* true iff last keyword was an else */ 130 131 132 /*-----------------------------------------------*\ 133 | INITIALIZATION | 134 \*-----------------------------------------------*/ 135 136 if (!setlocale(LC_ALL, "")) 137 fprintf(stderr, "indent: can't set locale.\n"); 138 139 hd_type = 0; 140 ps.p_stack[0] = stmt; /* this is the parser's stack */ 141 ps.last_nl = true; /* this is true if the last thing scanned was 142 * a newline */ 143 ps.last_token = semicolon; 144 combuf = (char *) malloc(bufsize); 145 labbuf = (char *) malloc(bufsize); 146 codebuf = (char *) malloc(bufsize); 147 tokenbuf = (char *) malloc(bufsize); 148 l_com = combuf + bufsize - 5; 149 l_lab = labbuf + bufsize - 5; 150 l_code = codebuf + bufsize - 5; 151 l_token = tokenbuf + bufsize - 5; 152 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, 153 * and comment buffers */ 154 combuf[1] = codebuf[1] = labbuf[1] = '\0'; 155 ps.else_if = 1; /* Default else-if special processing to on */ 156 s_lab = e_lab = labbuf + 1; 157 s_code = e_code = codebuf + 1; 158 s_com = e_com = combuf + 1; 159 s_token = e_token = tokenbuf + 1; 160 161 in_buffer = (char *) malloc(10); 162 in_buffer_limit = in_buffer + 8; 163 buf_ptr = buf_end = in_buffer; 164 line_no = 1; 165 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; 166 sp_sw = force_nl = false; 167 ps.in_or_st = false; 168 ps.bl_line = true; 169 dec_ind = 0; 170 di_stack[ps.dec_nest = 0] = 0; 171 ps.want_blank = ps.in_stmt = ps.ind_stmt = false; 172 173 174 scase = ps.pcase = false; 175 squest = 0; 176 sc_end = 0; 177 bp_save = 0; 178 be_save = 0; 179 180 output = 0; 181 182 183 184 /*--------------------------------------------------*\ 185 | COMMAND LINE SCAN | 186 \*--------------------------------------------------*/ 187 188 #ifdef undef 189 max_col = 78; /* -l78 */ 190 lineup_to_parens = 1; /* -lp */ 191 ps.ljust_decl = 0; /* -ndj */ 192 ps.com_ind = 33; /* -c33 */ 193 star_comment_cont = 1; /* -sc */ 194 ps.ind_size = 8; /* -i8 */ 195 verbose = 0; 196 ps.decl_indent = 16; /* -di16 */ 197 ps.indent_parameters = 1; /* -ip */ 198 ps.decl_com_ind = 0; /* if this is not set to some positive value 199 * by an arg, we will set this equal to 200 * ps.com_ind */ 201 btype_2 = 1; /* -br */ 202 cuddle_else = 1; /* -ce */ 203 ps.unindent_displace = 0; /* -d0 */ 204 ps.case_indent = 0; /* -cli0 */ 205 format_col1_comments = 1; /* -fc1 */ 206 procnames_start_line = 1; /* -psl */ 207 proc_calls_space = 0; /* -npcs */ 208 comment_delimiter_on_blankline = 1; /* -cdb */ 209 ps.leave_comma = 1; /* -nbc */ 210 #endif 211 212 for (i = 1; i < argc; ++i) 213 if (strcmp(argv[i], "-npro") == 0) 214 break; 215 set_defaults(); 216 if (i >= argc) 217 set_profile(); 218 219 for (i = 1; i < argc; ++i) { 220 221 /* 222 * look thru args (if any) for changes to defaults 223 */ 224 if (argv[i][0] != '-') { /* no flag on parameter */ 225 if (input == 0) { /* we must have the input file */ 226 in_name = argv[i]; /* remember name of 227 * input file */ 228 input = fopen(in_name, "r"); 229 if (input == 0) /* check for open error */ 230 err(1, "%s", in_name); 231 continue; 232 } else 233 if (output == 0) { /* we have the output 234 * file */ 235 out_name = argv[i]; /* remember name of 236 * output file */ 237 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite 238 * the file */ 239 fprintf(stderr, "indent: input and output files must be different\n"); 240 exit(1); 241 } 242 output = fopen(out_name, "w"); 243 if (output == 0) /* check for create 244 * error */ 245 err(1, "%s", out_name); 246 continue; 247 } 248 fprintf(stderr, "indent: unknown parameter: %s\n", argv[i]); 249 exit(1); 250 } else 251 set_option(argv[i]); 252 } /* end of for */ 253 if (input == 0) { 254 input = stdin; 255 } 256 if (output == 0) { 257 if (troff || input == stdin) 258 output = stdout; 259 else { 260 out_name = in_name; 261 bakcopy(); 262 } 263 } 264 if (ps.com_ind <= 1) 265 ps.com_ind = 2; /* dont put normal comments before column 2 */ 266 if (troff) { 267 if (bodyf.font[0] == 0) 268 parsefont(&bodyf, "R"); 269 if (scomf.font[0] == 0) 270 parsefont(&scomf, "I"); 271 if (blkcomf.font[0] == 0) 272 blkcomf = scomf, blkcomf.size += 2; 273 if (boxcomf.font[0] == 0) 274 boxcomf = blkcomf; 275 if (stringf.font[0] == 0) 276 parsefont(&stringf, "L"); 277 if (keywordf.font[0] == 0) 278 parsefont(&keywordf, "B"); 279 writefdef(&bodyf, 'B'); 280 writefdef(&scomf, 'C'); 281 writefdef(&blkcomf, 'L'); 282 writefdef(&boxcomf, 'X'); 283 writefdef(&stringf, 'S'); 284 writefdef(&keywordf, 'K'); 285 } 286 if (block_comment_max_col <= 0) 287 block_comment_max_col = max_col; 288 if (ps.decl_com_ind <= 0) /* if not specified by user, set this */ 289 ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind; 290 if (continuation_indent == 0) 291 continuation_indent = ps.ind_size; 292 fill_buffer(); /* get first batch of stuff into input buffer */ 293 294 parse(semicolon); 295 { 296 char *p = buf_ptr; 297 int col = 1; 298 299 while (1) { 300 if (*p == ' ') 301 col++; 302 else 303 if (*p == '\t') 304 col = ((col - 1) & ~7) + 9; 305 else 306 break; 307 p++; 308 } 309 if (col > ps.ind_size) 310 ps.ind_level = ps.i_l_follow = col / ps.ind_size; 311 } 312 if (troff) { 313 const char *p = in_name, *beg = in_name; 314 315 while (*p) 316 if (*p++ == '/') 317 beg = p; 318 fprintf(output, ".Fn \"%s\"\n", beg); 319 } 320 /* 321 * START OF MAIN LOOP 322 */ 323 324 while (1) { /* this is the main loop. it will go until we 325 * reach eof */ 326 int is_procname; 327 328 type_code = lexi(); /* lexi reads one token. The actual 329 * characters read are stored in 330 * "token". lexi returns a code 331 * indicating the type of token */ 332 is_procname = ps.procname[0]; 333 334 /* 335 * The following code moves everything following an if (), while (), 336 * else, etc. up to the start of the following stmt to a buffer. This 337 * allows proper handling of both kinds of brace placement. 338 */ 339 340 flushed_nl = false; 341 while (ps.search_brace) { /* if we scanned an if(), 342 * while(), etc., we might 343 * need to copy stuff into a 344 * buffer we must loop, 345 * copying stuff into 346 * save_com, until we find the 347 * start of the stmt which 348 * follows the if, or whatever */ 349 switch (type_code) { 350 case newline: 351 ++line_no; 352 flushed_nl = true; 353 case form_feed: 354 break; /* form feeds and newlines found here 355 * will be ignored */ 356 357 case lbrace: /* this is a brace that starts the 358 * compound stmt */ 359 if (sc_end == 0) { /* ignore buffering if a 360 * comment wasnt stored 361 * up */ 362 ps.search_brace = false; 363 goto check_type; 364 } 365 if (btype_2) { 366 save_com[0] = '{'; /* we either want to put 367 * the brace right after 368 * the if */ 369 goto sw_buffer; /* go to common code to 370 * get out of this loop */ 371 } 372 case comment: /* we have a comment, so we must copy 373 * it into the buffer */ 374 if (!flushed_nl || sc_end != 0) { 375 if (sc_end == 0) { /* if this is the first 376 * comment, we must set 377 * up the buffer */ 378 save_com[0] = save_com[1] = ' '; 379 sc_end = &(save_com[2]); 380 } else { 381 *sc_end++ = '\n'; /* add newline between 382 * comments */ 383 *sc_end++ = ' '; 384 --line_no; 385 } 386 *sc_end++ = '/'; /* copy in start of 387 * comment */ 388 *sc_end++ = '*'; 389 390 for (;;) { /* loop until we get to 391 * the end of the 392 * comment */ 393 *sc_end = *buf_ptr++; 394 if (buf_ptr >= buf_end) 395 fill_buffer(); 396 397 if (*sc_end++ == '*' && *buf_ptr == '/') 398 break; /* we are at end of 399 * comment */ 400 401 if (sc_end >= &(save_com[sc_size])) { /* check for temp buffer 402 * overflow */ 403 diag(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever."); 404 fflush(output); 405 exit(1); 406 } 407 } 408 *sc_end++ = '/'; /* add ending slash */ 409 if (++buf_ptr >= buf_end) /* get past / in buffer */ 410 fill_buffer(); 411 break; 412 } 413 default: /* it is the start of a normal 414 * statment */ 415 if (flushed_nl) /* if we flushed a newline, 416 * make sure it is put back */ 417 force_nl = true; 418 if ((type_code == sp_paren && *token == 'i' 419 && last_else && ps.else_if) || 420 (type_code == sp_nparen && *token == 'e' 421 && e_code != s_code && e_code[-1] == '}')) 422 force_nl = false; 423 424 if (sc_end == 0) { /* ignore buffering if 425 * comment wasnt saved 426 * up */ 427 ps.search_brace = false; 428 goto check_type; 429 } 430 if (force_nl) { /* if we should insert a nl 431 * here, put it into the 432 * buffer */ 433 force_nl = false; 434 --line_no; /* this will be 435 * re-increased when the 436 * nl is read from the 437 * buffer */ 438 *sc_end++ = '\n'; 439 *sc_end++ = ' '; 440 if (verbose && !flushed_nl) /* print error msg if 441 * the line was not 442 * already broken */ 443 diag(0, "Line broken"); 444 flushed_nl = false; 445 } 446 for (t_ptr = token; *t_ptr; ++t_ptr) 447 *sc_end++ = *t_ptr; /* copy token into temp 448 * buffer */ 449 ps.procname[0] = 0; 450 451 sw_buffer: 452 ps.search_brace = false; /* stop looking for 453 * start of stmt */ 454 bp_save = buf_ptr; /* save current input 455 * buffer */ 456 be_save = buf_end; 457 buf_ptr = save_com; /* fix so that 458 * subsequent calls to 459 * lexi will take tokens 460 * out of save_com */ 461 *sc_end++ = ' '; /* add trailing blank, 462 * just in case */ 463 buf_end = sc_end; 464 sc_end = 0; 465 break; 466 } /* end of switch */ 467 if (type_code != 0) /* we must make this check, 468 * just in case there was an 469 * unexpected EOF */ 470 type_code = lexi(); /* read another token */ 471 /* if (ps.search_brace) ps.procname[0] = 0; */ 472 if ((is_procname = ps.procname[0]) && flushed_nl 473 && !procnames_start_line && ps.in_decl 474 && type_code == ident) 475 flushed_nl = 0; 476 } /* end of while (search_brace) */ 477 last_else = 0; 478 check_type: 479 if (type_code == 0) { /* we got eof */ 480 if (s_lab != e_lab || s_code != e_code 481 || s_com != e_com) /* must dump end of line */ 482 dump_line(); 483 if (ps.tos > 1) /* check for balanced braces */ 484 diag(1, "Stuff missing from end of file."); 485 486 if (verbose) { 487 printf("There were %d output lines and %d comments\n", 488 ps.out_lines, ps.out_coms); 489 printf("(Lines with comments)/(Lines with code): %6.3f\n", 490 (1.0 * ps.com_lines) / code_lines); 491 } 492 fflush(output); 493 exit(found_err); 494 } 495 if ( 496 (type_code != comment) && 497 (type_code != newline) && 498 (type_code != preesc) && 499 (type_code != form_feed)) { 500 if (force_nl && 501 (type_code != semicolon) && 502 (type_code != lbrace || !btype_2)) { 503 /* we should force a broken line here */ 504 if (verbose && !flushed_nl) 505 diag(0, "Line broken"); 506 flushed_nl = false; 507 dump_line(); 508 ps.want_blank = false; /* dont insert blank at 509 * line start */ 510 force_nl = false; 511 } 512 ps.in_stmt = true; /* turn on flag which causes 513 * an extra level of 514 * indentation. this is turned 515 * off by a ; or '}' */ 516 if (s_com != e_com) { /* the turkey has embedded a 517 * comment in a line. fix it */ 518 *e_code++ = ' '; 519 for (t_ptr = s_com; *t_ptr; ++t_ptr) { 520 CHECK_SIZE_CODE; 521 *e_code++ = *t_ptr; 522 } 523 *e_code++ = ' '; 524 *e_code = '\0'; /* null terminate code sect */ 525 ps.want_blank = false; 526 e_com = s_com; 527 } 528 } else 529 if (type_code != comment) /* preserve force_nl 530 * thru a comment */ 531 force_nl = false; /* cancel forced newline 532 * after newline, form 533 * feed, etc */ 534 535 536 537 /*-----------------------------------------------------*\ 538 | do switch on type of token scanned | 539 \*-----------------------------------------------------*/ 540 CHECK_SIZE_CODE; 541 switch (type_code) { /* now, decide what to do with the 542 * token */ 543 544 case form_feed:/* found a form feed in line */ 545 ps.use_ff = true; /* a form feed is treated much 546 * like a newline */ 547 dump_line(); 548 ps.want_blank = false; 549 break; 550 551 case newline: 552 if (ps.last_token != comma || ps.p_l_follow > 0 553 || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) { 554 dump_line(); 555 ps.want_blank = false; 556 } 557 ++line_no; /* keep track of input line number */ 558 break; 559 560 case lparen: /* got a '(' or '[' */ 561 ++ps.p_l_follow; /* count parens to make Healy 562 * happy */ 563 if (ps.want_blank && *token != '[' && 564 (ps.last_token != ident || proc_calls_space 565 || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon)))) 566 *e_code++ = ' '; 567 if (ps.in_decl && !ps.block_init) { 568 if (troff && !ps.dumped_decl_indent && !is_procname && ps.last_token == decl) { 569 ps.dumped_decl_indent = 1; 570 sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); 571 e_code += strlen(e_code); 572 } else { 573 while ((e_code - s_code) < dec_ind) { 574 CHECK_SIZE_CODE; 575 *e_code++ = ' '; 576 } 577 *e_code++ = token[0]; 578 } 579 } else 580 *e_code++ = token[0]; 581 ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code; 582 if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent 583 && ps.paren_indents[0] < 2 * ps.ind_size) 584 ps.paren_indents[0] = 2 * ps.ind_size; 585 ps.want_blank = false; 586 if (ps.in_or_st && *token == '(' && ps.tos <= 2) { 587 /* 588 * this is a kluge to make sure that declarations will be 589 * aligned right if proc decl has an explicit type on it, i.e. 590 * "int a(x) {..." 591 */ 592 parse(semicolon); /* I said this was a 593 * kluge... */ 594 ps.in_or_st = false; /* turn off flag for 595 * structure decl or 596 * initialization */ 597 } 598 if (ps.sizeof_keyword) 599 ps.sizeof_mask |= 1 << ps.p_l_follow; 600 break; 601 602 case rparen: /* got a ')' or ']' */ 603 rparen_count--; 604 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) { 605 ps.last_u_d = true; 606 ps.cast_mask &= (1 << ps.p_l_follow) - 1; 607 } 608 ps.sizeof_mask &= (1 << ps.p_l_follow) - 1; 609 if (--ps.p_l_follow < 0) { 610 ps.p_l_follow = 0; 611 diag(0, "Extra %c", *token); 612 } 613 if (e_code == s_code) /* if the paren starts the 614 * line */ 615 ps.paren_level = ps.p_l_follow; /* then indent it */ 616 617 *e_code++ = token[0]; 618 ps.want_blank = true; 619 620 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if 621 * (...), or some such */ 622 sp_sw = false; 623 force_nl = true; /* must force newline 624 * after if */ 625 ps.last_u_d = true; /* inform lexi that a 626 * following operator is 627 * unary */ 628 ps.in_stmt = false; /* dont use stmt 629 * continuation 630 * indentation */ 631 632 parse(hd_type); /* let parser worry about if, 633 * or whatever */ 634 } 635 ps.search_brace = btype_2; /* this should insure 636 * that constructs such 637 * as main(){...} and 638 * int[]{...} have their 639 * braces put in the 640 * right place */ 641 break; 642 643 case unary_op: /* this could be any unary operation */ 644 if (ps.want_blank) 645 *e_code++ = ' '; 646 647 if (troff && !ps.dumped_decl_indent && ps.in_decl && !is_procname) { 648 sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token); 649 ps.dumped_decl_indent = 1; 650 e_code += strlen(e_code); 651 } else { 652 const char *res = token; 653 654 if (ps.in_decl && !ps.block_init) { /* if this is a unary op 655 * in a declaration, we 656 * should indent this 657 * token */ 658 for (i = 0; token[i]; ++i); /* find length of token */ 659 while ((e_code - s_code) < (dec_ind - i)) { 660 CHECK_SIZE_CODE; 661 *e_code++ = ' '; /* pad it */ 662 } 663 } 664 if (troff && token[0] == '-' && token[1] == '>') 665 res = "\\(->"; 666 for (t_ptr = res; *t_ptr; ++t_ptr) { 667 CHECK_SIZE_CODE; 668 *e_code++ = *t_ptr; 669 } 670 } 671 ps.want_blank = false; 672 break; 673 674 case binary_op:/* any binary operation */ 675 if (ps.want_blank) 676 *e_code++ = ' '; 677 { 678 const char *res = token; 679 680 if (troff) 681 switch (token[0]) { 682 case '<': 683 if (token[1] == '=') 684 res = "\\(<="; 685 break; 686 case '>': 687 if (token[1] == '=') 688 res = "\\(>="; 689 break; 690 case '!': 691 if (token[1] == '=') 692 res = "\\(!="; 693 break; 694 case '|': 695 if (token[1] == '|') 696 res = "\\(br\\(br"; 697 else 698 if (token[1] == 0) 699 res = "\\(br"; 700 break; 701 } 702 for (t_ptr = res; *t_ptr; ++t_ptr) { 703 CHECK_SIZE_CODE; 704 *e_code++ = *t_ptr; /* move the operator */ 705 } 706 } 707 ps.want_blank = true; 708 break; 709 710 case postop: /* got a trailing ++ or -- */ 711 *e_code++ = token[0]; 712 *e_code++ = token[1]; 713 ps.want_blank = true; 714 break; 715 716 case question: /* got a ? */ 717 squest++; /* this will be used when a later 718 * colon appears so we can distinguish 719 * the <c>?<n>:<n> construct */ 720 if (ps.want_blank) 721 *e_code++ = ' '; 722 *e_code++ = '?'; 723 ps.want_blank = true; 724 break; 725 726 case casestmt: /* got word 'case' or 'default' */ 727 scase = true; /* so we can process the later colon 728 * properly */ 729 goto copy_id; 730 731 case colon: /* got a ':' */ 732 if (squest > 0) { /* it is part of the <c>?<n>: 733 * <n> construct */ 734 --squest; 735 if (ps.want_blank) 736 *e_code++ = ' '; 737 *e_code++ = ':'; 738 ps.want_blank = true; 739 break; 740 } 741 if (ps.in_or_st) { 742 *e_code++ = ':'; 743 ps.want_blank = false; 744 break; 745 } 746 ps.in_stmt = false; /* seeing a label does not 747 * imply we are in a stmt */ 748 for (t_ptr = s_code; *t_ptr; ++t_ptr) 749 *e_lab++ = *t_ptr; /* turn everything so 750 * far into a label */ 751 e_code = s_code; 752 *e_lab++ = ':'; 753 *e_lab++ = ' '; 754 *e_lab = '\0'; 755 756 force_nl = ps.pcase = scase; /* ps.pcase will be used 757 * by dump_line to 758 * decide how to indent 759 * the label. force_nl 760 * will force a case n: 761 * to be on a line by 762 * itself */ 763 scase = false; 764 ps.want_blank = false; 765 break; 766 767 case semicolon:/* got a ';' */ 768 ps.in_or_st = false; /* we are not in an 769 * initialization or structure 770 * declaration */ 771 scase = false; /* these will only need resetting in a 772 * error */ 773 squest = 0; 774 if (ps.last_token == rparen && rparen_count == 0) 775 ps.in_parameter_declaration = 0; 776 ps.cast_mask = 0; 777 ps.sizeof_mask = 0; 778 ps.block_init = 0; 779 ps.block_init_level = 0; 780 ps.just_saw_decl--; 781 782 if (ps.in_decl && s_code == e_code && !ps.block_init) 783 while ((e_code - s_code) < (dec_ind - 1)) { 784 CHECK_SIZE_CODE; 785 *e_code++ = ' '; 786 } 787 788 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first 789 * level structure 790 * declaration, we arent 791 * any more */ 792 793 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { 794 795 /* 796 * This should be true iff there were unbalanced parens in the 797 * stmt. It is a bit complicated, because the semicolon might 798 * be in a for stmt 799 */ 800 diag(1, "Unbalanced parens"); 801 ps.p_l_follow = 0; 802 if (sp_sw) { /* this is a check for a if, 803 * while, etc. with unbalanced 804 * parens */ 805 sp_sw = false; 806 parse(hd_type); /* dont lose the if, or 807 * whatever */ 808 } 809 } 810 *e_code++ = ';'; 811 ps.want_blank = true; 812 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in 813 * the middle of a stmt */ 814 815 if (!sp_sw) { /* if not if for (;;) */ 816 parse(semicolon); /* let parser know about 817 * end of stmt */ 818 force_nl = true; /* force newline after a 819 * end of stmt */ 820 } 821 break; 822 823 case lbrace: /* got a '{' */ 824 ps.in_stmt = false; /* dont indent the {} */ 825 if (!ps.block_init) 826 force_nl = true; /* force other stuff on 827 * same line as '{' onto 828 * new line */ 829 else 830 if (ps.block_init_level <= 0) 831 ps.block_init_level = 1; 832 else 833 ps.block_init_level++; 834 835 if (s_code != e_code && !ps.block_init) { 836 if (!btype_2) { 837 dump_line(); 838 ps.want_blank = false; 839 } else 840 if (ps.in_parameter_declaration && !ps.in_or_st) { 841 ps.i_l_follow = 0; 842 dump_line(); 843 ps.want_blank = false; 844 } 845 } 846 if (ps.in_parameter_declaration) 847 prefix_blankline_requested = 0; 848 849 if (ps.p_l_follow > 0) { /* check for preceding 850 * unbalanced parens */ 851 diag(1, "Unbalanced parens"); 852 ps.p_l_follow = 0; 853 if (sp_sw) { /* check for unclosed if, for, 854 * etc. */ 855 sp_sw = false; 856 parse(hd_type); 857 ps.ind_level = ps.i_l_follow; 858 } 859 } 860 if (s_code == e_code) 861 ps.ind_stmt = false; /* dont put extra 862 * indentation on line 863 * with '{' */ 864 if (ps.in_decl && ps.in_or_st) { /* this is either a 865 * structure declaration 866 * or an init */ 867 di_stack[ps.dec_nest++] = dec_ind; 868 /* ? dec_ind = 0; */ 869 } else { 870 ps.decl_on_line = false; /* we cant be in the 871 * middle of a 872 * declaration, so dont 873 * do special 874 * indentation of 875 * comments */ 876 if (blanklines_after_declarations_at_proctop 877 && ps.in_parameter_declaration) 878 postfix_blankline_requested = 1; 879 ps.in_parameter_declaration = 0; 880 } 881 dec_ind = 0; 882 parse(lbrace); /* let parser know about this */ 883 if (ps.want_blank) /* put a blank before '{' if 884 * '{' is not at start of line */ 885 *e_code++ = ' '; 886 ps.want_blank = false; 887 *e_code++ = '{'; 888 ps.just_saw_decl = 0; 889 break; 890 891 case rbrace: /* got a '}' */ 892 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be 893 * omitted in 894 * declarations */ 895 parse(semicolon); 896 if (ps.p_l_follow) { /* check for unclosed if, for, 897 * else. */ 898 diag(1, "Unbalanced parens"); 899 ps.p_l_follow = 0; 900 sp_sw = false; 901 } 902 ps.just_saw_decl = 0; 903 ps.block_init_level--; 904 if (s_code != e_code && !ps.block_init) { /* '}' must be first on 905 * line */ 906 if (verbose) 907 diag(0, "Line broken"); 908 dump_line(); 909 } 910 *e_code++ = '}'; 911 ps.want_blank = true; 912 ps.in_stmt = ps.ind_stmt = false; 913 if (ps.dec_nest > 0) { /* we are in multi-level 914 * structure declaration */ 915 dec_ind = di_stack[--ps.dec_nest]; 916 if (ps.dec_nest == 0 && !ps.in_parameter_declaration) 917 ps.just_saw_decl = 2; 918 ps.in_decl = true; 919 } 920 prefix_blankline_requested = 0; 921 parse(rbrace); /* let parser know about this */ 922 ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead 923 && ps.il[ps.tos] >= ps.ind_level; 924 if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0) 925 postfix_blankline_requested = 1; 926 break; 927 928 case swstmt: /* got keyword "switch" */ 929 sp_sw = true; 930 hd_type = swstmt; /* keep this for when we have 931 * seen the expression */ 932 goto copy_id; /* go move the token into buffer */ 933 934 case sp_paren: /* token is if, while, for */ 935 sp_sw = true; /* the interesting stuff is done after 936 * the expression is scanned */ 937 hd_type = (*token == 'i' ? ifstmt : 938 (*token == 'w' ? whilestmt : forstmt)); 939 940 /* 941 * remember the type of header for later use by parser 942 */ 943 goto copy_id; /* copy the token into line */ 944 945 case sp_nparen:/* got else, do */ 946 ps.in_stmt = false; 947 if (*token == 'e') { 948 if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) { 949 if (verbose) 950 diag(0, "Line broken"); 951 dump_line(); /* make sure this starts 952 * a line */ 953 ps.want_blank = false; 954 } 955 force_nl = true; /* also, following stuff 956 * must go onto new line */ 957 last_else = 1; 958 parse(elselit); 959 } else { 960 if (e_code != s_code) { /* make sure this starts 961 * a line */ 962 if (verbose) 963 diag(0, "Line broken"); 964 dump_line(); 965 ps.want_blank = false; 966 } 967 force_nl = true; /* also, following stuff 968 * must go onto new line */ 969 last_else = 0; 970 parse(dolit); 971 } 972 goto copy_id; /* move the token into line */ 973 974 case decl: /* we have a declaration type (int, register, 975 * etc.) */ 976 parse(decl); /* let parser worry about indentation */ 977 if (ps.last_token == rparen && ps.tos <= 1) { 978 ps.in_parameter_declaration = 1; 979 if (s_code != e_code) { 980 dump_line(); 981 ps.want_blank = 0; 982 } 983 } 984 if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) { 985 ps.ind_level = ps.i_l_follow = 1; 986 ps.ind_stmt = 0; 987 } 988 ps.in_or_st = true; /* this might be a structure 989 * or initialization 990 * declaration */ 991 ps.in_decl = ps.decl_on_line = true; 992 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) 993 ps.just_saw_decl = 2; 994 prefix_blankline_requested = 0; 995 for (i = 0; token[i++];); /* get length of token */ 996 997 /* 998 * dec_ind = e_code - s_code + (ps.decl_indent>i ? ps.decl_indent 999 * : i); 1000 */ 1001 dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i; 1002 tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0); 1003 goto copy_id; 1004 1005 case ident: /* got an identifier or constant */ 1006 if (ps.in_decl) { /* if we are in a declaration, 1007 * we must indent identifier */ 1008 if (ps.want_blank) 1009 *e_code++ = ' '; 1010 ps.want_blank = false; 1011 if (is_procname == 0 || !procnames_start_line) { 1012 if (!ps.block_init) { 1013 if (troff && !ps.dumped_decl_indent) { 1014 sprintf(e_code, "\n.De %dp+\200p\n", dec_ind * 7); 1015 ps.dumped_decl_indent = 1; 1016 e_code += strlen(e_code); 1017 CHECK_SIZE_CODE; 1018 } else { 1019 int cur_dec_ind; 1020 int pos, startpos; 1021 1022 /* 1023 * in order to get the tab math right for 1024 * indentations that are not multiples of 8 we 1025 * need to modify both startpos and dec_ind 1026 * (cur_dec_ind) here by eight minus the 1027 * remainder of the current starting column 1028 * divided by eight. This seems to be a 1029 * properly working fix 1030 */ 1031 startpos = e_code - s_code; 1032 cur_dec_ind = dec_ind; 1033 pos = startpos; 1034 if ((ps.ind_level * ps.ind_size) % 8 != 0) { 1035 pos += (ps.ind_level * ps.ind_size) % 8; 1036 cur_dec_ind += (ps.ind_level * ps.ind_size) % 8; 1037 } 1038 1039 if (tabs_to_var) { 1040 while ((pos & ~7) + 8 <= cur_dec_ind) { 1041 CHECK_SIZE_CODE; 1042 *e_code++ = '\t'; 1043 pos = (pos & ~7) + 8; 1044 } 1045 } 1046 while (pos < cur_dec_ind) { 1047 CHECK_SIZE_CODE; 1048 *e_code++ = ' '; 1049 pos++; 1050 } 1051 if (ps.want_blank && e_code - s_code == startpos) 1052 *e_code++ = ' '; 1053 ps.want_blank = false; 1054 } 1055 } 1056 } else { 1057 if (dec_ind && s_code != e_code) 1058 dump_line(); 1059 dec_ind = 0; 1060 ps.want_blank = false; 1061 } 1062 } else 1063 if (sp_sw && ps.p_l_follow == 0) { 1064 sp_sw = false; 1065 force_nl = true; 1066 ps.last_u_d = true; 1067 ps.in_stmt = false; 1068 parse(hd_type); 1069 } 1070 copy_id: 1071 if (ps.want_blank) 1072 *e_code++ = ' '; 1073 if (troff && ps.its_a_keyword) { 1074 e_code = chfont(&bodyf, &keywordf, e_code); 1075 for (t_ptr = token; *t_ptr; ++t_ptr) { 1076 CHECK_SIZE_CODE; 1077 *e_code++ = keywordf.allcaps 1078 ? toupper((unsigned char)*t_ptr) 1079 : *t_ptr; 1080 } 1081 e_code = chfont(&keywordf, &bodyf, e_code); 1082 } else 1083 for (t_ptr = token; *t_ptr; ++t_ptr) { 1084 CHECK_SIZE_CODE; 1085 *e_code++ = *t_ptr; 1086 } 1087 ps.want_blank = true; 1088 break; 1089 1090 case period: /* treat a period kind of like a binary 1091 * operation */ 1092 *e_code++ = '.'; /* move the period into line */ 1093 ps.want_blank = false; /* dont put a blank after a 1094 * period */ 1095 break; 1096 1097 case comma: 1098 ps.want_blank = (s_code != e_code); /* only put blank after 1099 * comma if comma does 1100 * not start the line */ 1101 if (ps.in_decl && is_procname == 0 && !ps.block_init) 1102 while ((e_code - s_code) < (dec_ind - 1)) { 1103 CHECK_SIZE_CODE; 1104 *e_code++ = ' '; 1105 } 1106 1107 *e_code++ = ','; 1108 if (ps.p_l_follow == 0) { 1109 if (ps.block_init_level <= 0) 1110 ps.block_init = 0; 1111 if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8)) 1112 force_nl = true; 1113 } 1114 break; 1115 1116 case preesc: /* got the character '#' */ 1117 if ((s_com != e_com) || 1118 (s_lab != e_lab) || 1119 (s_code != e_code)) 1120 dump_line(); 1121 *e_lab++ = '#'; /* move whole line to 'label' buffer */ 1122 { 1123 int in_comment = 0; 1124 int com_start = 0; 1125 char quote = 0; 1126 int com_end = 0; 1127 1128 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 1129 buf_ptr++; 1130 if (buf_ptr >= buf_end) 1131 fill_buffer(); 1132 } 1133 while (*buf_ptr != '\n' || in_comment) { 1134 CHECK_SIZE_LAB; 1135 *e_lab = *buf_ptr++; 1136 if (buf_ptr >= buf_end) 1137 fill_buffer(); 1138 switch (*e_lab++) { 1139 case BACKSLASH: 1140 if (troff) 1141 *e_lab++ = BACKSLASH; 1142 if (!in_comment) { 1143 *e_lab++ = *buf_ptr++; 1144 if (buf_ptr >= buf_end) 1145 fill_buffer(); 1146 } 1147 break; 1148 case '/': 1149 if (*buf_ptr == '*' && !in_comment && !quote) { 1150 in_comment = 1; 1151 *e_lab++ = *buf_ptr++; 1152 com_start = e_lab - s_lab - 2; 1153 } 1154 break; 1155 case '"': 1156 if (quote == '"') 1157 quote = 0; 1158 break; 1159 case '\'': 1160 if (quote == '\'') 1161 quote = 0; 1162 break; 1163 case '*': 1164 if (*buf_ptr == '/' && in_comment) { 1165 in_comment = 0; 1166 *e_lab++ = *buf_ptr++; 1167 com_end = e_lab - s_lab; 1168 } 1169 break; 1170 } 1171 } 1172 1173 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1174 e_lab--; 1175 if (e_lab - s_lab == com_end && bp_save == 0) { /* comment on 1176 * preprocessor line */ 1177 if (sc_end == 0) /* if this is the first 1178 * comment, we must set 1179 * up the buffer */ 1180 sc_end = &(save_com[0]); 1181 else { 1182 *sc_end++ = '\n'; /* add newline between 1183 * comments */ 1184 *sc_end++ = ' '; 1185 --line_no; 1186 } 1187 memmove(sc_end, s_lab + com_start, com_end - com_start); 1188 sc_end += com_end - com_start; 1189 if (sc_end >= &save_com[sc_size]) 1190 abort(); 1191 e_lab = s_lab + com_start; 1192 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1193 e_lab--; 1194 bp_save = buf_ptr; /* save current input 1195 * buffer */ 1196 be_save = buf_end; 1197 buf_ptr = save_com; /* fix so that 1198 * subsequent calls to 1199 * lexi will take tokens 1200 * out of save_com */ 1201 *sc_end++ = ' '; /* add trailing blank, 1202 * just in case */ 1203 buf_end = sc_end; 1204 sc_end = 0; 1205 } 1206 *e_lab = '\0'; /* null terminate line */ 1207 ps.pcase = false; 1208 } 1209 1210 if (strncmp(s_lab, "#if", 3) == 0) { 1211 if (blanklines_around_conditional_compilation) { 1212 int c; 1213 prefix_blankline_requested++; 1214 while ((c = getc(input)) == '\n'); 1215 ungetc(c, input); 1216 } 1217 if (ifdef_level < (int)(sizeof state_stack / sizeof state_stack[0])) { 1218 match_state[ifdef_level].tos = -1; 1219 state_stack[ifdef_level++] = ps; 1220 } else 1221 diag(1, "#if stack overflow"); 1222 } else 1223 if (strncmp(s_lab, "#else", 5) == 0) { 1224 if (ifdef_level <= 0) 1225 diag(1, "Unmatched #else"); 1226 else { 1227 match_state[ifdef_level - 1] = ps; 1228 ps = state_stack[ifdef_level - 1]; 1229 } 1230 } else 1231 if (strncmp(s_lab, "#endif", 6) == 0) { 1232 if (ifdef_level <= 0) 1233 diag(1, "Unmatched #endif"); 1234 else { 1235 ifdef_level--; 1236 1237 #ifdef undef 1238 /* 1239 * This match needs to be more intelligent before the 1240 * message is useful 1241 */ 1242 if (match_state[ifdef_level].tos >= 0 1243 && memcmp(&ps, &match_state[ifdef_level], sizeof ps)) 1244 diag(0, "Syntactically inconsistant #ifdef alternatives."); 1245 #endif 1246 } 1247 if (blanklines_around_conditional_compilation) { 1248 postfix_blankline_requested++; 1249 n_real_blanklines = 0; 1250 } 1251 } 1252 break; /* subsequent processing of the newline 1253 * character will cause the line to be printed */ 1254 1255 case comment: /* we have gotten a start comment */ 1256 /* this is a biggie */ 1257 if (flushed_nl) { /* we should force a broken 1258 * line here */ 1259 flushed_nl = false; 1260 dump_line(); 1261 ps.want_blank = false; /* dont insert blank at 1262 * line start */ 1263 force_nl = false; 1264 } 1265 pr_comment(); 1266 break; 1267 } /* end of big switch stmt */ 1268 1269 *e_code = '\0'; /* make sure code section is null terminated */ 1270 if (type_code != comment && type_code != newline && type_code != preesc) 1271 ps.last_token = type_code; 1272 } /* end of main while (1) loop */ 1273 } 1274 /* 1275 * copy input file to backup file if in_name is /blah/blah/blah/file, then 1276 * backup file will be ".Bfile" then make the backup file the input and 1277 * original input file the output 1278 */ 1279 void 1280 bakcopy(void) 1281 { 1282 int n, bakchn; 1283 char buff[8 * 1024]; 1284 const char *p; 1285 1286 /* construct file name .Bfile */ 1287 for (p = in_name; *p; p++); /* skip to end of string */ 1288 while (p > in_name && *p != '/') /* find last '/' */ 1289 p--; 1290 if (*p == '/') 1291 p++; 1292 sprintf(bakfile, "%s.BAK", p); 1293 1294 /* copy in_name to backup file */ 1295 bakchn = creat(bakfile, 0600); 1296 if (bakchn < 0) 1297 err(1, "%s", bakfile); 1298 while ((n = read(fileno(input), buff, sizeof buff)) > 0) 1299 if (write(bakchn, buff, n) != n) 1300 err(1, "%s", bakfile); 1301 if (n < 0) 1302 err(1, "%s", in_name); 1303 close(bakchn); 1304 fclose(input); 1305 1306 /* re-open backup file as the input file */ 1307 input = fopen(bakfile, "r"); 1308 if (input == 0) 1309 err(1, "%s", bakfile); 1310 /* now the original input file will be the output */ 1311 output = fopen(in_name, "w"); 1312 if (output == 0) { 1313 unlink(bakfile); 1314 err(1, "%s", in_name); 1315 } 1316 } 1317