1 /* $NetBSD: io.c,v 1.143 2021/11/28 11:49:10 rillig Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-4-Clause 5 * 6 * Copyright (c) 1985 Sun Microsystems, Inc. 7 * Copyright (c) 1980, 1993 8 * The Regents of the University of California. All rights reserved. 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 #if 0 41 static char sccsid[] = "@(#)io.c 8.1 (Berkeley) 6/6/93"; 42 #endif 43 44 #include <sys/cdefs.h> 45 #if defined(__NetBSD__) 46 __RCSID("$NetBSD: io.c,v 1.143 2021/11/28 11:49:10 rillig Exp $"); 47 #elif defined(__FreeBSD__) 48 __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $"); 49 #endif 50 51 #include <assert.h> 52 #include <stdio.h> 53 #include <stdlib.h> 54 #include <string.h> 55 56 #include "indent.h" 57 58 /* 59 * There are 3 modes for reading the input. 60 * 61 * default: In this mode, the input comes from the input file. The buffer 62 * 'inp' contains the current line, terminated with '\n'. The current read 63 * position is inp.s, and there is always inp.buf <= inp.s < inp.e. All other 64 * pointers are null. 65 * 66 * copy-in: After reading 'if (expr)' or similar tokens, the input still comes 67 * from 'inp', but instead of processing it, it is copied to 'save_com'. The 68 * goal of this mode is to move the comments after the '{', that is to 69 * transform 'if (expr) comment {' to 'if (expr) { comment'. When the next 70 * token cannot be part of this transformation, switch to copy-out. 71 * 72 * copy-out: In this mode, the input comes from 'save_com', which contains the 73 * tokens to be placed after the '{'. The input still comes from the range 74 * [inp.s, inp.e), but these two members have been overwritten with pointers 75 * into save_com_buf, so inp.buf and inp.s are unrelated, which is unusual. 76 * In this mode, inp.e[-1] is usually not terminated with '\n'. After reading 77 * all tokens from save_com, switch to default mode again. 78 */ 79 static struct { 80 struct buffer inp; /* one line of input, ready to be split into 81 * tokens; occasionally 's' and 'e' switch 82 * to save_com_buf */ 83 char save_com_buf[5000]; /* input text is saved here when looking for 84 * the brace after an if, while, etc */ 85 char *save_com_s; /* start of the comment in save_com_buf, or 86 * null */ 87 char *save_com_e; /* end of the comment in save_com_buf, or 88 * null */ 89 90 char *saved_inp_s; /* saved value of inp.s when taking input from 91 * save_com, or null */ 92 char *saved_inp_e; /* saved value of inp.e, or null */ 93 } inbuf; 94 95 static int paren_indent; 96 static bool suppress_blanklines; 97 98 99 void 100 inp_init(void) 101 { 102 inbuf.inp.buf = xmalloc(10); 103 inbuf.inp.l = inbuf.inp.buf + 8; 104 inbuf.inp.s = inbuf.inp.buf; 105 inbuf.inp.e = inbuf.inp.buf; 106 } 107 108 const char * 109 inp_p(void) 110 { 111 assert(inbuf.inp.s < inbuf.inp.e); 112 return inbuf.inp.s; 113 } 114 115 const char * 116 inp_line_start(void) 117 { 118 return inbuf.saved_inp_s != NULL ? inbuf.save_com_buf : inbuf.inp.buf; 119 } 120 121 const char * 122 inp_line_end(void) 123 { 124 return inbuf.inp.e; 125 } 126 127 char 128 inp_peek(void) 129 { 130 assert(inbuf.inp.s < inbuf.inp.e); 131 return *inbuf.inp.s; 132 } 133 134 char 135 inp_lookahead(size_t i) 136 { 137 assert(i < (size_t)(inbuf.inp.e - inbuf.inp.s)); 138 return inbuf.inp.s[i]; 139 } 140 141 void 142 inp_skip(void) 143 { 144 assert(inbuf.inp.s < inbuf.inp.e); 145 inbuf.inp.s++; 146 if (inbuf.inp.s >= inbuf.inp.e) 147 inp_read_line(); 148 } 149 150 char 151 inp_next(void) 152 { 153 char ch = inp_peek(); 154 inp_skip(); 155 return ch; 156 } 157 158 #ifdef debug 159 static void 160 debug_inp_buf(const char *name, const char *s, const char *e) 161 { 162 if (s != NULL && e != NULL) { 163 debug_printf(" %-12s ", name); 164 debug_vis_range("\"", s, e, "\"\n"); 165 } 166 } 167 168 void 169 debug_inp(const char *prefix) 170 { 171 assert(inp_line_start() <= inbuf.inp.s); 172 assert(inbuf.inp.s <= inbuf.inp.e); 173 174 debug_println("%s %s:", __func__, prefix); 175 if (inbuf.saved_inp_s == NULL) 176 debug_inp_buf("inp.buf", inbuf.inp.buf, inbuf.inp.s); 177 debug_inp_buf("inp", inbuf.inp.s, inbuf.inp.e); /* never null */ 178 debug_inp_buf("save_com.buf", inbuf.save_com_buf, inbuf.save_com_s); 179 debug_inp_buf("save_com", inbuf.save_com_s, inbuf.save_com_e); 180 debug_inp_buf("saved_inp", inbuf.saved_inp_s, inbuf.saved_inp_e); 181 } 182 #endif 183 184 static void 185 inp_comment_check_size(size_t n) 186 { 187 if ((size_t)(inbuf.save_com_e - inbuf.save_com_buf) + n <= 188 array_length(inbuf.save_com_buf)) 189 return; 190 191 diag(1, "Internal buffer overflow - " 192 "Move big comment from right after if, while, or whatever"); 193 fflush(output); 194 exit(1); 195 } 196 197 void 198 inp_comment_init_newline(void) 199 { 200 if (inbuf.save_com_e != NULL) 201 return; 202 203 inbuf.save_com_s = inbuf.save_com_buf; 204 inbuf.save_com_s[0] = ' '; /* see inp_comment_insert_lbrace */ 205 inbuf.save_com_s[1] = ' '; /* see inp_comment_insert_lbrace */ 206 inbuf.save_com_e = &inbuf.save_com_s[2]; 207 debug_inp(__func__); 208 } 209 210 void 211 inp_comment_init_comment(void) 212 { 213 if (inbuf.save_com_e != NULL) 214 return; 215 216 /* 217 * Copy everything from the start of the line, because process_comment() 218 * will use that to calculate the original indentation of a boxed comment. 219 */ 220 /* 221 * TODO: Don't store anything in the memory range [input.inp.buf, 222 * input.inp.s), as that data can easily get lost. 223 */ 224 /* 225 * FIXME: The '4' below is completely wrong. For example, in the snippet 226 * 'if(expr)/''*comment', the 'r)' of the code is not copied. If there is 227 * an additional line break before the ')', memcpy tries to copy 228 * (size_t)-1 bytes. 229 * 230 * The original author of this magic number doesn't remember its purpose 231 * anymore, so there is no point in keeping it. The existing tests must 232 * still pass though. 233 */ 234 assert((size_t)(inbuf.inp.s - inbuf.inp.buf) >= 4); 235 size_t line_len = (size_t)(inbuf.inp.s - inbuf.inp.buf) - 4; 236 assert(line_len < array_length(inbuf.save_com_buf)); 237 238 memcpy(inbuf.save_com_buf, inbuf.inp.buf, line_len); 239 inbuf.save_com_s = inbuf.save_com_buf + line_len; 240 241 inbuf.save_com_s[0] = ' '; /* see inp_comment_insert_lbrace */ 242 inbuf.save_com_s[1] = ' '; /* see inp_comment_insert_lbrace */ 243 inbuf.save_com_e = &inbuf.save_com_s[2]; 244 245 debug_vis_range("search_stmt_comment: before save_com is \"", 246 inbuf.save_com_buf, inbuf.save_com_s, "\"\n"); 247 debug_vis_range("search_stmt_comment: save_com is \"", 248 inbuf.save_com_s, inbuf.save_com_e, "\"\n"); 249 } 250 251 void 252 inp_comment_init_preproc(void) 253 { 254 if (inbuf.save_com_e == NULL) { /* if this is the first comment, we 255 * must set up the buffer */ 256 /* 257 * XXX: No space is reserved for a potential '{' here, unlike in 258 * inp_comment_init_comment. 259 */ 260 inbuf.save_com_s = inbuf.save_com_buf; 261 inbuf.save_com_e = inbuf.save_com_s; 262 } else { 263 inp_comment_add_char('\n'); /* add newline between comments */ 264 inp_comment_add_char(' '); 265 --line_no; 266 } 267 } 268 269 void 270 inp_comment_add_char(char ch) 271 { 272 inp_comment_check_size(1); 273 *inbuf.save_com_e++ = ch; 274 } 275 276 void 277 inp_comment_add_range(const char *s, const char *e) 278 { 279 size_t len = (size_t)(e - s); 280 inp_comment_check_size(len); 281 memcpy(inbuf.save_com_e, s, len); 282 inbuf.save_com_e += len; 283 } 284 285 bool 286 inp_comment_complete_block(void) 287 { 288 return inbuf.save_com_e[-2] == '*' && inbuf.save_com_e[-1] == '/'; 289 } 290 291 bool 292 inp_comment_seen(void) 293 { 294 return inbuf.save_com_e != NULL; 295 } 296 297 void 298 inp_comment_rtrim_blank(void) 299 { 300 while (inbuf.save_com_e > inbuf.save_com_s && 301 ch_isblank(inbuf.save_com_e[-1])) 302 inbuf.save_com_e--; 303 } 304 305 void 306 inp_comment_rtrim_newline(void) 307 { 308 while (inbuf.save_com_e > inbuf.save_com_s && 309 inbuf.save_com_e[-1] == '\n') 310 inbuf.save_com_e--; 311 } 312 313 /* 314 * Switch the input to come from save_com, replaying the copied tokens while 315 * looking for the next '{'. 316 */ 317 void 318 inp_from_comment(void) 319 { 320 debug_inp("before inp_from_comment"); 321 inbuf.saved_inp_s = inbuf.inp.s; 322 inbuf.saved_inp_e = inbuf.inp.e; 323 324 inbuf.inp.s = inbuf.save_com_s; 325 inbuf.inp.e = inbuf.save_com_e; 326 inbuf.save_com_s = NULL; 327 inbuf.save_com_e = NULL; 328 debug_inp("after inp_from_comment"); 329 } 330 331 /* 332 * After having read from save_com, continue with the rest of the input line 333 * before reading the next line from the input file. 334 */ 335 static bool 336 inp_from_file(void) 337 { 338 if (inbuf.saved_inp_s == NULL) 339 return false; 340 341 inbuf.inp.s = inbuf.saved_inp_s; 342 inbuf.inp.e = inbuf.saved_inp_e; 343 inbuf.saved_inp_s = inbuf.saved_inp_e = NULL; 344 debug_println("switched inp.s back to saved_inp_s"); 345 return inbuf.inp.s < inbuf.inp.e; 346 } 347 348 void 349 inp_comment_insert_lbrace(void) 350 { 351 assert(inbuf.save_com_s[0] == ' '); /* see inp_comment_init_newline */ 352 inbuf.save_com_s[0] = '{'; 353 } 354 355 static void 356 inp_add(char ch) 357 { 358 if (inbuf.inp.e >= inbuf.inp.l) { 359 size_t new_size = (size_t)(inbuf.inp.l - inbuf.inp.buf) * 2 + 10; 360 size_t offset = (size_t)(inbuf.inp.e - inbuf.inp.buf); 361 inbuf.inp.buf = xrealloc(inbuf.inp.buf, new_size); 362 inbuf.inp.s = inbuf.inp.buf; 363 inbuf.inp.e = inbuf.inp.buf + offset; 364 inbuf.inp.l = inbuf.inp.buf + new_size - 2; 365 } 366 *inbuf.inp.e++ = ch; 367 } 368 369 static void 370 inp_read_next_line(FILE *f) 371 { 372 inbuf.inp.s = inbuf.inp.buf; 373 inbuf.inp.e = inbuf.inp.buf; 374 375 for (;;) { 376 int ch = getc(f); 377 if (ch == EOF) { 378 if (!inhibit_formatting) { 379 inp_add(' '); 380 inp_add('\n'); 381 } 382 had_eof = true; 383 break; 384 } 385 386 if (ch != '\0') 387 inp_add((char)ch); 388 if (ch == '\n') 389 break; 390 } 391 } 392 393 static void 394 output_char(char ch) 395 { 396 fputc(ch, output); 397 debug_vis_range("output_char '", &ch, &ch + 1, "'\n"); 398 } 399 400 static void 401 output_range(const char *s, const char *e) 402 { 403 fwrite(s, 1, (size_t)(e - s), output); 404 debug_vis_range("output_range \"", s, e, "\"\n"); 405 } 406 407 static int 408 output_indent(int old_ind, int new_ind) 409 { 410 int ind = old_ind; 411 412 if (opt.use_tabs) { 413 int tabsize = opt.tabsize; 414 int n = new_ind / tabsize - ind / tabsize; 415 if (n > 0) 416 ind -= ind % tabsize; 417 for (int i = 0; i < n; i++) { 418 fputc('\t', output); 419 ind += tabsize; 420 } 421 } 422 423 for (; ind < new_ind; ind++) 424 fputc(' ', output); 425 426 debug_println("output_indent %d", ind); 427 return ind; 428 } 429 430 static int 431 output_line_label(void) 432 { 433 int ind; 434 435 while (lab.e > lab.s && ch_isblank(lab.e[-1])) 436 lab.e--; 437 *lab.e = '\0'; 438 439 ind = output_indent(0, compute_label_indent()); 440 output_range(lab.s, lab.e); 441 ind = ind_add(ind, lab.s, lab.e); 442 443 ps.is_case_label = false; 444 return ind; 445 } 446 447 static int 448 output_line_code(int ind) 449 { 450 451 int target_ind = compute_code_indent(); 452 for (int i = 0; i < ps.p_l_follow; i++) { 453 if (ps.paren_indents[i] >= 0) { 454 int paren_ind = ps.paren_indents[i]; 455 ps.paren_indents[i] = (short)(-1 - (paren_ind + target_ind)); 456 debug_println( 457 "setting paren_indents[%d] from %d to %d for column %d", 458 i, paren_ind, ps.paren_indents[i], target_ind + 1); 459 } 460 } 461 462 ind = output_indent(ind, target_ind); 463 output_range(code.s, code.e); 464 return ind_add(ind, code.s, code.e); 465 } 466 467 static void 468 output_line_comment(int ind) 469 { 470 int target_ind = ps.com_ind; 471 const char *p = com.s; 472 473 target_ind += ps.comment_delta; 474 475 /* consider original indentation in case this is a box comment */ 476 for (; *p == '\t'; p++) 477 target_ind += opt.tabsize; 478 479 for (; target_ind < 0; p++) { 480 if (*p == ' ') 481 target_ind++; 482 else if (*p == '\t') 483 target_ind = next_tab(target_ind); 484 else { 485 target_ind = 0; 486 break; 487 } 488 } 489 490 /* if comment can't fit on this line, put it on the next line */ 491 if (ind > target_ind) { 492 output_char('\n'); 493 ind = 0; 494 ps.stats.lines++; 495 } 496 497 while (com.e > p && ch_isspace(com.e[-1])) 498 com.e--; 499 500 (void)output_indent(ind, target_ind); 501 output_range(p, com.e); 502 503 ps.comment_delta = ps.n_comment_delta; 504 ps.stats.comment_lines++; 505 } 506 507 /* 508 * Write a line of formatted source to the output file. The line consists of 509 * the label, the code and the comment. 510 */ 511 static void 512 output_complete_line(char line_terminator) 513 { 514 static bool first_line = true; 515 516 ps.is_function_definition = false; 517 518 if (code.s == code.e && lab.s == lab.e && com.s == com.e) { 519 if (suppress_blanklines) 520 suppress_blanklines = false; 521 else 522 blank_lines_to_output++; 523 524 } else if (!inhibit_formatting) { 525 suppress_blanklines = false; 526 if (blank_line_before && !first_line) { 527 if (opt.swallow_optional_blanklines) { 528 if (blank_lines_to_output == 1) 529 blank_lines_to_output = 0; 530 } else { 531 if (blank_lines_to_output == 0) 532 blank_lines_to_output = 1; 533 } 534 } 535 536 for (; blank_lines_to_output > 0; blank_lines_to_output--) 537 output_char('\n'); 538 539 if (ps.ind_level == 0) 540 ps.in_stmt_cont = false; /* this is a class A kludge */ 541 542 if (lab.e != lab.s || code.e != code.s) 543 ps.stats.code_lines++; 544 545 int ind = 0; 546 if (lab.e != lab.s) 547 ind = output_line_label(); 548 if (code.e != code.s) 549 ind = output_line_code(ind); 550 if (com.e != com.s) 551 output_line_comment(ind); 552 553 output_char(line_terminator); 554 ps.stats.lines++; 555 556 /* TODO: rename to blank_line_after_decl */ 557 if (ps.just_saw_decl == 1 && opt.blanklines_after_decl) { 558 blank_line_before = true; 559 ps.just_saw_decl = 0; 560 } else 561 blank_line_before = blank_line_after; 562 blank_line_after = false; 563 } 564 565 ps.decl_on_line = ps.in_decl; /* for proper comment indentation */ 566 ps.in_stmt_cont = ps.in_stmt_or_decl && !ps.in_decl; 567 ps.decl_indent_done = false; 568 569 *(lab.e = lab.s) = '\0'; /* reset buffers */ 570 *(code.e = code.s) = '\0'; 571 *(com.e = com.s = com.buf + 1) = '\0'; 572 573 ps.ind_level = ps.ind_level_follow; 574 ps.paren_level = ps.p_l_follow; 575 576 if (ps.paren_level > 0) { 577 /* TODO: explain what negative indentation means */ 578 paren_indent = -1 - ps.paren_indents[ps.paren_level - 1]; 579 debug_println("paren_indent is now %d", paren_indent); 580 } 581 582 first_line = false; 583 } 584 585 void 586 output_line(void) 587 { 588 output_complete_line('\n'); 589 } 590 591 void 592 output_line_ff(void) 593 { 594 output_complete_line('\f'); 595 } 596 597 static int 598 compute_code_indent_lineup(int base_ind) 599 { 600 int ti = paren_indent; 601 int overflow = ind_add(ti, code.s, code.e) - opt.max_line_length; 602 if (overflow < 0) 603 return ti; 604 605 if (ind_add(base_ind, code.s, code.e) < opt.max_line_length) { 606 ti -= overflow + 2; 607 if (ti > base_ind) 608 return ti; 609 return base_ind; 610 } 611 612 return ti; 613 } 614 615 int 616 compute_code_indent(void) 617 { 618 int base_ind = ps.ind_level * opt.indent_size; 619 620 if (ps.paren_level == 0) { 621 if (ps.in_stmt_cont) 622 return base_ind + opt.continuation_indent; 623 return base_ind; 624 } 625 626 if (opt.lineup_to_parens) { 627 if (opt.lineup_to_parens_always) 628 return paren_indent; 629 return compute_code_indent_lineup(base_ind); 630 } 631 632 if (2 * opt.continuation_indent == opt.indent_size) 633 return base_ind + opt.continuation_indent; 634 else 635 return base_ind + opt.continuation_indent * ps.paren_level; 636 } 637 638 int 639 compute_label_indent(void) 640 { 641 if (ps.is_case_label) 642 return (int)(case_ind * (float)opt.indent_size); 643 if (lab.s[0] == '#') 644 return 0; 645 return opt.indent_size * (ps.ind_level - 2); 646 } 647 648 static void 649 skip_blank(const char **pp) 650 { 651 while (ch_isblank(**pp)) 652 (*pp)++; 653 } 654 655 static bool 656 skip_string(const char **pp, const char *s) 657 { 658 size_t len = strlen(s); 659 if (strncmp(*pp, s, len) == 0) { 660 *pp += len; 661 return true; 662 } 663 return false; 664 } 665 666 static void 667 parse_indent_comment(void) 668 { 669 bool on; 670 671 const char *p = inbuf.inp.buf; 672 673 skip_blank(&p); 674 if (!skip_string(&p, "/*")) 675 return; 676 skip_blank(&p); 677 if (!skip_string(&p, "INDENT")) 678 return; 679 680 skip_blank(&p); 681 if (*p == '*' || skip_string(&p, "ON")) 682 on = true; 683 else if (skip_string(&p, "OFF")) 684 on = false; 685 else 686 return; 687 688 skip_blank(&p); 689 if (!skip_string(&p, "*/\n")) 690 return; 691 692 if (com.s != com.e || lab.s != lab.e || code.s != code.e) 693 output_line(); 694 695 inhibit_formatting = !on; 696 if (on) { 697 /* 698 * XXX: Does this make sense? Is the handling of blank lines above 699 * INDENT OFF comments essentially the same? 700 */ 701 blank_lines_to_output = 0; 702 blank_line_after = false; 703 blank_line_before = false; 704 suppress_blanklines = true; 705 } 706 } 707 708 void 709 inp_read_line(void) 710 { 711 if (inp_from_file()) 712 return; 713 714 inp_read_next_line(input); 715 716 parse_indent_comment(); 717 718 if (inhibit_formatting) 719 output_range(inbuf.inp.s, inbuf.inp.e); 720 } 721