1 /* $NetBSD: io.c,v 1.148 2022/04/23 06:43:22 rillig Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-4-Clause 5 * 6 * Copyright (c) 1985 Sun Microsystems, Inc. 7 * Copyright (c) 1980, 1993 8 * The Regents of the University of California. All rights reserved. 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 #if 0 41 static char sccsid[] = "@(#)io.c 8.1 (Berkeley) 6/6/93"; 42 #endif 43 44 #include <sys/cdefs.h> 45 #if defined(__NetBSD__) 46 __RCSID("$NetBSD: io.c,v 1.148 2022/04/23 06:43:22 rillig Exp $"); 47 #elif defined(__FreeBSD__) 48 __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $"); 49 #endif 50 51 #include <assert.h> 52 #include <stdio.h> 53 #include <stdlib.h> 54 #include <string.h> 55 56 #include "indent.h" 57 58 /* 59 * There are 3 modes for reading the input. 60 * 61 * default: In this mode, the input comes from the input file. The buffer 62 * 'inp' contains the current line, terminated with '\n'. The current read 63 * position is inp.s, and there is always inp.buf <= inp.s < inp.e. All other 64 * pointers are null. 65 * 66 * copy-in: After reading 'if (expr)' or similar tokens, the input still comes 67 * from 'inp', but instead of processing it, it is copied to 'save_com'. The 68 * goal of this mode is to move the comments after the '{', that is to 69 * transform 'if (expr) comment {' to 'if (expr) { comment'. When the next 70 * token cannot be part of this transformation, switch to copy-out. 71 * 72 * copy-out: In this mode, the input comes from 'save_com', which contains the 73 * tokens to be placed after the '{'. The input still comes from the range 74 * [inp.s, inp.e), but these two members have been overwritten with pointers 75 * into save_com_buf, so inp.buf and inp.s are unrelated, which is unusual. 76 * In this mode, inp.e[-1] is usually not terminated with '\n'. After reading 77 * all tokens from save_com, switch to default mode again. 78 */ 79 static struct { 80 struct buffer inp; /* one line of input, ready to be split into 81 * tokens; occasionally 's' and 'e' switch 82 * to save_com_buf */ 83 char save_com_buf[5000]; /* input text is saved here when looking for 84 * the brace after an if, while, etc */ 85 char *save_com_s; /* start of the comment in save_com_buf, or 86 * null */ 87 char *save_com_e; /* end of the comment in save_com_buf, or 88 * null */ 89 90 char *saved_inp_s; /* saved value of inp.s when taking input from 91 * save_com, or null */ 92 char *saved_inp_e; /* saved value of inp.e, or null */ 93 } inbuf; 94 95 static int paren_indent; 96 97 98 void 99 inp_init(void) 100 { 101 inbuf.inp.buf = xmalloc(10); 102 inbuf.inp.l = inbuf.inp.buf + 8; 103 inbuf.inp.s = inbuf.inp.buf; 104 inbuf.inp.e = inbuf.inp.buf; 105 } 106 107 const char * 108 inp_p(void) 109 { 110 assert(inbuf.inp.s < inbuf.inp.e); 111 return inbuf.inp.s; 112 } 113 114 const char * 115 inp_line_start(void) 116 { 117 return inbuf.saved_inp_s != NULL ? inbuf.save_com_buf : inbuf.inp.buf; 118 } 119 120 const char * 121 inp_line_end(void) 122 { 123 return inbuf.inp.e; 124 } 125 126 char 127 inp_peek(void) 128 { 129 assert(inbuf.inp.s < inbuf.inp.e); 130 return *inbuf.inp.s; 131 } 132 133 char 134 inp_lookahead(size_t i) 135 { 136 assert(i < (size_t)(inbuf.inp.e - inbuf.inp.s)); 137 return inbuf.inp.s[i]; 138 } 139 140 void 141 inp_skip(void) 142 { 143 assert(inbuf.inp.s < inbuf.inp.e); 144 inbuf.inp.s++; 145 if (inbuf.inp.s >= inbuf.inp.e) 146 inp_read_line(); 147 } 148 149 char 150 inp_next(void) 151 { 152 char ch = inp_peek(); 153 inp_skip(); 154 return ch; 155 } 156 157 #ifdef debug 158 static void 159 debug_inp_buf(const char *name, const char *s, const char *e) 160 { 161 if (s != NULL && e != NULL) { 162 debug_printf(" %-12s ", name); 163 debug_vis_range("\"", s, e, "\"\n"); 164 } 165 } 166 167 void 168 debug_inp(const char *prefix) 169 { 170 assert(inp_line_start() <= inbuf.inp.s); 171 assert(inbuf.inp.s <= inbuf.inp.e); 172 173 debug_println("%s %s:", __func__, prefix); 174 if (inbuf.saved_inp_s == NULL) 175 debug_inp_buf("inp.buf", inbuf.inp.buf, inbuf.inp.s); 176 debug_inp_buf("inp", inbuf.inp.s, inbuf.inp.e); /* never null */ 177 debug_inp_buf("save_com.buf", inbuf.save_com_buf, inbuf.save_com_s); 178 debug_inp_buf("save_com", inbuf.save_com_s, inbuf.save_com_e); 179 debug_inp_buf("saved_inp", inbuf.saved_inp_s, inbuf.saved_inp_e); 180 } 181 #endif 182 183 static void 184 inp_comment_check_size(size_t n) 185 { 186 if ((size_t)(inbuf.save_com_e - inbuf.save_com_buf) + n <= 187 array_length(inbuf.save_com_buf)) 188 return; 189 190 diag(1, "Internal buffer overflow - " 191 "Move big comment from right after if, while, or whatever"); 192 fflush(output); 193 exit(1); 194 } 195 196 void 197 inp_comment_init_newline(void) 198 { 199 if (inbuf.save_com_e != NULL) 200 return; 201 202 inbuf.save_com_s = inbuf.save_com_buf; 203 inbuf.save_com_s[0] = ' '; /* see inp_comment_insert_lbrace */ 204 inbuf.save_com_s[1] = ' '; /* see inp_comment_insert_lbrace */ 205 inbuf.save_com_e = &inbuf.save_com_s[2]; 206 debug_inp(__func__); 207 } 208 209 void 210 inp_comment_init_comment(void) 211 { 212 if (inbuf.save_com_e != NULL) 213 return; 214 215 /* 216 * Copy everything from the start of the line, because process_comment() 217 * will use that to calculate the original indentation of a boxed comment. 218 */ 219 /* 220 * TODO: Don't store anything in the memory range [input.inp.buf, 221 * input.inp.s), as that data can easily get lost. 222 */ 223 /* 224 * FIXME: The '4' below is completely wrong. For example, in the snippet 225 * 'if(expr)/''*comment', the 'r)' of the code is not copied. If there is 226 * an additional line break before the ')', memcpy tries to copy 227 * (size_t)-1 bytes. 228 * 229 * The original author of this magic number doesn't remember its purpose 230 * anymore, so there is no point in keeping it. The existing tests must 231 * still pass though. 232 */ 233 assert((size_t)(inbuf.inp.s - inbuf.inp.buf) >= 4); 234 size_t line_len = (size_t)(inbuf.inp.s - inbuf.inp.buf) - 4; 235 assert(line_len < array_length(inbuf.save_com_buf)); 236 237 memcpy(inbuf.save_com_buf, inbuf.inp.buf, line_len); 238 inbuf.save_com_s = inbuf.save_com_buf + line_len; 239 240 inbuf.save_com_s[0] = ' '; /* see inp_comment_insert_lbrace */ 241 inbuf.save_com_s[1] = ' '; /* see inp_comment_insert_lbrace */ 242 inbuf.save_com_e = &inbuf.save_com_s[2]; 243 244 debug_vis_range("search_stmt_comment: before save_com is \"", 245 inbuf.save_com_buf, inbuf.save_com_s, "\"\n"); 246 debug_vis_range("search_stmt_comment: save_com is \"", 247 inbuf.save_com_s, inbuf.save_com_e, "\"\n"); 248 } 249 250 void 251 inp_comment_init_preproc(void) 252 { 253 if (inbuf.save_com_e == NULL) { /* if this is the first comment, we 254 * must set up the buffer */ 255 /* 256 * XXX: No space is reserved for a potential '{' here, unlike in 257 * inp_comment_init_comment. 258 */ 259 inbuf.save_com_s = inbuf.save_com_buf; 260 inbuf.save_com_e = inbuf.save_com_s; 261 } else { 262 inp_comment_add_char('\n'); /* add newline between comments */ 263 inp_comment_add_char(' '); 264 --line_no; 265 } 266 } 267 268 void 269 inp_comment_add_char(char ch) 270 { 271 inp_comment_check_size(1); 272 *inbuf.save_com_e++ = ch; 273 } 274 275 void 276 inp_comment_add_range(const char *s, const char *e) 277 { 278 size_t len = (size_t)(e - s); 279 inp_comment_check_size(len); 280 memcpy(inbuf.save_com_e, s, len); 281 inbuf.save_com_e += len; 282 } 283 284 bool 285 inp_comment_complete_block(void) 286 { 287 return inbuf.save_com_e[-2] == '*' && inbuf.save_com_e[-1] == '/'; 288 } 289 290 bool 291 inp_comment_seen(void) 292 { 293 return inbuf.save_com_e != NULL; 294 } 295 296 void 297 inp_comment_rtrim_blank(void) 298 { 299 while (inbuf.save_com_e > inbuf.save_com_s && 300 ch_isblank(inbuf.save_com_e[-1])) 301 inbuf.save_com_e--; 302 } 303 304 void 305 inp_comment_rtrim_newline(void) 306 { 307 while (inbuf.save_com_e > inbuf.save_com_s && 308 inbuf.save_com_e[-1] == '\n') 309 inbuf.save_com_e--; 310 } 311 312 /* 313 * Switch the input to come from save_com, replaying the copied tokens while 314 * looking for the next '{'. 315 */ 316 void 317 inp_from_comment(void) 318 { 319 debug_inp("before inp_from_comment"); 320 inbuf.saved_inp_s = inbuf.inp.s; 321 inbuf.saved_inp_e = inbuf.inp.e; 322 323 inbuf.inp.s = inbuf.save_com_s; 324 inbuf.inp.e = inbuf.save_com_e; 325 inbuf.save_com_s = NULL; 326 inbuf.save_com_e = NULL; 327 debug_inp("after inp_from_comment"); 328 } 329 330 /* 331 * After having read from save_com, continue with the rest of the input line 332 * before reading the next line from the input file. 333 */ 334 static bool 335 inp_from_file(void) 336 { 337 if (inbuf.saved_inp_s == NULL) 338 return false; 339 340 inbuf.inp.s = inbuf.saved_inp_s; 341 inbuf.inp.e = inbuf.saved_inp_e; 342 inbuf.saved_inp_s = inbuf.saved_inp_e = NULL; 343 debug_println("switched inp.s back to saved_inp_s"); 344 return inbuf.inp.s < inbuf.inp.e; 345 } 346 347 void 348 inp_comment_insert_lbrace(void) 349 { 350 assert(inbuf.save_com_s[0] == ' '); /* see inp_comment_init_newline */ 351 inbuf.save_com_s[0] = '{'; 352 } 353 354 static void 355 inp_add(char ch) 356 { 357 if (inbuf.inp.e >= inbuf.inp.l) { 358 size_t new_size = (size_t)(inbuf.inp.l - inbuf.inp.buf) * 2 + 10; 359 size_t offset = (size_t)(inbuf.inp.e - inbuf.inp.buf); 360 inbuf.inp.buf = xrealloc(inbuf.inp.buf, new_size); 361 inbuf.inp.s = inbuf.inp.buf; 362 inbuf.inp.e = inbuf.inp.buf + offset; 363 inbuf.inp.l = inbuf.inp.buf + new_size - 2; 364 } 365 *inbuf.inp.e++ = ch; 366 } 367 368 static void 369 inp_read_next_line(FILE *f) 370 { 371 inbuf.inp.s = inbuf.inp.buf; 372 inbuf.inp.e = inbuf.inp.buf; 373 374 for (;;) { 375 int ch = getc(f); 376 if (ch == EOF) { 377 if (!inhibit_formatting) { 378 inp_add(' '); 379 inp_add('\n'); 380 } 381 had_eof = true; 382 break; 383 } 384 385 if (ch != '\0') 386 inp_add((char)ch); 387 if (ch == '\n') 388 break; 389 } 390 } 391 392 static void 393 output_char(char ch) 394 { 395 fputc(ch, output); 396 debug_vis_range("output_char '", &ch, &ch + 1, "'\n"); 397 } 398 399 static void 400 output_range(const char *s, const char *e) 401 { 402 fwrite(s, 1, (size_t)(e - s), output); 403 debug_vis_range("output_range \"", s, e, "\"\n"); 404 } 405 406 static int 407 output_indent(int old_ind, int new_ind) 408 { 409 int ind = old_ind; 410 411 if (opt.use_tabs) { 412 int tabsize = opt.tabsize; 413 int n = new_ind / tabsize - ind / tabsize; 414 if (n > 0) 415 ind -= ind % tabsize; 416 for (int i = 0; i < n; i++) { 417 fputc('\t', output); 418 ind += tabsize; 419 } 420 } 421 422 for (; ind < new_ind; ind++) 423 fputc(' ', output); 424 425 debug_println("output_indent %d", ind); 426 return ind; 427 } 428 429 static int 430 output_line_label(void) 431 { 432 int ind; 433 434 while (lab.e > lab.s && ch_isblank(lab.e[-1])) 435 lab.e--; 436 *lab.e = '\0'; 437 438 ind = output_indent(0, compute_label_indent()); 439 output_range(lab.s, lab.e); 440 ind = ind_add(ind, lab.s, lab.e); 441 442 ps.is_case_label = false; 443 return ind; 444 } 445 446 static int 447 output_line_code(int ind) 448 { 449 450 int target_ind = compute_code_indent(); 451 for (int i = 0; i < ps.nparen; i++) { 452 if (ps.paren[i].indent >= 0) { 453 int paren_ind = ps.paren[i].indent; 454 ps.paren[i].indent = (short)(-1 - (paren_ind + target_ind)); 455 debug_println( 456 "setting paren_indents[%d] from %d to %d for column %d", 457 i, paren_ind, ps.paren[i].indent, target_ind + 1); 458 } 459 } 460 461 ind = output_indent(ind, target_ind); 462 output_range(code.s, code.e); 463 return ind_add(ind, code.s, code.e); 464 } 465 466 static void 467 output_line_comment(int ind) 468 { 469 int target_ind = ps.com_ind; 470 const char *p = com.s; 471 472 target_ind += ps.comment_delta; 473 474 /* consider original indentation in case this is a box comment */ 475 for (; *p == '\t'; p++) 476 target_ind += opt.tabsize; 477 478 for (; target_ind < 0; p++) { 479 if (*p == ' ') 480 target_ind++; 481 else if (*p == '\t') 482 target_ind = next_tab(target_ind); 483 else { 484 target_ind = 0; 485 break; 486 } 487 } 488 489 /* if comment can't fit on this line, put it on the next line */ 490 if (ind > target_ind) { 491 output_char('\n'); 492 ind = 0; 493 ps.stats.lines++; 494 } 495 496 while (com.e > p && ch_isspace(com.e[-1])) 497 com.e--; 498 499 (void)output_indent(ind, target_ind); 500 output_range(p, com.e); 501 502 ps.comment_delta = ps.n_comment_delta; 503 ps.stats.comment_lines++; 504 } 505 506 /* 507 * Write a line of formatted source to the output file. The line consists of 508 * the label, the code and the comment. 509 */ 510 static void 511 output_complete_line(char line_terminator) 512 { 513 static bool first_line = true; 514 515 ps.is_function_definition = false; 516 517 if (code.s == code.e && lab.s == lab.e && com.s == com.e) { 518 if (out.suppress_blanklines) 519 out.suppress_blanklines = false; 520 else 521 out.blank_lines_to_output++; 522 523 } else if (!inhibit_formatting) { 524 out.suppress_blanklines = false; 525 if (out.blank_line_before && !first_line) { 526 if (opt.swallow_optional_blanklines) { 527 if (out.blank_lines_to_output == 1) 528 out.blank_lines_to_output = 0; 529 } else { 530 if (out.blank_lines_to_output == 0) 531 out.blank_lines_to_output = 1; 532 } 533 } 534 535 for (; out.blank_lines_to_output > 0; out.blank_lines_to_output--) 536 output_char('\n'); 537 538 if (ps.ind_level == 0) 539 ps.in_stmt_cont = false; /* this is a class A kludge */ 540 541 if (lab.e != lab.s || code.e != code.s) 542 ps.stats.code_lines++; 543 544 int ind = 0; 545 if (lab.e != lab.s) 546 ind = output_line_label(); 547 if (code.e != code.s) 548 ind = output_line_code(ind); 549 if (com.e != com.s) 550 output_line_comment(ind); 551 552 output_char(line_terminator); 553 ps.stats.lines++; 554 555 /* TODO: rename to blank_line_after_decl */ 556 if (ps.just_saw_decl == 1 && opt.blanklines_after_decl) { 557 out.blank_line_before = true; 558 ps.just_saw_decl = 0; 559 } else 560 out.blank_line_before = out.blank_line_after; 561 out.blank_line_after = false; 562 } 563 564 ps.decl_on_line = ps.in_decl; /* for proper comment indentation */ 565 ps.in_stmt_cont = ps.in_stmt_or_decl && !ps.in_decl; 566 ps.decl_indent_done = false; 567 568 *(lab.e = lab.s) = '\0'; /* reset buffers */ 569 *(code.e = code.s) = '\0'; 570 *(com.e = com.s = com.buf + 1) = '\0'; 571 572 ps.ind_level = ps.ind_level_follow; 573 ps.line_start_nparen = ps.nparen; 574 575 if (ps.nparen > 0) { 576 /* TODO: explain what negative indentation means */ 577 paren_indent = -1 - ps.paren[ps.nparen - 1].indent; 578 debug_println("paren_indent is now %d", paren_indent); 579 } 580 581 first_line = false; 582 } 583 584 void 585 output_line(void) 586 { 587 output_complete_line('\n'); 588 } 589 590 void 591 output_line_ff(void) 592 { 593 output_complete_line('\f'); 594 } 595 596 static int 597 compute_code_indent_lineup(int base_ind) 598 { 599 int ti = paren_indent; 600 int overflow = ind_add(ti, code.s, code.e) - opt.max_line_length; 601 if (overflow < 0) 602 return ti; 603 604 if (ind_add(base_ind, code.s, code.e) < opt.max_line_length) { 605 ti -= overflow + 2; 606 if (ti > base_ind) 607 return ti; 608 return base_ind; 609 } 610 611 return ti; 612 } 613 614 int 615 compute_code_indent(void) 616 { 617 int base_ind = ps.ind_level * opt.indent_size; 618 619 if (ps.line_start_nparen == 0) { 620 if (ps.in_stmt_cont && ps.in_enum != in_enum_brace) 621 return base_ind + opt.continuation_indent; 622 return base_ind; 623 } 624 625 if (opt.lineup_to_parens) { 626 if (opt.lineup_to_parens_always) 627 return paren_indent; 628 return compute_code_indent_lineup(base_ind); 629 } 630 631 if (2 * opt.continuation_indent == opt.indent_size) 632 return base_ind + opt.continuation_indent; 633 else 634 return base_ind + opt.continuation_indent * ps.line_start_nparen; 635 } 636 637 int 638 compute_label_indent(void) 639 { 640 if (ps.is_case_label) 641 return (int)(case_ind * (float)opt.indent_size); 642 if (lab.s[0] == '#') 643 return 0; 644 return opt.indent_size * (ps.ind_level - 2); 645 } 646 647 static void 648 skip_blank(const char **pp) 649 { 650 while (ch_isblank(**pp)) 651 (*pp)++; 652 } 653 654 static bool 655 skip_string(const char **pp, const char *s) 656 { 657 size_t len = strlen(s); 658 if (strncmp(*pp, s, len) == 0) { 659 *pp += len; 660 return true; 661 } 662 return false; 663 } 664 665 static void 666 parse_indent_comment(void) 667 { 668 bool on; 669 670 const char *p = inbuf.inp.buf; 671 672 skip_blank(&p); 673 if (!skip_string(&p, "/*")) 674 return; 675 skip_blank(&p); 676 if (!skip_string(&p, "INDENT")) 677 return; 678 679 skip_blank(&p); 680 if (*p == '*' || skip_string(&p, "ON")) 681 on = true; 682 else if (skip_string(&p, "OFF")) 683 on = false; 684 else 685 return; 686 687 skip_blank(&p); 688 if (!skip_string(&p, "*/\n")) 689 return; 690 691 if (com.s != com.e || lab.s != lab.e || code.s != code.e) 692 output_line(); 693 694 inhibit_formatting = !on; 695 if (on) { 696 /* 697 * XXX: Does this make sense? Is the handling of blank lines above 698 * INDENT OFF comments essentially the same? 699 */ 700 out.blank_lines_to_output = 0; 701 out.blank_line_after = false; 702 out.blank_line_before = false; 703 out.suppress_blanklines = true; 704 } 705 } 706 707 void 708 inp_read_line(void) 709 { 710 if (inp_from_file()) 711 return; 712 713 inp_read_next_line(input); 714 715 parse_indent_comment(); 716 717 if (inhibit_formatting) 718 output_range(inbuf.inp.s, inbuf.inp.e); 719 } 720