xref: /netbsd-src/usr.bin/indent/io.c (revision 7d62b00eb9ad855ffcd7da46b41e23feb5476fac)
1 /*	$NetBSD: io.c,v 1.148 2022/04/23 06:43:22 rillig Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-4-Clause
5  *
6  * Copyright (c) 1985 Sun Microsystems, Inc.
7  * Copyright (c) 1980, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #if 0
41 static char sccsid[] = "@(#)io.c	8.1 (Berkeley) 6/6/93";
42 #endif
43 
44 #include <sys/cdefs.h>
45 #if defined(__NetBSD__)
46 __RCSID("$NetBSD: io.c,v 1.148 2022/04/23 06:43:22 rillig Exp $");
47 #elif defined(__FreeBSD__)
48 __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $");
49 #endif
50 
51 #include <assert.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 
56 #include "indent.h"
57 
58 /*
59  * There are 3 modes for reading the input.
60  *
61  * default: In this mode, the input comes from the input file. The buffer
62  * 'inp' contains the current line, terminated with '\n'. The current read
63  * position is inp.s, and there is always inp.buf <= inp.s < inp.e. All other
64  * pointers are null.
65  *
66  * copy-in: After reading 'if (expr)' or similar tokens, the input still comes
67  * from 'inp', but instead of processing it, it is copied to 'save_com'. The
68  * goal of this mode is to move the comments after the '{', that is to
69  * transform 'if (expr) comment {' to 'if (expr) { comment'. When the next
70  * token cannot be part of this transformation, switch to copy-out.
71  *
72  * copy-out: In this mode, the input comes from 'save_com', which contains the
73  * tokens to be placed after the '{'. The input still comes from the range
74  * [inp.s, inp.e), but these two members have been overwritten with pointers
75  * into save_com_buf, so inp.buf and inp.s are unrelated, which is unusual.
76  * In this mode, inp.e[-1] is usually not terminated with '\n'. After reading
77  * all tokens from save_com, switch to default mode again.
78  */
79 static struct {
80     struct buffer inp;		/* one line of input, ready to be split into
81 				 * tokens; occasionally 's' and 'e' switch
82 				 * to save_com_buf */
83     char save_com_buf[5000];	/* input text is saved here when looking for
84 				 * the brace after an if, while, etc */
85     char *save_com_s;		/* start of the comment in save_com_buf, or
86 				 * null */
87     char *save_com_e;		/* end of the comment in save_com_buf, or
88 				 * null */
89 
90     char *saved_inp_s;		/* saved value of inp.s when taking input from
91 				 * save_com, or null */
92     char *saved_inp_e;		/* saved value of inp.e, or null */
93 } inbuf;
94 
95 static int paren_indent;
96 
97 
98 void
99 inp_init(void)
100 {
101     inbuf.inp.buf = xmalloc(10);
102     inbuf.inp.l = inbuf.inp.buf + 8;
103     inbuf.inp.s = inbuf.inp.buf;
104     inbuf.inp.e = inbuf.inp.buf;
105 }
106 
107 const char *
108 inp_p(void)
109 {
110     assert(inbuf.inp.s < inbuf.inp.e);
111     return inbuf.inp.s;
112 }
113 
114 const char *
115 inp_line_start(void)
116 {
117     return inbuf.saved_inp_s != NULL ? inbuf.save_com_buf : inbuf.inp.buf;
118 }
119 
120 const char *
121 inp_line_end(void)
122 {
123     return inbuf.inp.e;
124 }
125 
126 char
127 inp_peek(void)
128 {
129     assert(inbuf.inp.s < inbuf.inp.e);
130     return *inbuf.inp.s;
131 }
132 
133 char
134 inp_lookahead(size_t i)
135 {
136     assert(i < (size_t)(inbuf.inp.e - inbuf.inp.s));
137     return inbuf.inp.s[i];
138 }
139 
140 void
141 inp_skip(void)
142 {
143     assert(inbuf.inp.s < inbuf.inp.e);
144     inbuf.inp.s++;
145     if (inbuf.inp.s >= inbuf.inp.e)
146 	inp_read_line();
147 }
148 
149 char
150 inp_next(void)
151 {
152     char ch = inp_peek();
153     inp_skip();
154     return ch;
155 }
156 
157 #ifdef debug
158 static void
159 debug_inp_buf(const char *name, const char *s, const char *e)
160 {
161     if (s != NULL && e != NULL) {
162 	debug_printf("    %-12s ", name);
163 	debug_vis_range("\"", s, e, "\"\n");
164     }
165 }
166 
167 void
168 debug_inp(const char *prefix)
169 {
170     assert(inp_line_start() <= inbuf.inp.s);
171     assert(inbuf.inp.s <= inbuf.inp.e);
172 
173     debug_println("%s %s:", __func__, prefix);
174     if (inbuf.saved_inp_s == NULL)
175 	debug_inp_buf("inp.buf", inbuf.inp.buf, inbuf.inp.s);
176     debug_inp_buf("inp", inbuf.inp.s, inbuf.inp.e);	/* never null */
177     debug_inp_buf("save_com.buf", inbuf.save_com_buf, inbuf.save_com_s);
178     debug_inp_buf("save_com", inbuf.save_com_s, inbuf.save_com_e);
179     debug_inp_buf("saved_inp", inbuf.saved_inp_s, inbuf.saved_inp_e);
180 }
181 #endif
182 
183 static void
184 inp_comment_check_size(size_t n)
185 {
186     if ((size_t)(inbuf.save_com_e - inbuf.save_com_buf) + n <=
187 	array_length(inbuf.save_com_buf))
188 	return;
189 
190     diag(1, "Internal buffer overflow - "
191 	"Move big comment from right after if, while, or whatever");
192     fflush(output);
193     exit(1);
194 }
195 
196 void
197 inp_comment_init_newline(void)
198 {
199     if (inbuf.save_com_e != NULL)
200 	return;
201 
202     inbuf.save_com_s = inbuf.save_com_buf;
203     inbuf.save_com_s[0] = ' ';	/* see inp_comment_insert_lbrace */
204     inbuf.save_com_s[1] = ' ';	/* see inp_comment_insert_lbrace */
205     inbuf.save_com_e = &inbuf.save_com_s[2];
206     debug_inp(__func__);
207 }
208 
209 void
210 inp_comment_init_comment(void)
211 {
212     if (inbuf.save_com_e != NULL)
213 	return;
214 
215     /*
216      * Copy everything from the start of the line, because process_comment()
217      * will use that to calculate the original indentation of a boxed comment.
218      */
219     /*
220      * TODO: Don't store anything in the memory range [input.inp.buf,
221      * input.inp.s), as that data can easily get lost.
222      */
223     /*
224      * FIXME: The '4' below is completely wrong. For example, in the snippet
225      * 'if(expr)/''*comment', the 'r)' of the code is not copied. If there is
226      * an additional line break before the ')', memcpy tries to copy
227      * (size_t)-1 bytes.
228      *
229      * The original author of this magic number doesn't remember its purpose
230      * anymore, so there is no point in keeping it. The existing tests must
231      * still pass though.
232      */
233     assert((size_t)(inbuf.inp.s - inbuf.inp.buf) >= 4);
234     size_t line_len = (size_t)(inbuf.inp.s - inbuf.inp.buf) - 4;
235     assert(line_len < array_length(inbuf.save_com_buf));
236 
237     memcpy(inbuf.save_com_buf, inbuf.inp.buf, line_len);
238     inbuf.save_com_s = inbuf.save_com_buf + line_len;
239 
240     inbuf.save_com_s[0] = ' ';	/* see inp_comment_insert_lbrace */
241     inbuf.save_com_s[1] = ' ';	/* see inp_comment_insert_lbrace */
242     inbuf.save_com_e = &inbuf.save_com_s[2];
243 
244     debug_vis_range("search_stmt_comment: before save_com is \"",
245 	inbuf.save_com_buf, inbuf.save_com_s, "\"\n");
246     debug_vis_range("search_stmt_comment: save_com is \"",
247 	inbuf.save_com_s, inbuf.save_com_e, "\"\n");
248 }
249 
250 void
251 inp_comment_init_preproc(void)
252 {
253     if (inbuf.save_com_e == NULL) {	/* if this is the first comment, we
254 					 * must set up the buffer */
255 	/*
256 	 * XXX: No space is reserved for a potential '{' here, unlike in
257 	 * inp_comment_init_comment.
258 	 */
259 	inbuf.save_com_s = inbuf.save_com_buf;
260 	inbuf.save_com_e = inbuf.save_com_s;
261     } else {
262 	inp_comment_add_char('\n');	/* add newline between comments */
263 	inp_comment_add_char(' ');
264 	--line_no;
265     }
266 }
267 
268 void
269 inp_comment_add_char(char ch)
270 {
271     inp_comment_check_size(1);
272     *inbuf.save_com_e++ = ch;
273 }
274 
275 void
276 inp_comment_add_range(const char *s, const char *e)
277 {
278     size_t len = (size_t)(e - s);
279     inp_comment_check_size(len);
280     memcpy(inbuf.save_com_e, s, len);
281     inbuf.save_com_e += len;
282 }
283 
284 bool
285 inp_comment_complete_block(void)
286 {
287     return inbuf.save_com_e[-2] == '*' && inbuf.save_com_e[-1] == '/';
288 }
289 
290 bool
291 inp_comment_seen(void)
292 {
293     return inbuf.save_com_e != NULL;
294 }
295 
296 void
297 inp_comment_rtrim_blank(void)
298 {
299     while (inbuf.save_com_e > inbuf.save_com_s &&
300 	    ch_isblank(inbuf.save_com_e[-1]))
301 	inbuf.save_com_e--;
302 }
303 
304 void
305 inp_comment_rtrim_newline(void)
306 {
307     while (inbuf.save_com_e > inbuf.save_com_s &&
308 	    inbuf.save_com_e[-1] == '\n')
309 	inbuf.save_com_e--;
310 }
311 
312 /*
313  * Switch the input to come from save_com, replaying the copied tokens while
314  * looking for the next '{'.
315  */
316 void
317 inp_from_comment(void)
318 {
319     debug_inp("before inp_from_comment");
320     inbuf.saved_inp_s = inbuf.inp.s;
321     inbuf.saved_inp_e = inbuf.inp.e;
322 
323     inbuf.inp.s = inbuf.save_com_s;
324     inbuf.inp.e = inbuf.save_com_e;
325     inbuf.save_com_s = NULL;
326     inbuf.save_com_e = NULL;
327     debug_inp("after inp_from_comment");
328 }
329 
330 /*
331  * After having read from save_com, continue with the rest of the input line
332  * before reading the next line from the input file.
333  */
334 static bool
335 inp_from_file(void)
336 {
337     if (inbuf.saved_inp_s == NULL)
338 	return false;
339 
340     inbuf.inp.s = inbuf.saved_inp_s;
341     inbuf.inp.e = inbuf.saved_inp_e;
342     inbuf.saved_inp_s = inbuf.saved_inp_e = NULL;
343     debug_println("switched inp.s back to saved_inp_s");
344     return inbuf.inp.s < inbuf.inp.e;
345 }
346 
347 void
348 inp_comment_insert_lbrace(void)
349 {
350     assert(inbuf.save_com_s[0] == ' ');	/* see inp_comment_init_newline */
351     inbuf.save_com_s[0] = '{';
352 }
353 
354 static void
355 inp_add(char ch)
356 {
357     if (inbuf.inp.e >= inbuf.inp.l) {
358 	size_t new_size = (size_t)(inbuf.inp.l - inbuf.inp.buf) * 2 + 10;
359 	size_t offset = (size_t)(inbuf.inp.e - inbuf.inp.buf);
360 	inbuf.inp.buf = xrealloc(inbuf.inp.buf, new_size);
361 	inbuf.inp.s = inbuf.inp.buf;
362 	inbuf.inp.e = inbuf.inp.buf + offset;
363 	inbuf.inp.l = inbuf.inp.buf + new_size - 2;
364     }
365     *inbuf.inp.e++ = ch;
366 }
367 
368 static void
369 inp_read_next_line(FILE *f)
370 {
371     inbuf.inp.s = inbuf.inp.buf;
372     inbuf.inp.e = inbuf.inp.buf;
373 
374     for (;;) {
375 	int ch = getc(f);
376 	if (ch == EOF) {
377 	    if (!inhibit_formatting) {
378 		inp_add(' ');
379 		inp_add('\n');
380 	    }
381 	    had_eof = true;
382 	    break;
383 	}
384 
385 	if (ch != '\0')
386 	    inp_add((char)ch);
387 	if (ch == '\n')
388 	    break;
389     }
390 }
391 
392 static void
393 output_char(char ch)
394 {
395     fputc(ch, output);
396     debug_vis_range("output_char '", &ch, &ch + 1, "'\n");
397 }
398 
399 static void
400 output_range(const char *s, const char *e)
401 {
402     fwrite(s, 1, (size_t)(e - s), output);
403     debug_vis_range("output_range \"", s, e, "\"\n");
404 }
405 
406 static int
407 output_indent(int old_ind, int new_ind)
408 {
409     int ind = old_ind;
410 
411     if (opt.use_tabs) {
412 	int tabsize = opt.tabsize;
413 	int n = new_ind / tabsize - ind / tabsize;
414 	if (n > 0)
415 	    ind -= ind % tabsize;
416 	for (int i = 0; i < n; i++) {
417 	    fputc('\t', output);
418 	    ind += tabsize;
419 	}
420     }
421 
422     for (; ind < new_ind; ind++)
423 	fputc(' ', output);
424 
425     debug_println("output_indent %d", ind);
426     return ind;
427 }
428 
429 static int
430 output_line_label(void)
431 {
432     int ind;
433 
434     while (lab.e > lab.s && ch_isblank(lab.e[-1]))
435 	lab.e--;
436     *lab.e = '\0';
437 
438     ind = output_indent(0, compute_label_indent());
439     output_range(lab.s, lab.e);
440     ind = ind_add(ind, lab.s, lab.e);
441 
442     ps.is_case_label = false;
443     return ind;
444 }
445 
446 static int
447 output_line_code(int ind)
448 {
449 
450     int target_ind = compute_code_indent();
451     for (int i = 0; i < ps.nparen; i++) {
452 	if (ps.paren[i].indent >= 0) {
453 	    int paren_ind = ps.paren[i].indent;
454 	    ps.paren[i].indent = (short)(-1 - (paren_ind + target_ind));
455 	    debug_println(
456 		"setting paren_indents[%d] from %d to %d for column %d",
457 		i, paren_ind, ps.paren[i].indent, target_ind + 1);
458 	}
459     }
460 
461     ind = output_indent(ind, target_ind);
462     output_range(code.s, code.e);
463     return ind_add(ind, code.s, code.e);
464 }
465 
466 static void
467 output_line_comment(int ind)
468 {
469     int target_ind = ps.com_ind;
470     const char *p = com.s;
471 
472     target_ind += ps.comment_delta;
473 
474     /* consider original indentation in case this is a box comment */
475     for (; *p == '\t'; p++)
476 	target_ind += opt.tabsize;
477 
478     for (; target_ind < 0; p++) {
479 	if (*p == ' ')
480 	    target_ind++;
481 	else if (*p == '\t')
482 	    target_ind = next_tab(target_ind);
483 	else {
484 	    target_ind = 0;
485 	    break;
486 	}
487     }
488 
489     /* if comment can't fit on this line, put it on the next line */
490     if (ind > target_ind) {
491 	output_char('\n');
492 	ind = 0;
493 	ps.stats.lines++;
494     }
495 
496     while (com.e > p && ch_isspace(com.e[-1]))
497 	com.e--;
498 
499     (void)output_indent(ind, target_ind);
500     output_range(p, com.e);
501 
502     ps.comment_delta = ps.n_comment_delta;
503     ps.stats.comment_lines++;
504 }
505 
506 /*
507  * Write a line of formatted source to the output file. The line consists of
508  * the label, the code and the comment.
509  */
510 static void
511 output_complete_line(char line_terminator)
512 {
513     static bool first_line = true;
514 
515     ps.is_function_definition = false;
516 
517     if (code.s == code.e && lab.s == lab.e && com.s == com.e) {
518 	if (out.suppress_blanklines)
519 	    out.suppress_blanklines = false;
520 	else
521 	    out.blank_lines_to_output++;
522 
523     } else if (!inhibit_formatting) {
524 	out.suppress_blanklines = false;
525 	if (out.blank_line_before && !first_line) {
526 	    if (opt.swallow_optional_blanklines) {
527 		if (out.blank_lines_to_output == 1)
528 		    out.blank_lines_to_output = 0;
529 	    } else {
530 		if (out.blank_lines_to_output == 0)
531 		    out.blank_lines_to_output = 1;
532 	    }
533 	}
534 
535 	for (; out.blank_lines_to_output > 0; out.blank_lines_to_output--)
536 	    output_char('\n');
537 
538 	if (ps.ind_level == 0)
539 	    ps.in_stmt_cont = false;	/* this is a class A kludge */
540 
541 	if (lab.e != lab.s || code.e != code.s)
542 	    ps.stats.code_lines++;
543 
544 	int ind = 0;
545 	if (lab.e != lab.s)
546 	    ind = output_line_label();
547 	if (code.e != code.s)
548 	    ind = output_line_code(ind);
549 	if (com.e != com.s)
550 	    output_line_comment(ind);
551 
552 	output_char(line_terminator);
553 	ps.stats.lines++;
554 
555 	/* TODO: rename to blank_line_after_decl */
556 	if (ps.just_saw_decl == 1 && opt.blanklines_after_decl) {
557 	    out.blank_line_before = true;
558 	    ps.just_saw_decl = 0;
559 	} else
560 	    out.blank_line_before = out.blank_line_after;
561 	out.blank_line_after = false;
562     }
563 
564     ps.decl_on_line = ps.in_decl;	/* for proper comment indentation */
565     ps.in_stmt_cont = ps.in_stmt_or_decl && !ps.in_decl;
566     ps.decl_indent_done = false;
567 
568     *(lab.e = lab.s) = '\0';	/* reset buffers */
569     *(code.e = code.s) = '\0';
570     *(com.e = com.s = com.buf + 1) = '\0';
571 
572     ps.ind_level = ps.ind_level_follow;
573     ps.line_start_nparen = ps.nparen;
574 
575     if (ps.nparen > 0) {
576 	/* TODO: explain what negative indentation means */
577 	paren_indent = -1 - ps.paren[ps.nparen - 1].indent;
578 	debug_println("paren_indent is now %d", paren_indent);
579     }
580 
581     first_line = false;
582 }
583 
584 void
585 output_line(void)
586 {
587     output_complete_line('\n');
588 }
589 
590 void
591 output_line_ff(void)
592 {
593     output_complete_line('\f');
594 }
595 
596 static int
597 compute_code_indent_lineup(int base_ind)
598 {
599     int ti = paren_indent;
600     int overflow = ind_add(ti, code.s, code.e) - opt.max_line_length;
601     if (overflow < 0)
602 	return ti;
603 
604     if (ind_add(base_ind, code.s, code.e) < opt.max_line_length) {
605 	ti -= overflow + 2;
606 	if (ti > base_ind)
607 	    return ti;
608 	return base_ind;
609     }
610 
611     return ti;
612 }
613 
614 int
615 compute_code_indent(void)
616 {
617     int base_ind = ps.ind_level * opt.indent_size;
618 
619     if (ps.line_start_nparen == 0) {
620 	if (ps.in_stmt_cont && ps.in_enum != in_enum_brace)
621 	    return base_ind + opt.continuation_indent;
622 	return base_ind;
623     }
624 
625     if (opt.lineup_to_parens) {
626 	if (opt.lineup_to_parens_always)
627 	    return paren_indent;
628 	return compute_code_indent_lineup(base_ind);
629     }
630 
631     if (2 * opt.continuation_indent == opt.indent_size)
632 	return base_ind + opt.continuation_indent;
633     else
634 	return base_ind + opt.continuation_indent * ps.line_start_nparen;
635 }
636 
637 int
638 compute_label_indent(void)
639 {
640     if (ps.is_case_label)
641 	return (int)(case_ind * (float)opt.indent_size);
642     if (lab.s[0] == '#')
643 	return 0;
644     return opt.indent_size * (ps.ind_level - 2);
645 }
646 
647 static void
648 skip_blank(const char **pp)
649 {
650     while (ch_isblank(**pp))
651 	(*pp)++;
652 }
653 
654 static bool
655 skip_string(const char **pp, const char *s)
656 {
657     size_t len = strlen(s);
658     if (strncmp(*pp, s, len) == 0) {
659 	*pp += len;
660 	return true;
661     }
662     return false;
663 }
664 
665 static void
666 parse_indent_comment(void)
667 {
668     bool on;
669 
670     const char *p = inbuf.inp.buf;
671 
672     skip_blank(&p);
673     if (!skip_string(&p, "/*"))
674 	return;
675     skip_blank(&p);
676     if (!skip_string(&p, "INDENT"))
677 	return;
678 
679     skip_blank(&p);
680     if (*p == '*' || skip_string(&p, "ON"))
681 	on = true;
682     else if (skip_string(&p, "OFF"))
683 	on = false;
684     else
685 	return;
686 
687     skip_blank(&p);
688     if (!skip_string(&p, "*/\n"))
689 	return;
690 
691     if (com.s != com.e || lab.s != lab.e || code.s != code.e)
692 	output_line();
693 
694     inhibit_formatting = !on;
695     if (on) {
696 	/*
697 	 * XXX: Does this make sense? Is the handling of blank lines above
698 	 * INDENT OFF comments essentially the same?
699 	 */
700 	out.blank_lines_to_output = 0;
701 	out.blank_line_after = false;
702 	out.blank_line_before = false;
703 	out.suppress_blanklines = true;
704     }
705 }
706 
707 void
708 inp_read_line(void)
709 {
710     if (inp_from_file())
711 	return;
712 
713     inp_read_next_line(input);
714 
715     parse_indent_comment();
716 
717     if (inhibit_formatting)
718 	output_range(inbuf.inp.s, inbuf.inp.e);
719 }
720