xref: /netbsd-src/usr.bin/indent/io.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /*	$NetBSD: io.c,v 1.143 2021/11/28 11:49:10 rillig Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-4-Clause
5  *
6  * Copyright (c) 1985 Sun Microsystems, Inc.
7  * Copyright (c) 1980, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #if 0
41 static char sccsid[] = "@(#)io.c	8.1 (Berkeley) 6/6/93";
42 #endif
43 
44 #include <sys/cdefs.h>
45 #if defined(__NetBSD__)
46 __RCSID("$NetBSD: io.c,v 1.143 2021/11/28 11:49:10 rillig Exp $");
47 #elif defined(__FreeBSD__)
48 __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $");
49 #endif
50 
51 #include <assert.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 
56 #include "indent.h"
57 
58 /*
59  * There are 3 modes for reading the input.
60  *
61  * default: In this mode, the input comes from the input file. The buffer
62  * 'inp' contains the current line, terminated with '\n'. The current read
63  * position is inp.s, and there is always inp.buf <= inp.s < inp.e. All other
64  * pointers are null.
65  *
66  * copy-in: After reading 'if (expr)' or similar tokens, the input still comes
67  * from 'inp', but instead of processing it, it is copied to 'save_com'. The
68  * goal of this mode is to move the comments after the '{', that is to
69  * transform 'if (expr) comment {' to 'if (expr) { comment'. When the next
70  * token cannot be part of this transformation, switch to copy-out.
71  *
72  * copy-out: In this mode, the input comes from 'save_com', which contains the
73  * tokens to be placed after the '{'. The input still comes from the range
74  * [inp.s, inp.e), but these two members have been overwritten with pointers
75  * into save_com_buf, so inp.buf and inp.s are unrelated, which is unusual.
76  * In this mode, inp.e[-1] is usually not terminated with '\n'. After reading
77  * all tokens from save_com, switch to default mode again.
78  */
79 static struct {
80     struct buffer inp;		/* one line of input, ready to be split into
81 				 * tokens; occasionally 's' and 'e' switch
82 				 * to save_com_buf */
83     char save_com_buf[5000];	/* input text is saved here when looking for
84 				 * the brace after an if, while, etc */
85     char *save_com_s;		/* start of the comment in save_com_buf, or
86 				 * null */
87     char *save_com_e;		/* end of the comment in save_com_buf, or
88 				 * null */
89 
90     char *saved_inp_s;		/* saved value of inp.s when taking input from
91 				 * save_com, or null */
92     char *saved_inp_e;		/* saved value of inp.e, or null */
93 } inbuf;
94 
95 static int paren_indent;
96 static bool suppress_blanklines;
97 
98 
99 void
100 inp_init(void)
101 {
102     inbuf.inp.buf = xmalloc(10);
103     inbuf.inp.l = inbuf.inp.buf + 8;
104     inbuf.inp.s = inbuf.inp.buf;
105     inbuf.inp.e = inbuf.inp.buf;
106 }
107 
108 const char *
109 inp_p(void)
110 {
111     assert(inbuf.inp.s < inbuf.inp.e);
112     return inbuf.inp.s;
113 }
114 
115 const char *
116 inp_line_start(void)
117 {
118     return inbuf.saved_inp_s != NULL ? inbuf.save_com_buf : inbuf.inp.buf;
119 }
120 
121 const char *
122 inp_line_end(void)
123 {
124     return inbuf.inp.e;
125 }
126 
127 char
128 inp_peek(void)
129 {
130     assert(inbuf.inp.s < inbuf.inp.e);
131     return *inbuf.inp.s;
132 }
133 
134 char
135 inp_lookahead(size_t i)
136 {
137     assert(i < (size_t)(inbuf.inp.e - inbuf.inp.s));
138     return inbuf.inp.s[i];
139 }
140 
141 void
142 inp_skip(void)
143 {
144     assert(inbuf.inp.s < inbuf.inp.e);
145     inbuf.inp.s++;
146     if (inbuf.inp.s >= inbuf.inp.e)
147 	inp_read_line();
148 }
149 
150 char
151 inp_next(void)
152 {
153     char ch = inp_peek();
154     inp_skip();
155     return ch;
156 }
157 
158 #ifdef debug
159 static void
160 debug_inp_buf(const char *name, const char *s, const char *e)
161 {
162     if (s != NULL && e != NULL) {
163 	debug_printf("    %-12s ", name);
164 	debug_vis_range("\"", s, e, "\"\n");
165     }
166 }
167 
168 void
169 debug_inp(const char *prefix)
170 {
171     assert(inp_line_start() <= inbuf.inp.s);
172     assert(inbuf.inp.s <= inbuf.inp.e);
173 
174     debug_println("%s %s:", __func__, prefix);
175     if (inbuf.saved_inp_s == NULL)
176 	debug_inp_buf("inp.buf", inbuf.inp.buf, inbuf.inp.s);
177     debug_inp_buf("inp", inbuf.inp.s, inbuf.inp.e);	/* never null */
178     debug_inp_buf("save_com.buf", inbuf.save_com_buf, inbuf.save_com_s);
179     debug_inp_buf("save_com", inbuf.save_com_s, inbuf.save_com_e);
180     debug_inp_buf("saved_inp", inbuf.saved_inp_s, inbuf.saved_inp_e);
181 }
182 #endif
183 
184 static void
185 inp_comment_check_size(size_t n)
186 {
187     if ((size_t)(inbuf.save_com_e - inbuf.save_com_buf) + n <=
188 	array_length(inbuf.save_com_buf))
189 	return;
190 
191     diag(1, "Internal buffer overflow - "
192 	"Move big comment from right after if, while, or whatever");
193     fflush(output);
194     exit(1);
195 }
196 
197 void
198 inp_comment_init_newline(void)
199 {
200     if (inbuf.save_com_e != NULL)
201 	return;
202 
203     inbuf.save_com_s = inbuf.save_com_buf;
204     inbuf.save_com_s[0] = ' ';	/* see inp_comment_insert_lbrace */
205     inbuf.save_com_s[1] = ' ';	/* see inp_comment_insert_lbrace */
206     inbuf.save_com_e = &inbuf.save_com_s[2];
207     debug_inp(__func__);
208 }
209 
210 void
211 inp_comment_init_comment(void)
212 {
213     if (inbuf.save_com_e != NULL)
214 	return;
215 
216     /*
217      * Copy everything from the start of the line, because process_comment()
218      * will use that to calculate the original indentation of a boxed comment.
219      */
220     /*
221      * TODO: Don't store anything in the memory range [input.inp.buf,
222      * input.inp.s), as that data can easily get lost.
223      */
224     /*
225      * FIXME: The '4' below is completely wrong. For example, in the snippet
226      * 'if(expr)/''*comment', the 'r)' of the code is not copied. If there is
227      * an additional line break before the ')', memcpy tries to copy
228      * (size_t)-1 bytes.
229      *
230      * The original author of this magic number doesn't remember its purpose
231      * anymore, so there is no point in keeping it. The existing tests must
232      * still pass though.
233      */
234     assert((size_t)(inbuf.inp.s - inbuf.inp.buf) >= 4);
235     size_t line_len = (size_t)(inbuf.inp.s - inbuf.inp.buf) - 4;
236     assert(line_len < array_length(inbuf.save_com_buf));
237 
238     memcpy(inbuf.save_com_buf, inbuf.inp.buf, line_len);
239     inbuf.save_com_s = inbuf.save_com_buf + line_len;
240 
241     inbuf.save_com_s[0] = ' ';	/* see inp_comment_insert_lbrace */
242     inbuf.save_com_s[1] = ' ';	/* see inp_comment_insert_lbrace */
243     inbuf.save_com_e = &inbuf.save_com_s[2];
244 
245     debug_vis_range("search_stmt_comment: before save_com is \"",
246 	inbuf.save_com_buf, inbuf.save_com_s, "\"\n");
247     debug_vis_range("search_stmt_comment: save_com is \"",
248 	inbuf.save_com_s, inbuf.save_com_e, "\"\n");
249 }
250 
251 void
252 inp_comment_init_preproc(void)
253 {
254     if (inbuf.save_com_e == NULL) {	/* if this is the first comment, we
255 					 * must set up the buffer */
256 	/*
257 	 * XXX: No space is reserved for a potential '{' here, unlike in
258 	 * inp_comment_init_comment.
259 	 */
260 	inbuf.save_com_s = inbuf.save_com_buf;
261 	inbuf.save_com_e = inbuf.save_com_s;
262     } else {
263 	inp_comment_add_char('\n');	/* add newline between comments */
264 	inp_comment_add_char(' ');
265 	--line_no;
266     }
267 }
268 
269 void
270 inp_comment_add_char(char ch)
271 {
272     inp_comment_check_size(1);
273     *inbuf.save_com_e++ = ch;
274 }
275 
276 void
277 inp_comment_add_range(const char *s, const char *e)
278 {
279     size_t len = (size_t)(e - s);
280     inp_comment_check_size(len);
281     memcpy(inbuf.save_com_e, s, len);
282     inbuf.save_com_e += len;
283 }
284 
285 bool
286 inp_comment_complete_block(void)
287 {
288     return inbuf.save_com_e[-2] == '*' && inbuf.save_com_e[-1] == '/';
289 }
290 
291 bool
292 inp_comment_seen(void)
293 {
294     return inbuf.save_com_e != NULL;
295 }
296 
297 void
298 inp_comment_rtrim_blank(void)
299 {
300     while (inbuf.save_com_e > inbuf.save_com_s &&
301 	    ch_isblank(inbuf.save_com_e[-1]))
302 	inbuf.save_com_e--;
303 }
304 
305 void
306 inp_comment_rtrim_newline(void)
307 {
308     while (inbuf.save_com_e > inbuf.save_com_s &&
309 	    inbuf.save_com_e[-1] == '\n')
310 	inbuf.save_com_e--;
311 }
312 
313 /*
314  * Switch the input to come from save_com, replaying the copied tokens while
315  * looking for the next '{'.
316  */
317 void
318 inp_from_comment(void)
319 {
320     debug_inp("before inp_from_comment");
321     inbuf.saved_inp_s = inbuf.inp.s;
322     inbuf.saved_inp_e = inbuf.inp.e;
323 
324     inbuf.inp.s = inbuf.save_com_s;
325     inbuf.inp.e = inbuf.save_com_e;
326     inbuf.save_com_s = NULL;
327     inbuf.save_com_e = NULL;
328     debug_inp("after inp_from_comment");
329 }
330 
331 /*
332  * After having read from save_com, continue with the rest of the input line
333  * before reading the next line from the input file.
334  */
335 static bool
336 inp_from_file(void)
337 {
338     if (inbuf.saved_inp_s == NULL)
339 	return false;
340 
341     inbuf.inp.s = inbuf.saved_inp_s;
342     inbuf.inp.e = inbuf.saved_inp_e;
343     inbuf.saved_inp_s = inbuf.saved_inp_e = NULL;
344     debug_println("switched inp.s back to saved_inp_s");
345     return inbuf.inp.s < inbuf.inp.e;
346 }
347 
348 void
349 inp_comment_insert_lbrace(void)
350 {
351     assert(inbuf.save_com_s[0] == ' ');	/* see inp_comment_init_newline */
352     inbuf.save_com_s[0] = '{';
353 }
354 
355 static void
356 inp_add(char ch)
357 {
358     if (inbuf.inp.e >= inbuf.inp.l) {
359 	size_t new_size = (size_t)(inbuf.inp.l - inbuf.inp.buf) * 2 + 10;
360 	size_t offset = (size_t)(inbuf.inp.e - inbuf.inp.buf);
361 	inbuf.inp.buf = xrealloc(inbuf.inp.buf, new_size);
362 	inbuf.inp.s = inbuf.inp.buf;
363 	inbuf.inp.e = inbuf.inp.buf + offset;
364 	inbuf.inp.l = inbuf.inp.buf + new_size - 2;
365     }
366     *inbuf.inp.e++ = ch;
367 }
368 
369 static void
370 inp_read_next_line(FILE *f)
371 {
372     inbuf.inp.s = inbuf.inp.buf;
373     inbuf.inp.e = inbuf.inp.buf;
374 
375     for (;;) {
376 	int ch = getc(f);
377 	if (ch == EOF) {
378 	    if (!inhibit_formatting) {
379 		inp_add(' ');
380 		inp_add('\n');
381 	    }
382 	    had_eof = true;
383 	    break;
384 	}
385 
386 	if (ch != '\0')
387 	    inp_add((char)ch);
388 	if (ch == '\n')
389 	    break;
390     }
391 }
392 
393 static void
394 output_char(char ch)
395 {
396     fputc(ch, output);
397     debug_vis_range("output_char '", &ch, &ch + 1, "'\n");
398 }
399 
400 static void
401 output_range(const char *s, const char *e)
402 {
403     fwrite(s, 1, (size_t)(e - s), output);
404     debug_vis_range("output_range \"", s, e, "\"\n");
405 }
406 
407 static int
408 output_indent(int old_ind, int new_ind)
409 {
410     int ind = old_ind;
411 
412     if (opt.use_tabs) {
413 	int tabsize = opt.tabsize;
414 	int n = new_ind / tabsize - ind / tabsize;
415 	if (n > 0)
416 	    ind -= ind % tabsize;
417 	for (int i = 0; i < n; i++) {
418 	    fputc('\t', output);
419 	    ind += tabsize;
420 	}
421     }
422 
423     for (; ind < new_ind; ind++)
424 	fputc(' ', output);
425 
426     debug_println("output_indent %d", ind);
427     return ind;
428 }
429 
430 static int
431 output_line_label(void)
432 {
433     int ind;
434 
435     while (lab.e > lab.s && ch_isblank(lab.e[-1]))
436 	lab.e--;
437     *lab.e = '\0';
438 
439     ind = output_indent(0, compute_label_indent());
440     output_range(lab.s, lab.e);
441     ind = ind_add(ind, lab.s, lab.e);
442 
443     ps.is_case_label = false;
444     return ind;
445 }
446 
447 static int
448 output_line_code(int ind)
449 {
450 
451     int target_ind = compute_code_indent();
452     for (int i = 0; i < ps.p_l_follow; i++) {
453 	if (ps.paren_indents[i] >= 0) {
454 	    int paren_ind = ps.paren_indents[i];
455 	    ps.paren_indents[i] = (short)(-1 - (paren_ind + target_ind));
456 	    debug_println(
457 		"setting paren_indents[%d] from %d to %d for column %d",
458 		i, paren_ind, ps.paren_indents[i], target_ind + 1);
459 	}
460     }
461 
462     ind = output_indent(ind, target_ind);
463     output_range(code.s, code.e);
464     return ind_add(ind, code.s, code.e);
465 }
466 
467 static void
468 output_line_comment(int ind)
469 {
470     int target_ind = ps.com_ind;
471     const char *p = com.s;
472 
473     target_ind += ps.comment_delta;
474 
475     /* consider original indentation in case this is a box comment */
476     for (; *p == '\t'; p++)
477 	target_ind += opt.tabsize;
478 
479     for (; target_ind < 0; p++) {
480 	if (*p == ' ')
481 	    target_ind++;
482 	else if (*p == '\t')
483 	    target_ind = next_tab(target_ind);
484 	else {
485 	    target_ind = 0;
486 	    break;
487 	}
488     }
489 
490     /* if comment can't fit on this line, put it on the next line */
491     if (ind > target_ind) {
492 	output_char('\n');
493 	ind = 0;
494 	ps.stats.lines++;
495     }
496 
497     while (com.e > p && ch_isspace(com.e[-1]))
498 	com.e--;
499 
500     (void)output_indent(ind, target_ind);
501     output_range(p, com.e);
502 
503     ps.comment_delta = ps.n_comment_delta;
504     ps.stats.comment_lines++;
505 }
506 
507 /*
508  * Write a line of formatted source to the output file. The line consists of
509  * the label, the code and the comment.
510  */
511 static void
512 output_complete_line(char line_terminator)
513 {
514     static bool first_line = true;
515 
516     ps.is_function_definition = false;
517 
518     if (code.s == code.e && lab.s == lab.e && com.s == com.e) {
519 	if (suppress_blanklines)
520 	    suppress_blanklines = false;
521 	else
522 	    blank_lines_to_output++;
523 
524     } else if (!inhibit_formatting) {
525 	suppress_blanklines = false;
526 	if (blank_line_before && !first_line) {
527 	    if (opt.swallow_optional_blanklines) {
528 		if (blank_lines_to_output == 1)
529 		    blank_lines_to_output = 0;
530 	    } else {
531 		if (blank_lines_to_output == 0)
532 		    blank_lines_to_output = 1;
533 	    }
534 	}
535 
536 	for (; blank_lines_to_output > 0; blank_lines_to_output--)
537 	    output_char('\n');
538 
539 	if (ps.ind_level == 0)
540 	    ps.in_stmt_cont = false;	/* this is a class A kludge */
541 
542 	if (lab.e != lab.s || code.e != code.s)
543 	    ps.stats.code_lines++;
544 
545 	int ind = 0;
546 	if (lab.e != lab.s)
547 	    ind = output_line_label();
548 	if (code.e != code.s)
549 	    ind = output_line_code(ind);
550 	if (com.e != com.s)
551 	    output_line_comment(ind);
552 
553 	output_char(line_terminator);
554 	ps.stats.lines++;
555 
556 	/* TODO: rename to blank_line_after_decl */
557 	if (ps.just_saw_decl == 1 && opt.blanklines_after_decl) {
558 	    blank_line_before = true;
559 	    ps.just_saw_decl = 0;
560 	} else
561 	    blank_line_before = blank_line_after;
562 	blank_line_after = false;
563     }
564 
565     ps.decl_on_line = ps.in_decl;	/* for proper comment indentation */
566     ps.in_stmt_cont = ps.in_stmt_or_decl && !ps.in_decl;
567     ps.decl_indent_done = false;
568 
569     *(lab.e = lab.s) = '\0';	/* reset buffers */
570     *(code.e = code.s) = '\0';
571     *(com.e = com.s = com.buf + 1) = '\0';
572 
573     ps.ind_level = ps.ind_level_follow;
574     ps.paren_level = ps.p_l_follow;
575 
576     if (ps.paren_level > 0) {
577 	/* TODO: explain what negative indentation means */
578 	paren_indent = -1 - ps.paren_indents[ps.paren_level - 1];
579 	debug_println("paren_indent is now %d", paren_indent);
580     }
581 
582     first_line = false;
583 }
584 
585 void
586 output_line(void)
587 {
588     output_complete_line('\n');
589 }
590 
591 void
592 output_line_ff(void)
593 {
594     output_complete_line('\f');
595 }
596 
597 static int
598 compute_code_indent_lineup(int base_ind)
599 {
600     int ti = paren_indent;
601     int overflow = ind_add(ti, code.s, code.e) - opt.max_line_length;
602     if (overflow < 0)
603 	return ti;
604 
605     if (ind_add(base_ind, code.s, code.e) < opt.max_line_length) {
606 	ti -= overflow + 2;
607 	if (ti > base_ind)
608 	    return ti;
609 	return base_ind;
610     }
611 
612     return ti;
613 }
614 
615 int
616 compute_code_indent(void)
617 {
618     int base_ind = ps.ind_level * opt.indent_size;
619 
620     if (ps.paren_level == 0) {
621 	if (ps.in_stmt_cont)
622 	    return base_ind + opt.continuation_indent;
623 	return base_ind;
624     }
625 
626     if (opt.lineup_to_parens) {
627 	if (opt.lineup_to_parens_always)
628 	    return paren_indent;
629 	return compute_code_indent_lineup(base_ind);
630     }
631 
632     if (2 * opt.continuation_indent == opt.indent_size)
633 	return base_ind + opt.continuation_indent;
634     else
635 	return base_ind + opt.continuation_indent * ps.paren_level;
636 }
637 
638 int
639 compute_label_indent(void)
640 {
641     if (ps.is_case_label)
642 	return (int)(case_ind * (float)opt.indent_size);
643     if (lab.s[0] == '#')
644 	return 0;
645     return opt.indent_size * (ps.ind_level - 2);
646 }
647 
648 static void
649 skip_blank(const char **pp)
650 {
651     while (ch_isblank(**pp))
652 	(*pp)++;
653 }
654 
655 static bool
656 skip_string(const char **pp, const char *s)
657 {
658     size_t len = strlen(s);
659     if (strncmp(*pp, s, len) == 0) {
660 	*pp += len;
661 	return true;
662     }
663     return false;
664 }
665 
666 static void
667 parse_indent_comment(void)
668 {
669     bool on;
670 
671     const char *p = inbuf.inp.buf;
672 
673     skip_blank(&p);
674     if (!skip_string(&p, "/*"))
675 	return;
676     skip_blank(&p);
677     if (!skip_string(&p, "INDENT"))
678 	return;
679 
680     skip_blank(&p);
681     if (*p == '*' || skip_string(&p, "ON"))
682 	on = true;
683     else if (skip_string(&p, "OFF"))
684 	on = false;
685     else
686 	return;
687 
688     skip_blank(&p);
689     if (!skip_string(&p, "*/\n"))
690 	return;
691 
692     if (com.s != com.e || lab.s != lab.e || code.s != code.e)
693 	output_line();
694 
695     inhibit_formatting = !on;
696     if (on) {
697 	/*
698 	 * XXX: Does this make sense? Is the handling of blank lines above
699 	 * INDENT OFF comments essentially the same?
700 	 */
701 	blank_lines_to_output = 0;
702 	blank_line_after = false;
703 	blank_line_before = false;
704 	suppress_blanklines = true;
705     }
706 }
707 
708 void
709 inp_read_line(void)
710 {
711     if (inp_from_file())
712 	return;
713 
714     inp_read_next_line(input);
715 
716     parse_indent_comment();
717 
718     if (inhibit_formatting)
719 	output_range(inbuf.inp.s, inbuf.inp.e);
720 }
721