xref: /netbsd-src/usr.bin/indent/indent.c (revision 4170684f22077e3779c5c14826430de0dec964b2)
1 /*	$NetBSD: indent.c,v 1.310 2023/05/23 18:16:28 rillig Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-4-Clause
5  *
6  * Copyright (c) 1985 Sun Microsystems, Inc.
7  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
8  * Copyright (c) 1980, 1993
9  *	The Regents of the University of California.  All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #include <sys/cdefs.h>
41 __RCSID("$NetBSD: indent.c,v 1.310 2023/05/23 18:16:28 rillig Exp $");
42 
43 #include <sys/param.h>
44 #include <err.h>
45 #include <fcntl.h>
46 #include <stdarg.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 
52 #include "indent.h"
53 
54 struct options opt = {
55 	.brace_same_line = true,
56 	.comment_delimiter_on_blankline = true,
57 	.cuddle_else = true,
58 	.comment_column = 33,
59 	.decl_indent = 16,
60 	.else_if = true,
61 	.function_brace_split = true,
62 	.format_col1_comments = true,
63 	.format_block_comments = true,
64 	.indent_parameters = true,
65 	.indent_size = 8,
66 	.local_decl_indent = -1,
67 	.lineup_to_parens = true,
68 	.procnames_start_line = true,
69 	.star_comment_cont = true,
70 	.tabsize = 8,
71 	.max_line_length = 78,
72 	.use_tabs = true,
73 };
74 
75 struct parser_state ps;
76 
77 struct buffer token;
78 
79 struct buffer lab;
80 struct buffer code;
81 struct buffer com;
82 
83 bool found_err;
84 bool break_comma;
85 float case_ind;
86 bool had_eof;
87 int line_no = 1;
88 enum indent_enabled indent_enabled;
89 
90 static int ifdef_level;
91 static struct parser_state state_stack[5];
92 
93 FILE *input;
94 FILE *output;
95 
96 static const char *in_name = "Standard Input";
97 static const char *out_name = "Standard Output";
98 static const char *backup_suffix = ".BAK";
99 static char bakfile[MAXPATHLEN] = "";
100 
101 
102 void *
103 nonnull(void *p)
104 {
105 	if (p == NULL)
106 		err(EXIT_FAILURE, NULL);
107 	return p;
108 }
109 
110 static void
111 buf_expand(struct buffer *buf, size_t add_size)
112 {
113 	buf->cap = buf->cap + add_size + 400;
114 	buf->mem = nonnull(realloc(buf->mem, buf->cap));
115 	buf->st = buf->mem;
116 }
117 
118 void
119 buf_add_char(struct buffer *buf, char ch)
120 {
121 	if (buf->len == buf->cap)
122 		buf_expand(buf, 1);
123 	buf->mem[buf->len++] = ch;
124 }
125 
126 void
127 buf_add_chars(struct buffer *buf, const char *s, size_t len)
128 {
129 	if (len == 0)
130 		return;
131 	if (len > buf->cap - buf->len)
132 		buf_expand(buf, len);
133 	memcpy(buf->mem + buf->len, s, len);
134 	buf->len += len;
135 }
136 
137 static void
138 buf_add_buf(struct buffer *buf, const struct buffer *add)
139 {
140 	buf_add_chars(buf, add->st, add->len);
141 }
142 
143 void
144 diag(int level, const char *msg, ...)
145 {
146 	va_list ap;
147 
148 	if (level != 0)
149 		found_err = true;
150 
151 	va_start(ap, msg);
152 	fprintf(stderr, "%s: %s:%d: ",
153 	    level == 0 ? "warning" : "error", in_name, line_no);
154 	vfprintf(stderr, msg, ap);
155 	fprintf(stderr, "\n");
156 	va_end(ap);
157 }
158 
159 /*
160  * Compute the indentation from starting at 'ind' and adding the text starting
161  * at 's'.
162  */
163 int
164 ind_add(int ind, const char *s, size_t len)
165 {
166 	for (const char *p = s; len > 0; p++, len--) {
167 		if (*p == '\n')
168 			ind = 0;
169 		else if (*p == '\t')
170 			ind = next_tab(ind);
171 		else if (*p == '\b')
172 			--ind;
173 		else
174 			++ind;
175 	}
176 	return ind;
177 }
178 
179 static void
180 init_globals(void)
181 {
182 	ps.s_sym[0] = psym_stmt_list;
183 	ps.prev_token = lsym_semicolon;
184 	ps.next_col_1 = true;
185 
186 	const char *suffix = getenv("SIMPLE_BACKUP_SUFFIX");
187 	if (suffix != NULL)
188 		backup_suffix = suffix;
189 }
190 
191 /*
192  * Copy the input file to the backup file, then make the backup file the input
193  * and the original input file the output.
194  */
195 static void
196 bakcopy(void)
197 {
198 	ssize_t n;
199 	int bak_fd;
200 	char buff[8 * 1024];
201 
202 	const char *last_slash = strrchr(in_name, '/');
203 	snprintf(bakfile, sizeof(bakfile), "%s%s",
204 	    last_slash != NULL ? last_slash + 1 : in_name, backup_suffix);
205 
206 	/* copy in_name to backup file */
207 	bak_fd = creat(bakfile, 0600);
208 	if (bak_fd < 0)
209 		err(1, "%s", bakfile);
210 
211 	while ((n = read(fileno(input), buff, sizeof(buff))) > 0)
212 		if (write(bak_fd, buff, (size_t)n) != n)
213 			err(1, "%s", bakfile);
214 	if (n < 0)
215 		err(1, "%s", in_name);
216 
217 	close(bak_fd);
218 	(void)fclose(input);
219 
220 	/* re-open backup file as the input file */
221 	input = fopen(bakfile, "r");
222 	if (input == NULL)
223 		err(1, "%s", bakfile);
224 	/* now the original input file will be the output */
225 	output = fopen(in_name, "w");
226 	if (output == NULL) {
227 		unlink(bakfile);
228 		err(1, "%s", in_name);
229 	}
230 }
231 
232 static void
233 load_profiles(int argc, char **argv)
234 {
235 	const char *profile_name = NULL;
236 
237 	for (int i = 1; i < argc; ++i) {
238 		const char *arg = argv[i];
239 
240 		if (strcmp(arg, "-npro") == 0)
241 			return;
242 		if (arg[0] == '-' && arg[1] == 'P' && arg[2] != '\0')
243 			profile_name = arg + 2;
244 	}
245 
246 	load_profile_files(profile_name);
247 }
248 
249 static void
250 parse_command_line(int argc, char **argv)
251 {
252 	for (int i = 1; i < argc; ++i) {
253 		const char *arg = argv[i];
254 
255 		if (arg[0] == '-') {
256 			set_option(arg, "Command line");
257 
258 		} else if (input == NULL) {
259 			in_name = arg;
260 			if ((input = fopen(in_name, "r")) == NULL)
261 				err(1, "%s", in_name);
262 
263 		} else if (output == NULL) {
264 			out_name = arg;
265 			if (strcmp(in_name, out_name) == 0)
266 				errx(1, "input and output files "
267 				    "must be different");
268 			if ((output = fopen(out_name, "w")) == NULL)
269 				err(1, "%s", out_name);
270 
271 		} else
272 			errx(1, "too many arguments: %s", arg);
273 	}
274 
275 	if (input == NULL) {
276 		input = stdin;
277 		output = stdout;
278 	} else if (output == NULL) {
279 		out_name = in_name;
280 		bakcopy();
281 	}
282 
283 	if (opt.comment_column <= 1)
284 		opt.comment_column = 2;	/* don't put normal comments in column
285 					 * 1, see opt.format_col1_comments */
286 	if (opt.block_comment_max_line_length <= 0)
287 		opt.block_comment_max_line_length = opt.max_line_length;
288 	if (opt.local_decl_indent < 0)
289 		opt.local_decl_indent = opt.decl_indent;
290 	if (opt.decl_comment_column <= 0)
291 		opt.decl_comment_column = opt.ljust_decl
292 		    ? (opt.comment_column <= 10 ? 2 : opt.comment_column - 8)
293 		    : opt.comment_column;
294 	if (opt.continuation_indent == 0)
295 		opt.continuation_indent = opt.indent_size;
296 }
297 
298 static void
299 set_initial_indentation(void)
300 {
301 	inp_read_line();
302 
303 	int ind = 0;
304 	for (const char *p = inp.st;; p++) {
305 		if (*p == ' ')
306 			ind++;
307 		else if (*p == '\t')
308 			ind = next_tab(ind);
309 		else
310 			break;
311 	}
312 
313 	ps.ind_level = ps.ind_level_follow = ind / opt.indent_size;
314 }
315 
316 static void
317 code_add_decl_indent(int decl_ind, bool tabs_to_var)
318 {
319 	int base = ps.ind_level * opt.indent_size;
320 	int ind = base + (int)code.len;
321 	int target = base + decl_ind;
322 	size_t orig_code_len = code.len;
323 
324 	if (tabs_to_var)
325 		for (int next; (next = next_tab(ind)) <= target; ind = next)
326 			buf_add_char(&code, '\t');
327 
328 	for (; ind < target; ind++)
329 		buf_add_char(&code, ' ');
330 
331 	if (code.len == orig_code_len && ps.want_blank) {
332 		buf_add_char(&code, ' ');
333 		ps.want_blank = false;
334 	}
335 }
336 
337 static void
338 update_ps_decl_ptr(lexer_symbol lsym)
339 {
340 	switch (ps.decl_ptr) {
341 	case dp_start:
342 		if (lsym == lsym_storage_class)
343 			ps.decl_ptr = dp_start;
344 		else if (lsym == lsym_type_outside_parentheses)
345 			ps.decl_ptr = dp_word;
346 		else if (lsym == lsym_word)
347 			ps.decl_ptr = dp_word;
348 		else
349 			ps.decl_ptr = dp_other;
350 		break;
351 	case dp_word:
352 		if (lsym == lsym_unary_op && token.st[0] == '*')
353 			ps.decl_ptr = dp_word_asterisk;
354 		else
355 			ps.decl_ptr = dp_other;
356 		break;
357 	case dp_word_asterisk:
358 		if (lsym == lsym_unary_op && token.st[0] == '*')
359 			ps.decl_ptr = dp_word_asterisk;
360 		else
361 			ps.decl_ptr = dp_other;
362 		break;
363 	case dp_other:
364 		if (lsym == lsym_semicolon || lsym == lsym_rbrace)
365 			ps.decl_ptr = dp_start;
366 		if (lsym == lsym_lparen_or_lbracket
367 		    && ps.prev_token == lsym_for)
368 			ps.decl_ptr = dp_start;
369 		if (lsym == lsym_comma && ps.in_decl)
370 			ps.decl_ptr = dp_start;
371 		break;
372 	}
373 }
374 
375 static void
376 update_ps_in_enum(lexer_symbol lsym)
377 {
378 	switch (ps.in_enum) {
379 	case in_enum_no:
380 		if (lsym == lsym_tag && token.st[0] == 'e')
381 			ps.in_enum = in_enum_enum;
382 		break;
383 	case in_enum_enum:
384 		if (lsym == lsym_type_outside_parentheses
385 		    || lsym == lsym_type_in_parentheses)
386 			ps.in_enum = in_enum_type;
387 		else if (lsym == lsym_lbrace)
388 			ps.in_enum = in_enum_brace;
389 		else
390 			ps.in_enum = in_enum_no;
391 		break;
392 	case in_enum_type:
393 		if (lsym == lsym_lbrace)
394 			ps.in_enum = in_enum_brace;
395 		else
396 			ps.in_enum = in_enum_no;
397 		break;
398 	case in_enum_brace:
399 		if (lsym == lsym_rbrace)
400 			ps.in_enum = in_enum_no;
401 		break;
402 	}
403 }
404 
405 static int
406 process_eof(void)
407 {
408 	if (lab.len > 0 || code.len > 0 || com.len > 0)
409 		output_line();
410 	if (indent_enabled != indent_on) {
411 		indent_enabled = indent_last_off_line;
412 		output_line();
413 	}
414 
415 	if (ps.tos > 1)		/* check for balanced braces */
416 		diag(1, "Stuff missing from end of file");
417 
418 	fflush(output);
419 	return found_err ? EXIT_FAILURE : EXIT_SUCCESS;
420 }
421 
422 static void
423 maybe_break_line(lexer_symbol lsym)
424 {
425 	if (!ps.force_nl)
426 		return;
427 	if (lsym == lsym_semicolon)
428 		return;
429 	if (lsym == lsym_lbrace && opt.brace_same_line)
430 		return;
431 
432 	if (opt.verbose)
433 		diag(0, "Line broken");
434 	output_line();
435 	ps.force_nl = false;
436 }
437 
438 static void
439 move_com_to_code(lexer_symbol lsym)
440 {
441 	if (ps.want_blank)
442 		buf_add_char(&code, ' ');
443 	buf_add_buf(&code, &com);
444 	if (lsym != lsym_rparen_or_rbracket)
445 		buf_add_char(&code, ' ');
446 	com.len = 0;
447 	ps.want_blank = false;
448 }
449 
450 static void
451 process_newline(void)
452 {
453 	if (ps.prev_token == lsym_comma && ps.nparen == 0 && !ps.block_init &&
454 	    !opt.break_after_comma && break_comma &&
455 	    com.len == 0)
456 		goto stay_in_line;
457 
458 	output_line();
459 
460 stay_in_line:
461 	++line_no;
462 }
463 
464 static bool
465 is_function_pointer_declaration(void)
466 {
467 	return token.st[0] == '('
468 	    && ps.in_decl
469 	    && !ps.block_init
470 	    && !ps.decl_indent_done
471 	    && !ps.is_function_definition
472 	    && ps.line_start_nparen == 0;
473 }
474 
475 static bool
476 want_blank_before_lparen(void)
477 {
478 	if (!ps.want_blank)
479 		return false;
480 	if (opt.proc_calls_space)
481 		return true;
482 	if (ps.prev_token == lsym_rparen_or_rbracket)
483 		return false;
484 	if (ps.prev_token == lsym_offsetof)
485 		return false;
486 	if (ps.prev_token == lsym_sizeof)
487 		return opt.blank_after_sizeof;
488 	if (ps.prev_token == lsym_word || ps.prev_token == lsym_funcname)
489 		return false;
490 	return true;
491 }
492 
493 static bool
494 want_blank_before_lbracket(void)
495 {
496 	if (code.len == 0)
497 		return false;
498 	if (ps.prev_token == lsym_comma)
499 		return true;
500 	if (ps.prev_token == lsym_binary_op)
501 		return true;
502 	return false;
503 }
504 
505 static void
506 process_lparen_or_lbracket(void)
507 {
508 	if (++ps.nparen == array_length(ps.paren)) {
509 		diag(0, "Reached internal limit of %zu unclosed parentheses",
510 		    array_length(ps.paren));
511 		ps.nparen--;
512 	}
513 
514 	if (is_function_pointer_declaration()) {
515 		code_add_decl_indent(ps.decl_ind, ps.tabs_to_var);
516 		ps.decl_indent_done = true;
517 	} else if (token.st[0] == '('
518 	    ? want_blank_before_lparen() : want_blank_before_lbracket())
519 		buf_add_char(&code, ' ');
520 	ps.want_blank = false;
521 	buf_add_char(&code, token.st[0]);
522 
523 	int indent = ind_add(0, code.st, code.len);
524 	enum paren_level_cast cast = cast_unknown;
525 
526 	if (opt.extra_expr_indent && !opt.lineup_to_parens
527 	    && ps.spaced_expr_psym != psym_0 && ps.nparen == 1
528 	    && opt.continuation_indent == opt.indent_size)
529 		ps.extra_expr_indent = eei_yes;
530 
531 	if (opt.extra_expr_indent && ps.spaced_expr_psym != psym_0
532 	    && ps.nparen == 1 && indent < 2 * opt.indent_size)
533 		indent = 2 * opt.indent_size;
534 
535 	if (ps.init_or_struct && *token.st == '(' && ps.tos <= 2) {
536 		/* this is a kluge to make sure that declarations will be
537 		 * aligned right if proc decl has an explicit type on it, i.e.
538 		 * "int a(x) {..." */
539 		parse(psym_0);
540 		ps.init_or_struct = false;
541 	}
542 
543 	if (ps.prev_token == lsym_offsetof || ps.prev_token == lsym_sizeof
544 	    || ps.is_function_definition)
545 		cast = cast_no;
546 
547 	ps.paren[ps.nparen - 1].indent = indent;
548 	ps.paren[ps.nparen - 1].cast = cast;
549 	debug_println("paren_indents[%d] is now %s%d",
550 	    ps.nparen - 1, paren_level_cast_name[cast], indent);
551 }
552 
553 static void
554 process_rparen_or_rbracket(void)
555 {
556 	if (ps.nparen == 0) {
557 		diag(0, "Extra '%c'", *token.st);
558 		goto unbalanced;
559 	}
560 
561 	enum paren_level_cast cast = ps.paren[--ps.nparen].cast;
562 	if (ps.decl_on_line && !ps.block_init)
563 		cast = cast_no;
564 
565 	if (cast == cast_maybe) {
566 		ps.next_unary = true;
567 		ps.want_blank = opt.space_after_cast;
568 	} else
569 		ps.want_blank = true;
570 
571 	if (code.len == 0)	/* if the paren starts the line */
572 		ps.line_start_nparen = ps.nparen;	/* then indent it */
573 
574 unbalanced:
575 	buf_add_char(&code, token.st[0]);
576 
577 	if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) {
578 		if (ps.extra_expr_indent == eei_yes)
579 			ps.extra_expr_indent = eei_last;
580 		ps.force_nl = true;
581 		ps.next_unary = true;
582 		ps.in_stmt_or_decl = false;
583 		parse(ps.spaced_expr_psym);
584 		ps.spaced_expr_psym = psym_0;
585 		ps.want_blank = true;
586 		out.line_kind = lk_stmt_head;
587 	}
588 }
589 
590 static bool
591 want_blank_before_unary_op(void)
592 {
593 	if (ps.want_blank)
594 		return true;
595 	if (token.st[0] == '+' || token.st[0] == '-')
596 		return code.len > 0 && code.mem[code.len - 1] == token.st[0];
597 	return false;
598 }
599 
600 static void
601 process_unary_op(void)
602 {
603 	if (!ps.decl_indent_done && ps.in_decl && !ps.block_init &&
604 	    !ps.is_function_definition && ps.line_start_nparen == 0) {
605 		/* pointer declarations */
606 		code_add_decl_indent(ps.decl_ind - (int)token.len,
607 		    ps.tabs_to_var);
608 		ps.decl_indent_done = true;
609 	} else if (want_blank_before_unary_op())
610 		buf_add_char(&code, ' ');
611 
612 	buf_add_buf(&code, &token);
613 	ps.want_blank = false;
614 }
615 
616 static void
617 process_binary_op(void)
618 {
619 	if (code.len > 0 && ps.want_blank)
620 		buf_add_char(&code, ' ');
621 	buf_add_buf(&code, &token);
622 	ps.want_blank = true;
623 }
624 
625 static void
626 process_postfix_op(void)
627 {
628 	buf_add_buf(&code, &token);
629 	ps.want_blank = true;
630 }
631 
632 static void
633 process_question(void)
634 {
635 	ps.quest_level++;
636 	if (code.len == 0) {
637 		ps.in_stmt_cont = true;
638 		ps.in_stmt_or_decl = true;
639 		ps.in_decl = false;
640 	}
641 	if (ps.want_blank)
642 		buf_add_char(&code, ' ');
643 	buf_add_char(&code, '?');
644 	ps.want_blank = true;
645 }
646 
647 static void
648 process_colon(void)
649 {
650 	if (ps.quest_level > 0) {	/* part of a '?:' operator */
651 		ps.quest_level--;
652 		if (code.len == 0) {
653 			ps.in_stmt_cont = true;
654 			ps.in_stmt_or_decl = true;
655 			ps.in_decl = false;
656 		}
657 		if (ps.want_blank)
658 			buf_add_char(&code, ' ');
659 		buf_add_char(&code, ':');
660 		ps.want_blank = true;
661 		return;
662 	}
663 
664 	if (ps.init_or_struct) {	/* bit-field */
665 		buf_add_char(&code, ':');
666 		ps.want_blank = false;
667 		return;
668 	}
669 
670 	buf_add_buf(&lab, &code);	/* 'case' or 'default' or named label
671 					 */
672 	buf_add_char(&lab, ':');
673 	code.len = 0;
674 
675 	ps.in_stmt_or_decl = false;
676 	ps.is_case_label = ps.seen_case;
677 	ps.force_nl = ps.seen_case;
678 	ps.seen_case = false;
679 	ps.want_blank = false;
680 }
681 
682 static void
683 process_semicolon(void)
684 {
685 	if (ps.decl_level == 0)
686 		ps.init_or_struct = false;
687 	ps.seen_case = false;	/* only needs to be reset on error */
688 	ps.quest_level = 0;	/* only needs to be reset on error */
689 	if (ps.prev_token == lsym_rparen_or_rbracket)
690 		ps.in_func_def_params = false;
691 	ps.block_init = false;
692 	ps.block_init_level = 0;
693 	ps.declaration = ps.declaration == decl_begin ? decl_end : decl_no;
694 
695 	if (ps.in_decl && code.len == 0 && !ps.block_init &&
696 	    !ps.decl_indent_done && ps.line_start_nparen == 0) {
697 		/* indent stray semicolons in declarations */
698 		code_add_decl_indent(ps.decl_ind - 1, ps.tabs_to_var);
699 		ps.decl_indent_done = true;
700 	}
701 
702 	ps.in_decl = ps.decl_level > 0;	/* if we were in a first level
703 					 * structure declaration before, we
704 					 * aren't anymore */
705 
706 	if (ps.nparen > 0 && ps.spaced_expr_psym != psym_for_exprs) {
707 		/* There were unbalanced parentheses in the statement. It is a
708 		 * bit complicated, because the semicolon might be in a for
709 		 * statement. */
710 		diag(1, "Unbalanced parentheses");
711 		ps.nparen = 0;
712 		if (ps.spaced_expr_psym != psym_0) {
713 			parse(ps.spaced_expr_psym);
714 			ps.spaced_expr_psym = psym_0;
715 		}
716 	}
717 	buf_add_char(&code, ';');
718 	ps.want_blank = true;
719 	ps.in_stmt_or_decl = ps.nparen > 0;
720 	ps.decl_ind = 0;
721 
722 	if (ps.spaced_expr_psym == psym_0) {
723 		parse(psym_0);	/* let parser know about end of stmt */
724 		ps.force_nl = true;
725 	}
726 }
727 
728 static void
729 process_lbrace(void)
730 {
731 	ps.in_stmt_or_decl = false;	/* don't indent the {} */
732 
733 	if (!ps.block_init)
734 		ps.force_nl = true;
735 	else if (ps.block_init_level <= 0)
736 		ps.block_init_level = 1;
737 	else
738 		ps.block_init_level++;
739 
740 	if (code.len > 0 && !ps.block_init) {
741 		if (!opt.brace_same_line ||
742 		    (code.len > 0 && code.mem[code.len - 1] == '}'))
743 			output_line();
744 		else if (ps.in_func_def_params && !ps.init_or_struct) {
745 			ps.ind_level_follow = 0;
746 			if (opt.function_brace_split)
747 				output_line();
748 			else
749 				ps.want_blank = true;
750 		}
751 	}
752 
753 	if (ps.nparen > 0) {
754 		diag(1, "Unbalanced parentheses");
755 		ps.nparen = 0;
756 		if (ps.spaced_expr_psym != psym_0) {
757 			parse(ps.spaced_expr_psym);
758 			ps.spaced_expr_psym = psym_0;
759 			ps.ind_level = ps.ind_level_follow;
760 		}
761 	}
762 
763 	if (code.len == 0)
764 		ps.in_stmt_cont = false;	/* don't indent the '{' itself
765 						 */
766 	if (ps.in_decl && ps.init_or_struct) {
767 		ps.di_stack[ps.decl_level] = ps.decl_ind;
768 		if (++ps.decl_level == (int)array_length(ps.di_stack)) {
769 			diag(0, "Reached internal limit of %d struct levels",
770 			    (int)array_length(ps.di_stack));
771 			ps.decl_level--;
772 		}
773 	} else {
774 		ps.decl_on_line = false;	/* we can't be in the middle of
775 						 * a declaration, so don't do
776 						 * special indentation of
777 						 * comments */
778 		ps.in_func_def_params = false;
779 		ps.in_decl = false;
780 	}
781 
782 	ps.decl_ind = 0;
783 	parse(psym_lbrace);
784 	if (ps.want_blank)
785 		buf_add_char(&code, ' ');
786 	ps.want_blank = false;
787 	buf_add_char(&code, '{');
788 	ps.declaration = decl_no;
789 }
790 
791 static void
792 process_rbrace(void)
793 {
794 	if (ps.nparen > 0) {	/* check for unclosed if, for, else. */
795 		diag(1, "Unbalanced parentheses");
796 		ps.nparen = 0;
797 		ps.spaced_expr_psym = psym_0;
798 	}
799 
800 	ps.declaration = decl_no;
801 	ps.block_init_level--;
802 
803 	if (code.len > 0 && !ps.block_init) {
804 		if (opt.verbose)
805 			diag(0, "Line broken");
806 		output_line();
807 	}
808 
809 	buf_add_char(&code, '}');
810 	ps.want_blank = true;
811 	ps.in_stmt_or_decl = false;
812 	ps.in_stmt_cont = false;
813 
814 	if (ps.decl_level > 0) {	/* multi-level structure declaration */
815 		ps.decl_ind = ps.di_stack[--ps.decl_level];
816 		if (ps.decl_level == 0 && !ps.in_func_def_params) {
817 			ps.declaration = decl_begin;
818 			ps.decl_ind = ps.ind_level == 0
819 			    ? opt.decl_indent : opt.local_decl_indent;
820 		}
821 		ps.in_decl = true;
822 	}
823 
824 	if (ps.tos == 2)
825 		out.line_kind = lk_func_end;
826 
827 	parse(psym_rbrace);
828 }
829 
830 static void
831 process_do(void)
832 {
833 	ps.in_stmt_or_decl = false;
834 
835 	if (code.len > 0) {	/* make sure this starts a line */
836 		if (opt.verbose)
837 			diag(0, "Line broken");
838 		output_line();
839 	}
840 
841 	ps.force_nl = true;
842 	parse(psym_do);
843 }
844 
845 static void
846 process_else(void)
847 {
848 	ps.in_stmt_or_decl = false;
849 
850 	if (code.len > 0
851 	    && !(opt.cuddle_else && code.mem[code.len - 1] == '}')) {
852 		if (opt.verbose)
853 			diag(0, "Line broken");
854 		output_line();
855 	}
856 
857 	ps.force_nl = true;
858 	parse(psym_else);
859 }
860 
861 static void
862 process_type(void)
863 {
864 	parse(psym_decl);	/* let the parser worry about indentation */
865 
866 	if (ps.prev_token == lsym_rparen_or_rbracket && ps.tos <= 1) {
867 		if (code.len > 0)
868 			output_line();
869 	}
870 
871 	if (ps.in_func_def_params && opt.indent_parameters &&
872 	    ps.decl_level == 0) {
873 		ps.ind_level = ps.ind_level_follow = 1;
874 		ps.in_stmt_cont = false;
875 	}
876 
877 	ps.init_or_struct = /* maybe */ true;
878 	ps.in_decl = ps.decl_on_line = ps.prev_token != lsym_typedef;
879 	if (ps.decl_level <= 0)
880 		ps.declaration = decl_begin;
881 
882 	int len = (int)token.len + 1;
883 	int ind = ps.ind_level == 0 || ps.decl_level > 0
884 	    ? opt.decl_indent	/* global variable or local member */
885 	    : opt.local_decl_indent;	/* local variable */
886 	ps.decl_ind = ind > 0 ? ind : len;
887 	ps.tabs_to_var = opt.use_tabs && ind > 0;
888 }
889 
890 static void
891 process_ident(lexer_symbol lsym)
892 {
893 	if (ps.in_decl) {
894 		if (lsym == lsym_funcname) {
895 			ps.in_decl = false;
896 			if (opt.procnames_start_line && code.len > 0)
897 				output_line();
898 			else if (ps.want_blank)
899 				buf_add_char(&code, ' ');
900 			ps.want_blank = false;
901 
902 		} else if (!ps.block_init && !ps.decl_indent_done &&
903 		    ps.line_start_nparen == 0) {
904 			if (opt.decl_indent == 0
905 			    && code.len > 0 && code.mem[code.len - 1] == '}')
906 				ps.decl_ind =
907 				    ind_add(0, code.st, code.len) + 1;
908 			code_add_decl_indent(ps.decl_ind, ps.tabs_to_var);
909 			ps.decl_indent_done = true;
910 			ps.want_blank = false;
911 		}
912 
913 	} else if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) {
914 		ps.force_nl = true;
915 		ps.next_unary = true;
916 		ps.in_stmt_or_decl = false;
917 		parse(ps.spaced_expr_psym);
918 		ps.spaced_expr_psym = psym_0;
919 	}
920 }
921 
922 static void
923 process_period(void)
924 {
925 	if (code.len > 0 && code.mem[code.len - 1] == ',')
926 		buf_add_char(&code, ' ');
927 	buf_add_char(&code, '.');
928 	ps.want_blank = false;
929 }
930 
931 static void
932 process_comma(void)
933 {
934 	ps.want_blank = code.len > 0;	/* only put blank after comma if comma
935 					 * does not start the line */
936 
937 	if (ps.in_decl && !ps.is_function_definition && !ps.block_init &&
938 	    !ps.decl_indent_done && ps.line_start_nparen == 0) {
939 		/* indent leading commas and not the actual identifiers */
940 		code_add_decl_indent(ps.decl_ind - 1, ps.tabs_to_var);
941 		ps.decl_indent_done = true;
942 	}
943 
944 	buf_add_char(&code, ',');
945 
946 	if (ps.nparen == 0) {
947 		if (ps.block_init_level <= 0)
948 			ps.block_init = false;
949 		int typical_varname_length = 8;
950 		if (break_comma && (opt.break_after_comma ||
951 		    ind_add(compute_code_indent(), code.st, code.len)
952 		    >= opt.max_line_length - typical_varname_length))
953 			ps.force_nl = true;
954 	}
955 }
956 
957 /* move the whole line to the 'label' buffer */
958 static void
959 read_preprocessing_line(void)
960 {
961 	enum {
962 		PLAIN, STR, CHR, COMM
963 	} state = PLAIN;
964 
965 	buf_add_char(&lab, '#');
966 
967 	while (ch_isblank(inp.st[0]))
968 		buf_add_char(&lab, *inp.st++);
969 
970 	while (inp.st[0] != '\n' || (state == COMM && !had_eof)) {
971 		buf_add_char(&lab, inp_next());
972 		switch (lab.mem[lab.len - 1]) {
973 		case '\\':
974 			if (state != COMM)
975 				buf_add_char(&lab, inp_next());
976 			break;
977 		case '/':
978 			if (inp.st[0] == '*' && state == PLAIN) {
979 				state = COMM;
980 				buf_add_char(&lab, *inp.st++);
981 			}
982 			break;
983 		case '"':
984 			if (state == STR)
985 				state = PLAIN;
986 			else if (state == PLAIN)
987 				state = STR;
988 			break;
989 		case '\'':
990 			if (state == CHR)
991 				state = PLAIN;
992 			else if (state == PLAIN)
993 				state = CHR;
994 			break;
995 		case '*':
996 			if (inp.st[0] == '/' && state == COMM) {
997 				state = PLAIN;
998 				buf_add_char(&lab, *inp.st++);
999 			}
1000 			break;
1001 		}
1002 	}
1003 
1004 	while (lab.len > 0 && ch_isblank(lab.mem[lab.len - 1]))
1005 		lab.len--;
1006 }
1007 
1008 static void
1009 process_preprocessing(void)
1010 {
1011 	if (lab.len > 0 || code.len > 0 || com.len > 0)
1012 		output_line();
1013 
1014 	read_preprocessing_line();
1015 
1016 	ps.is_case_label = false;
1017 
1018 	const char *end = lab.mem + lab.len;
1019 	const char *dir = lab.st + 1;
1020 	while (dir < end && ch_isblank(*dir))
1021 		dir++;
1022 	size_t dir_len = 0;
1023 	while (dir + dir_len < end && ch_isalpha(dir[dir_len]))
1024 		dir_len++;
1025 
1026 	if (dir_len >= 2 && memcmp(dir, "if", 2) == 0) {
1027 		if ((size_t)ifdef_level < array_length(state_stack))
1028 			state_stack[ifdef_level++] = ps;
1029 		else
1030 			diag(1, "#if stack overflow");
1031 		out.line_kind = lk_if;
1032 
1033 	} else if (dir_len >= 2 && memcmp(dir, "el", 2) == 0) {
1034 		if (ifdef_level <= 0)
1035 			diag(1, dir[2] == 'i'
1036 			    ? "Unmatched #elif" : "Unmatched #else");
1037 		else
1038 			ps = state_stack[ifdef_level - 1];
1039 
1040 	} else if (dir_len == 5 && memcmp(dir, "endif", 5) == 0) {
1041 		if (ifdef_level <= 0)
1042 			diag(1, "Unmatched #endif");
1043 		else
1044 			ifdef_level--;
1045 		out.line_kind = lk_endif;
1046 	}
1047 
1048 	/* subsequent processing of the newline character will cause the line
1049 	 * to be printed */
1050 }
1051 
1052 static void
1053 process_lsym(lexer_symbol lsym)
1054 {
1055 	switch (lsym) {
1056 
1057 	case lsym_newline:
1058 		process_newline();
1059 		break;
1060 
1061 	case lsym_lparen_or_lbracket:
1062 		process_lparen_or_lbracket();
1063 		break;
1064 
1065 	case lsym_rparen_or_rbracket:
1066 		process_rparen_or_rbracket();
1067 		break;
1068 
1069 	case lsym_unary_op:
1070 		process_unary_op();
1071 		break;
1072 
1073 	case lsym_binary_op:
1074 		process_binary_op();
1075 		break;
1076 
1077 	case lsym_postfix_op:
1078 		process_postfix_op();
1079 		break;
1080 
1081 	case lsym_question:
1082 		process_question();
1083 		break;
1084 
1085 	case lsym_case_label:
1086 		ps.seen_case = true;
1087 		goto copy_token;
1088 
1089 	case lsym_colon:
1090 		process_colon();
1091 		break;
1092 
1093 	case lsym_semicolon:
1094 		process_semicolon();
1095 		break;
1096 
1097 	case lsym_lbrace:
1098 		process_lbrace();
1099 		break;
1100 
1101 	case lsym_rbrace:
1102 		process_rbrace();
1103 		break;
1104 
1105 	case lsym_switch:
1106 		ps.spaced_expr_psym = psym_switch_expr;
1107 		goto copy_token;
1108 
1109 	case lsym_for:
1110 		ps.spaced_expr_psym = psym_for_exprs;
1111 		goto copy_token;
1112 
1113 	case lsym_if:
1114 		ps.spaced_expr_psym = psym_if_expr;
1115 		goto copy_token;
1116 
1117 	case lsym_while:
1118 		ps.spaced_expr_psym = psym_while_expr;
1119 		goto copy_token;
1120 
1121 	case lsym_do:
1122 		process_do();
1123 		goto copy_token;
1124 
1125 	case lsym_else:
1126 		process_else();
1127 		goto copy_token;
1128 
1129 	case lsym_typedef:
1130 	case lsym_storage_class:
1131 		goto copy_token;
1132 
1133 	case lsym_tag:
1134 		if (ps.nparen > 0)
1135 			goto copy_token;
1136 		/* FALLTHROUGH */
1137 	case lsym_type_outside_parentheses:
1138 		process_type();
1139 		goto copy_token;
1140 
1141 	case lsym_type_in_parentheses:
1142 	case lsym_offsetof:
1143 	case lsym_sizeof:
1144 	case lsym_word:
1145 	case lsym_funcname:
1146 	case lsym_return:
1147 		process_ident(lsym);
1148 	copy_token:
1149 		if (ps.want_blank)
1150 			buf_add_char(&code, ' ');
1151 		buf_add_buf(&code, &token);
1152 		if (lsym != lsym_funcname)
1153 			ps.want_blank = true;
1154 		break;
1155 
1156 	case lsym_period:
1157 		process_period();
1158 		break;
1159 
1160 	case lsym_comma:
1161 		process_comma();
1162 		break;
1163 
1164 	case lsym_preprocessing:
1165 		process_preprocessing();
1166 		break;
1167 
1168 	case lsym_comment:
1169 		process_comment();
1170 		break;
1171 
1172 	default:
1173 		break;
1174 	}
1175 }
1176 
1177 static int
1178 indent(void)
1179 {
1180 	debug_parser_state();
1181 
1182 	for (;;) {		/* loop until we reach eof */
1183 		lexer_symbol lsym = lexi();
1184 
1185 		debug_blank_line();
1186 		debug_printf("line %d: %s", line_no, lsym_name[lsym]);
1187 		debug_buffers();
1188 		debug_blank_line();
1189 
1190 		if (lsym == lsym_eof)
1191 			return process_eof();
1192 
1193 		if (lsym == lsym_if && ps.prev_token == lsym_else
1194 		    && opt.else_if)
1195 			ps.force_nl = false;
1196 
1197 		if (lsym == lsym_newline || lsym == lsym_preprocessing)
1198 			ps.force_nl = false;
1199 		else if (lsym == lsym_comment) {
1200 			/* no special processing */
1201 		} else {
1202 			maybe_break_line(lsym);
1203 			/*
1204 			 * Add an extra level of indentation; turned off again
1205 			 * by a ';' or '}'.
1206 			 */
1207 			ps.in_stmt_or_decl = true;
1208 			if (com.len > 0)
1209 				move_com_to_code(lsym);
1210 			update_ps_decl_ptr(lsym);
1211 			update_ps_in_enum(lsym);
1212 		}
1213 
1214 		process_lsym(lsym);
1215 
1216 		debug_parser_state();
1217 
1218 		if (lsym != lsym_comment && lsym != lsym_newline &&
1219 		    lsym != lsym_preprocessing)
1220 			ps.prev_token = lsym;
1221 	}
1222 }
1223 
1224 int
1225 main(int argc, char **argv)
1226 {
1227 	init_globals();
1228 	load_profiles(argc, argv);
1229 	parse_command_line(argc, argv);
1230 	set_initial_indentation();
1231 	return indent();
1232 }
1233