xref: /netbsd-src/usr.bin/indent/parse.c (revision 4724848cf0da353df257f730694b7882798e5daf)
1 /*	$NetBSD: parse.c,v 1.49 2022/04/22 21:21:20 rillig Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-4-Clause
5  *
6  * Copyright (c) 1985 Sun Microsystems, Inc.
7  * Copyright (c) 1980, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #if 0
41 static char sccsid[] = "@(#)parse.c	8.1 (Berkeley) 6/6/93";
42 #endif
43 
44 #include <sys/cdefs.h>
45 #if defined(__NetBSD__)
46 __RCSID("$NetBSD: parse.c,v 1.49 2022/04/22 21:21:20 rillig Exp $");
47 #else
48 __FBSDID("$FreeBSD: head/usr.bin/indent/parse.c 337651 2018-08-11 19:20:06Z pstef $");
49 #endif
50 
51 #include <assert.h>
52 #include <err.h>
53 #include <stdio.h>
54 
55 #include "indent.h"
56 
57 static void reduce(void);
58 
59 #ifdef debug
60 static const char *
61 psym_name(parser_symbol psym)
62 {
63     static const char *const name[] = {
64 	"semicolon",
65 	"lbrace",
66 	"rbrace",
67 	"decl",
68 	"stmt",
69 	"stmt_list",
70 	"for_exprs",
71 	"if_expr",
72 	"if_expr_stmt",
73 	"if_expr_stmt_else",
74 	"else",
75 	"switch_expr",
76 	"do",
77 	"do_stmt",
78 	"while_expr",
79     };
80 
81     assert(array_length(name) == (int)psym_while_expr + 1);
82 
83     return name[psym];
84 }
85 #endif
86 
87 static int
88 decl_level(void)
89 {
90     int level = 0;
91     for (int i = ps.tos - 1; i > 0; i--)
92 	if (ps.s_sym[i] == psym_decl)
93 	    level++;
94     return level;
95 }
96 
97 /*
98  * Shift the token onto the parser stack, or reduce it by combining it with
99  * previous tokens.
100  */
101 void
102 parse(parser_symbol psym)
103 {
104     debug_println("parse token: %s", psym_name(psym));
105 
106     if (psym != psym_else) {
107 	while (ps.s_sym[ps.tos] == psym_if_expr_stmt) {
108 	    ps.s_sym[ps.tos] = psym_stmt;
109 	    reduce();
110 	}
111     }
112 
113     switch (psym) {
114 
115     case psym_decl:
116 	ps.search_stmt = opt.brace_same_line;
117 	/* indicate that following brace should be on same line */
118 
119 	if (ps.s_sym[ps.tos] == psym_decl)
120 	    break;		/* only put one declaration onto stack */
121 
122 	break_comma = true;	/* while in a declaration, force a newline
123 				 * after comma */
124 	ps.s_sym[++ps.tos] = psym_decl;
125 	ps.s_ind_level[ps.tos] = ps.ind_level_follow;
126 
127 	if (opt.ljust_decl)
128 	    ps.ind_level_follow = ps.ind_level = decl_level();
129 	break;
130 
131     case psym_if_expr:
132 	if (ps.s_sym[ps.tos] == psym_if_expr_stmt_else && opt.else_if) {
133 	    /*
134 	     * Reduce "else if" to "if". This saves a lot of stack space in
135 	     * case of a long "if-else-if ... else-if" sequence.
136 	     */
137 	    ps.ind_level_follow = ps.s_ind_level[ps.tos--];
138 	}
139 	/* FALLTHROUGH */
140     case psym_do:
141     case psym_for_exprs:
142 	ps.s_sym[++ps.tos] = psym;
143 	ps.s_ind_level[ps.tos] = ps.ind_level = ps.ind_level_follow;
144 	++ps.ind_level_follow;	/* subsequent statements should be indented 1 */
145 	ps.search_stmt = opt.brace_same_line;
146 	break;
147 
148     case psym_lbrace:
149 	break_comma = false;	/* don't break comma in an initializer list */
150 	if (ps.s_sym[ps.tos] == psym_stmt || ps.s_sym[ps.tos] == psym_decl
151 		|| ps.s_sym[ps.tos] == psym_stmt_list)
152 	    ++ps.ind_level_follow;	/* it is a random, isolated stmt group
153 					 * or a declaration */
154 	else {
155 	    if (code.s == code.e) {
156 		/* it is a group as part of a while, for, etc. */
157 		--ps.ind_level;
158 
159 		/*
160 		 * for a switch, brace should be two levels out from the code
161 		 */
162 		if (ps.s_sym[ps.tos] == psym_switch_expr &&
163 			opt.case_indent >= 1.0F)
164 		    --ps.ind_level;
165 	    }
166 	}
167 
168 	ps.s_sym[++ps.tos] = psym_lbrace;
169 	ps.s_ind_level[ps.tos] = ps.ind_level;
170 	ps.s_sym[++ps.tos] = psym_stmt;
171 	ps.s_ind_level[ps.tos] = ps.ind_level_follow;
172 	break;
173 
174     case psym_while_expr:
175 	if (ps.s_sym[ps.tos] == psym_do_stmt) {
176 	    /* it is matched with do stmt */
177 	    ps.ind_level = ps.ind_level_follow = ps.s_ind_level[ps.tos];
178 	    ps.s_sym[++ps.tos] = psym_while_expr;
179 	    ps.s_ind_level[ps.tos] = ps.ind_level = ps.ind_level_follow;
180 
181 	} else {		/* it is a while loop */
182 	    ps.s_sym[++ps.tos] = psym_while_expr;
183 	    ps.s_ind_level[ps.tos] = ps.ind_level_follow;
184 	    ++ps.ind_level_follow;
185 	    ps.search_stmt = opt.brace_same_line;
186 	}
187 
188 	break;
189 
190     case psym_else:
191 	if (ps.s_sym[ps.tos] != psym_if_expr_stmt)
192 	    diag(1, "Unmatched 'else'");
193 	else {
194 	    ps.ind_level = ps.s_ind_level[ps.tos];
195 	    ps.ind_level_follow = ps.ind_level + 1;
196 	    ps.s_sym[ps.tos] = psym_if_expr_stmt_else;
197 
198 	    ps.search_stmt = opt.brace_same_line || opt.else_if;
199 	}
200 	break;
201 
202     case psym_rbrace:
203 	/* stack should have <lbrace> <stmt> or <lbrace> <stmt_list> */
204 	if (ps.tos > 0 && ps.s_sym[ps.tos - 1] == psym_lbrace) {
205 	    ps.ind_level = ps.ind_level_follow = ps.s_ind_level[--ps.tos];
206 	    ps.s_sym[ps.tos] = psym_stmt;
207 	} else
208 	    diag(1, "Statement nesting error");
209 	break;
210 
211     case psym_switch_expr:
212 	ps.s_sym[++ps.tos] = psym_switch_expr;
213 	ps.s_case_ind_level[ps.tos] = case_ind;
214 	ps.s_ind_level[ps.tos] = ps.ind_level_follow;
215 	case_ind = (float)ps.ind_level_follow + opt.case_indent;
216 	ps.ind_level_follow += (int)opt.case_indent + 1;
217 	ps.search_stmt = opt.brace_same_line;
218 	break;
219 
220     case psym_semicolon:	/* a simple statement */
221 	break_comma = false;	/* don't break after comma in a declaration */
222 	ps.s_sym[++ps.tos] = psym_stmt;
223 	ps.s_ind_level[ps.tos] = ps.ind_level;
224 	break;
225 
226     default:
227 	diag(1, "Unknown code to parser");
228 	return;
229     }
230 
231     if (ps.tos >= STACKSIZE - 1)
232 	errx(1, "Parser stack overflow");
233 
234     reduce();			/* see if any reduction can be done */
235 
236 #ifdef debug
237     printf("parse stack:");
238     for (int i = 1; i <= ps.tos; ++i)
239 	printf(" %s %d", psym_name(ps.s_sym[i]), ps.s_ind_level[i]);
240     if (ps.tos == 0)
241 	printf(" empty");
242     printf("\n");
243 #endif
244 }
245 
246 void
247 parse_stmt_head(stmt_head hd)
248 {
249     static const parser_symbol psym[] = {
250 	[hd_for] = psym_for_exprs,
251 	[hd_if] = psym_if_expr,
252 	[hd_switch] = psym_switch_expr,
253 	[hd_while] = psym_while_expr
254     };
255     parse(psym[hd]);
256 }
257 
258 /*
259  * Try to combine the statement on the top of the parse stack with the symbol
260  * directly below it, replacing these two symbols with a single symbol.
261  */
262 static bool
263 reduce_stmt(void)
264 {
265     switch (ps.s_sym[ps.tos - 1]) {
266 
267     case psym_stmt:
268     case psym_stmt_list:
269 	ps.s_sym[--ps.tos] = psym_stmt_list;
270 	return true;
271 
272     case psym_do:
273 	ps.s_sym[--ps.tos] = psym_do_stmt;
274 	ps.ind_level_follow = ps.s_ind_level[ps.tos];
275 	return true;
276 
277     case psym_if_expr:
278 	ps.s_sym[--ps.tos] = psym_if_expr_stmt;
279 	int i = ps.tos - 1;
280 	while (ps.s_sym[i] != psym_stmt &&
281 		ps.s_sym[i] != psym_stmt_list &&
282 		ps.s_sym[i] != psym_lbrace)
283 	    --i;
284 	ps.ind_level_follow = ps.s_ind_level[i];
285 	/*
286 	 * For the time being, assume that there is no 'else' on this 'if',
287 	 * and set the indentation level accordingly. If an 'else' is scanned,
288 	 * it will be fixed up later.
289 	 */
290 	return true;
291 
292     case psym_switch_expr:
293 	case_ind = ps.s_case_ind_level[ps.tos - 1];
294 	/* FALLTHROUGH */
295     case psym_decl:		/* finish of a declaration */
296     case psym_if_expr_stmt_else:
297     case psym_for_exprs:
298     case psym_while_expr:
299 	ps.s_sym[--ps.tos] = psym_stmt;
300 	ps.ind_level_follow = ps.s_ind_level[ps.tos];
301 	return true;
302 
303     default:
304 	return false;
305     }
306 }
307 
308 /*
309  * Repeatedly try to reduce the top two symbols on the parse stack to a
310  * single symbol, until no more reductions are possible.
311  */
312 static void
313 reduce(void)
314 {
315 again:
316     if (ps.s_sym[ps.tos] == psym_stmt && reduce_stmt())
317 	goto again;
318     if (ps.s_sym[ps.tos] == psym_while_expr &&
319 	    ps.s_sym[ps.tos - 1] == psym_do_stmt) {
320 	ps.tos -= 2;
321 	goto again;
322     }
323 }
324