xref: /netbsd-src/usr.bin/indent/parse.c (revision 53d1339bf7f9c7367b35a9e1ebe693f9b047a47b)
1 /*	$NetBSD: parse.c,v 1.18 2021/03/12 23:10:18 rillig Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-4-Clause
5  *
6  * Copyright (c) 1985 Sun Microsystems, Inc.
7  * Copyright (c) 1980, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #if 0
41 #ifndef lint
42 static char sccsid[] = "@(#)parse.c	8.1 (Berkeley) 6/6/93";
43 #endif /* not lint */
44 #endif
45 
46 #include <sys/cdefs.h>
47 #ifndef lint
48 #if defined(__NetBSD__)
49 __RCSID("$FreeBSD$");
50 #else
51 __FBSDID("$FreeBSD: head/usr.bin/indent/parse.c 337651 2018-08-11 19:20:06Z pstef $");
52 #endif
53 #endif
54 
55 #include <err.h>
56 #include <stdio.h>
57 
58 #include "indent.h"
59 
60 static void reduce(void);
61 
62 void
63 parse(token_type tk)		/* tk: the code for the construct scanned */
64 {
65     int         i;
66 
67 #ifdef debug
68     printf("parse token: '%s' \"%s\"\n", token_type_name(tk), token);
69 #endif
70 
71     while (ps.p_stack[ps.tos] == if_expr_stmt && tk != keyword_else) {
72 	/* true if we have an if without an else */
73 	ps.p_stack[ps.tos] = stmt;	/* apply the if(..) stmt ::= stmt
74 					 * reduction */
75 	reduce();		/* see if this allows any reduction */
76     }
77 
78 
79     switch (tk) {		/* go on and figure out what to do with the
80 				 * input */
81 
82     case decl:			/* scanned a declaration word */
83 	ps.search_brace = opt.btype_2;
84 	/* indicate that following brace should be on same line */
85 	if (ps.p_stack[ps.tos] != decl) {	/* only put one declaration
86 						 * onto stack */
87 	    break_comma = true;	/* while in declaration, newline should be
88 				 * forced after comma */
89 	    ps.p_stack[++ps.tos] = decl;
90 	    ps.il[ps.tos] = ps.i_l_follow;
91 
92 	    if (opt.ljust_decl) {/* only do if we want left justified
93 				 * declarations */
94 		ps.ind_level = 0;
95 		for (i = ps.tos - 1; i > 0; --i)
96 		    if (ps.p_stack[i] == decl)
97 			++ps.ind_level;	/* indentation is number of
98 					 * declaration levels deep we are */
99 		ps.i_l_follow = ps.ind_level;
100 	    }
101 	}
102 	break;
103 
104     case if_expr:		/* 'if' '(' <expr> ')' */
105 	if (ps.p_stack[ps.tos] == if_expr_stmt_else && opt.else_if) {
106 	    /*
107 	     * Note that the stack pointer here is decremented, effectively
108 	     * reducing "else if" to "if". This saves a lot of stack space
109 	     * in case of a long "if-else-if ... else-if" sequence.
110 	     */
111 	    ps.i_l_follow = ps.il[ps.tos--];
112 	}
113 	/* the rest is the same as for keyword_do and for_exprs */
114 	/* FALLTHROUGH */
115     case keyword_do:		/* 'do' */
116     case for_exprs:		/* 'for' (...) */
117 	ps.p_stack[++ps.tos] = tk;
118 	ps.il[ps.tos] = ps.ind_level = ps.i_l_follow;
119 	++ps.i_l_follow;	/* subsequent statements should be indented 1 */
120 	ps.search_brace = opt.btype_2;
121 	break;
122 
123     case lbrace:		/* scanned { */
124 	break_comma = false;	/* don't break comma in an initial list */
125 	if (ps.p_stack[ps.tos] == stmt || ps.p_stack[ps.tos] == decl
126 		|| ps.p_stack[ps.tos] == stmt_list)
127 	    ++ps.i_l_follow;	/* it is a random, isolated stmt group or a
128 				 * declaration */
129 	else {
130 	    if (s_code == e_code) {
131 		/*
132 		 * only do this if there is nothing on the line
133 		 */
134 		--ps.ind_level;
135 		/*
136 		 * it is a group as part of a while, for, etc.
137 		 */
138 		if (ps.p_stack[ps.tos] == switch_expr && opt.case_indent >= 1)
139 		    --ps.ind_level;
140 		/*
141 		 * for a switch, brace should be two levels out from the code
142 		 */
143 	    }
144 	}
145 
146 	ps.p_stack[++ps.tos] = lbrace;
147 	ps.il[ps.tos] = ps.ind_level;
148 	ps.p_stack[++ps.tos] = stmt;
149 	/* allow null stmt between braces */
150 	ps.il[ps.tos] = ps.i_l_follow;
151 	break;
152 
153     case while_expr:		/* 'while' '(' <expr> ')' */
154 	if (ps.p_stack[ps.tos] == do_stmt) {
155 	    /* it is matched with do stmt */
156 	    ps.ind_level = ps.i_l_follow = ps.il[ps.tos];
157 	    ps.p_stack[++ps.tos] = while_expr;
158 	    ps.il[ps.tos] = ps.ind_level = ps.i_l_follow;
159 	} else {		/* it is a while loop */
160 	    ps.p_stack[++ps.tos] = while_expr;
161 	    ps.il[ps.tos] = ps.i_l_follow;
162 	    ++ps.i_l_follow;
163 	    ps.search_brace = opt.btype_2;
164 	}
165 
166 	break;
167 
168     case keyword_else:
169 	if (ps.p_stack[ps.tos] != if_expr_stmt)
170 	    diag(1, "Unmatched 'else'");
171 	else {
172 	    ps.ind_level = ps.il[ps.tos];	/* indentation for else should
173 						 * be same as for if */
174 	    ps.i_l_follow = ps.ind_level + 1;	/* everything following should
175 						 * be in 1 level */
176 	    ps.p_stack[ps.tos] = if_expr_stmt_else;
177 	    /* remember if with else */
178 	    ps.search_brace = opt.btype_2 | opt.else_if;
179 	}
180 	break;
181 
182     case rbrace:		/* scanned a } */
183 	/* stack should have <lbrace> <stmt> or <lbrace> <stmt_list> */
184 	if (ps.tos > 0 && ps.p_stack[ps.tos - 1] == lbrace) {
185 	    ps.ind_level = ps.i_l_follow = ps.il[--ps.tos];
186 	    ps.p_stack[ps.tos] = stmt;
187 	} else
188 	    diag(1, "Statement nesting error");
189 	break;
190 
191     case switch_expr:		/* had switch (...) */
192 	ps.p_stack[++ps.tos] = switch_expr;
193 	ps.cstk[ps.tos] = case_ind;
194 	/* save current case indent level */
195 	ps.il[ps.tos] = ps.i_l_follow;
196 	case_ind = ps.i_l_follow + opt.case_indent;	/* cases should be one
197 							 * level down from
198 							 * switch */
199 	ps.i_l_follow += opt.case_indent + 1;	/* statements should be two
200 						 * levels in */
201 	ps.search_brace = opt.btype_2;
202 	break;
203 
204     case semicolon:		/* this indicates a simple stmt */
205 	break_comma = false;	/* turn off flag to break after commas in a
206 				 * declaration */
207 	ps.p_stack[++ps.tos] = stmt;
208 	ps.il[ps.tos] = ps.ind_level;
209 	break;
210 
211     default:			/* this is an error */
212 	diag(1, "Unknown code to parser");
213 	return;
214 
215 
216     }				/* end of switch */
217 
218     if (ps.tos >= STACKSIZE - 1)
219 	errx(1, "Parser stack overflow");
220 
221     reduce();			/* see if any reduction can be done */
222 
223 #ifdef debug
224     printf("parse stack:");
225     for (i = 1; i <= ps.tos; ++i)
226 	printf(" ('%s' at %d)", token_type_name(ps.p_stack[i]), ps.il[i]);
227     if (ps.tos == 0)
228         printf(" empty");
229     printf("\n");
230 #endif
231 }
232 
233 /*----------------------------------------------*\
234 |   REDUCTION PHASE				 |
235 \*----------------------------------------------*/
236 
237 /*
238  * Try to combine the statement on the top of the parse stack with the symbol
239  * directly below it, replacing these two symbols with a single symbol.
240  */
241 static int
242 reduce_stmt(void)
243 {
244     switch (ps.p_stack[ps.tos - 1]) {
245 
246     case stmt:		/* stmt stmt */
247     case stmt_list:	/* stmt_list stmt */
248 	ps.p_stack[--ps.tos] = stmt_list;
249 	return true;
250 
251     case keyword_do:	/* 'do' <stmt> */
252 	ps.p_stack[--ps.tos] = do_stmt;
253 	ps.i_l_follow = ps.il[ps.tos];
254 	return true;
255 
256     case if_expr:	/* 'if' '(' <expr> ')' <stmt> */
257 	ps.p_stack[--ps.tos] = if_expr_stmt;
258 	int i = ps.tos - 1;
259 	while (ps.p_stack[i] != stmt &&
260 	       ps.p_stack[i] != stmt_list &&
261 	       ps.p_stack[i] != lbrace)
262 	    --i;
263 	ps.i_l_follow = ps.il[i];
264 	/*
265 	 * for the time being, we will assume that there is no else on
266 	 * this if, and set the indentation level accordingly. If an
267 	 * else is scanned, it will be fixed up later
268 	 */
269 	return true;
270 
271     case switch_expr:	/* 'switch' '(' <expr> ')' <stmt> */
272 	case_ind = ps.cstk[ps.tos - 1];
273 	/* FALLTHROUGH */
274     case decl:		/* finish of a declaration */
275     case if_expr_stmt_else: /* 'if' '(' <expr> ')' <stmt> 'else' <stmt> */
276     case for_exprs:	/* 'for' '(' ... ')' <stmt> */
277     case while_expr:	/* 'while' '(' <expr> ')' <stmt> */
278 	ps.p_stack[--ps.tos] = stmt;
279 	ps.i_l_follow = ps.il[ps.tos];
280 	return true;
281 
282     default:		/* <anything else> <stmt> */
283 	return false;
284     }
285 }
286 
287 /*
288  * Repeatedly try to reduce the top two symbols on the parse stack to a
289  * single symbol, until no more reductions are possible.
290  *
291  * On each reduction, ps.i_l_follow (the indentation for the following line)
292  * is set to the indentation level associated with the old TOS.
293  */
294 static void
295 reduce(void)
296 {
297 again:
298     if (ps.p_stack[ps.tos] == stmt) {
299 	if (reduce_stmt())
300 	    goto again;
301     } else if (ps.p_stack[ps.tos] == while_expr) {
302 	if (ps.p_stack[ps.tos - 1] == do_stmt) {
303 	    ps.tos -= 2;
304 	    goto again;
305 	}
306     }
307 }
308