xref: /netbsd-src/usr.bin/indent/parse.c (revision 0e2e28bced52bda3788c857106bde6c44d2df3b8)
1 /*	$NetBSD: parse.c,v 1.79 2023/06/18 06:56:32 rillig Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-4-Clause
5  *
6  * Copyright (c) 1985 Sun Microsystems, Inc.
7  * Copyright (c) 1980, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #include <sys/cdefs.h>
41 __RCSID("$NetBSD: parse.c,v 1.79 2023/06/18 06:56:32 rillig Exp $");
42 
43 #include <stdlib.h>
44 
45 #include "indent.h"
46 
47 /*
48  * Try to combine the statement on the top of the parse stack with the symbol
49  * directly below it, replacing these two symbols with a single symbol.
50  */
51 static bool
52 psyms_reduce_stmt(void)
53 {
54 	switch (ps.psyms.sym[ps.psyms.len - 2]) {
55 
56 	case psym_stmt:
57 		ps.psyms.len--;
58 		ps.psyms.sym[ps.psyms.len - 1] = psym_stmt;
59 		return true;
60 
61 	case psym_do:
62 		ps.psyms.len--;
63 		ps.psyms.sym[ps.psyms.len - 1] = psym_do_stmt;
64 		ps.ind_level_follow = ps.psyms.ind_level[ps.psyms.len - 1];
65 		return true;
66 
67 	case psym_if_expr:
68 		ps.psyms.len--;
69 		ps.psyms.sym[ps.psyms.len - 1] = psym_if_expr_stmt;
70 		/* For the time being, assume that there is no 'else' on this
71 		 * 'if', and set the indentation level accordingly. If an
72 		 * 'else' is scanned, it will be fixed up later. */
73 		size_t i = ps.psyms.len - 2;
74 		while (ps.psyms.sym[i] != psym_stmt
75 		    && ps.psyms.sym[i] != psym_lbrace_block)
76 			i--;
77 		ps.ind_level_follow = ps.psyms.ind_level[i];
78 		return true;
79 
80 	case psym_switch_expr:
81 	case psym_decl:
82 	case psym_if_expr_stmt_else:
83 	case psym_for_exprs:
84 	case psym_while_expr:
85 		ps.psyms.len--;
86 		ps.psyms.sym[ps.psyms.len - 1] = psym_stmt;
87 		ps.ind_level_follow = ps.psyms.ind_level[ps.psyms.len - 1];
88 		return true;
89 
90 	default:
91 		return false;
92 	}
93 }
94 
95 static int
96 left_justify_decl_level(void)
97 {
98 	int level = 0;
99 	for (size_t i = ps.psyms.len - 2; i > 0; i--)
100 		if (ps.psyms.sym[i] == psym_decl)
101 			level++;
102 	return level;
103 }
104 
105 void
106 ps_push(parser_symbol psym, bool follow)
107 {
108 	if (ps.psyms.len == ps.psyms.cap) {
109 		ps.psyms.cap += 16;
110 		ps.psyms.sym = nonnull(realloc(ps.psyms.sym,
111 			sizeof(ps.psyms.sym[0]) * ps.psyms.cap));
112 		ps.psyms.ind_level = nonnull(realloc(ps.psyms.ind_level,
113 			sizeof(ps.psyms.ind_level[0]) * ps.psyms.cap));
114 	}
115 	ps.psyms.len++;
116 	ps.psyms.sym[ps.psyms.len - 1] = psym;
117 	ps.psyms.ind_level[ps.psyms.len - 1] =
118 	    follow ? ps.ind_level_follow : ps.ind_level;
119 }
120 
121 /*
122  * Repeatedly try to reduce the top two symbols on the parse stack to a single
123  * symbol, until no more reductions are possible.
124  */
125 static void
126 psyms_reduce(void)
127 {
128 again:
129 	if (ps.psyms.len >= 2 && ps.psyms.sym[ps.psyms.len - 1] == psym_stmt
130 	    && psyms_reduce_stmt())
131 		goto again;
132 	if (ps.psyms.sym[ps.psyms.len - 1] == psym_while_expr &&
133 	    ps.psyms.sym[ps.psyms.len - 2] == psym_do_stmt) {
134 		ps.psyms.len -= 2;
135 		goto again;
136 	}
137 }
138 
139 static bool
140 is_lbrace(parser_symbol psym)
141 {
142 	return psym == psym_lbrace_block
143 	    || psym == psym_lbrace_struct
144 	    || psym == psym_lbrace_union
145 	    || psym == psym_lbrace_enum;
146 }
147 
148 /*
149  * Shift the token onto the parser stack, then try to reduce it by combining it with
150  * previous tokens.
151  */
152 void
153 parse(parser_symbol psym)
154 {
155 	debug_blank_line();
156 	debug_println("parse token: %s", psym_name[psym]);
157 
158 	if (psym != psym_else) {
159 		while (ps.psyms.sym[ps.psyms.len - 1] == psym_if_expr_stmt) {
160 			ps.psyms.sym[ps.psyms.len - 1] = psym_stmt;
161 			psyms_reduce();
162 		}
163 	}
164 
165 	switch (psym) {
166 
167 	case psym_lbrace_block:
168 	case psym_lbrace_struct:
169 	case psym_lbrace_union:
170 	case psym_lbrace_enum:
171 		ps.break_after_comma = false;
172 		if (ps.psyms.sym[ps.psyms.len - 1] == psym_decl
173 		    || ps.psyms.sym[ps.psyms.len - 1] == psym_stmt)
174 			ps.ind_level_follow++;
175 		else if (code.len == 0) {
176 			/* It is part of a while, for, etc. */
177 			ps.ind_level--;
178 
179 			/* for a switch, brace should be two levels out from
180 			 * the code */
181 			if (ps.psyms.sym[ps.psyms.len - 1] == psym_switch_expr
182 			    && opt.case_indent >= 1.0F)
183 				ps.ind_level--;
184 		}
185 
186 		ps_push(psym, false);
187 		ps_push(psym_stmt, true);
188 		break;
189 
190 	case psym_rbrace:
191 		/* stack should have <lbrace> <stmt> or <lbrace> <decl> */
192 		if (!(ps.psyms.len >= 2
193 			&& is_lbrace(ps.psyms.sym[ps.psyms.len - 2]))) {
194 			diag(1, "Statement nesting error");
195 			break;
196 		}
197 		ps.psyms.len--;
198 		ps.ind_level = ps.psyms.ind_level[ps.psyms.len - 1];
199 		ps.ind_level_follow = ps.ind_level;
200 		ps.psyms.sym[ps.psyms.len - 1] = psym_stmt;
201 		break;
202 
203 	case psym_decl:
204 		if (ps.psyms.sym[ps.psyms.len - 1] == psym_decl)
205 			break;	/* only put one declaration onto stack */
206 
207 		ps.break_after_comma = true;
208 		ps_push(psym_decl, true);
209 
210 		if (opt.left_justify_decl) {
211 			ps.ind_level = left_justify_decl_level();
212 			ps.ind_level_follow = ps.ind_level;
213 		}
214 		break;
215 
216 	case psym_stmt:
217 		ps.break_after_comma = false;
218 		ps_push(psym_stmt, false);
219 		break;
220 
221 	case psym_if_expr:
222 		if (ps.psyms.sym[ps.psyms.len - 1] == psym_if_expr_stmt_else
223 		    && opt.else_if_in_same_line) {
224 			ps.ind_level_follow =
225 			    ps.psyms.ind_level[ps.psyms.len - 1];
226 			ps.psyms.len--;
227 		}
228 		/* FALLTHROUGH */
229 	case psym_do:
230 	case psym_for_exprs:
231 		ps.ind_level = ps.ind_level_follow;
232 		ps.ind_level_follow = ps.ind_level + 1;
233 		ps_push(psym, false);
234 		break;
235 
236 	case psym_else:
237 		if (ps.psyms.sym[ps.psyms.len - 1] != psym_if_expr_stmt) {
238 			diag(1, "Unmatched 'else'");
239 			break;
240 		}
241 		ps.ind_level = ps.psyms.ind_level[ps.psyms.len - 1];
242 		ps.ind_level_follow = ps.ind_level + 1;
243 		ps.psyms.sym[ps.psyms.len - 1] = psym_if_expr_stmt_else;
244 		break;
245 
246 	case psym_switch_expr:
247 		ps_push(psym_switch_expr, true);
248 		ps.ind_level_follow += (int)opt.case_indent + 1;
249 		break;
250 
251 	case psym_while_expr:
252 		if (ps.psyms.sym[ps.psyms.len - 1] == psym_do_stmt) {
253 			ps.ind_level = ps.psyms.ind_level[ps.psyms.len - 1];
254 			ps.ind_level_follow = ps.ind_level;
255 			ps_push(psym_while_expr, false);
256 		} else {
257 			ps_push(psym_while_expr, true);
258 			ps.ind_level_follow++;
259 		}
260 		break;
261 
262 	default:
263 		diag(1, "Unknown code to parser");
264 		return;
265 	}
266 
267 	debug_psyms_stack("before reduction");
268 	psyms_reduce();
269 	debug_psyms_stack("after reduction");
270 }
271