xref: /csrg-svn/usr.bin/pascal/src/yycosts.c (revision 62223)
148116Sbostic /*-
2*62223Sbostic  * Copyright (c) 1980, 1993
3*62223Sbostic  *	The Regents of the University of California.  All rights reserved.
448116Sbostic  *
548116Sbostic  * %sccs.include.redist.c%
622203Sdist  */
7783Speter 
814747Sthien #ifndef lint
9*62223Sbostic static char sccsid[] = "@(#)yycosts.c	8.1 (Berkeley) 06/06/93";
1048116Sbostic #endif /* not lint */
11783Speter 
12783Speter #include "whoami.h"
13783Speter #include "0.h"
1414747Sthien #include "tree_ty.h"	/* must be included for yy.h */
15783Speter #include "yy.h"
16783Speter 
17783Speter /*
18783Speter  * Symbol costs for Pascal.
19783Speter  *
20783Speter  * Cost strategy of August 14, 1977.
21783Speter  *
22783Speter  * The costs determined by the routines in this file are used by
23783Speter  * the recovery in choosing appropriate corrections.
24783Speter  * The cost vectors and the error productions in the grammar
25783Speter  * work together to define the corrective capacity of the grammar.
26783Speter  *
27783Speter  * The costs here largely derive from those given in Steve Rhode's
28783Speter  * thesis for the Pascal-I error correcting parser which he implemented.
29783Speter  * Some minor changes have been made to adjust for the fact that
30783Speter  * the current error recovery is not as "smart", both because of the
31783Speter  * limited forward move and because of the lack of any type information
32783Speter  * about identifiers.
33783Speter  *
34783Speter  * These adjustments largely take the form of increased costs for certain
35783Speter  * tokens, noticeably keywords which are major brackets such as "begin"
36783Speter  * "label", "procedure", etc.
37783Speter  *
38783Speter  * The overall weighting strategy is still similar to Rhodes' strategy.
39783Speter  * The costs can be considered:
40783Speter  *
41783Speter  *	LOW	<= 3
42783Speter  *	MEDIUM	4 or 5
43783Speter  *	HIGH	>= 6
44783Speter  */
45783Speter 
46783Speter /*
47783Speter  * Insertion costs
48783Speter  *
49783Speter  * In addition to the normal symbol insertion costs,
50783Speter  * there are zero cost insertions here.
51783Speter  * The current error recovery system treats symbols
52783Speter  * which have zero insertion cost in a special way,
53783Speter  * inserting them but suppressing diagnostics.
54783Speter  * This allows the system to hold of on bracketing
55783Speter  * error diagnostics about missing end's until the
56783Speter  * reduction occurs which knows the line number of the
57783Speter  * corresponding "begin", "repeat", etc.
58783Speter  * A more intelligent and useful diagnostic can then
59783Speter  * be printed.
60783Speter  *
61783Speter  * Although this routine never allows the insertion
62783Speter  * of the keyword begin, it can be inserted after a
63783Speter  * procedure or function body starts, if it was omitted
64783Speter  * by a special case in the panic routine, which notices
65783Speter  * the keywords in the statement body of the procedure
66783Speter  * and inserts the begin to recover.
67783Speter  *
68783Speter  * Similarly, we do not insert end-of-file, but
69783Speter  * the fact that end-of-file is the unique input
70783Speter  * is noticed by the recovery routines as a special
71783Speter  * case and handled there.
72783Speter  */
inscost(sy,before)73783Speter inscost(sy, before)
74783Speter 	register int sy, before;
75783Speter {
76783Speter 
77783Speter 	switch (before) {
78783Speter 		case YEND:
79783Speter 			if (sy == YEND)
80783Speter 				break;
81783Speter 		case YPROCEDURE:
82783Speter 		case YFUNCTION:
83783Speter 			if (sy == YUNTIL || sy == YEND)
84783Speter 				return (0);
85783Speter 	}
86783Speter 	switch (sy) {
87783Speter 		case ';':
88783Speter 			return (1);
89783Speter 		case ',':
90783Speter 		case ':':
91783Speter 		case YOF:
92783Speter 		case YDO:
93783Speter 			return (2);
94783Speter 		case YARRAY:
95783Speter 		case '+':
96783Speter 		case '*':
97783Speter 			return (3);
98783Speter 		default:
99783Speter 			return (4);
100783Speter 		case '^':
101783Speter 		case YNOT:
102783Speter 		case YLABEL:
103783Speter 		case YCONST:
104783Speter 		case YTYPE:
105783Speter 		case YVAR:
106783Speter 		case YUNTIL:
107783Speter 		case '(':
108783Speter 		case '[':
109783Speter 		case YWHILE:
110783Speter 		case YWITH:
111783Speter 			return (5);
112783Speter 		case YPROCEDURE:
113783Speter 		case YFUNCTION:
114783Speter 		case YCASE:
115783Speter 			return (6);
116783Speter 		case YEND:
117783Speter 			return (8);
118783Speter 		case YBEGIN:
119783Speter 		case YEOF:
120783Speter 		case YREPEAT:
121783Speter 		case YRECORD:
122783Speter 			return (INFINITY);
123783Speter 	}
124783Speter }
125783Speter 
126783Speter /*
127783Speter  * Replacement costs
128783Speter  *
129783Speter  * Most replacement costs are the same as an insertion
130783Speter  * plus a deletion cost.  One special case is the replacement
131783Speter  * of a large number of keywords by an identifier.
132783Speter  * These are given lower costs, especially the keyword "to".
133783Speter  */
repcost(what,with)134783Speter repcost(what, with)
135783Speter 	register int what, with;
136783Speter {
137783Speter 	register int c;
138783Speter 
139783Speter 	if (with == what)
140783Speter 		return (INFINITY);
141783Speter 	if (with == YID && what > ERROR)
142783Speter 		switch (what) {
143783Speter 			case YID:
144783Speter 			case YDOTDOT:
145783Speter 			case YINT:
146783Speter 			case YBINT:
147783Speter 			case YSTRING:
148783Speter 			case YNUMB:
149783Speter 				break;
150783Speter 			case YTO:
151783Speter 				return (3);
152783Speter 			default:
153783Speter 				return (5);
154783Speter 			case YRECORD:
155783Speter 			case YTHEN:
156783Speter 				return (6);
157783Speter 			case YBEGIN:
158783Speter 				break;
159783Speter 		}
1607999Smckusick 	if (what == YID && with == YFORWARD)
1617999Smckusick 		return(5);
162783Speter 	if (what == ';' && (with == ',' || with == '.'))
163783Speter 		return (CLIMIT - 1);
16414747Sthien 	c = delcost(what) + inscost(with, what);
165783Speter 	/*
166783Speter 	 * It costs extra to replace something which has
167783Speter 	 * semantics by something which doesn't.
168783Speter 	 */
169783Speter 	if (nullsem(what) == NIL && nullsem(with) != NIL)
1703084Smckusic 		c += 4;
171783Speter 	return (c);
172783Speter }
173783Speter 
174783Speter /*
175783Speter  * Deletion costs
176783Speter  */
delcost(what)177783Speter delcost(what)
178783Speter 	int what;
179783Speter {
180783Speter 
181783Speter 	switch (what) {
182783Speter 		case '.':
183783Speter 		case ':':
184783Speter 		case ',':
185783Speter 		case '=':
186783Speter 		case '(':
187783Speter 			return (3);
188783Speter 		case YELSE:
189783Speter 		case YTHEN:
190783Speter 			return (4);
191783Speter 		default:
192783Speter 			return (5);
193783Speter 		case YLABEL:
194783Speter 		case YCONST:
195783Speter 		case YTYPE:
196783Speter 		case YVAR:
197783Speter 			return (10);
198783Speter 		case YPROCEDURE:
199783Speter 		case YFUNCTION:
200783Speter 		case YBEGIN:
201783Speter 		case YEND:
202783Speter 			return ((CLIMIT * 3) / 4);
203783Speter 		case ';':
204783Speter 		case YEOF:
205783Speter 			return (INFINITY);
206783Speter 	}
207783Speter }
208783Speter #ifdef DEBUG
209783Speter 
210783Speter /*
211783Speter  * Routine to print out costs with "-K" option.
212783Speter  */
2133084Smckusic char	yysyms[] = ";,:=*+/-|&()[]<>~^";
214783Speter 
215783Speter 
yycosts()216783Speter yycosts()
217783Speter {
218783Speter 	register int c;
219783Speter 	register char *cp;
220783Speter 
221783Speter 	printf("Insert\tDelete\tRep(ID)\tSymbol\n");
222783Speter 	for (cp = yysyms; *cp; cp++)
223783Speter 		yydocost(*cp);
224783Speter 	for (c = ERROR + 1; c < YLAST; c++)
225783Speter 		yydocost(c);
226783Speter #ifdef PXP
227783Speter 	flush();
228783Speter #endif
229783Speter }
230783Speter 
yydocost(c)231783Speter yydocost(c)
232783Speter 	int c;
233783Speter {
234783Speter 
235783Speter 	printf("%4d\t", inscost(c, -1));
236783Speter 	printf("%4d\t", delcost(c));
23714747Sthien 	if (repcost(c, YID) != inscost(YID, c) + delcost(c))
238783Speter 		printf("%4d", repcost(c, YID));
23930802Sbostic 	printf("\t%s\n", charname(c,1));
240783Speter }
241783Speter #endif
242