xref: /csrg-svn/usr.bin/pascal/src/yycosts.c (revision 14747)
1783Speter /* Copyright (c) 1979 Regents of the University of California */
2783Speter 
3*14747Sthien #ifndef lint
4*14747Sthien static char sccsid[] = "@(#)yycosts.c 1.5 08/19/83";
5*14747Sthien #endif
6783Speter 
7783Speter #include "whoami.h"
8783Speter #include "0.h"
9*14747Sthien #include "tree_ty.h"	/* must be included for yy.h */
10783Speter #include "yy.h"
11783Speter 
12783Speter /*
13783Speter  * Symbol costs for Pascal.
14783Speter  *
15783Speter  * Cost strategy of August 14, 1977.
16783Speter  *
17783Speter  * The costs determined by the routines in this file are used by
18783Speter  * the recovery in choosing appropriate corrections.
19783Speter  * The cost vectors and the error productions in the grammar
20783Speter  * work together to define the corrective capacity of the grammar.
21783Speter  *
22783Speter  * The costs here largely derive from those given in Steve Rhode's
23783Speter  * thesis for the Pascal-I error correcting parser which he implemented.
24783Speter  * Some minor changes have been made to adjust for the fact that
25783Speter  * the current error recovery is not as "smart", both because of the
26783Speter  * limited forward move and because of the lack of any type information
27783Speter  * about identifiers.
28783Speter  *
29783Speter  * These adjustments largely take the form of increased costs for certain
30783Speter  * tokens, noticeably keywords which are major brackets such as "begin"
31783Speter  * "label", "procedure", etc.
32783Speter  *
33783Speter  * The overall weighting strategy is still similar to Rhodes' strategy.
34783Speter  * The costs can be considered:
35783Speter  *
36783Speter  *	LOW	<= 3
37783Speter  *	MEDIUM	4 or 5
38783Speter  *	HIGH	>= 6
39783Speter  */
40783Speter 
41783Speter /*
42783Speter  * Insertion costs
43783Speter  *
44783Speter  * In addition to the normal symbol insertion costs,
45783Speter  * there are zero cost insertions here.
46783Speter  * The current error recovery system treats symbols
47783Speter  * which have zero insertion cost in a special way,
48783Speter  * inserting them but suppressing diagnostics.
49783Speter  * This allows the system to hold of on bracketing
50783Speter  * error diagnostics about missing end's until the
51783Speter  * reduction occurs which knows the line number of the
52783Speter  * corresponding "begin", "repeat", etc.
53783Speter  * A more intelligent and useful diagnostic can then
54783Speter  * be printed.
55783Speter  *
56783Speter  * Although this routine never allows the insertion
57783Speter  * of the keyword begin, it can be inserted after a
58783Speter  * procedure or function body starts, if it was omitted
59783Speter  * by a special case in the panic routine, which notices
60783Speter  * the keywords in the statement body of the procedure
61783Speter  * and inserts the begin to recover.
62783Speter  *
63783Speter  * Similarly, we do not insert end-of-file, but
64783Speter  * the fact that end-of-file is the unique input
65783Speter  * is noticed by the recovery routines as a special
66783Speter  * case and handled there.
67783Speter  */
68783Speter inscost(sy, before)
69783Speter 	register int sy, before;
70783Speter {
71783Speter 
72783Speter 	switch (before) {
73783Speter 		case YEND:
74783Speter 			if (sy == YEND)
75783Speter 				break;
76783Speter 		case YPROCEDURE:
77783Speter 		case YFUNCTION:
78783Speter 			if (sy == YUNTIL || sy == YEND)
79783Speter 				return (0);
80783Speter 	}
81783Speter 	switch (sy) {
82783Speter 		case ';':
83783Speter 			return (1);
84783Speter 		case ',':
85783Speter 		case ':':
86783Speter 		case YOF:
87783Speter 		case YDO:
88783Speter 			return (2);
89783Speter 		case YARRAY:
90783Speter 		case '+':
91783Speter 		case '*':
92783Speter 			return (3);
93783Speter 		default:
94783Speter 			return (4);
95783Speter 		case '^':
96783Speter 		case YNOT:
97783Speter 		case YLABEL:
98783Speter 		case YCONST:
99783Speter 		case YTYPE:
100783Speter 		case YVAR:
101783Speter 		case YUNTIL:
102783Speter 		case '(':
103783Speter 		case '[':
104783Speter 		case YWHILE:
105783Speter 		case YWITH:
106783Speter 			return (5);
107783Speter 		case YPROCEDURE:
108783Speter 		case YFUNCTION:
109783Speter 		case YCASE:
110783Speter 			return (6);
111783Speter 		case YEND:
112783Speter 			return (8);
113783Speter 		case YBEGIN:
114783Speter 		case YEOF:
115783Speter 		case YREPEAT:
116783Speter 		case YRECORD:
117783Speter 			return (INFINITY);
118783Speter 	}
119783Speter }
120783Speter 
121783Speter /*
122783Speter  * Replacement costs
123783Speter  *
124783Speter  * Most replacement costs are the same as an insertion
125783Speter  * plus a deletion cost.  One special case is the replacement
126783Speter  * of a large number of keywords by an identifier.
127783Speter  * These are given lower costs, especially the keyword "to".
128783Speter  */
129783Speter repcost(what, with)
130783Speter 	register int what, with;
131783Speter {
132783Speter 	register int c;
133783Speter 
134783Speter 	if (with == what)
135783Speter 		return (INFINITY);
136783Speter 	if (with == YID && what > ERROR)
137783Speter 		switch (what) {
138783Speter 			case YID:
139783Speter 			case YDOTDOT:
140783Speter 			case YINT:
141783Speter 			case YBINT:
142783Speter 			case YSTRING:
143783Speter 			case YNUMB:
144783Speter 				break;
145783Speter 			case YTO:
146783Speter 				return (3);
147783Speter 			default:
148783Speter 				return (5);
149783Speter 			case YRECORD:
150783Speter 			case YTHEN:
151783Speter 				return (6);
152783Speter 			case YBEGIN:
153783Speter 				break;
154783Speter 		}
1557999Smckusick 	if (what == YID && with == YFORWARD)
1567999Smckusick 		return(5);
157783Speter 	if (what == ';' && (with == ',' || with == '.'))
158783Speter 		return (CLIMIT - 1);
159*14747Sthien 	c = delcost(what) + inscost(with, what);
160783Speter 	/*
161783Speter 	 * It costs extra to replace something which has
162783Speter 	 * semantics by something which doesn't.
163783Speter 	 */
164783Speter 	if (nullsem(what) == NIL && nullsem(with) != NIL)
1653084Smckusic 		c += 4;
166783Speter 	return (c);
167783Speter }
168783Speter 
169783Speter /*
170783Speter  * Deletion costs
171783Speter  */
172783Speter delcost(what)
173783Speter 	int what;
174783Speter {
175783Speter 
176783Speter 	switch (what) {
177783Speter 		case '.':
178783Speter 		case ':':
179783Speter 		case ',':
180783Speter 		case '=':
181783Speter 		case '(':
182783Speter 			return (3);
183783Speter 		case YELSE:
184783Speter 		case YTHEN:
185783Speter 			return (4);
186783Speter 		default:
187783Speter 			return (5);
188783Speter 		case YLABEL:
189783Speter 		case YCONST:
190783Speter 		case YTYPE:
191783Speter 		case YVAR:
192783Speter 			return (10);
193783Speter 		case YPROCEDURE:
194783Speter 		case YFUNCTION:
195783Speter 		case YBEGIN:
196783Speter 		case YEND:
197783Speter 			return ((CLIMIT * 3) / 4);
198783Speter 		case ';':
199783Speter 		case YEOF:
200783Speter 			return (INFINITY);
201783Speter 	}
202783Speter }
203783Speter #ifdef DEBUG
204783Speter 
205783Speter /*
206783Speter  * Routine to print out costs with "-K" option.
207783Speter  */
2083084Smckusic char	yysyms[] = ";,:=*+/-|&()[]<>~^";
209783Speter 
210783Speter 
211783Speter yycosts()
212783Speter {
213783Speter 	register int c;
214783Speter 	register char *cp;
215783Speter 
216783Speter 	printf("Insert\tDelete\tRep(ID)\tSymbol\n");
217783Speter 	for (cp = yysyms; *cp; cp++)
218783Speter 		yydocost(*cp);
219783Speter 	for (c = ERROR + 1; c < YLAST; c++)
220783Speter 		yydocost(c);
221783Speter #ifdef PXP
222783Speter 	flush();
223783Speter #endif
224783Speter }
225783Speter 
226783Speter yydocost(c)
227783Speter 	int c;
228783Speter {
229783Speter 
230783Speter 	printf("%4d\t", inscost(c, -1));
231783Speter 	printf("%4d\t", delcost(c));
232*14747Sthien 	if (repcost(c, YID) != inscost(YID, c) + delcost(c))
233783Speter 		printf("%4d", repcost(c, YID));
234*14747Sthien 	printf("\t%s%s\n", charname(c,1));
235783Speter }
236783Speter #endif
237