xref: /csrg-svn/usr.bin/pascal/src/yycosts.c (revision 30802)
122203Sdist /*
222203Sdist  * Copyright (c) 1980 Regents of the University of California.
322203Sdist  * All rights reserved.  The Berkeley software License Agreement
422203Sdist  * specifies the terms and conditions for redistribution.
522203Sdist  */
6783Speter 
714747Sthien #ifndef lint
8*30802Sbostic static char sccsid[] = "@(#)yycosts.c	5.2 (Berkeley) 04/06/87";
922203Sdist #endif not lint
10783Speter 
11783Speter #include "whoami.h"
12783Speter #include "0.h"
1314747Sthien #include "tree_ty.h"	/* must be included for yy.h */
14783Speter #include "yy.h"
15783Speter 
16783Speter /*
17783Speter  * Symbol costs for Pascal.
18783Speter  *
19783Speter  * Cost strategy of August 14, 1977.
20783Speter  *
21783Speter  * The costs determined by the routines in this file are used by
22783Speter  * the recovery in choosing appropriate corrections.
23783Speter  * The cost vectors and the error productions in the grammar
24783Speter  * work together to define the corrective capacity of the grammar.
25783Speter  *
26783Speter  * The costs here largely derive from those given in Steve Rhode's
27783Speter  * thesis for the Pascal-I error correcting parser which he implemented.
28783Speter  * Some minor changes have been made to adjust for the fact that
29783Speter  * the current error recovery is not as "smart", both because of the
30783Speter  * limited forward move and because of the lack of any type information
31783Speter  * about identifiers.
32783Speter  *
33783Speter  * These adjustments largely take the form of increased costs for certain
34783Speter  * tokens, noticeably keywords which are major brackets such as "begin"
35783Speter  * "label", "procedure", etc.
36783Speter  *
37783Speter  * The overall weighting strategy is still similar to Rhodes' strategy.
38783Speter  * The costs can be considered:
39783Speter  *
40783Speter  *	LOW	<= 3
41783Speter  *	MEDIUM	4 or 5
42783Speter  *	HIGH	>= 6
43783Speter  */
44783Speter 
45783Speter /*
46783Speter  * Insertion costs
47783Speter  *
48783Speter  * In addition to the normal symbol insertion costs,
49783Speter  * there are zero cost insertions here.
50783Speter  * The current error recovery system treats symbols
51783Speter  * which have zero insertion cost in a special way,
52783Speter  * inserting them but suppressing diagnostics.
53783Speter  * This allows the system to hold of on bracketing
54783Speter  * error diagnostics about missing end's until the
55783Speter  * reduction occurs which knows the line number of the
56783Speter  * corresponding "begin", "repeat", etc.
57783Speter  * A more intelligent and useful diagnostic can then
58783Speter  * be printed.
59783Speter  *
60783Speter  * Although this routine never allows the insertion
61783Speter  * of the keyword begin, it can be inserted after a
62783Speter  * procedure or function body starts, if it was omitted
63783Speter  * by a special case in the panic routine, which notices
64783Speter  * the keywords in the statement body of the procedure
65783Speter  * and inserts the begin to recover.
66783Speter  *
67783Speter  * Similarly, we do not insert end-of-file, but
68783Speter  * the fact that end-of-file is the unique input
69783Speter  * is noticed by the recovery routines as a special
70783Speter  * case and handled there.
71783Speter  */
72783Speter inscost(sy, before)
73783Speter 	register int sy, before;
74783Speter {
75783Speter 
76783Speter 	switch (before) {
77783Speter 		case YEND:
78783Speter 			if (sy == YEND)
79783Speter 				break;
80783Speter 		case YPROCEDURE:
81783Speter 		case YFUNCTION:
82783Speter 			if (sy == YUNTIL || sy == YEND)
83783Speter 				return (0);
84783Speter 	}
85783Speter 	switch (sy) {
86783Speter 		case ';':
87783Speter 			return (1);
88783Speter 		case ',':
89783Speter 		case ':':
90783Speter 		case YOF:
91783Speter 		case YDO:
92783Speter 			return (2);
93783Speter 		case YARRAY:
94783Speter 		case '+':
95783Speter 		case '*':
96783Speter 			return (3);
97783Speter 		default:
98783Speter 			return (4);
99783Speter 		case '^':
100783Speter 		case YNOT:
101783Speter 		case YLABEL:
102783Speter 		case YCONST:
103783Speter 		case YTYPE:
104783Speter 		case YVAR:
105783Speter 		case YUNTIL:
106783Speter 		case '(':
107783Speter 		case '[':
108783Speter 		case YWHILE:
109783Speter 		case YWITH:
110783Speter 			return (5);
111783Speter 		case YPROCEDURE:
112783Speter 		case YFUNCTION:
113783Speter 		case YCASE:
114783Speter 			return (6);
115783Speter 		case YEND:
116783Speter 			return (8);
117783Speter 		case YBEGIN:
118783Speter 		case YEOF:
119783Speter 		case YREPEAT:
120783Speter 		case YRECORD:
121783Speter 			return (INFINITY);
122783Speter 	}
123783Speter }
124783Speter 
125783Speter /*
126783Speter  * Replacement costs
127783Speter  *
128783Speter  * Most replacement costs are the same as an insertion
129783Speter  * plus a deletion cost.  One special case is the replacement
130783Speter  * of a large number of keywords by an identifier.
131783Speter  * These are given lower costs, especially the keyword "to".
132783Speter  */
133783Speter repcost(what, with)
134783Speter 	register int what, with;
135783Speter {
136783Speter 	register int c;
137783Speter 
138783Speter 	if (with == what)
139783Speter 		return (INFINITY);
140783Speter 	if (with == YID && what > ERROR)
141783Speter 		switch (what) {
142783Speter 			case YID:
143783Speter 			case YDOTDOT:
144783Speter 			case YINT:
145783Speter 			case YBINT:
146783Speter 			case YSTRING:
147783Speter 			case YNUMB:
148783Speter 				break;
149783Speter 			case YTO:
150783Speter 				return (3);
151783Speter 			default:
152783Speter 				return (5);
153783Speter 			case YRECORD:
154783Speter 			case YTHEN:
155783Speter 				return (6);
156783Speter 			case YBEGIN:
157783Speter 				break;
158783Speter 		}
1597999Smckusick 	if (what == YID && with == YFORWARD)
1607999Smckusick 		return(5);
161783Speter 	if (what == ';' && (with == ',' || with == '.'))
162783Speter 		return (CLIMIT - 1);
16314747Sthien 	c = delcost(what) + inscost(with, what);
164783Speter 	/*
165783Speter 	 * It costs extra to replace something which has
166783Speter 	 * semantics by something which doesn't.
167783Speter 	 */
168783Speter 	if (nullsem(what) == NIL && nullsem(with) != NIL)
1693084Smckusic 		c += 4;
170783Speter 	return (c);
171783Speter }
172783Speter 
173783Speter /*
174783Speter  * Deletion costs
175783Speter  */
176783Speter delcost(what)
177783Speter 	int what;
178783Speter {
179783Speter 
180783Speter 	switch (what) {
181783Speter 		case '.':
182783Speter 		case ':':
183783Speter 		case ',':
184783Speter 		case '=':
185783Speter 		case '(':
186783Speter 			return (3);
187783Speter 		case YELSE:
188783Speter 		case YTHEN:
189783Speter 			return (4);
190783Speter 		default:
191783Speter 			return (5);
192783Speter 		case YLABEL:
193783Speter 		case YCONST:
194783Speter 		case YTYPE:
195783Speter 		case YVAR:
196783Speter 			return (10);
197783Speter 		case YPROCEDURE:
198783Speter 		case YFUNCTION:
199783Speter 		case YBEGIN:
200783Speter 		case YEND:
201783Speter 			return ((CLIMIT * 3) / 4);
202783Speter 		case ';':
203783Speter 		case YEOF:
204783Speter 			return (INFINITY);
205783Speter 	}
206783Speter }
207783Speter #ifdef DEBUG
208783Speter 
209783Speter /*
210783Speter  * Routine to print out costs with "-K" option.
211783Speter  */
2123084Smckusic char	yysyms[] = ";,:=*+/-|&()[]<>~^";
213783Speter 
214783Speter 
215783Speter yycosts()
216783Speter {
217783Speter 	register int c;
218783Speter 	register char *cp;
219783Speter 
220783Speter 	printf("Insert\tDelete\tRep(ID)\tSymbol\n");
221783Speter 	for (cp = yysyms; *cp; cp++)
222783Speter 		yydocost(*cp);
223783Speter 	for (c = ERROR + 1; c < YLAST; c++)
224783Speter 		yydocost(c);
225783Speter #ifdef PXP
226783Speter 	flush();
227783Speter #endif
228783Speter }
229783Speter 
230783Speter yydocost(c)
231783Speter 	int c;
232783Speter {
233783Speter 
234783Speter 	printf("%4d\t", inscost(c, -1));
235783Speter 	printf("%4d\t", delcost(c));
23614747Sthien 	if (repcost(c, YID) != inscost(YID, c) + delcost(c))
237783Speter 		printf("%4d", repcost(c, YID));
238*30802Sbostic 	printf("\t%s\n", charname(c,1));
239783Speter }
240783Speter #endif
241