1*783Speter /* Copyright (c) 1979 Regents of the University of California */
2*783Speter 
3*783Speter static	char sccsid[] = "@(#)yycosts.c 1.1 08/27/80";
4*783Speter 
5*783Speter #include "whoami.h"
6*783Speter #include "0.h"
7*783Speter #include "yy.h"
8*783Speter 
9*783Speter /*
10*783Speter  * Symbol costs for Pascal.
11*783Speter  *
12*783Speter  * Cost strategy of August 14, 1977.
13*783Speter  *
14*783Speter  * The costs determined by the routines in this file are used by
15*783Speter  * the recovery in choosing appropriate corrections.
16*783Speter  * The cost vectors and the error productions in the grammar
17*783Speter  * work together to define the corrective capacity of the grammar.
18*783Speter  *
19*783Speter  * The costs here largely derive from those given in Steve Rhode's
20*783Speter  * thesis for the Pascal-I error correcting parser which he implemented.
21*783Speter  * Some minor changes have been made to adjust for the fact that
22*783Speter  * the current error recovery is not as "smart", both because of the
23*783Speter  * limited forward move and because of the lack of any type information
24*783Speter  * about identifiers.
25*783Speter  *
26*783Speter  * These adjustments largely take the form of increased costs for certain
27*783Speter  * tokens, noticeably keywords which are major brackets such as "begin"
28*783Speter  * "label", "procedure", etc.
29*783Speter  *
30*783Speter  * The overall weighting strategy is still similar to Rhodes' strategy.
31*783Speter  * The costs can be considered:
32*783Speter  *
33*783Speter  *	LOW	<= 3
34*783Speter  *	MEDIUM	4 or 5
35*783Speter  *	HIGH	>= 6
36*783Speter  */
37*783Speter 
38*783Speter /*
39*783Speter  * Insertion costs
40*783Speter  *
41*783Speter  * In addition to the normal symbol insertion costs,
42*783Speter  * there are zero cost insertions here.
43*783Speter  * The current error recovery system treats symbols
44*783Speter  * which have zero insertion cost in a special way,
45*783Speter  * inserting them but suppressing diagnostics.
46*783Speter  * This allows the system to hold of on bracketing
47*783Speter  * error diagnostics about missing end's until the
48*783Speter  * reduction occurs which knows the line number of the
49*783Speter  * corresponding "begin", "repeat", etc.
50*783Speter  * A more intelligent and useful diagnostic can then
51*783Speter  * be printed.
52*783Speter  *
53*783Speter  * Although this routine never allows the insertion
54*783Speter  * of the keyword begin, it can be inserted after a
55*783Speter  * procedure or function body starts, if it was omitted
56*783Speter  * by a special case in the panic routine, which notices
57*783Speter  * the keywords in the statement body of the procedure
58*783Speter  * and inserts the begin to recover.
59*783Speter  *
60*783Speter  * Similarly, we do not insert end-of-file, but
61*783Speter  * the fact that end-of-file is the unique input
62*783Speter  * is noticed by the recovery routines as a special
63*783Speter  * case and handled there.
64*783Speter  */
65*783Speter inscost(sy, before)
66*783Speter 	register int sy, before;
67*783Speter {
68*783Speter 
69*783Speter 	switch (before) {
70*783Speter 		case YEND:
71*783Speter 			if (sy == YEND)
72*783Speter 				break;
73*783Speter 		case YPROCEDURE:
74*783Speter 		case YFUNCTION:
75*783Speter 			if (sy == YUNTIL || sy == YEND)
76*783Speter 				return (0);
77*783Speter 	}
78*783Speter 	switch (sy) {
79*783Speter 		case ';':
80*783Speter 			return (1);
81*783Speter 		case ',':
82*783Speter 		case ':':
83*783Speter 		case YOF:
84*783Speter 		case YDO:
85*783Speter 			return (2);
86*783Speter 		case YARRAY:
87*783Speter 		case '+':
88*783Speter 		case '*':
89*783Speter 			return (3);
90*783Speter 		default:
91*783Speter 			return (4);
92*783Speter 		case '^':
93*783Speter 		case YNOT:
94*783Speter 		case YLABEL:
95*783Speter 		case YCONST:
96*783Speter 		case YTYPE:
97*783Speter 		case YVAR:
98*783Speter 		case YUNTIL:
99*783Speter 		case '(':
100*783Speter 		case '[':
101*783Speter 		case YWHILE:
102*783Speter 		case YWITH:
103*783Speter 		case YASSERT:
104*783Speter 			return (5);
105*783Speter 		case YPROCEDURE:
106*783Speter 		case YFUNCTION:
107*783Speter 		case YCASE:
108*783Speter 			return (6);
109*783Speter 		case YEND:
110*783Speter 			return (8);
111*783Speter 		case YBEGIN:
112*783Speter 		case YEOF:
113*783Speter 		case YREPEAT:
114*783Speter 		case YRECORD:
115*783Speter 			return (INFINITY);
116*783Speter 	}
117*783Speter }
118*783Speter 
119*783Speter /*
120*783Speter  * Replacement costs
121*783Speter  *
122*783Speter  * Most replacement costs are the same as an insertion
123*783Speter  * plus a deletion cost.  One special case is the replacement
124*783Speter  * of a large number of keywords by an identifier.
125*783Speter  * These are given lower costs, especially the keyword "to".
126*783Speter  */
127*783Speter repcost(what, with)
128*783Speter 	register int what, with;
129*783Speter {
130*783Speter 	register int c;
131*783Speter 
132*783Speter 	if (with == what)
133*783Speter 		return (INFINITY);
134*783Speter 	if (with == YID && what > ERROR)
135*783Speter 		switch (what) {
136*783Speter 			case YID:
137*783Speter 			case YDOTDOT:
138*783Speter 			case YINT:
139*783Speter 			case YBINT:
140*783Speter 			case YSTRING:
141*783Speter 			case YNUMB:
142*783Speter 				break;
143*783Speter 			case YTO:
144*783Speter 				return (3);
145*783Speter 			default:
146*783Speter 				return (5);
147*783Speter 			case YRECORD:
148*783Speter 			case YTHEN:
149*783Speter 				return (6);
150*783Speter 			case YBEGIN:
151*783Speter 				break;
152*783Speter 		}
153*783Speter 	if (what == ';' && (with == ',' || with == '.'))
154*783Speter 		return (CLIMIT - 1);
155*783Speter 	c = delcost(what) + inscost(with);
156*783Speter 	/*
157*783Speter 	 * It costs extra to replace something which has
158*783Speter 	 * semantics by something which doesn't.
159*783Speter 	 */
160*783Speter 	if (nullsem(what) == NIL && nullsem(with) != NIL)
161*783Speter 		c =+ 4;
162*783Speter 	return (c);
163*783Speter }
164*783Speter 
165*783Speter /*
166*783Speter  * Deletion costs
167*783Speter  */
168*783Speter delcost(what)
169*783Speter 	int what;
170*783Speter {
171*783Speter 
172*783Speter 	switch (what) {
173*783Speter 		case '.':
174*783Speter 		case ':':
175*783Speter 		case ',':
176*783Speter 		case '=':
177*783Speter 		case '(':
178*783Speter 			return (3);
179*783Speter 		case YELSE:
180*783Speter 		case YTHEN:
181*783Speter 			return (4);
182*783Speter 		default:
183*783Speter 			return (5);
184*783Speter 		case YLABEL:
185*783Speter 		case YCONST:
186*783Speter 		case YTYPE:
187*783Speter 		case YVAR:
188*783Speter 			return (10);
189*783Speter 		case YPROCEDURE:
190*783Speter 		case YFUNCTION:
191*783Speter 		case YBEGIN:
192*783Speter 		case YEND:
193*783Speter 			return ((CLIMIT * 3) / 4);
194*783Speter 		case ';':
195*783Speter 		case YEOF:
196*783Speter 			return (INFINITY);
197*783Speter 	}
198*783Speter }
199*783Speter #ifdef DEBUG
200*783Speter 
201*783Speter /*
202*783Speter  * Routine to print out costs with "-K" option.
203*783Speter  */
204*783Speter char	yysyms[]	";,:=*+/-|&()[]<>~^";
205*783Speter 
206*783Speter 
207*783Speter yycosts()
208*783Speter {
209*783Speter 	register int c;
210*783Speter 	register char *cp;
211*783Speter 
212*783Speter 	printf("Insert\tDelete\tRep(ID)\tSymbol\n");
213*783Speter 	for (cp = yysyms; *cp; cp++)
214*783Speter 		yydocost(*cp);
215*783Speter 	for (c = ERROR + 1; c < YLAST; c++)
216*783Speter 		yydocost(c);
217*783Speter #ifdef PXP
218*783Speter 	flush();
219*783Speter #endif
220*783Speter }
221*783Speter 
222*783Speter yydocost(c)
223*783Speter 	int c;
224*783Speter {
225*783Speter 
226*783Speter 	printf("%4d\t", inscost(c, -1));
227*783Speter 	printf("%4d\t", delcost(c));
228*783Speter 	if (repcost(c, YID) != inscost(YID) + delcost(c))
229*783Speter 		printf("%4d", repcost(c, YID));
230*783Speter 	printf("\t%s%s\n", charname(c));
231*783Speter }
232*783Speter #endif
233