148116Sbostic /*-
2*62223Sbostic * Copyright (c) 1980, 1993
3*62223Sbostic * The Regents of the University of California. All rights reserved.
448116Sbostic *
548116Sbostic * %sccs.include.redist.c%
622203Sdist */
7783Speter
814747Sthien #ifndef lint
9*62223Sbostic static char sccsid[] = "@(#)yycosts.c 8.1 (Berkeley) 06/06/93";
1048116Sbostic #endif /* not lint */
11783Speter
12783Speter #include "whoami.h"
13783Speter #include "0.h"
1414747Sthien #include "tree_ty.h" /* must be included for yy.h */
15783Speter #include "yy.h"
16783Speter
17783Speter /*
18783Speter * Symbol costs for Pascal.
19783Speter *
20783Speter * Cost strategy of August 14, 1977.
21783Speter *
22783Speter * The costs determined by the routines in this file are used by
23783Speter * the recovery in choosing appropriate corrections.
24783Speter * The cost vectors and the error productions in the grammar
25783Speter * work together to define the corrective capacity of the grammar.
26783Speter *
27783Speter * The costs here largely derive from those given in Steve Rhode's
28783Speter * thesis for the Pascal-I error correcting parser which he implemented.
29783Speter * Some minor changes have been made to adjust for the fact that
30783Speter * the current error recovery is not as "smart", both because of the
31783Speter * limited forward move and because of the lack of any type information
32783Speter * about identifiers.
33783Speter *
34783Speter * These adjustments largely take the form of increased costs for certain
35783Speter * tokens, noticeably keywords which are major brackets such as "begin"
36783Speter * "label", "procedure", etc.
37783Speter *
38783Speter * The overall weighting strategy is still similar to Rhodes' strategy.
39783Speter * The costs can be considered:
40783Speter *
41783Speter * LOW <= 3
42783Speter * MEDIUM 4 or 5
43783Speter * HIGH >= 6
44783Speter */
45783Speter
46783Speter /*
47783Speter * Insertion costs
48783Speter *
49783Speter * In addition to the normal symbol insertion costs,
50783Speter * there are zero cost insertions here.
51783Speter * The current error recovery system treats symbols
52783Speter * which have zero insertion cost in a special way,
53783Speter * inserting them but suppressing diagnostics.
54783Speter * This allows the system to hold of on bracketing
55783Speter * error diagnostics about missing end's until the
56783Speter * reduction occurs which knows the line number of the
57783Speter * corresponding "begin", "repeat", etc.
58783Speter * A more intelligent and useful diagnostic can then
59783Speter * be printed.
60783Speter *
61783Speter * Although this routine never allows the insertion
62783Speter * of the keyword begin, it can be inserted after a
63783Speter * procedure or function body starts, if it was omitted
64783Speter * by a special case in the panic routine, which notices
65783Speter * the keywords in the statement body of the procedure
66783Speter * and inserts the begin to recover.
67783Speter *
68783Speter * Similarly, we do not insert end-of-file, but
69783Speter * the fact that end-of-file is the unique input
70783Speter * is noticed by the recovery routines as a special
71783Speter * case and handled there.
72783Speter */
inscost(sy,before)73783Speter inscost(sy, before)
74783Speter register int sy, before;
75783Speter {
76783Speter
77783Speter switch (before) {
78783Speter case YEND:
79783Speter if (sy == YEND)
80783Speter break;
81783Speter case YPROCEDURE:
82783Speter case YFUNCTION:
83783Speter if (sy == YUNTIL || sy == YEND)
84783Speter return (0);
85783Speter }
86783Speter switch (sy) {
87783Speter case ';':
88783Speter return (1);
89783Speter case ',':
90783Speter case ':':
91783Speter case YOF:
92783Speter case YDO:
93783Speter return (2);
94783Speter case YARRAY:
95783Speter case '+':
96783Speter case '*':
97783Speter return (3);
98783Speter default:
99783Speter return (4);
100783Speter case '^':
101783Speter case YNOT:
102783Speter case YLABEL:
103783Speter case YCONST:
104783Speter case YTYPE:
105783Speter case YVAR:
106783Speter case YUNTIL:
107783Speter case '(':
108783Speter case '[':
109783Speter case YWHILE:
110783Speter case YWITH:
111783Speter return (5);
112783Speter case YPROCEDURE:
113783Speter case YFUNCTION:
114783Speter case YCASE:
115783Speter return (6);
116783Speter case YEND:
117783Speter return (8);
118783Speter case YBEGIN:
119783Speter case YEOF:
120783Speter case YREPEAT:
121783Speter case YRECORD:
122783Speter return (INFINITY);
123783Speter }
124783Speter }
125783Speter
126783Speter /*
127783Speter * Replacement costs
128783Speter *
129783Speter * Most replacement costs are the same as an insertion
130783Speter * plus a deletion cost. One special case is the replacement
131783Speter * of a large number of keywords by an identifier.
132783Speter * These are given lower costs, especially the keyword "to".
133783Speter */
repcost(what,with)134783Speter repcost(what, with)
135783Speter register int what, with;
136783Speter {
137783Speter register int c;
138783Speter
139783Speter if (with == what)
140783Speter return (INFINITY);
141783Speter if (with == YID && what > ERROR)
142783Speter switch (what) {
143783Speter case YID:
144783Speter case YDOTDOT:
145783Speter case YINT:
146783Speter case YBINT:
147783Speter case YSTRING:
148783Speter case YNUMB:
149783Speter break;
150783Speter case YTO:
151783Speter return (3);
152783Speter default:
153783Speter return (5);
154783Speter case YRECORD:
155783Speter case YTHEN:
156783Speter return (6);
157783Speter case YBEGIN:
158783Speter break;
159783Speter }
1607999Smckusick if (what == YID && with == YFORWARD)
1617999Smckusick return(5);
162783Speter if (what == ';' && (with == ',' || with == '.'))
163783Speter return (CLIMIT - 1);
16414747Sthien c = delcost(what) + inscost(with, what);
165783Speter /*
166783Speter * It costs extra to replace something which has
167783Speter * semantics by something which doesn't.
168783Speter */
169783Speter if (nullsem(what) == NIL && nullsem(with) != NIL)
1703084Smckusic c += 4;
171783Speter return (c);
172783Speter }
173783Speter
174783Speter /*
175783Speter * Deletion costs
176783Speter */
delcost(what)177783Speter delcost(what)
178783Speter int what;
179783Speter {
180783Speter
181783Speter switch (what) {
182783Speter case '.':
183783Speter case ':':
184783Speter case ',':
185783Speter case '=':
186783Speter case '(':
187783Speter return (3);
188783Speter case YELSE:
189783Speter case YTHEN:
190783Speter return (4);
191783Speter default:
192783Speter return (5);
193783Speter case YLABEL:
194783Speter case YCONST:
195783Speter case YTYPE:
196783Speter case YVAR:
197783Speter return (10);
198783Speter case YPROCEDURE:
199783Speter case YFUNCTION:
200783Speter case YBEGIN:
201783Speter case YEND:
202783Speter return ((CLIMIT * 3) / 4);
203783Speter case ';':
204783Speter case YEOF:
205783Speter return (INFINITY);
206783Speter }
207783Speter }
208783Speter #ifdef DEBUG
209783Speter
210783Speter /*
211783Speter * Routine to print out costs with "-K" option.
212783Speter */
2133084Smckusic char yysyms[] = ";,:=*+/-|&()[]<>~^";
214783Speter
215783Speter
yycosts()216783Speter yycosts()
217783Speter {
218783Speter register int c;
219783Speter register char *cp;
220783Speter
221783Speter printf("Insert\tDelete\tRep(ID)\tSymbol\n");
222783Speter for (cp = yysyms; *cp; cp++)
223783Speter yydocost(*cp);
224783Speter for (c = ERROR + 1; c < YLAST; c++)
225783Speter yydocost(c);
226783Speter #ifdef PXP
227783Speter flush();
228783Speter #endif
229783Speter }
230783Speter
yydocost(c)231783Speter yydocost(c)
232783Speter int c;
233783Speter {
234783Speter
235783Speter printf("%4d\t", inscost(c, -1));
236783Speter printf("%4d\t", delcost(c));
23714747Sthien if (repcost(c, YID) != inscost(YID, c) + delcost(c))
238783Speter printf("%4d", repcost(c, YID));
23930802Sbostic printf("\t%s\n", charname(c,1));
240783Speter }
241783Speter #endif
242