xref: /plan9/sys/src/cmd/awk/tran.c (revision 219b2ee8daee37f4aad58d63f21287faa8e4ffdc)
1 /*
2 Copyright (c) 1989 AT&T
3 	All Rights Reserved
4 
5 THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T.
6 
7 The copyright notice above does not evidence any
8 actual or intended publication of such source code.
9 */
10 
11 #define	DEBUG
12 #include <stdio.h>
13 #include <math.h>
14 #include <ctype.h>
15 #include <string.h>
16 #include <stdlib.h>
17 #include "awk.h"
18 #include "y.tab.h"
19 
20 #define	FULLTAB	2	/* rehash when table gets this x full */
21 #define	GROWTAB 4	/* grow table by this factor */
22 
23 Array	*symtab;	/* main symbol table */
24 
25 uchar	**FS;		/* initial field sep */
26 uchar	**RS;		/* initial record sep */
27 uchar	**OFS;		/* output field sep */
28 uchar	**ORS;		/* output record sep */
29 uchar	**OFMT;		/* output format for numbers */
30 uchar	**CONVFMT;	/* format for conversions in getsval */
31 Awkfloat *NF;		/* number of fields in current record */
32 Awkfloat *NR;		/* number of current record */
33 Awkfloat *FNR;		/* number of current record in current file */
34 uchar	**FILENAME;	/* current filename argument */
35 Awkfloat *ARGC;		/* number of arguments from command line */
36 uchar	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
37 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
38 Awkfloat *RLENGTH;	/* length of same */
39 
40 Cell	*recloc;	/* location of record */
41 Cell	*nrloc;		/* NR */
42 Cell	*nfloc;		/* NF */
43 Cell	*fnrloc;	/* FNR */
44 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
45 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
46 Cell	*rstartloc;	/* RSTART */
47 Cell	*rlengthloc;	/* RLENGTH */
48 Cell	*symtabloc;	/* SYMTAB */
49 
50 Cell	*nullloc;	/* a guaranteed empty cell */
51 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
52 
53 extern Cell *fldtab;
54 
55 void syminit(void)	/* initialize symbol table with builtin vars */
56 {
57 	setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
58 	/* this is used for if(x)... tests: */
59 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
60 	nullnode = valtonode(nullloc, CCON);
61 
62 	/* recloc = setsymtab("$0", record, 0.0, REC|STR|DONTFREE, symtab); */
63 	/* has been done elsewhere */
64 	recloc = &fldtab[0];
65 	FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval;
66 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
67 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
68 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
69 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
70 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
71 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
72 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
73 	NF = &nfloc->fval;
74 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
75 	NR = &nrloc->fval;
76 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
77 	FNR = &fnrloc->fval;
78 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
79 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
80 	RSTART = &rstartloc->fval;
81 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
82 	RLENGTH = &rlengthloc->fval;
83 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
84 	symtabloc->sval = (uchar *) symtab;
85 }
86 
87 void arginit(int ac, uchar *av[])	/* set up ARGV and ARGC */
88 {
89 	Cell *cp;
90 	int i;
91 	uchar temp[5];
92 
93 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
94 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
95 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
96 	cp->sval = (uchar *) ARGVtab;
97 	for (i = 0; i < ac; i++) {
98 		sprintf((char *)temp, "%d", i);
99 		if (isnumber(*av))
100 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
101 		else
102 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
103 		av++;
104 	}
105 }
106 
107 void envinit(uchar **envp)	/* set up ENVIRON variable */
108 {
109 	Cell *cp;
110 	uchar *p;
111 
112 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
113 	ENVtab = makesymtab(NSYMTAB);
114 	cp->sval = (uchar *) ENVtab;
115 	for ( ; *envp; envp++) {
116 		if ((p = (uchar *) strchr((char *) *envp, '=')) == NULL)
117 			continue;
118 		*p++ = 0;	/* split into two strings at = */
119 		if (isnumber(p))
120 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
121 		else
122 			setsymtab(*envp, p, 0.0, STR, ENVtab);
123 		p[-1] = '=';	/* restore in case env is passed down to a shell */
124 	}
125 }
126 
127 Array *makesymtab(int n)	/* make a new symbol table */
128 {
129 	Array *ap;
130 	Cell **tp;
131 
132 	ap = (Array *) malloc(sizeof(Array));
133 	tp = (Cell **) calloc(n, sizeof(Cell *));
134 	if (ap == NULL || tp == NULL)
135 		ERROR "out of space in makesymtab" FATAL;
136 	ap->nelem = 0;
137 	ap->size = n;
138 	ap->tab = tp;
139 	return(ap);
140 }
141 
142 void freesymtab(Cell *ap)	/* free a symbol table */
143 {
144 	Cell *cp, *temp;
145 	Array *tp;
146 	int i;
147 
148 	if (!isarr(ap))
149 		return;
150 	tp = (Array *) ap->sval;
151 	if (tp == NULL)
152 		return;
153 	for (i = 0; i < tp->size; i++) {
154 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
155 			xfree(cp->nval);
156 			if (freeable(cp))
157 				xfree(cp->sval);
158 			temp = cp->cnext;	/* avoids freeing then using */
159 			free(cp);
160 		}
161 	}
162 	free(tp->tab);
163 	free(tp);
164 }
165 
166 void freeelem(Cell *ap, uchar *s)	/* free elem s from ap (i.e., ap["s"] */
167 {
168 	Array *tp;
169 	Cell *p, *prev = NULL;
170 	int h;
171 
172 	tp = (Array *) ap->sval;
173 	h = hash(s, tp->size);
174 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
175 		if (strcmp((char *) s, (char *) p->nval) == 0) {
176 			if (prev == NULL)	/* 1st one */
177 				tp->tab[h] = p->cnext;
178 			else			/* middle somewhere */
179 				prev->cnext = p->cnext;
180 			if (freeable(p))
181 				xfree(p->sval);
182 			free(p->nval);
183 			free(p);
184 			tp->nelem--;
185 			return;
186 		}
187 }
188 
189 Cell *setsymtab(uchar *n, uchar *s, Awkfloat f, unsigned t, Array *tp)
190 {
191 	register int h;
192 	register Cell *p;
193 
194 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
195 		dprintf( ("setsymtab found %o: n=%s s=\"%s\" f=%g t=%o\n",
196 			p, p->nval, p->sval, p->fval, p->tval) );
197 		return(p);
198 	}
199 	p = (Cell *) malloc(sizeof(Cell));
200 	if (p == NULL)
201 		ERROR "out of space for symbol table at %s", n FATAL;
202 	p->nval = tostring(n);
203 	p->sval = s ? tostring(s) : tostring("");
204 	p->fval = f;
205 	p->tval = t;
206 	tp->nelem++;
207 	if (tp->nelem > FULLTAB * tp->size)
208 		rehash(tp);
209 	h = hash(n, tp->size);
210 	p->cnext = tp->tab[h];
211 	tp->tab[h] = p;
212 	dprintf( ("setsymtab set %o: n=%s s=\"%s\" f=%g t=%o\n",
213 		p, p->nval, p->sval, p->fval, p->tval) );
214 	return(p);
215 }
216 
217 hash(uchar *s, int n)	/* form hash value for string s */
218 {
219 	register unsigned hashval;
220 
221 	for (hashval = 0; *s != '\0'; s++)
222 		hashval = (*s + 31 * hashval);
223 	return hashval % n;
224 }
225 
226 void rehash(Array *tp)	/* rehash items in small table into big one */
227 {
228 	int i, nh, nsz;
229 	Cell *cp, *op, **np;
230 
231 	nsz = GROWTAB * tp->size;
232 	np = (Cell **) calloc(nsz, sizeof(Cell *));
233 	if (np == NULL)		/* can't do it, but can keep running. */
234 		return;		/* someone else will run out later. */
235 	for (i = 0; i < tp->size; i++) {
236 		for (cp = tp->tab[i]; cp; cp = op) {
237 			op = cp->cnext;
238 			nh = hash(cp->nval, nsz);
239 			cp->cnext = np[nh];
240 			np[nh] = cp;
241 		}
242 	}
243 	free(tp->tab);
244 	tp->tab = np;
245 	tp->size = nsz;
246 }
247 
248 Cell *lookup(uchar *s, Array *tp)	/* look for s in tp */
249 {
250 	register Cell *p, *prev = NULL;
251 	int h;
252 
253 	h = hash(s, tp->size);
254 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
255 		if (strcmp((char *) s, (char *) p->nval) == 0)
256 			return(p);	/* found it */
257 	return(NULL);			/* not found */
258 }
259 
260 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
261 {
262 	if ((vp->tval & (NUM | STR)) == 0)
263 		funnyvar(vp, "assign to");
264 	if (vp->tval & FLD) {
265 		donerec = 0;	/* mark $0 invalid */
266 		if (vp-fldtab > *NF)
267 			newfld(vp-fldtab);
268 		dprintf( ("setting field %d to %g\n", vp-fldtab, f) );
269 	} else if (vp->tval & REC) {
270 		donefld = 0;	/* mark $1... invalid */
271 		donerec = 1;
272 	}
273 	vp->tval &= ~STR;	/* mark string invalid */
274 	vp->tval |= NUM;	/* mark number ok */
275 	dprintf( ("setfval %o: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
276 	return vp->fval = f;
277 }
278 
279 void funnyvar(Cell *vp, char *rw)
280 {
281 	if (vp->tval & ARR)
282 		ERROR "can't %s %s; it's an array name.", rw, vp->nval FATAL;
283 	if (vp->tval & FCN)
284 		ERROR "can't %s %s; it's a function.", rw, vp->nval FATAL;
285 	ERROR "funny variable %o: n=%s s=\"%s\" f=%g t=%o",
286 		vp, vp->nval, vp->sval, vp->fval, vp->tval WARNING;
287 }
288 
289 uchar *setsval(Cell *vp, uchar *s)	/* set string val of a Cell */
290 {
291 	char *t;
292 
293 	if ((vp->tval & (NUM | STR)) == 0)
294 		funnyvar(vp, "assign to");
295 	if (vp->tval & FLD) {
296 		donerec = 0;	/* mark $0 invalid */
297 		if (vp-fldtab > *NF)
298 			newfld(vp-fldtab);
299 		dprintf( ("setting field %d to %s (%o)\n", vp-fldtab, s, s) );
300 	} else if (vp->tval & REC) {
301 		donefld = 0;	/* mark $1... invalid */
302 		donerec = 1;
303 	}
304 	t = tostring(s);	/* in case it's self-assign */
305 	vp->tval &= ~NUM;
306 	vp->tval |= STR;
307 	if (freeable(vp))
308 		xfree(vp->sval);
309 	vp->tval &= ~DONTFREE;
310 	dprintf( ("setsval %o: %s = \"%s (%o)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
311 	return(vp->sval = t);
312 }
313 
314 Awkfloat r_getfval(Cell *vp)	/* get float val of a Cell */
315 {
316 	if ((vp->tval & (NUM | STR)) == 0)
317 		funnyvar(vp, "read value of");
318 	if ((vp->tval & FLD) && donefld == 0)
319 		fldbld();
320 	else if ((vp->tval & REC) && donerec == 0)
321 		recbld();
322 	if (!isnum(vp)) {	/* not a number */
323 		vp->fval = atof(vp->sval);	/* best guess */
324 		if (isnumber(vp->sval) && !(vp->tval&CON))
325 			vp->tval |= NUM;	/* make NUM only sparingly */
326 	}
327 	dprintf( ("getfval %o: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
328 	return(vp->fval);
329 }
330 
331 uchar *r_getsval(Cell *vp)	/* get string val of a Cell */
332 {
333 	uchar s[100];
334 	double dtemp;
335 
336 	if ((vp->tval & (NUM | STR)) == 0)
337 		funnyvar(vp, "read value of");
338 	if ((vp->tval & FLD) && donefld == 0)
339 		fldbld();
340 	else if ((vp->tval & REC) && donerec == 0)
341 		recbld();
342 	if ((vp->tval & STR) == 0) {
343 		if (!(vp->tval&DONTFREE))
344 			xfree(vp->sval);
345 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
346 			sprintf((char *)s, "%.20g", vp->fval);
347 		else
348 			sprintf((char *)s, (char *)*CONVFMT, vp->fval);
349 		vp->sval = tostring(s);
350 		vp->tval &= ~DONTFREE;
351 		vp->tval |= STR;
352 	}
353 	dprintf( ("getsval %o: %s = \"%s (%o)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
354 	return(vp->sval);
355 }
356 
357 uchar *tostring(uchar *s)	/* make a copy of string s */
358 {
359 	register uchar *p;
360 
361 	p = (uchar *) malloc(strlen((char *) s)+1);
362 	if (p == NULL)
363 		ERROR "out of space in tostring on %s", s FATAL;
364 	strcpy((char *) p, (char *) s);
365 	return(p);
366 }
367 
368 uchar *qstring(uchar *s, int delim)	/* collect string up to next delim */
369 {
370 	uchar *q;
371 	int c, n;
372 
373 	for (q = cbuf; (c = *s) != delim; s++) {
374 		if (q >= cbuf + CBUFLEN - 1)
375 			ERROR "string %.10s... too long", cbuf SYNTAX;
376 		else if (c == '\n')
377 			ERROR "newline in string %.10s...", cbuf SYNTAX;
378 		else if (c != '\\')
379 			*q++ = c;
380 		else	/* \something */
381 			switch (c = *++s) {
382 			case '\\':	*q++ = '\\'; break;
383 			case 'n':	*q++ = '\n'; break;
384 			case 't':	*q++ = '\t'; break;
385 			case 'b':	*q++ = '\b'; break;
386 			case 'f':	*q++ = '\f'; break;
387 			case 'r':	*q++ = '\r'; break;
388 			default:
389 				if (!isdigit(c)) {
390 					*q++ = c;
391 					break;
392 				}
393 				n = c - '0';
394 				if (isdigit(s[1])) {
395 					n = 8 * n + *++s - '0';
396 					if (isdigit(s[1]))
397 						n = 8 * n + *++s - '0';
398 				}
399 				*q++ = n;
400 				break;
401 			}
402 	}
403 	*q = '\0';
404 	return cbuf;
405 }
406