xref: /inferno-os/utils/awk/tran.c (revision ecc9caba0e344ed50c05ee8156b2734f4d76e463)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #define	DEBUG
26 #include <stdio.h>
27 #include <math.h>
28 #include <ctype.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include "awk.h"
32 #include "ytab.h"
33 
34 #define	FULLTAB	2	/* rehash when table gets this x full */
35 #define	GROWTAB 4	/* grow table by this factor */
36 
37 Array	*symtab;	/* main symbol table */
38 
39 char	**FS;		/* initial field sep */
40 char	**RS;		/* initial record sep */
41 char	**OFS;		/* output field sep */
42 char	**ORS;		/* output record sep */
43 char	**OFMT;		/* output format for numbers */
44 char	**CONVFMT;	/* format for conversions in getsval */
45 Awkfloat *NF;		/* number of fields in current record */
46 Awkfloat *NR;		/* number of current record */
47 Awkfloat *FNR;		/* number of current record in current file */
48 char	**FILENAME;	/* current filename argument */
49 Awkfloat *ARGC;		/* number of arguments from command line */
50 char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
51 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
52 Awkfloat *RLENGTH;	/* length of same */
53 
54 Cell	*nrloc;		/* NR */
55 Cell	*nfloc;		/* NF */
56 Cell	*fnrloc;	/* FNR */
57 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
58 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
59 Cell	*rstartloc;	/* RSTART */
60 Cell	*rlengthloc;	/* RLENGTH */
61 Cell	*symtabloc;	/* SYMTAB */
62 
63 Cell	*nullloc;	/* a guaranteed empty cell */
64 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
65 Cell	*literal0;
66 
67 extern Cell **fldtab;
68 
69 void syminit(void)	/* initialize symbol table with builtin vars */
70 {
71 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
72 	/* this is used for if(x)... tests: */
73 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
74 	nullnode = celltonode(nullloc, CCON);
75 
76 	FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval;
77 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
78 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
79 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
80 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
81 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
82 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
83 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
84 	NF = &nfloc->fval;
85 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
86 	NR = &nrloc->fval;
87 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
88 	FNR = &fnrloc->fval;
89 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
90 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
91 	RSTART = &rstartloc->fval;
92 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
93 	RLENGTH = &rlengthloc->fval;
94 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
95 	symtabloc->sval = (char *) symtab;
96 }
97 
98 void arginit(int ac, char **av)	/* set up ARGV and ARGC */
99 {
100 	Cell *cp;
101 	int i;
102 	char temp[50];
103 
104 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
105 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
106 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
107 	cp->sval = (char *) ARGVtab;
108 	for (i = 0; i < ac; i++) {
109 		sprintf(temp, "%d", i);
110 		if (is_number(*av))
111 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
112 		else
113 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
114 		av++;
115 	}
116 }
117 
118 void envinit(char **envp)	/* set up ENVIRON variable */
119 {
120 	Cell *cp;
121 	char *p;
122 
123 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
124 	ENVtab = makesymtab(NSYMTAB);
125 	cp->sval = (char *) ENVtab;
126 	for ( ; *envp; envp++) {
127 		if ((p = strchr(*envp, '=')) == NULL)
128 			continue;
129 		if( p == *envp ) /* no left hand side name in env string */
130 			continue;
131 		*p++ = 0;	/* split into two strings at = */
132 		if (is_number(p))
133 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
134 		else
135 			setsymtab(*envp, p, 0.0, STR, ENVtab);
136 		p[-1] = '=';	/* restore in case env is passed down to a shell */
137 	}
138 }
139 
140 Array *makesymtab(int n)	/* make a new symbol table */
141 {
142 	Array *ap;
143 	Cell **tp;
144 
145 	ap = (Array *) malloc(sizeof(Array));
146 	tp = (Cell **) calloc(n, sizeof(Cell *));
147 	if (ap == NULL || tp == NULL)
148 		FATAL("out of space in makesymtab");
149 	ap->nelem = 0;
150 	ap->size = n;
151 	ap->tab = tp;
152 	return(ap);
153 }
154 
155 void freesymtab(Cell *ap)	/* free a symbol table */
156 {
157 	Cell *cp, *temp;
158 	Array *tp;
159 	int i;
160 
161 	if (!isarr(ap))
162 		return;
163 	tp = (Array *) ap->sval;
164 	if (tp == NULL)
165 		return;
166 	for (i = 0; i < tp->size; i++) {
167 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
168 			xfree(cp->nval);
169 			if (freeable(cp))
170 				xfree(cp->sval);
171 			temp = cp->cnext;	/* avoids freeing then using */
172 			free(cp);
173 		}
174 		tp->tab[i] = 0;
175 	}
176 	free(tp->tab);
177 	free(tp);
178 }
179 
180 void freeelem(Cell *ap, char *s)	/* free elem s from ap (i.e., ap["s"] */
181 {
182 	Array *tp;
183 	Cell *p, *prev = NULL;
184 	int h;
185 
186 	tp = (Array *) ap->sval;
187 	h = hash(s, tp->size);
188 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
189 		if (strcmp(s, p->nval) == 0) {
190 			if (prev == NULL)	/* 1st one */
191 				tp->tab[h] = p->cnext;
192 			else			/* middle somewhere */
193 				prev->cnext = p->cnext;
194 			if (freeable(p))
195 				xfree(p->sval);
196 			free(p->nval);
197 			free(p);
198 			tp->nelem--;
199 			return;
200 		}
201 }
202 
203 Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
204 {
205 	int h;
206 	Cell *p;
207 
208 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
209 		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
210 			p, p->nval, p->sval, p->fval, p->tval) );
211 		return(p);
212 	}
213 	p = (Cell *) malloc(sizeof(Cell));
214 	if (p == NULL)
215 		FATAL("out of space for symbol table at %s", n);
216 	p->nval = tostring(n);
217 	p->sval = s ? tostring(s) : tostring("");
218 	p->fval = f;
219 	p->tval = t;
220 	p->csub = CUNK;
221 	p->ctype = OCELL;
222 	tp->nelem++;
223 	if (tp->nelem > FULLTAB * tp->size)
224 		rehash(tp);
225 	h = hash(n, tp->size);
226 	p->cnext = tp->tab[h];
227 	tp->tab[h] = p;
228 	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
229 		p, p->nval, p->sval, p->fval, p->tval) );
230 	return(p);
231 }
232 
233 int hash(char *s, int n)	/* form hash value for string s */
234 {
235 	unsigned hashval;
236 
237 	for (hashval = 0; *s != '\0'; s++)
238 		hashval = (*s + 31 * hashval);
239 	return hashval % n;
240 }
241 
242 void rehash(Array *tp)	/* rehash items in small table into big one */
243 {
244 	int i, nh, nsz;
245 	Cell *cp, *op, **np;
246 
247 	nsz = GROWTAB * tp->size;
248 	np = (Cell **) calloc(nsz, sizeof(Cell *));
249 	if (np == NULL)		/* can't do it, but can keep running. */
250 		return;		/* someone else will run out later. */
251 	for (i = 0; i < tp->size; i++) {
252 		for (cp = tp->tab[i]; cp; cp = op) {
253 			op = cp->cnext;
254 			nh = hash(cp->nval, nsz);
255 			cp->cnext = np[nh];
256 			np[nh] = cp;
257 		}
258 	}
259 	free(tp->tab);
260 	tp->tab = np;
261 	tp->size = nsz;
262 }
263 
264 Cell *lookup(char *s, Array *tp)	/* look for s in tp */
265 {
266 	Cell *p;
267 	int h;
268 
269 	h = hash(s, tp->size);
270 	for (p = tp->tab[h]; p != NULL; p = p->cnext)
271 		if (strcmp(s, p->nval) == 0)
272 			return(p);	/* found it */
273 	return(NULL);			/* not found */
274 }
275 
276 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
277 {
278 	int fldno;
279 
280 	if ((vp->tval & (NUM | STR)) == 0)
281 		funnyvar(vp, "assign to");
282 	if (isfld(vp)) {
283 		donerec = 0;	/* mark $0 invalid */
284 		fldno = atoi(vp->nval);
285 		if (fldno > *NF)
286 			newfld(fldno);
287 		   dprintf( ("setting field %d to %g\n", fldno, f) );
288 	} else if (isrec(vp)) {
289 		donefld = 0;	/* mark $1... invalid */
290 		donerec = 1;
291 	}
292 	if (freeable(vp))
293 		xfree(vp->sval); /* free any previous string */
294 	vp->tval &= ~STR;	/* mark string invalid */
295 	vp->tval |= NUM;	/* mark number ok */
296 	   dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
297 	return vp->fval = f;
298 }
299 
300 void funnyvar(Cell *vp, char *rw)
301 {
302 	if (isarr(vp))
303 		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
304 	if (vp->tval & FCN)
305 		FATAL("can't %s %s; it's a function.", rw, vp->nval);
306 	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
307 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
308 }
309 
310 char *setsval(Cell *vp, char *s)	/* set string val of a Cell */
311 {
312 	char *t;
313 	int fldno;
314 
315 	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
316 	if ((vp->tval & (NUM | STR)) == 0)
317 		funnyvar(vp, "assign to");
318 	if (isfld(vp)) {
319 		donerec = 0;	/* mark $0 invalid */
320 		fldno = atoi(vp->nval);
321 		if (fldno > *NF)
322 			newfld(fldno);
323 		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
324 	} else if (isrec(vp)) {
325 		donefld = 0;	/* mark $1... invalid */
326 		donerec = 1;
327 	}
328 	t = tostring(s);	/* in case it's self-assign */
329 	vp->tval &= ~NUM;
330 	vp->tval |= STR;
331 	if (freeable(vp))
332 		xfree(vp->sval);
333 	vp->tval &= ~DONTFREE;
334 	   dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
335 	return(vp->sval = t);
336 }
337 
338 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
339 {
340 	if ((vp->tval & (NUM | STR)) == 0)
341 		funnyvar(vp, "read value of");
342 	if (isfld(vp) && donefld == 0)
343 		fldbld();
344 	else if (isrec(vp) && donerec == 0)
345 		recbld();
346 	if (!isnum(vp)) {	/* not a number */
347 		vp->fval = atof(vp->sval);	/* best guess */
348 		if (is_number(vp->sval) && !(vp->tval&CON))
349 			vp->tval |= NUM;	/* make NUM only sparingly */
350 	}
351 	   dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
352 	return(vp->fval);
353 }
354 
355 char *getsval(Cell *vp)	/* get string val of a Cell */
356 {
357 	char s[100];	/* BUG: unchecked */
358 	double dtemp;
359 
360 	if ((vp->tval & (NUM | STR)) == 0)
361 		funnyvar(vp, "read value of");
362 	if (isfld(vp) && donefld == 0)
363 		fldbld();
364 	else if (isrec(vp) && donerec == 0)
365 		recbld();
366 	if (isstr(vp) == 0) {
367 		if (freeable(vp))
368 			xfree(vp->sval);
369 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
370 			sprintf(s, "%.30g", vp->fval);
371 		else
372 			sprintf(s, *CONVFMT, vp->fval);
373 		vp->sval = tostring(s);
374 		vp->tval &= ~DONTFREE;
375 		vp->tval |= STR;
376 	}
377 	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
378 	return(vp->sval);
379 }
380 
381 char *tostring(char *s)	/* make a copy of string s */
382 {
383 	char *p;
384 
385 	p = (char *) malloc(strlen(s)+1);
386 	if (p == NULL)
387 		FATAL("out of space in tostring on %s", s);
388 	strcpy(p, s);
389 	return(p);
390 }
391 
392 char *qstring(char *is, int delim)	/* collect string up to next delim */
393 {
394 	char *os = is;
395 	int c, n;
396 	uschar *s = (uschar *) is;
397 	uschar *buf, *bp;
398 
399 	if ((buf = (uschar *) malloc(strlen(s)+3)) == NULL)
400 		FATAL( "out of space in qstring(%s)", s);
401 	for (bp = buf; (c = *s) != delim; s++) {
402 		if (c == '\n')
403 			SYNTAX( "newline in string %.20s...", os );
404 		else if (c != '\\')
405 			*bp++ = c;
406 		else {	/* \something */
407 			c = *++s;
408 			if (c == 0) {	/* \ at end */
409 				*bp++ = '\\';
410 				break;	/* for loop */
411 			}
412 			switch (c) {
413 			case '\\':	*bp++ = '\\'; break;
414 			case 'n':	*bp++ = '\n'; break;
415 			case 't':	*bp++ = '\t'; break;
416 			case 'b':	*bp++ = '\b'; break;
417 			case 'f':	*bp++ = '\f'; break;
418 			case 'r':	*bp++ = '\r'; break;
419 			default:
420 				if (!isdigit(c)) {
421 					*bp++ = c;
422 					break;
423 				}
424 				n = c - '0';
425 				if (isdigit(s[1])) {
426 					n = 8 * n + *++s - '0';
427 					if (isdigit(s[1]))
428 						n = 8 * n + *++s - '0';
429 				}
430 				*bp++ = n;
431 				break;
432 			}
433 		}
434 	}
435 	*bp++ = 0;
436 	return (char *) buf;
437 }
438