xref: /netbsd-src/external/historical/nawk/dist/tran.c (revision 9ddb6ab554e70fb9bbd90c3d96b812bc57755a14)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #if HAVE_NBTOOL_CONFIG_H
26 #include "nbtool_config.h"
27 #endif
28 
29 #define	DEBUG
30 #include <stdio.h>
31 #include <math.h>
32 #include <ctype.h>
33 #include <string.h>
34 #include <stdlib.h>
35 #include "awk.h"
36 #include "awkgram.h"
37 
38 #define	FULLTAB	2	/* rehash when table gets this x full */
39 #define	GROWTAB 4	/* grow table by this factor */
40 
41 Array	*symtab;	/* main symbol table */
42 
43 char	**FS;		/* initial field sep */
44 char	**RS;		/* initial record sep */
45 char	**OFS;		/* output field sep */
46 char	**ORS;		/* output record sep */
47 char	**OFMT;		/* output format for numbers */
48 char	**CONVFMT;	/* format for conversions in getsval */
49 Awkfloat *NF;		/* number of fields in current record */
50 Awkfloat *NR;		/* number of current record */
51 Awkfloat *FNR;		/* number of current record in current file */
52 char	**FILENAME;	/* current filename argument */
53 Awkfloat *ARGC;		/* number of arguments from command line */
54 char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
55 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
56 Awkfloat *RLENGTH;	/* length of same */
57 
58 Cell	*fsloc;		/* FS */
59 Cell	*nrloc;		/* NR */
60 Cell	*nfloc;		/* NF */
61 Cell	*fnrloc;	/* FNR */
62 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
63 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
64 Cell	*rstartloc;	/* RSTART */
65 Cell	*rlengthloc;	/* RLENGTH */
66 Cell	*symtabloc;	/* SYMTAB */
67 
68 Cell	*nullloc;	/* a guaranteed empty cell */
69 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
70 Cell	*literal0;
71 
72 extern Cell **fldtab;
73 
74 void syminit(void)	/* initialize symbol table with builtin vars */
75 {
76 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
77 	/* this is used for if(x)... tests: */
78 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
79 	nullnode = celltonode(nullloc, CCON);
80 
81 	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
82 	FS = &fsloc->sval;
83 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
84 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
85 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
86 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
87 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
88 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
89 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
90 	NF = &nfloc->fval;
91 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
92 	NR = &nrloc->fval;
93 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
94 	FNR = &fnrloc->fval;
95 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
96 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
97 	RSTART = &rstartloc->fval;
98 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
99 	RLENGTH = &rlengthloc->fval;
100 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
101 	symtabloc->sval = (char *) symtab;
102 }
103 
104 void arginit(int ac, char **av)	/* set up ARGV and ARGC */
105 {
106 	Cell *cp;
107 	int i;
108 	char temp[50];
109 
110 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
111 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
112 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
113 	cp->sval = (char *) ARGVtab;
114 	for (i = 0; i < ac; i++) {
115 		snprintf(temp, sizeof(temp), "%d", i);
116 		if (is_number(*av))
117 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
118 		else
119 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
120 		av++;
121 	}
122 }
123 
124 void envinit(char **envp)	/* set up ENVIRON variable */
125 {
126 	Cell *cp;
127 	char *p;
128 
129 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
130 	ENVtab = makesymtab(NSYMTAB);
131 	cp->sval = (char *) ENVtab;
132 	for ( ; *envp; envp++) {
133 		if ((p = strchr(*envp, '=')) == NULL)
134 			continue;
135 		if( p == *envp ) /* no left hand side name in env string */
136 			continue;
137 		*p++ = 0;	/* split into two strings at = */
138 		if (is_number(p))
139 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
140 		else
141 			setsymtab(*envp, p, 0.0, STR, ENVtab);
142 		p[-1] = '=';	/* restore in case env is passed down to a shell */
143 	}
144 }
145 
146 Array *makesymtab(int n)	/* make a new symbol table */
147 {
148 	Array *ap;
149 	Cell **tp;
150 
151 	ap = malloc(sizeof(*ap));
152 	tp = calloc(n, sizeof(*tp));
153 	if (ap == NULL || tp == NULL)
154 		FATAL("out of space in makesymtab");
155 	ap->nelem = 0;
156 	ap->size = n;
157 	ap->tab = tp;
158 	return(ap);
159 }
160 
161 void freesymtab(Cell *ap)	/* free a symbol table */
162 {
163 	Cell *cp, *temp;
164 	Array *tp;
165 	int i;
166 
167 	if (!isarr(ap))
168 		return;
169 	tp = (Array *) ap->sval;
170 	if (tp == NULL)
171 		return;
172 	for (i = 0; i < tp->size; i++) {
173 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
174 			xfree(cp->nval);
175 			if (freeable(cp))
176 				xfree(cp->sval);
177 			temp = cp->cnext;	/* avoids freeing then using */
178 			free(cp);
179 			tp->nelem--;
180 		}
181 		tp->tab[i] = 0;
182 	}
183 	if (tp->nelem != 0)
184 		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
185 	free(tp->tab);
186 	free(tp);
187 }
188 
189 void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
190 {
191 	Array *tp;
192 	Cell *p, *prev = NULL;
193 	int h;
194 
195 	tp = (Array *) ap->sval;
196 	h = hash(s, tp->size);
197 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
198 		if (strcmp(s, p->nval) == 0) {
199 			if (prev == NULL)	/* 1st one */
200 				tp->tab[h] = p->cnext;
201 			else			/* middle somewhere */
202 				prev->cnext = p->cnext;
203 			if (freeable(p))
204 				xfree(p->sval);
205 			free(p->nval);
206 			free(p);
207 			tp->nelem--;
208 			return;
209 		}
210 }
211 
212 Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
213 {
214 	int h;
215 	Cell *p;
216 
217 	if (n == NULL)
218 		n = "";
219 
220 	if ((p = lookup(n, tp)) != NULL) {
221 		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
222 			p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
223 		return(p);
224 	}
225 	p = malloc(sizeof(*p));
226 	if (p == NULL)
227 		FATAL("out of space for symbol table at %s", n);
228 	p->nval = tostring(n);
229 	p->sval = s ? tostring(s) : tostring("");
230 	p->fval = f;
231 	p->tval = t;
232 	p->csub = CUNK;
233 	p->ctype = OCELL;
234 	tp->nelem++;
235 	if (tp->nelem > FULLTAB * tp->size)
236 		rehash(tp);
237 	h = hash(n, tp->size);
238 	p->cnext = tp->tab[h];
239 	tp->tab[h] = p;
240 	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
241 		p, p->nval, p->sval, p->fval, p->tval) );
242 	return(p);
243 }
244 
245 int hash(const char *s, int n)	/* form hash value for string s */
246 {
247 	unsigned hashval;
248 
249 	for (hashval = 0; *s != '\0'; s++)
250 		hashval = (*s + 31 * hashval);
251 	return hashval % n;
252 }
253 
254 void rehash(Array *tp)	/* rehash items in small table into big one */
255 {
256 	int i, nh, nsz;
257 	Cell *cp, *op, **np;
258 
259 	nsz = GROWTAB * tp->size;
260 	np = calloc(nsz, sizeof(*np));
261 	if (np == NULL)		/* can't do it, but can keep running. */
262 		return;		/* someone else will run out later. */
263 	for (i = 0; i < tp->size; i++) {
264 		for (cp = tp->tab[i]; cp; cp = op) {
265 			op = cp->cnext;
266 			nh = hash(cp->nval, nsz);
267 			cp->cnext = np[nh];
268 			np[nh] = cp;
269 		}
270 	}
271 	free(tp->tab);
272 	tp->tab = np;
273 	tp->size = nsz;
274 }
275 
276 Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
277 {
278 	Cell *p;
279 	int h;
280 
281 	h = hash(s, tp->size);
282 	for (p = tp->tab[h]; p != NULL; p = p->cnext)
283 		if (strcmp(s, p->nval) == 0)
284 			return(p);	/* found it */
285 	return(NULL);			/* not found */
286 }
287 
288 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
289 {
290 	int fldno;
291 
292 	f += 0.0;		/* normalise negative zero to positive zero */
293 	if ((vp->tval & (NUM | STR)) == 0)
294 		funnyvar(vp, "assign to");
295 	if (isfld(vp)) {
296 		donerec = 0;	/* mark $0 invalid */
297 		fldno = atoi(vp->nval);
298 		if (fldno > *NF)
299 			newfld(fldno);
300 		   dprintf( ("setting field %d to %g\n", fldno, f) );
301 	} else if (&vp->fval == NF) {
302 		donerec = 0;	/* mark $0 invalid */
303 		setlastfld(f);
304 		dprintf( ("setting NF to %g\n", f) );
305 	} else if (isrec(vp)) {
306 		donefld = 0;	/* mark $1... invalid */
307 		donerec = 1;
308 	}
309 	if (freeable(vp))
310 		xfree(vp->sval); /* free any previous string */
311 	vp->tval &= ~STR;	/* mark string invalid */
312 	vp->tval |= NUM;	/* mark number ok */
313 	   dprintf( ("setfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), f, vp->tval) );
314 	return vp->fval = f;
315 }
316 
317 void funnyvar(Cell *vp, const char *rw)
318 {
319 	if (isarr(vp))
320 		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
321 	if (vp->tval & FCN)
322 		FATAL("can't %s %s; it's a function.", rw, vp->nval);
323 	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
324 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
325 }
326 
327 char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
328 {
329 	char *t;
330 	int fldno;
331 	Awkfloat f;
332 
333 	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
334 		vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
335 	if ((vp->tval & (NUM | STR)) == 0)
336 		funnyvar(vp, "assign to");
337 	if (isfld(vp)) {
338 		donerec = 0;	/* mark $0 invalid */
339 		fldno = atoi(vp->nval);
340 		if (fldno > *NF)
341 			newfld(fldno);
342 		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
343 	} else if (isrec(vp)) {
344 		donefld = 0;	/* mark $1... invalid */
345 		donerec = 1;
346 	}
347 	t = tostring(s);	/* in case it's self-assign */
348 	if (freeable(vp))
349 		xfree(vp->sval);
350 	vp->tval &= ~NUM;
351 	vp->tval |= STR;
352 	vp->tval &= ~DONTFREE;
353 	   dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
354 		vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
355 
356 	vp->sval = t;
357 	if (&vp->fval == NF) {
358 		donerec = 0;	/* mark $0 invalid */
359 		f = getfval(vp);
360 		setlastfld(f);
361 		dprintf( ("setting NF to %g\n", f) );
362 	}
363 
364 	return(vp->sval);
365 }
366 
367 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
368 {
369 	if ((vp->tval & (NUM | STR)) == 0)
370 		funnyvar(vp, "read value of");
371 	if (isfld(vp) && donefld == 0)
372 		fldbld();
373 	else if (isrec(vp) && donerec == 0)
374 		recbld();
375 	if (!isnum(vp)) {	/* not a number */
376 		vp->fval = atof(vp->sval);	/* best guess */
377 		if (is_number(vp->sval) && !(vp->tval&CON))
378 			vp->tval |= NUM;	/* make NUM only sparingly */
379 	}
380 	   dprintf( ("getfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), vp->fval, vp->tval) );
381 	return(vp->fval);
382 }
383 
384 static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
385 {
386 	char s[100];
387 	double dtemp;
388 
389 	if ((vp->tval & (NUM | STR)) == 0)
390 		funnyvar(vp, "read value of");
391 	if (isfld(vp) && donefld == 0)
392 		fldbld();
393 	else if (isrec(vp) && donerec == 0)
394 		recbld();
395 	if (isstr(vp) == 0) {
396 		if (freeable(vp))
397 			xfree(vp->sval);
398 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
399 			snprintf(s, sizeof(s), "%.30g", vp->fval);
400 		else
401 			snprintf(s, sizeof(s), *fmt, vp->fval);
402 		vp->sval = tostring(s);
403 		vp->tval &= ~DONTFREE;
404 		vp->tval |= STR;
405 	}
406 	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
407 	return(vp->sval);
408 }
409 
410 char *getsval(Cell *vp)       /* get string val of a Cell */
411 {
412       return get_str_val(vp, CONVFMT);
413 }
414 
415 char *getpssval(Cell *vp)     /* get string val of a Cell for print */
416 {
417       return get_str_val(vp, OFMT);
418 }
419 
420 
421 char *tostring(const char *s)	/* make a copy of string s */
422 {
423 	char *p;
424 
425 	p = strdup(s);
426 	if (p == NULL)
427 		FATAL("out of space in tostring on %s", s);
428 	return(p);
429 }
430 
431 Cell *catstr(Cell *a, Cell *b) /* concatenate a and b */
432 {
433 	Cell *c;
434 	char *p;
435 	char *sa = getsval(a);
436 	char *sb = getsval(b);
437 	size_t l = strlen(sa) + strlen(sb) + 1;
438 	p = malloc(l);
439 	if (p == NULL)
440 		FATAL("out of space concatenating %s and %s", sa, sb);
441 	snprintf(p, l, "%s%s", sa, sb);
442 	c = setsymtab(p, p, 0.0, CON|STR|DONTFREE, symtab);
443 	free(p);
444 	return c;
445 }
446 
447 char *tostringN(const char *s, size_t n)	/* make a copy of string s */
448 {
449 	char *p;
450 
451 	p = malloc(n);
452 	if (p == NULL)
453 		FATAL("out of space in tostring on %s", s);
454 	strcpy(p, s);
455 	return(p);
456 }
457 
458 char *qstring(const char *is, int delim)	/* collect string up to next delim */
459 {
460 	const char *os = is;
461 	int c, n;
462 	const uschar *s = (const uschar *) is;
463 	uschar *buf, *bp;
464 
465 	if ((buf = malloc(strlen(is)+3)) == NULL)
466 		FATAL( "out of space in qstring(%s)", s);
467 	for (bp = buf; (c = *s) != delim; s++) {
468 		if (c == '\n')
469 			SYNTAX( "newline in string %.20s...", os );
470 		else if (c != '\\')
471 			*bp++ = c;
472 		else {	/* \something */
473 			c = *++s;
474 			if (c == 0) {	/* \ at end */
475 				*bp++ = '\\';
476 				break;	/* for loop */
477 			}
478 			switch (c) {
479 			case '\\':	*bp++ = '\\'; break;
480 			case 'n':	*bp++ = '\n'; break;
481 			case 't':	*bp++ = '\t'; break;
482 			case 'b':	*bp++ = '\b'; break;
483 			case 'f':	*bp++ = '\f'; break;
484 			case 'r':	*bp++ = '\r'; break;
485 			default:
486 				if (!isdigit(c)) {
487 					*bp++ = c;
488 					break;
489 				}
490 				n = c - '0';
491 				if (isdigit(s[1])) {
492 					n = 8 * n + *++s - '0';
493 					if (isdigit(s[1]))
494 						n = 8 * n + *++s - '0';
495 				}
496 				*bp++ = n;
497 				break;
498 			}
499 		}
500 	}
501 	*bp++ = 0;
502 	return (char *) buf;
503 }
504