xref: /openbsd-src/usr.bin/awk/tran.c (revision 8445c53715e7030056b779e8ab40efb7820981f2)
1 /*	$OpenBSD: tran.c,v 1.7 2001/09/08 00:12:40 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 #define	DEBUG
27 #include <stdio.h>
28 #include <math.h>
29 #include <ctype.h>
30 #include <string.h>
31 #include <stdlib.h>
32 #include "awk.h"
33 #include "ytab.h"
34 
35 #define	FULLTAB	2	/* rehash when table gets this x full */
36 #define	GROWTAB 4	/* grow table by this factor */
37 
38 Array	*symtab;	/* main symbol table */
39 
40 char	**FS;		/* initial field sep */
41 char	**RS;		/* initial record sep */
42 char	**OFS;		/* output field sep */
43 char	**ORS;		/* output record sep */
44 char	**OFMT;		/* output format for numbers */
45 char	**CONVFMT;	/* format for conversions in getsval */
46 Awkfloat *NF;		/* number of fields in current record */
47 Awkfloat *NR;		/* number of current record */
48 Awkfloat *FNR;		/* number of current record in current file */
49 char	**FILENAME;	/* current filename argument */
50 Awkfloat *ARGC;		/* number of arguments from command line */
51 char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
52 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
53 Awkfloat *RLENGTH;	/* length of same */
54 
55 Cell	*nrloc;		/* NR */
56 Cell	*nfloc;		/* NF */
57 Cell	*fnrloc;	/* FNR */
58 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
59 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
60 Cell	*rstartloc;	/* RSTART */
61 Cell	*rlengthloc;	/* RLENGTH */
62 Cell	*symtabloc;	/* SYMTAB */
63 
64 Cell	*nullloc;	/* a guaranteed empty cell */
65 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
66 Cell	*literal0;
67 
68 extern Cell **fldtab;
69 
70 void syminit(void)	/* initialize symbol table with builtin vars */
71 {
72 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
73 	/* this is used for if(x)... tests: */
74 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
75 	nullnode = celltonode(nullloc, CCON);
76 
77 	FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval;
78 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
79 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
80 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
81 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
82 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
83 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
84 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
85 	NF = &nfloc->fval;
86 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
87 	NR = &nrloc->fval;
88 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
89 	FNR = &fnrloc->fval;
90 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
91 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
92 	RSTART = &rstartloc->fval;
93 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
94 	RLENGTH = &rlengthloc->fval;
95 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
96 	symtabloc->sval = (char *) symtab;
97 }
98 
99 void arginit(int ac, char **av)	/* set up ARGV and ARGC */
100 {
101 	Cell *cp;
102 	int i;
103 	char temp[50];
104 
105 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
106 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
107 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
108 	cp->sval = (char *) ARGVtab;
109 	for (i = 0; i < ac; i++) {
110 		sprintf(temp, "%d", i);
111 		if (is_number(*av))
112 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
113 		else
114 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
115 		av++;
116 	}
117 }
118 
119 void envinit(char **envp)	/* set up ENVIRON variable */
120 {
121 	Cell *cp;
122 	char *p;
123 
124 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
125 	ENVtab = makesymtab(NSYMTAB);
126 	cp->sval = (char *) ENVtab;
127 	for ( ; *envp; envp++) {
128 		if ((p = strchr(*envp, '=')) == NULL)
129 			continue;
130 		if( p == *envp ) /* no left hand side name in env string */
131 			continue;
132 		*p++ = 0;	/* split into two strings at = */
133 		if (is_number(p))
134 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
135 		else
136 			setsymtab(*envp, p, 0.0, STR, ENVtab);
137 		p[-1] = '=';	/* restore in case env is passed down to a shell */
138 	}
139 }
140 
141 Array *makesymtab(int n)	/* make a new symbol table */
142 {
143 	Array *ap;
144 	Cell **tp;
145 
146 	ap = (Array *) malloc(sizeof(Array));
147 	tp = (Cell **) calloc(n, sizeof(Cell *));
148 	if (ap == NULL || tp == NULL)
149 		FATAL("out of space in makesymtab");
150 	ap->nelem = 0;
151 	ap->size = n;
152 	ap->tab = tp;
153 	return(ap);
154 }
155 
156 void freesymtab(Cell *ap)	/* free a symbol table */
157 {
158 	Cell *cp, *temp;
159 	Array *tp;
160 	int i;
161 
162 	if (!isarr(ap))
163 		return;
164 	tp = (Array *) ap->sval;
165 	if (tp == NULL)
166 		return;
167 	for (i = 0; i < tp->size; i++) {
168 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
169 			xfree(cp->nval);
170 			if (freeable(cp))
171 				xfree(cp->sval);
172 			temp = cp->cnext;	/* avoids freeing then using */
173 			free(cp);
174 		}
175 		tp->tab[i] = 0;
176 	}
177 	free(tp->tab);
178 	free(tp);
179 }
180 
181 void freeelem(Cell *ap, char *s)	/* free elem s from ap (i.e., ap["s"] */
182 {
183 	Array *tp;
184 	Cell *p, *prev = NULL;
185 	int h;
186 
187 	tp = (Array *) ap->sval;
188 	h = hash(s, tp->size);
189 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
190 		if (strcmp(s, p->nval) == 0) {
191 			if (prev == NULL)	/* 1st one */
192 				tp->tab[h] = p->cnext;
193 			else			/* middle somewhere */
194 				prev->cnext = p->cnext;
195 			if (freeable(p))
196 				xfree(p->sval);
197 			free(p->nval);
198 			free(p);
199 			tp->nelem--;
200 			return;
201 		}
202 }
203 
204 Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
205 {
206 	int h;
207 	Cell *p;
208 
209 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
210 		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
211 			p, p->nval, p->sval, p->fval, p->tval) );
212 		return(p);
213 	}
214 	p = (Cell *) malloc(sizeof(Cell));
215 	if (p == NULL)
216 		FATAL("out of space for symbol table at %s", n);
217 	p->nval = tostring(n);
218 	p->sval = s ? tostring(s) : tostring("");
219 	p->fval = f;
220 	p->tval = t;
221 	p->csub = CUNK;
222 	p->ctype = OCELL;
223 	tp->nelem++;
224 	if (tp->nelem > FULLTAB * tp->size)
225 		rehash(tp);
226 	h = hash(n, tp->size);
227 	p->cnext = tp->tab[h];
228 	tp->tab[h] = p;
229 	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
230 		p, p->nval, p->sval, p->fval, p->tval) );
231 	return(p);
232 }
233 
234 int hash(char *s, int n)	/* form hash value for string s */
235 {
236 	unsigned hashval;
237 
238 	for (hashval = 0; *s != '\0'; s++)
239 		hashval = (*s + 31 * hashval);
240 	return hashval % n;
241 }
242 
243 void rehash(Array *tp)	/* rehash items in small table into big one */
244 {
245 	int i, nh, nsz;
246 	Cell *cp, *op, **np;
247 
248 	nsz = GROWTAB * tp->size;
249 	np = (Cell **) calloc(nsz, sizeof(Cell *));
250 	if (np == NULL)		/* can't do it, but can keep running. */
251 		return;		/* someone else will run out later. */
252 	for (i = 0; i < tp->size; i++) {
253 		for (cp = tp->tab[i]; cp; cp = op) {
254 			op = cp->cnext;
255 			nh = hash(cp->nval, nsz);
256 			cp->cnext = np[nh];
257 			np[nh] = cp;
258 		}
259 	}
260 	free(tp->tab);
261 	tp->tab = np;
262 	tp->size = nsz;
263 }
264 
265 Cell *lookup(char *s, Array *tp)	/* look for s in tp */
266 {
267 	Cell *p;
268 	int h;
269 
270 	h = hash(s, tp->size);
271 	for (p = tp->tab[h]; p != NULL; p = p->cnext)
272 		if (strcmp(s, p->nval) == 0)
273 			return(p);	/* found it */
274 	return(NULL);			/* not found */
275 }
276 
277 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
278 {
279 	int fldno;
280 
281 	if ((vp->tval & (NUM | STR)) == 0)
282 		funnyvar(vp, "assign to");
283 	if (isfld(vp)) {
284 		donerec = 0;	/* mark $0 invalid */
285 		fldno = atoi(vp->nval);
286 		if (fldno > *NF)
287 			newfld(fldno);
288 		   dprintf( ("setting field %d to %g\n", fldno, f) );
289 	} else if (isrec(vp)) {
290 		donefld = 0;	/* mark $1... invalid */
291 		donerec = 1;
292 	}
293 	if (freeable(vp))
294 		xfree(vp->sval); /* free any previous string */
295 	vp->tval &= ~STR;	/* mark string invalid */
296 	vp->tval |= NUM;	/* mark number ok */
297 	   dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
298 	return vp->fval = f;
299 }
300 
301 void funnyvar(Cell *vp, char *rw)
302 {
303 	if (isarr(vp))
304 		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
305 	if (vp->tval & FCN)
306 		FATAL("can't %s %s; it's a function.", rw, vp->nval);
307 	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
308 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
309 }
310 
311 char *setsval(Cell *vp, char *s)	/* set string val of a Cell */
312 {
313 	char *t;
314 	int fldno;
315 
316 	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
317 	if ((vp->tval & (NUM | STR)) == 0)
318 		funnyvar(vp, "assign to");
319 	if (isfld(vp)) {
320 		donerec = 0;	/* mark $0 invalid */
321 		fldno = atoi(vp->nval);
322 		if (fldno > *NF)
323 			newfld(fldno);
324 		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
325 	} else if (isrec(vp)) {
326 		donefld = 0;	/* mark $1... invalid */
327 		donerec = 1;
328 	}
329 	t = tostring(s);	/* in case it's self-assign */
330 	vp->tval &= ~NUM;
331 	vp->tval |= STR;
332 	if (freeable(vp))
333 		xfree(vp->sval);
334 	vp->tval &= ~DONTFREE;
335 	   dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
336 	return(vp->sval = t);
337 }
338 
339 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
340 {
341 	if ((vp->tval & (NUM | STR)) == 0)
342 		funnyvar(vp, "read value of");
343 	if (isfld(vp) && donefld == 0)
344 		fldbld();
345 	else if (isrec(vp) && donerec == 0)
346 		recbld();
347 	if (!isnum(vp)) {	/* not a number */
348 		vp->fval = atof(vp->sval);	/* best guess */
349 		if (is_number(vp->sval) && !(vp->tval&CON))
350 			vp->tval |= NUM;	/* make NUM only sparingly */
351 	}
352 	   dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
353 	return(vp->fval);
354 }
355 
356 char *getsval(Cell *vp)	/* get string val of a Cell */
357 {
358 	char s[100];	/* BUG: unchecked */
359 	double dtemp;
360 
361 	if ((vp->tval & (NUM | STR)) == 0)
362 		funnyvar(vp, "read value of");
363 	if (isfld(vp) && donefld == 0)
364 		fldbld();
365 	else if (isrec(vp) && donerec == 0)
366 		recbld();
367 	if (isstr(vp) == 0) {
368 		if (freeable(vp))
369 			xfree(vp->sval);
370 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
371 			sprintf(s, "%.30g", vp->fval);
372 		else
373 			sprintf(s, *CONVFMT, vp->fval);
374 		vp->sval = tostring(s);
375 		vp->tval &= ~DONTFREE;
376 		vp->tval |= STR;
377 	}
378 	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
379 	return(vp->sval);
380 }
381 
382 char *tostring(char *s)	/* make a copy of string s */
383 {
384 	char *p;
385 
386 	p = (char *) malloc(strlen(s)+1);
387 	if (p == NULL)
388 		FATAL("out of space in tostring on %s", s);
389 	strcpy(p, s);
390 	return(p);
391 }
392 
393 char *qstring(char *is, int delim)	/* collect string up to next delim */
394 {
395 	char *os = is;
396 	int c, n;
397 	uschar *s = (uschar *) is;
398 	uschar *buf, *bp;
399 
400 	if ((buf = (uschar *) malloc(strlen(s)+3)) == NULL)
401 		FATAL( "out of space in qstring(%s)", s);
402 	for (bp = buf; (c = *s) != delim; s++) {
403 		if (c == '\n')
404 			SYNTAX( "newline in string %.20s...", os );
405 		else if (c != '\\')
406 			*bp++ = c;
407 		else {	/* \something */
408 			c = *++s;
409 			if (c == 0) {	/* \ at end */
410 				*bp++ = '\\';
411 				break;	/* for loop */
412 			}
413 			switch (c) {
414 			case '\\':	*bp++ = '\\'; break;
415 			case 'n':	*bp++ = '\n'; break;
416 			case 't':	*bp++ = '\t'; break;
417 			case 'b':	*bp++ = '\b'; break;
418 			case 'f':	*bp++ = '\f'; break;
419 			case 'r':	*bp++ = '\r'; break;
420 			default:
421 				if (!isdigit(c)) {
422 					*bp++ = c;
423 					break;
424 				}
425 				n = c - '0';
426 				if (isdigit(s[1])) {
427 					n = 8 * n + *++s - '0';
428 					if (isdigit(s[1]))
429 						n = 8 * n + *++s - '0';
430 				}
431 				*bp++ = n;
432 				break;
433 			}
434 		}
435 	}
436 	*bp++ = 0;
437 	return (char *) buf;
438 }
439