xref: /openbsd-src/usr.bin/awk/tran.c (revision db3296cf5c1dd9058ceecc3a29fe4aaa0bd26000)
1 /*	$OpenBSD: tran.c,v 1.10 2003/04/06 06:12:01 pvalchev Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 #define	DEBUG
27 #include <stdio.h>
28 #include <math.h>
29 #include <ctype.h>
30 #include <string.h>
31 #include <stdlib.h>
32 #include "awk.h"
33 #include "ytab.h"
34 
35 #define	FULLTAB	2	/* rehash when table gets this x full */
36 #define	GROWTAB 4	/* grow table by this factor */
37 
38 Array	*symtab;	/* main symbol table */
39 
40 char	**FS;		/* initial field sep */
41 char	**RS;		/* initial record sep */
42 char	**OFS;		/* output field sep */
43 char	**ORS;		/* output record sep */
44 char	**OFMT;		/* output format for numbers */
45 char	**CONVFMT;	/* format for conversions in getsval */
46 Awkfloat *NF;		/* number of fields in current record */
47 Awkfloat *NR;		/* number of current record */
48 Awkfloat *FNR;		/* number of current record in current file */
49 char	**FILENAME;	/* current filename argument */
50 Awkfloat *ARGC;		/* number of arguments from command line */
51 char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
52 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
53 Awkfloat *RLENGTH;	/* length of same */
54 
55 Cell	*nrloc;		/* NR */
56 Cell	*nfloc;		/* NF */
57 Cell	*fnrloc;	/* FNR */
58 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
59 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
60 Cell	*rstartloc;	/* RSTART */
61 Cell	*rlengthloc;	/* RLENGTH */
62 Cell	*symtabloc;	/* SYMTAB */
63 
64 Cell	*nullloc;	/* a guaranteed empty cell */
65 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
66 Cell	*literal0;
67 
68 extern Cell **fldtab;
69 
70 void syminit(void)	/* initialize symbol table with builtin vars */
71 {
72 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
73 	/* this is used for if(x)... tests: */
74 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
75 	nullnode = celltonode(nullloc, CCON);
76 
77 	FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval;
78 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
79 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
80 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
81 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
82 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
83 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
84 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
85 	NF = &nfloc->fval;
86 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
87 	NR = &nrloc->fval;
88 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
89 	FNR = &fnrloc->fval;
90 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
91 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
92 	RSTART = &rstartloc->fval;
93 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
94 	RLENGTH = &rlengthloc->fval;
95 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
96 	symtabloc->sval = (char *) symtab;
97 }
98 
99 void arginit(int ac, char **av)	/* set up ARGV and ARGC */
100 {
101 	Cell *cp;
102 	int i;
103 	char temp[50];
104 
105 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
106 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
107 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
108 	cp->sval = (char *) ARGVtab;
109 	for (i = 0; i < ac; i++) {
110 		snprintf(temp, sizeof temp, "%d", i);
111 		if (is_number(*av))
112 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
113 		else
114 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
115 		av++;
116 	}
117 }
118 
119 void envinit(char **envp)	/* set up ENVIRON variable */
120 {
121 	Cell *cp;
122 	char *p;
123 
124 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
125 	ENVtab = makesymtab(NSYMTAB);
126 	cp->sval = (char *) ENVtab;
127 	for ( ; *envp; envp++) {
128 		if ((p = strchr(*envp, '=')) == NULL)
129 			continue;
130 		if( p == *envp ) /* no left hand side name in env string */
131 			continue;
132 		*p++ = 0;	/* split into two strings at = */
133 		if (is_number(p))
134 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
135 		else
136 			setsymtab(*envp, p, 0.0, STR, ENVtab);
137 		p[-1] = '=';	/* restore in case env is passed down to a shell */
138 	}
139 }
140 
141 Array *makesymtab(int n)	/* make a new symbol table */
142 {
143 	Array *ap;
144 	Cell **tp;
145 
146 	ap = (Array *) malloc(sizeof(Array));
147 	tp = (Cell **) calloc(n, sizeof(Cell *));
148 	if (ap == NULL || tp == NULL)
149 		FATAL("out of space in makesymtab");
150 	ap->nelem = 0;
151 	ap->size = n;
152 	ap->tab = tp;
153 	return(ap);
154 }
155 
156 void freesymtab(Cell *ap)	/* free a symbol table */
157 {
158 	Cell *cp, *temp;
159 	Array *tp;
160 	int i;
161 
162 	if (!isarr(ap))
163 		return;
164 	tp = (Array *) ap->sval;
165 	if (tp == NULL)
166 		return;
167 	for (i = 0; i < tp->size; i++) {
168 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
169 			xfree(cp->nval);
170 			if (freeable(cp))
171 				xfree(cp->sval);
172 			temp = cp->cnext;	/* avoids freeing then using */
173 			free(cp);
174 			tp->nelem--;
175 		}
176 		tp->tab[i] = 0;
177 	}
178 	if (tp->nelem != 0)
179 		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
180 	free(tp->tab);
181 	free(tp);
182 }
183 
184 void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
185 {
186 	Array *tp;
187 	Cell *p, *prev = NULL;
188 	int h;
189 
190 	tp = (Array *) ap->sval;
191 	h = hash(s, tp->size);
192 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
193 		if (strcmp(s, p->nval) == 0) {
194 			if (prev == NULL)	/* 1st one */
195 				tp->tab[h] = p->cnext;
196 			else			/* middle somewhere */
197 				prev->cnext = p->cnext;
198 			if (freeable(p))
199 				xfree(p->sval);
200 			free(p->nval);
201 			free(p);
202 			tp->nelem--;
203 			return;
204 		}
205 }
206 
207 Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
208 {
209 	int h;
210 	Cell *p;
211 
212 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
213 		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
214 			p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
215 		return(p);
216 	}
217 	p = (Cell *) malloc(sizeof(Cell));
218 	if (p == NULL)
219 		FATAL("out of space for symbol table at %s", n);
220 	p->nval = tostring(n);
221 	p->sval = s ? tostring(s) : tostring("");
222 	p->fval = f;
223 	p->tval = t;
224 	p->csub = CUNK;
225 	p->ctype = OCELL;
226 	tp->nelem++;
227 	if (tp->nelem > FULLTAB * tp->size)
228 		rehash(tp);
229 	h = hash(n, tp->size);
230 	p->cnext = tp->tab[h];
231 	tp->tab[h] = p;
232 	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
233 		p, p->nval, p->sval, p->fval, p->tval) );
234 	return(p);
235 }
236 
237 int hash(const char *s, int n)	/* form hash value for string s */
238 {
239 	unsigned hashval;
240 
241 	for (hashval = 0; *s != '\0'; s++)
242 		hashval = (*s + 31 * hashval);
243 	return hashval % n;
244 }
245 
246 void rehash(Array *tp)	/* rehash items in small table into big one */
247 {
248 	int i, nh, nsz;
249 	Cell *cp, *op, **np;
250 
251 	nsz = GROWTAB * tp->size;
252 	np = (Cell **) calloc(nsz, sizeof(Cell *));
253 	if (np == NULL)		/* can't do it, but can keep running. */
254 		return;		/* someone else will run out later. */
255 	for (i = 0; i < tp->size; i++) {
256 		for (cp = tp->tab[i]; cp; cp = op) {
257 			op = cp->cnext;
258 			nh = hash(cp->nval, nsz);
259 			cp->cnext = np[nh];
260 			np[nh] = cp;
261 		}
262 	}
263 	free(tp->tab);
264 	tp->tab = np;
265 	tp->size = nsz;
266 }
267 
268 Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
269 {
270 	Cell *p;
271 	int h;
272 
273 	h = hash(s, tp->size);
274 	for (p = tp->tab[h]; p != NULL; p = p->cnext)
275 		if (strcmp(s, p->nval) == 0)
276 			return(p);	/* found it */
277 	return(NULL);			/* not found */
278 }
279 
280 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
281 {
282 	int fldno;
283 
284 	if ((vp->tval & (NUM | STR)) == 0)
285 		funnyvar(vp, "assign to");
286 	if (isfld(vp)) {
287 		donerec = 0;	/* mark $0 invalid */
288 		fldno = atoi(vp->nval);
289 		if (fldno > *NF)
290 			newfld(fldno);
291 		   dprintf( ("setting field %d to %g\n", fldno, f) );
292 	} else if (isrec(vp)) {
293 		donefld = 0;	/* mark $1... invalid */
294 		donerec = 1;
295 	}
296 	if (freeable(vp))
297 		xfree(vp->sval); /* free any previous string */
298 	vp->tval &= ~STR;	/* mark string invalid */
299 	vp->tval |= NUM;	/* mark number ok */
300 	   dprintf( ("setfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), f, vp->tval) );
301 	return vp->fval = f;
302 }
303 
304 void funnyvar(Cell *vp, const char *rw)
305 {
306 	if (isarr(vp))
307 		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
308 	if (vp->tval & FCN)
309 		FATAL("can't %s %s; it's a function.", rw, vp->nval);
310 	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
311 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
312 }
313 
314 char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
315 {
316 	char *t;
317 	int fldno;
318 
319 	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, NN(vp->nval), s, vp->tval) );
320 	if ((vp->tval & (NUM | STR)) == 0)
321 		funnyvar(vp, "assign to");
322 	if (isfld(vp)) {
323 		donerec = 0;	/* mark $0 invalid */
324 		fldno = atoi(vp->nval);
325 		if (fldno > *NF)
326 			newfld(fldno);
327 		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
328 	} else if (isrec(vp)) {
329 		donefld = 0;	/* mark $1... invalid */
330 		donerec = 1;
331 	}
332 	t = tostring(s);	/* in case it's self-assign */
333 	vp->tval &= ~NUM;
334 	vp->tval |= STR;
335 	if (freeable(vp))
336 		xfree(vp->sval);
337 	vp->tval &= ~DONTFREE;
338 	   dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, NN(vp->nval), t,t, vp->tval) );
339 	return(vp->sval = t);
340 }
341 
342 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
343 {
344 	if ((vp->tval & (NUM | STR)) == 0)
345 		funnyvar(vp, "read value of");
346 	if (isfld(vp) && donefld == 0)
347 		fldbld();
348 	else if (isrec(vp) && donerec == 0)
349 		recbld();
350 	if (!isnum(vp)) {	/* not a number */
351 		vp->fval = atof(vp->sval);	/* best guess */
352 		if (is_number(vp->sval) && !(vp->tval&CON))
353 			vp->tval |= NUM;	/* make NUM only sparingly */
354 	}
355 	   dprintf( ("getfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), vp->fval, vp->tval) );
356 	return(vp->fval);
357 }
358 
359  static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
360 
361 {
362 	char s[100];	/* BUG: unchecked */
363 	double dtemp;
364 
365 	if ((vp->tval & (NUM | STR)) == 0)
366 		funnyvar(vp, "read value of");
367 	if (isfld(vp) && donefld == 0)
368 		fldbld();
369 	else if (isrec(vp) && donerec == 0)
370 		recbld();
371 	if (isstr(vp) == 0) {
372 		if (freeable(vp))
373 			xfree(vp->sval);
374 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
375 			snprintf(s, sizeof(s), "%.30g", vp->fval);
376 		else
377 			snprintf(s, sizeof(s), *fmt, vp->fval);
378 		vp->sval = tostring(s);
379 		vp->tval &= ~DONTFREE;
380 		vp->tval |= STR;
381 	}
382 	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
383 	return(vp->sval);
384 }
385 
386 char *getsval(Cell *vp)       /* get string val of a Cell */
387 {
388       return get_str_val(vp, CONVFMT);
389 }
390 
391 char *getpssval(Cell *vp)     /* get string val of a Cell for print */
392 {
393       return get_str_val(vp, OFMT);
394 }
395 
396 
397 char *tostring(const char *s)	/* make a copy of string s */
398 {
399 	return (strdup(s));
400 }
401 
402 char *qstring(const char *is, int delim)	/* collect string up to next delim */
403 {
404 	const char *os = is;
405 	int c, n;
406 	uschar *s = (uschar *) is;
407 	uschar *buf, *bp;
408 
409 	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
410 		FATAL( "out of space in qstring(%s)", s);
411 	for (bp = buf; (c = *s) != delim; s++) {
412 		if (c == '\n')
413 			SYNTAX( "newline in string %.20s...", os );
414 		else if (c != '\\')
415 			*bp++ = c;
416 		else {	/* \something */
417 			c = *++s;
418 			if (c == 0) {	/* \ at end */
419 				*bp++ = '\\';
420 				break;	/* for loop */
421 			}
422 			switch (c) {
423 			case '\\':	*bp++ = '\\'; break;
424 			case 'n':	*bp++ = '\n'; break;
425 			case 't':	*bp++ = '\t'; break;
426 			case 'b':	*bp++ = '\b'; break;
427 			case 'f':	*bp++ = '\f'; break;
428 			case 'r':	*bp++ = '\r'; break;
429 			default:
430 				if (!isdigit(c)) {
431 					*bp++ = c;
432 					break;
433 				}
434 				n = c - '0';
435 				if (isdigit(s[1])) {
436 					n = 8 * n + *++s - '0';
437 					if (isdigit(s[1]))
438 						n = 8 * n + *++s - '0';
439 				}
440 				*bp++ = n;
441 				break;
442 			}
443 		}
444 	}
445 	*bp++ = 0;
446 	return (char *) buf;
447 }
448