xref: /openbsd-src/usr.bin/awk/tran.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*	$OpenBSD: tran.c,v 1.14 2006/11/04 19:10:15 jmc Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 #define	DEBUG
27 #include <stdio.h>
28 #include <math.h>
29 #include <ctype.h>
30 #include <string.h>
31 #include <stdlib.h>
32 #include "awk.h"
33 #include "ytab.h"
34 
35 #define	FULLTAB	2	/* rehash when table gets this x full */
36 #define	GROWTAB 4	/* grow table by this factor */
37 
38 Array	*symtab;	/* main symbol table */
39 
40 char	**FS;		/* initial field sep */
41 char	**RS;		/* initial record sep */
42 char	**OFS;		/* output field sep */
43 char	**ORS;		/* output record sep */
44 char	**OFMT;		/* output format for numbers */
45 char	**CONVFMT;	/* format for conversions in getsval */
46 Awkfloat *NF;		/* number of fields in current record */
47 Awkfloat *NR;		/* number of current record */
48 Awkfloat *FNR;		/* number of current record in current file */
49 char	**FILENAME;	/* current filename argument */
50 Awkfloat *ARGC;		/* number of arguments from command line */
51 char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
52 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
53 Awkfloat *RLENGTH;	/* length of same */
54 
55 Cell	*fsloc;		/* FS */
56 Cell	*nrloc;		/* NR */
57 Cell	*nfloc;		/* NF */
58 Cell	*fnrloc;	/* FNR */
59 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
60 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
61 Cell	*rstartloc;	/* RSTART */
62 Cell	*rlengthloc;	/* RLENGTH */
63 Cell	*symtabloc;	/* SYMTAB */
64 
65 Cell	*nullloc;	/* a guaranteed empty cell */
66 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
67 Cell	*literal0;
68 
69 extern Cell **fldtab;
70 
71 void syminit(void)	/* initialize symbol table with builtin vars */
72 {
73 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
74 	/* this is used for if(x)... tests: */
75 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
76 	nullnode = celltonode(nullloc, CCON);
77 
78 	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
79 	FS = &fsloc->sval;
80 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
81 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
82 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
83 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
84 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
85 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
86 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
87 	NF = &nfloc->fval;
88 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
89 	NR = &nrloc->fval;
90 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
91 	FNR = &fnrloc->fval;
92 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
93 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
94 	RSTART = &rstartloc->fval;
95 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
96 	RLENGTH = &rlengthloc->fval;
97 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
98 	symtabloc->sval = (char *) symtab;
99 }
100 
101 void arginit(int ac, char **av)	/* set up ARGV and ARGC */
102 {
103 	Cell *cp;
104 	int i;
105 	char temp[50];
106 
107 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
108 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
109 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
110 	cp->sval = (char *) ARGVtab;
111 	for (i = 0; i < ac; i++) {
112 		snprintf(temp, sizeof temp, "%d", i);
113 		if (is_number(*av))
114 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
115 		else
116 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
117 		av++;
118 	}
119 }
120 
121 void envinit(char **envp)	/* set up ENVIRON variable */
122 {
123 	Cell *cp;
124 	char *p;
125 
126 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
127 	ENVtab = makesymtab(NSYMTAB);
128 	cp->sval = (char *) ENVtab;
129 	for ( ; *envp; envp++) {
130 		if ((p = strchr(*envp, '=')) == NULL)
131 			continue;
132 		if( p == *envp ) /* no left hand side name in env string */
133 			continue;
134 		*p++ = 0;	/* split into two strings at = */
135 		if (is_number(p))
136 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
137 		else
138 			setsymtab(*envp, p, 0.0, STR, ENVtab);
139 		p[-1] = '=';	/* restore in case env is passed down to a shell */
140 	}
141 }
142 
143 Array *makesymtab(int n)	/* make a new symbol table */
144 {
145 	Array *ap;
146 	Cell **tp;
147 
148 	ap = (Array *) malloc(sizeof(Array));
149 	tp = (Cell **) calloc(n, sizeof(Cell *));
150 	if (ap == NULL || tp == NULL)
151 		FATAL("out of space in makesymtab");
152 	ap->nelem = 0;
153 	ap->size = n;
154 	ap->tab = tp;
155 	return(ap);
156 }
157 
158 void freesymtab(Cell *ap)	/* free a symbol table */
159 {
160 	Cell *cp, *temp;
161 	Array *tp;
162 	int i;
163 
164 	if (!isarr(ap))
165 		return;
166 	tp = (Array *) ap->sval;
167 	if (tp == NULL)
168 		return;
169 	for (i = 0; i < tp->size; i++) {
170 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
171 			xfree(cp->nval);
172 			if (freeable(cp))
173 				xfree(cp->sval);
174 			temp = cp->cnext;	/* avoids freeing then using */
175 			free(cp);
176 			tp->nelem--;
177 		}
178 		tp->tab[i] = 0;
179 	}
180 	if (tp->nelem != 0)
181 		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
182 	free(tp->tab);
183 	free(tp);
184 }
185 
186 void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
187 {
188 	Array *tp;
189 	Cell *p, *prev = NULL;
190 	int h;
191 
192 	tp = (Array *) ap->sval;
193 	h = hash(s, tp->size);
194 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
195 		if (strcmp(s, p->nval) == 0) {
196 			if (prev == NULL)	/* 1st one */
197 				tp->tab[h] = p->cnext;
198 			else			/* middle somewhere */
199 				prev->cnext = p->cnext;
200 			if (freeable(p))
201 				xfree(p->sval);
202 			free(p->nval);
203 			free(p);
204 			tp->nelem--;
205 			return;
206 		}
207 }
208 
209 Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
210 {
211 	int h;
212 	Cell *p;
213 
214 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
215 		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
216 			p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
217 		return(p);
218 	}
219 	p = (Cell *) malloc(sizeof(Cell));
220 	if (p == NULL)
221 		FATAL("out of space for symbol table at %s", n);
222 	p->nval = tostring(n);
223 	p->sval = s ? tostring(s) : tostring("");
224 	p->fval = f;
225 	p->tval = t;
226 	p->csub = CUNK;
227 	p->ctype = OCELL;
228 	tp->nelem++;
229 	if (tp->nelem > FULLTAB * tp->size)
230 		rehash(tp);
231 	h = hash(n, tp->size);
232 	p->cnext = tp->tab[h];
233 	tp->tab[h] = p;
234 	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
235 		p, p->nval, p->sval, p->fval, p->tval) );
236 	return(p);
237 }
238 
239 int hash(const char *s, int n)	/* form hash value for string s */
240 {
241 	unsigned hashval;
242 
243 	for (hashval = 0; *s != '\0'; s++)
244 		hashval = (*s + 31 * hashval);
245 	return hashval % n;
246 }
247 
248 void rehash(Array *tp)	/* rehash items in small table into big one */
249 {
250 	int i, nh, nsz;
251 	Cell *cp, *op, **np;
252 
253 	nsz = GROWTAB * tp->size;
254 	np = (Cell **) calloc(nsz, sizeof(Cell *));
255 	if (np == NULL)		/* can't do it, but can keep running. */
256 		return;		/* someone else will run out later. */
257 	for (i = 0; i < tp->size; i++) {
258 		for (cp = tp->tab[i]; cp; cp = op) {
259 			op = cp->cnext;
260 			nh = hash(cp->nval, nsz);
261 			cp->cnext = np[nh];
262 			np[nh] = cp;
263 		}
264 	}
265 	free(tp->tab);
266 	tp->tab = np;
267 	tp->size = nsz;
268 }
269 
270 Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
271 {
272 	Cell *p;
273 	int h;
274 
275 	h = hash(s, tp->size);
276 	for (p = tp->tab[h]; p != NULL; p = p->cnext)
277 		if (strcmp(s, p->nval) == 0)
278 			return(p);	/* found it */
279 	return(NULL);			/* not found */
280 }
281 
282 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
283 {
284 	int fldno;
285 
286 	if ((vp->tval & (NUM | STR)) == 0)
287 		funnyvar(vp, "assign to");
288 	if (isfld(vp)) {
289 		donerec = 0;	/* mark $0 invalid */
290 		fldno = atoi(vp->nval);
291 		if (fldno > *NF)
292 			newfld(fldno);
293 		   dprintf( ("setting field %d to %g\n", fldno, f) );
294 	} else if (isrec(vp)) {
295 		donefld = 0;	/* mark $1... invalid */
296 		donerec = 1;
297 	}
298 	if (freeable(vp))
299 		xfree(vp->sval); /* free any previous string */
300 	vp->tval &= ~STR;	/* mark string invalid */
301 	vp->tval |= NUM;	/* mark number ok */
302 	   dprintf( ("setfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), f, vp->tval) );
303 	return vp->fval = f;
304 }
305 
306 void funnyvar(Cell *vp, const char *rw)
307 {
308 	if (isarr(vp))
309 		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
310 	if (vp->tval & FCN)
311 		FATAL("can't %s %s; it's a function.", rw, vp->nval);
312 	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
313 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
314 }
315 
316 char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
317 {
318 	char *t;
319 	int fldno;
320 
321 	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
322 		vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
323 	if ((vp->tval & (NUM | STR)) == 0)
324 		funnyvar(vp, "assign to");
325 	if (isfld(vp)) {
326 		donerec = 0;	/* mark $0 invalid */
327 		fldno = atoi(vp->nval);
328 		if (fldno > *NF)
329 			newfld(fldno);
330 		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
331 	} else if (isrec(vp)) {
332 		donefld = 0;	/* mark $1... invalid */
333 		donerec = 1;
334 	}
335 	t = tostring(s);	/* in case it's self-assign */
336 	if (freeable(vp))
337 		xfree(vp->sval);
338 	vp->tval &= ~NUM;
339 	vp->tval |= STR;
340 	vp->tval &= ~DONTFREE;
341 	   dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
342 		vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
343 	return(vp->sval = t);
344 }
345 
346 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
347 {
348 	if ((vp->tval & (NUM | STR)) == 0)
349 		funnyvar(vp, "read value of");
350 	if (isfld(vp) && donefld == 0)
351 		fldbld();
352 	else if (isrec(vp) && donerec == 0)
353 		recbld();
354 	if (!isnum(vp)) {	/* not a number */
355 		vp->fval = atof(vp->sval);	/* best guess */
356 		if (is_number(vp->sval) && !(vp->tval&CON))
357 			vp->tval |= NUM;	/* make NUM only sparingly */
358 	}
359 	   dprintf( ("getfval %p: %s = %g, t=%o\n", vp, NN(vp->nval), vp->fval, vp->tval) );
360 	return(vp->fval);
361 }
362 
363 static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
364 {
365 	int n;
366 	double dtemp;
367 
368 	if ((vp->tval & (NUM | STR)) == 0)
369 		funnyvar(vp, "read value of");
370 	if (isfld(vp) && donefld == 0)
371 		fldbld();
372 	else if (isrec(vp) && donerec == 0)
373 		recbld();
374 	if (isstr(vp) == 0) {
375 		if (freeable(vp))
376 			xfree(vp->sval);
377 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
378 			n = asprintf(&vp->sval, "%.30g", vp->fval);
379 		else
380 			n = asprintf(&vp->sval, *fmt, vp->fval);
381 		if (n == -1)
382 			FATAL("out of space in get_str_val");
383 		vp->tval &= ~DONTFREE;
384 		vp->tval |= STR;
385 	}
386 	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
387 	return(vp->sval);
388 }
389 
390 char *getsval(Cell *vp)       /* get string val of a Cell */
391 {
392       return get_str_val(vp, CONVFMT);
393 }
394 
395 char *getpssval(Cell *vp)     /* get string val of a Cell for print */
396 {
397       return get_str_val(vp, OFMT);
398 }
399 
400 
401 char *tostring(const char *s)	/* make a copy of string s */
402 {
403 	char *p;
404 
405 	p = strdup(s);
406 	if (p == NULL)
407 		FATAL("out of space in tostring on %s", s);
408 	return p;
409 }
410 
411 char *qstring(const char *is, int delim)	/* collect string up to next delim */
412 {
413 	const char *os = is;
414 	int c, n;
415 	uschar *s = (uschar *) is;
416 	uschar *buf, *bp;
417 
418 	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
419 		FATAL( "out of space in qstring(%s)", s);
420 	for (bp = buf; (c = *s) != delim; s++) {
421 		if (c == '\n')
422 			SYNTAX( "newline in string %.20s...", os );
423 		else if (c != '\\')
424 			*bp++ = c;
425 		else {	/* \something */
426 			c = *++s;
427 			if (c == 0) {	/* \ at end */
428 				*bp++ = '\\';
429 				break;	/* for loop */
430 			}
431 			switch (c) {
432 			case '\\':	*bp++ = '\\'; break;
433 			case 'n':	*bp++ = '\n'; break;
434 			case 't':	*bp++ = '\t'; break;
435 			case 'b':	*bp++ = '\b'; break;
436 			case 'f':	*bp++ = '\f'; break;
437 			case 'r':	*bp++ = '\r'; break;
438 			default:
439 				if (!isdigit(c)) {
440 					*bp++ = c;
441 					break;
442 				}
443 				n = c - '0';
444 				if (isdigit(s[1])) {
445 					n = 8 * n + *++s - '0';
446 					if (isdigit(s[1]))
447 						n = 8 * n + *++s - '0';
448 				}
449 				*bp++ = n;
450 				break;
451 			}
452 		}
453 	}
454 	*bp++ = 0;
455 	return (char *) buf;
456 }
457