xref: /openbsd-src/usr.bin/awk/tran.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: tran.c,v 1.6 1999/12/08 23:09:46 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 #define	DEBUG
27 #include <stdio.h>
28 #include <math.h>
29 #include <ctype.h>
30 #include <string.h>
31 #include <stdlib.h>
32 #include "awk.h"
33 #include "ytab.h"
34 
35 #define	FULLTAB	2	/* rehash when table gets this x full */
36 #define	GROWTAB 4	/* grow table by this factor */
37 
38 Array	*symtab;	/* main symbol table */
39 
40 char	**FS;		/* initial field sep */
41 char	**RS;		/* initial record sep */
42 char	**OFS;		/* output field sep */
43 char	**ORS;		/* output record sep */
44 char	**OFMT;		/* output format for numbers */
45 char	**CONVFMT;	/* format for conversions in getsval */
46 Awkfloat *NF;		/* number of fields in current record */
47 Awkfloat *NR;		/* number of current record */
48 Awkfloat *FNR;		/* number of current record in current file */
49 char	**FILENAME;	/* current filename argument */
50 Awkfloat *ARGC;		/* number of arguments from command line */
51 char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
52 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
53 Awkfloat *RLENGTH;	/* length of same */
54 
55 Cell	*nrloc;		/* NR */
56 Cell	*nfloc;		/* NF */
57 Cell	*fnrloc;	/* FNR */
58 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
59 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
60 Cell	*rstartloc;	/* RSTART */
61 Cell	*rlengthloc;	/* RLENGTH */
62 Cell	*symtabloc;	/* SYMTAB */
63 
64 Cell	*nullloc;	/* a guaranteed empty cell */
65 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
66 Cell	*literal0;
67 
68 extern Cell **fldtab;
69 
70 void syminit(void)	/* initialize symbol table with builtin vars */
71 {
72 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
73 	/* this is used for if(x)... tests: */
74 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
75 	nullnode = celltonode(nullloc, CCON);
76 
77 	FS = &setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab)->sval;
78 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
79 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
80 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
81 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
82 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
83 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
84 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
85 	NF = &nfloc->fval;
86 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
87 	NR = &nrloc->fval;
88 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
89 	FNR = &fnrloc->fval;
90 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
91 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
92 	RSTART = &rstartloc->fval;
93 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
94 	RLENGTH = &rlengthloc->fval;
95 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
96 	symtabloc->sval = (char *) symtab;
97 }
98 
99 void arginit(int ac, char **av)	/* set up ARGV and ARGC */
100 {
101 	Cell *cp;
102 	int i;
103 	char temp[50];
104 
105 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
106 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
107 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
108 	cp->sval = (char *) ARGVtab;
109 	for (i = 0; i < ac; i++) {
110 		sprintf(temp, "%d", i);
111 		if (is_number(*av))
112 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
113 		else
114 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
115 		av++;
116 	}
117 }
118 
119 void envinit(char **envp)	/* set up ENVIRON variable */
120 {
121 	Cell *cp;
122 	char *p;
123 
124 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
125 	ENVtab = makesymtab(NSYMTAB);
126 	cp->sval = (char *) ENVtab;
127 	for ( ; *envp; envp++) {
128 		if ((p = strchr(*envp, '=')) == NULL)
129 			continue;
130 		*p++ = 0;	/* split into two strings at = */
131 		if (is_number(p))
132 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
133 		else
134 			setsymtab(*envp, p, 0.0, STR, ENVtab);
135 		p[-1] = '=';	/* restore in case env is passed down to a shell */
136 	}
137 }
138 
139 Array *makesymtab(int n)	/* make a new symbol table */
140 {
141 	Array *ap;
142 	Cell **tp;
143 
144 	ap = (Array *) malloc(sizeof(Array));
145 	tp = (Cell **) calloc(n, sizeof(Cell *));
146 	if (ap == NULL || tp == NULL)
147 		FATAL("out of space in makesymtab");
148 	ap->nelem = 0;
149 	ap->size = n;
150 	ap->tab = tp;
151 	return(ap);
152 }
153 
154 void freesymtab(Cell *ap)	/* free a symbol table */
155 {
156 	Cell *cp, *temp;
157 	Array *tp;
158 	int i;
159 
160 	if (!isarr(ap))
161 		return;
162 	tp = (Array *) ap->sval;
163 	if (tp == NULL)
164 		return;
165 	for (i = 0; i < tp->size; i++) {
166 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
167 			xfree(cp->nval);
168 			if (freeable(cp))
169 				xfree(cp->sval);
170 			temp = cp->cnext;	/* avoids freeing then using */
171 			free(cp);
172 		}
173 		tp->tab[i] = 0;
174 	}
175 	free(tp->tab);
176 	free(tp);
177 }
178 
179 void freeelem(Cell *ap, char *s)	/* free elem s from ap (i.e., ap["s"] */
180 {
181 	Array *tp;
182 	Cell *p, *prev = NULL;
183 	int h;
184 
185 	tp = (Array *) ap->sval;
186 	h = hash(s, tp->size);
187 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
188 		if (strcmp(s, p->nval) == 0) {
189 			if (prev == NULL)	/* 1st one */
190 				tp->tab[h] = p->cnext;
191 			else			/* middle somewhere */
192 				prev->cnext = p->cnext;
193 			if (freeable(p))
194 				xfree(p->sval);
195 			free(p->nval);
196 			free(p);
197 			tp->nelem--;
198 			return;
199 		}
200 }
201 
202 Cell *setsymtab(char *n, char *s, Awkfloat f, unsigned t, Array *tp)
203 {
204 	int h;
205 	Cell *p;
206 
207 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
208 		   dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
209 			p, p->nval, p->sval, p->fval, p->tval) );
210 		return(p);
211 	}
212 	p = (Cell *) malloc(sizeof(Cell));
213 	if (p == NULL)
214 		FATAL("out of space for symbol table at %s", n);
215 	p->nval = tostring(n);
216 	p->sval = s ? tostring(s) : tostring("");
217 	p->fval = f;
218 	p->tval = t;
219 	p->csub = CUNK;
220 	p->ctype = OCELL;
221 	tp->nelem++;
222 	if (tp->nelem > FULLTAB * tp->size)
223 		rehash(tp);
224 	h = hash(n, tp->size);
225 	p->cnext = tp->tab[h];
226 	tp->tab[h] = p;
227 	   dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
228 		p, p->nval, p->sval, p->fval, p->tval) );
229 	return(p);
230 }
231 
232 int hash(char *s, int n)	/* form hash value for string s */
233 {
234 	unsigned hashval;
235 
236 	for (hashval = 0; *s != '\0'; s++)
237 		hashval = (*s + 31 * hashval);
238 	return hashval % n;
239 }
240 
241 void rehash(Array *tp)	/* rehash items in small table into big one */
242 {
243 	int i, nh, nsz;
244 	Cell *cp, *op, **np;
245 
246 	nsz = GROWTAB * tp->size;
247 	np = (Cell **) calloc(nsz, sizeof(Cell *));
248 	if (np == NULL)		/* can't do it, but can keep running. */
249 		return;		/* someone else will run out later. */
250 	for (i = 0; i < tp->size; i++) {
251 		for (cp = tp->tab[i]; cp; cp = op) {
252 			op = cp->cnext;
253 			nh = hash(cp->nval, nsz);
254 			cp->cnext = np[nh];
255 			np[nh] = cp;
256 		}
257 	}
258 	free(tp->tab);
259 	tp->tab = np;
260 	tp->size = nsz;
261 }
262 
263 Cell *lookup(char *s, Array *tp)	/* look for s in tp */
264 {
265 	Cell *p;
266 	int h;
267 
268 	h = hash(s, tp->size);
269 	for (p = tp->tab[h]; p != NULL; p = p->cnext)
270 		if (strcmp(s, p->nval) == 0)
271 			return(p);	/* found it */
272 	return(NULL);			/* not found */
273 }
274 
275 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
276 {
277 	int fldno;
278 
279 	if ((vp->tval & (NUM | STR)) == 0)
280 		funnyvar(vp, "assign to");
281 	if (isfld(vp)) {
282 		donerec = 0;	/* mark $0 invalid */
283 		fldno = atoi(vp->nval);
284 		if (fldno > *NF)
285 			newfld(fldno);
286 		   dprintf( ("setting field %d to %g\n", fldno, f) );
287 	} else if (isrec(vp)) {
288 		donefld = 0;	/* mark $1... invalid */
289 		donerec = 1;
290 	}
291 	if (freeable(vp))
292 		xfree(vp->sval); /* free any previous string */
293 	vp->tval &= ~STR;	/* mark string invalid */
294 	vp->tval |= NUM;	/* mark number ok */
295 	   dprintf( ("setfval %p: %s = %g, t=%o\n", vp, vp->nval, f, vp->tval) );
296 	return vp->fval = f;
297 }
298 
299 void funnyvar(Cell *vp, char *rw)
300 {
301 	if (isarr(vp))
302 		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
303 	if (vp->tval & FCN)
304 		FATAL("can't %s %s; it's a function.", rw, vp->nval);
305 	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
306 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
307 }
308 
309 char *setsval(Cell *vp, char *s)	/* set string val of a Cell */
310 {
311 	char *t;
312 	int fldno;
313 
314 	   dprintf( ("starting setsval %p: %s = \"%s\", t=%o\n", vp, vp->nval, s, vp->tval) );
315 	if ((vp->tval & (NUM | STR)) == 0)
316 		funnyvar(vp, "assign to");
317 	if (isfld(vp)) {
318 		donerec = 0;	/* mark $0 invalid */
319 		fldno = atoi(vp->nval);
320 		if (fldno > *NF)
321 			newfld(fldno);
322 		   dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
323 	} else if (isrec(vp)) {
324 		donefld = 0;	/* mark $1... invalid */
325 		donerec = 1;
326 	}
327 	t = tostring(s);	/* in case it's self-assign */
328 	vp->tval &= ~NUM;
329 	vp->tval |= STR;
330 	if (freeable(vp))
331 		xfree(vp->sval);
332 	vp->tval &= ~DONTFREE;
333 	   dprintf( ("setsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, t,t, vp->tval) );
334 	return(vp->sval = t);
335 }
336 
337 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
338 {
339 	if ((vp->tval & (NUM | STR)) == 0)
340 		funnyvar(vp, "read value of");
341 	if (isfld(vp) && donefld == 0)
342 		fldbld();
343 	else if (isrec(vp) && donerec == 0)
344 		recbld();
345 	if (!isnum(vp)) {	/* not a number */
346 		vp->fval = atof(vp->sval);	/* best guess */
347 		if (is_number(vp->sval) && !(vp->tval&CON))
348 			vp->tval |= NUM;	/* make NUM only sparingly */
349 	}
350 	   dprintf( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
351 	return(vp->fval);
352 }
353 
354 char *getsval(Cell *vp)	/* get string val of a Cell */
355 {
356 	char s[100];	/* BUG: unchecked */
357 	double dtemp;
358 
359 	if ((vp->tval & (NUM | STR)) == 0)
360 		funnyvar(vp, "read value of");
361 	if (isfld(vp) && donefld == 0)
362 		fldbld();
363 	else if (isrec(vp) && donerec == 0)
364 		recbld();
365 	if (isstr(vp) == 0) {
366 		if (freeable(vp))
367 			xfree(vp->sval);
368 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
369 			sprintf(s, "%.30g", vp->fval);
370 		else
371 			sprintf(s, *CONVFMT, vp->fval);
372 		vp->sval = tostring(s);
373 		vp->tval &= ~DONTFREE;
374 		vp->tval |= STR;
375 	}
376 	   dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", vp, vp->nval, vp->sval, vp->sval, vp->tval) );
377 	return(vp->sval);
378 }
379 
380 char *tostring(char *s)	/* make a copy of string s */
381 {
382 	char *p;
383 
384 	p = (char *) malloc(strlen(s)+1);
385 	if (p == NULL)
386 		FATAL("out of space in tostring on %s", s);
387 	strcpy(p, s);
388 	return(p);
389 }
390 
391 char *qstring(char *s, int delim)	/* collect string up to next delim */
392 {
393 	char *os = s;
394 	int c, n;
395 	char *buf, *bp;
396 
397 	if ((buf = (char *) malloc(strlen(s)+3)) == NULL)
398 		FATAL( "out of space in qstring(%s)", s);
399 	for (bp = buf; (c = *s) != delim; s++) {
400 		if (c == '\n')
401 			SYNTAX( "newline in string %.20s...", os );
402 		else if (c != '\\')
403 			*bp++ = c;
404 		else {	/* \something */
405 			c = *++s;
406 			if (c == 0) {	/* \ at end */
407 				*bp++ = '\\';
408 				break;	/* for loop */
409 			}
410 			switch (c) {
411 			case '\\':	*bp++ = '\\'; break;
412 			case 'n':	*bp++ = '\n'; break;
413 			case 't':	*bp++ = '\t'; break;
414 			case 'b':	*bp++ = '\b'; break;
415 			case 'f':	*bp++ = '\f'; break;
416 			case 'r':	*bp++ = '\r'; break;
417 			default:
418 				if (!isdigit(c)) {
419 					*bp++ = c;
420 					break;
421 				}
422 				n = c - '0';
423 				if (isdigit(s[1])) {
424 					n = 8 * n + *++s - '0';
425 					if (isdigit(s[1]))
426 						n = 8 * n + *++s - '0';
427 				}
428 				*bp++ = n;
429 				break;
430 			}
431 		}
432 	}
433 	*bp++ = 0;
434 	return buf;
435 }
436