xref: /openbsd-src/usr.bin/awk/tran.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: tran.c,v 1.18 2020/02/27 21:43:46 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 #define	DEBUG
27 #include <stdio.h>
28 #include <math.h>
29 #include <ctype.h>
30 #include <string.h>
31 #include <stdlib.h>
32 #include "awk.h"
33 #include "ytab.h"
34 
35 #define	FULLTAB	2	/* rehash when table gets this x full */
36 #define	GROWTAB 4	/* grow table by this factor */
37 
38 Array	*symtab;	/* main symbol table */
39 
40 char	**FS;		/* initial field sep */
41 char	**RS;		/* initial record sep */
42 char	**OFS;		/* output field sep */
43 char	**ORS;		/* output record sep */
44 char	**OFMT;		/* output format for numbers */
45 char	**CONVFMT;	/* format for conversions in getsval */
46 Awkfloat *NF;		/* number of fields in current record */
47 Awkfloat *NR;		/* number of current record */
48 Awkfloat *FNR;		/* number of current record in current file */
49 char	**FILENAME;	/* current filename argument */
50 Awkfloat *ARGC;		/* number of arguments from command line */
51 char	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
52 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
53 Awkfloat *RLENGTH;	/* length of same */
54 
55 Cell	*fsloc;		/* FS */
56 Cell	*nrloc;		/* NR */
57 Cell	*nfloc;		/* NF */
58 Cell	*fnrloc;	/* FNR */
59 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
60 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
61 Cell	*rstartloc;	/* RSTART */
62 Cell	*rlengthloc;	/* RLENGTH */
63 Cell	*symtabloc;	/* SYMTAB */
64 
65 Cell	*nullloc;	/* a guaranteed empty cell */
66 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
67 Cell	*literal0;
68 
69 extern Cell **fldtab;
70 
71 void syminit(void)	/* initialize symbol table with builtin vars */
72 {
73 	literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
74 	/* this is used for if(x)... tests: */
75 	nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
76 	nullnode = celltonode(nullloc, CCON);
77 
78 	fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
79 	FS = &fsloc->sval;
80 	RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
81 	OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
82 	ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
83 	OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
84 	CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
85 	FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
86 	nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
87 	NF = &nfloc->fval;
88 	nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
89 	NR = &nrloc->fval;
90 	fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
91 	FNR = &fnrloc->fval;
92 	SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
93 	rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
94 	RSTART = &rstartloc->fval;
95 	rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
96 	RLENGTH = &rlengthloc->fval;
97 	symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
98 	symtabloc->sval = (char *) symtab;
99 }
100 
101 void arginit(int ac, char **av)	/* set up ARGV and ARGC */
102 {
103 	Cell *cp;
104 	int i;
105 	char temp[50];
106 
107 	ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
108 	cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
109 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
110 	cp->sval = (char *) ARGVtab;
111 	for (i = 0; i < ac; i++) {
112 		snprintf(temp, sizeof temp, "%d", i);
113 		if (is_number(*av))
114 			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
115 		else
116 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
117 		av++;
118 	}
119 }
120 
121 void envinit(char **envp)	/* set up ENVIRON variable */
122 {
123 	Cell *cp;
124 	char *p;
125 
126 	cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
127 	ENVtab = makesymtab(NSYMTAB);
128 	cp->sval = (char *) ENVtab;
129 	for ( ; *envp; envp++) {
130 		if ((p = strchr(*envp, '=')) == NULL)
131 			continue;
132 		if( p == *envp ) /* no left hand side name in env string */
133 			continue;
134 		*p++ = 0;	/* split into two strings at = */
135 		if (is_number(p))
136 			setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
137 		else
138 			setsymtab(*envp, p, 0.0, STR, ENVtab);
139 		p[-1] = '=';	/* restore in case env is passed down to a shell */
140 	}
141 }
142 
143 Array *makesymtab(int n)	/* make a new symbol table */
144 {
145 	Array *ap;
146 	Cell **tp;
147 
148 	ap = (Array *) malloc(sizeof(Array));
149 	tp = (Cell **) calloc(n, sizeof(Cell *));
150 	if (ap == NULL || tp == NULL)
151 		FATAL("out of space in makesymtab");
152 	ap->nelem = 0;
153 	ap->size = n;
154 	ap->tab = tp;
155 	return(ap);
156 }
157 
158 void freesymtab(Cell *ap)	/* free a symbol table */
159 {
160 	Cell *cp, *temp;
161 	Array *tp;
162 	int i;
163 
164 	if (!isarr(ap))
165 		return;
166 	tp = (Array *) ap->sval;
167 	if (tp == NULL)
168 		return;
169 	for (i = 0; i < tp->size; i++) {
170 		for (cp = tp->tab[i]; cp != NULL; cp = temp) {
171 			xfree(cp->nval);
172 			if (freeable(cp))
173 				xfree(cp->sval);
174 			temp = cp->cnext;	/* avoids freeing then using */
175 			free(cp);
176 			tp->nelem--;
177 		}
178 		tp->tab[i] = 0;
179 	}
180 	if (tp->nelem != 0)
181 		WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
182 	free(tp->tab);
183 	free(tp);
184 }
185 
186 void freeelem(Cell *ap, const char *s)	/* free elem s from ap (i.e., ap["s"] */
187 {
188 	Array *tp;
189 	Cell *p, *prev = NULL;
190 	int h;
191 
192 	tp = (Array *) ap->sval;
193 	h = hash(s, tp->size);
194 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
195 		if (strcmp(s, p->nval) == 0) {
196 			if (prev == NULL)	/* 1st one */
197 				tp->tab[h] = p->cnext;
198 			else			/* middle somewhere */
199 				prev->cnext = p->cnext;
200 			if (freeable(p))
201 				xfree(p->sval);
202 			free(p->nval);
203 			free(p);
204 			tp->nelem--;
205 			return;
206 		}
207 }
208 
209 Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
210 {
211 	int h;
212 	Cell *p;
213 
214 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
215 		   DPRINTF( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
216 			(void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
217 		return(p);
218 	}
219 	p = (Cell *) malloc(sizeof(Cell));
220 	if (p == NULL)
221 		FATAL("out of space for symbol table at %s", n);
222 	p->nval = tostring(n);
223 	p->sval = s ? tostring(s) : tostring("");
224 	p->fval = f;
225 	p->tval = t;
226 	p->csub = CUNK;
227 	p->ctype = OCELL;
228 	tp->nelem++;
229 	if (tp->nelem > FULLTAB * tp->size)
230 		rehash(tp);
231 	h = hash(n, tp->size);
232 	p->cnext = tp->tab[h];
233 	tp->tab[h] = p;
234 	   DPRINTF( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
235 		(void*)p, p->nval, p->sval, p->fval, p->tval) );
236 	return(p);
237 }
238 
239 int hash(const char *s, int n)	/* form hash value for string s */
240 {
241 	unsigned hashval;
242 
243 	for (hashval = 0; *s != '\0'; s++)
244 		hashval = (*s + 31 * hashval);
245 	return hashval % n;
246 }
247 
248 void rehash(Array *tp)	/* rehash items in small table into big one */
249 {
250 	int i, nh, nsz;
251 	Cell *cp, *op, **np;
252 
253 	nsz = GROWTAB * tp->size;
254 	np = (Cell **) calloc(nsz, sizeof(Cell *));
255 	if (np == NULL)		/* can't do it, but can keep running. */
256 		return;		/* someone else will run out later. */
257 	for (i = 0; i < tp->size; i++) {
258 		for (cp = tp->tab[i]; cp; cp = op) {
259 			op = cp->cnext;
260 			nh = hash(cp->nval, nsz);
261 			cp->cnext = np[nh];
262 			np[nh] = cp;
263 		}
264 	}
265 	free(tp->tab);
266 	tp->tab = np;
267 	tp->size = nsz;
268 }
269 
270 Cell *lookup(const char *s, Array *tp)	/* look for s in tp */
271 {
272 	Cell *p;
273 	int h;
274 
275 	h = hash(s, tp->size);
276 	for (p = tp->tab[h]; p != NULL; p = p->cnext)
277 		if (strcmp(s, p->nval) == 0)
278 			return(p);	/* found it */
279 	return(NULL);			/* not found */
280 }
281 
282 Awkfloat setfval(Cell *vp, Awkfloat f)	/* set float val of a Cell */
283 {
284 	int fldno;
285 
286 	if ((vp->tval & (NUM | STR)) == 0)
287 		funnyvar(vp, "assign to");
288 	if (isfld(vp)) {
289 		donerec = 0;	/* mark $0 invalid */
290 		fldno = atoi(vp->nval);
291 		if (fldno > *NF)
292 			newfld(fldno);
293 		   DPRINTF( ("setting field %d to %g\n", fldno, f) );
294 	} else if (isrec(vp)) {
295 		donefld = 0;	/* mark $1... invalid */
296 		donerec = 1;
297 	}
298 	if (freeable(vp))
299 		xfree(vp->sval); /* free any previous string */
300 	vp->tval &= ~STR;	/* mark string invalid */
301 	vp->tval |= NUM;	/* mark number ok */
302 	if (f == -0)  /* who would have thought this possible? */
303 		f = 0;
304 	   DPRINTF( ("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval) );
305 	return vp->fval = f;
306 }
307 
308 void funnyvar(Cell *vp, const char *rw)
309 {
310 	if (isarr(vp))
311 		FATAL("can't %s %s; it's an array name.", rw, vp->nval);
312 	if (vp->tval & FCN)
313 		FATAL("can't %s %s; it's a function.", rw, vp->nval);
314 	WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
315 		vp, vp->nval, vp->sval, vp->fval, vp->tval);
316 }
317 
318 char *setsval(Cell *vp, const char *s)	/* set string val of a Cell */
319 {
320 	char *t;
321 	int fldno;
322 
323 	   DPRINTF( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n",
324 		(void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
325 	if ((vp->tval & (NUM | STR)) == 0)
326 		funnyvar(vp, "assign to");
327 	if (isfld(vp)) {
328 		donerec = 0;	/* mark $0 invalid */
329 		fldno = atoi(vp->nval);
330 		if (fldno > *NF)
331 			newfld(fldno);
332 		   DPRINTF( ("setting field %d to %s (%p)\n", fldno, s, s) );
333 	} else if (isrec(vp)) {
334 		donefld = 0;	/* mark $1... invalid */
335 		donerec = 1;
336 	}
337 	t = tostring(s);	/* in case it's self-assign */
338 	if (freeable(vp))
339 		xfree(vp->sval);
340 	vp->tval &= ~NUM;
341 	vp->tval |= STR;
342 	vp->tval &= ~DONTFREE;
343 	   DPRINTF( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n",
344 		(void*)vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
345 	return(vp->sval = t);
346 }
347 
348 Awkfloat getfval(Cell *vp)	/* get float val of a Cell */
349 {
350 	if ((vp->tval & (NUM | STR)) == 0)
351 		funnyvar(vp, "read value of");
352 	if (isfld(vp) && donefld == 0)
353 		fldbld();
354 	else if (isrec(vp) && donerec == 0)
355 		recbld();
356 	if (!isnum(vp)) {	/* not a number */
357 		vp->fval = atof(vp->sval);	/* best guess */
358 		if (is_number(vp->sval) && !(vp->tval&CON))
359 			vp->tval |= NUM;	/* make NUM only sparingly */
360 	}
361 	   DPRINTF( ("getfval %p: %s = %g, t=%o\n",
362 		(void*)vp, NN(vp->nval), vp->fval, vp->tval) );
363 	return(vp->fval);
364 }
365 
366 static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
367 {
368 	int n;
369 	double dtemp;
370 
371 	if ((vp->tval & (NUM | STR)) == 0)
372 		funnyvar(vp, "read value of");
373 	if (isfld(vp) && donefld == 0)
374 		fldbld();
375 	else if (isrec(vp) && donerec == 0)
376 		recbld();
377 	if (isstr(vp) == 0) {
378 		if (freeable(vp))
379 			xfree(vp->sval);
380 		if (modf(vp->fval, &dtemp) == 0)	/* it's integral */
381 			n = asprintf(&vp->sval, "%.30g", vp->fval);
382 		else
383 			n = asprintf(&vp->sval, *fmt, vp->fval);
384 		if (n == -1)
385 			FATAL("out of space in get_str_val");
386 		vp->tval &= ~DONTFREE;
387 		vp->tval |= STR;
388 	}
389 	   DPRINTF( ("getsval %p: %s = \"%s (%p)\", t=%o\n",
390 		(void*)vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
391 	return(vp->sval);
392 }
393 
394 char *getsval(Cell *vp)       /* get string val of a Cell */
395 {
396       return get_str_val(vp, CONVFMT);
397 }
398 
399 char *getpssval(Cell *vp)     /* get string val of a Cell for print */
400 {
401       return get_str_val(vp, OFMT);
402 }
403 
404 
405 char *tostring(const char *s)	/* make a copy of string s */
406 {
407 	char *p;
408 
409 	p = strdup(s);
410 	if (p == NULL)
411 		FATAL("out of space in tostring on %s", s);
412 	return p;
413 }
414 
415 char *qstring(const char *is, int delim)	/* collect string up to next delim */
416 {
417 	const char *os = is;
418 	int c, n;
419 	uschar *s = (uschar *) is;
420 	uschar *buf, *bp;
421 
422 	if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
423 		FATAL( "out of space in qstring(%s)", s);
424 	for (bp = buf; (c = *s) != delim; s++) {
425 		if (c == '\n')
426 			SYNTAX( "newline in string %.20s...", os );
427 		else if (c != '\\')
428 			*bp++ = c;
429 		else {	/* \something */
430 			c = *++s;
431 			if (c == 0) {	/* \ at end */
432 				*bp++ = '\\';
433 				break;	/* for loop */
434 			}
435 			switch (c) {
436 			case '\\':	*bp++ = '\\'; break;
437 			case 'n':	*bp++ = '\n'; break;
438 			case 't':	*bp++ = '\t'; break;
439 			case 'v':	*bp++ = '\v'; break;
440 			case 'b':	*bp++ = '\b'; break;
441 			case 'f':	*bp++ = '\f'; break;
442 			case 'r':	*bp++ = '\r'; break;
443 			case 'a':	*bp++ = '\007'; break;
444 			default:
445 				if (!isdigit(c)) {
446 					*bp++ = c;
447 					break;
448 				}
449 				n = c - '0';
450 				if (isdigit(s[1])) {
451 					n = 8 * n + *++s - '0';
452 					if (isdigit(s[1]))
453 						n = 8 * n + *++s - '0';
454 				}
455 				*bp++ = n;
456 				break;
457 			}
458 		}
459 	}
460 	*bp++ = 0;
461 	return (char *) buf;
462 }
463