xref: /openbsd-src/usr.bin/awk/run.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /*	$OpenBSD: run.c,v 1.68 2020/08/28 16:29:16 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 #define DEBUG
27 #include <stdio.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <wchar.h>
31 #include <wctype.h>
32 #include <fcntl.h>
33 #include <setjmp.h>
34 #include <limits.h>
35 #include <math.h>
36 #include <string.h>
37 #include <stdlib.h>
38 #include <time.h>
39 #include <sys/types.h>
40 #include <sys/wait.h>
41 #include "awk.h"
42 #include "awkgram.tab.h"
43 
44 static void stdinit(void);
45 static void flush_all(void);
46 
47 #if 1
48 #define tempfree(x)	do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
49 #else
50 void tempfree(Cell *p) {
51 	if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
52 		WARNING("bad csub %d in Cell %d %s",
53 			p->csub, p->ctype, p->sval);
54 	}
55 	if (istemp(p))
56 		tfree(p);
57 }
58 #endif
59 
60 /* do we really need these? */
61 /* #ifdef _NFILE */
62 /* #ifndef FOPEN_MAX */
63 /* #define FOPEN_MAX _NFILE */
64 /* #endif */
65 /* #endif */
66 /*  */
67 /* #ifndef	FOPEN_MAX */
68 /* #define	FOPEN_MAX	40 */	/* max number of open files */
69 /* #endif */
70 /*  */
71 /* #ifndef RAND_MAX */
72 /* #define RAND_MAX	32767 */	/* all that ansi guarantees */
73 /* #endif */
74 
75 jmp_buf env;
76 extern	int	pairstack[];
77 extern	Awkfloat	srand_seed;
78 
79 Node	*winner = NULL;	/* root of parse tree */
80 Cell	*tmps;		/* free temporary cells for execution */
81 
82 static Cell	truecell	={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
83 Cell	*True	= &truecell;
84 static Cell	falsecell	={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
85 Cell	*False	= &falsecell;
86 static Cell	breakcell	={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
87 Cell	*jbreak	= &breakcell;
88 static Cell	contcell	={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
89 Cell	*jcont	= &contcell;
90 static Cell	nextcell	={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
91 Cell	*jnext	= &nextcell;
92 static Cell	nextfilecell	={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
93 Cell	*jnextfile	= &nextfilecell;
94 static Cell	exitcell	={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
95 Cell	*jexit	= &exitcell;
96 static Cell	retcell		={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
97 Cell	*jret	= &retcell;
98 static Cell	tempcell	={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
99 
100 Node	*curnode = NULL;	/* the node being executed, for debugging */
101 
102 /* buffer memory management */
103 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
104 	const char *whatrtn)
105 /* pbuf:    address of pointer to buffer being managed
106  * psiz:    address of buffer size variable
107  * minlen:  minimum length of buffer needed
108  * quantum: buffer size quantum
109  * pbptr:   address of movable pointer into buffer, or 0 if none
110  * whatrtn: name of the calling routine if failure should cause fatal error
111  *
112  * return   0 for realloc failure, !=0 for success
113  */
114 {
115 	if (minlen > *psiz) {
116 		char *tbuf;
117 		int rminlen = quantum ? minlen % quantum : 0;
118 		int boff = pbptr ? *pbptr - *pbuf : 0;
119 		/* round up to next multiple of quantum */
120 		if (rminlen)
121 			minlen += quantum - rminlen;
122 		tbuf = realloc(*pbuf, minlen);
123 		DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
124 		if (tbuf == NULL) {
125 			if (whatrtn)
126 				FATAL("out of memory in %s", whatrtn);
127 			return 0;
128 		}
129 		*pbuf = tbuf;
130 		*psiz = minlen;
131 		if (pbptr)
132 			*pbptr = tbuf + boff;
133 	}
134 	return 1;
135 }
136 
137 void run(Node *a)	/* execution of parse tree starts here */
138 {
139 
140 	stdinit();
141 	execute(a);
142 	closeall();
143 }
144 
145 Cell *execute(Node *u)	/* execute a node of the parse tree */
146 {
147 	Cell *(*proc)(Node **, int);
148 	Cell *x;
149 	Node *a;
150 
151 	if (u == NULL)
152 		return(True);
153 	for (a = u; ; a = a->nnext) {
154 		curnode = a;
155 		if (isvalue(a)) {
156 			x = (Cell *) (a->narg[0]);
157 			if (isfld(x) && !donefld)
158 				fldbld();
159 			else if (isrec(x) && !donerec)
160 				recbld();
161 			return(x);
162 		}
163 		if (notlegal(a->nobj))	/* probably a Cell* but too risky to print */
164 			FATAL("illegal statement");
165 		proc = proctab[a->nobj-FIRSTTOKEN];
166 		x = (*proc)(a->narg, a->nobj);
167 		if (isfld(x) && !donefld)
168 			fldbld();
169 		else if (isrec(x) && !donerec)
170 			recbld();
171 		if (isexpr(a))
172 			return(x);
173 		if (isjump(x))
174 			return(x);
175 		if (a->nnext == NULL)
176 			return(x);
177 		tempfree(x);
178 	}
179 }
180 
181 
182 Cell *program(Node **a, int n)	/* execute an awk program */
183 {				/* a[0] = BEGIN, a[1] = body, a[2] = END */
184 	Cell *x;
185 
186 	if (setjmp(env) != 0)
187 		goto ex;
188 	if (a[0]) {		/* BEGIN */
189 		x = execute(a[0]);
190 		if (isexit(x))
191 			return(True);
192 		if (isjump(x))
193 			FATAL("illegal break, continue, next or nextfile from BEGIN");
194 		tempfree(x);
195 	}
196 	if (a[1] || a[2])
197 		while (getrec(&record, &recsize, true) > 0) {
198 			x = execute(a[1]);
199 			if (isexit(x))
200 				break;
201 			tempfree(x);
202 		}
203   ex:
204 	if (setjmp(env) != 0)	/* handles exit within END */
205 		goto ex1;
206 	if (a[2]) {		/* END */
207 		x = execute(a[2]);
208 		if (isbreak(x) || isnext(x) || iscont(x))
209 			FATAL("illegal break, continue, next or nextfile from END");
210 		tempfree(x);
211 	}
212   ex1:
213 	return(True);
214 }
215 
216 struct Frame {	/* stack frame for awk function calls */
217 	int nargs;	/* number of arguments in this call */
218 	Cell *fcncell;	/* pointer to Cell for function */
219 	Cell **args;	/* pointer to array of arguments after execute */
220 	Cell *retval;	/* return value */
221 };
222 
223 #define	NARGS	50	/* max args in a call */
224 
225 struct Frame *frame = NULL;	/* base of stack frames; dynamically allocated */
226 int	nframe = 0;		/* number of frames allocated */
227 struct Frame *frp = NULL;	/* frame pointer. bottom level unused */
228 
229 Cell *call(Node **a, int n)	/* function call.  very kludgy and fragile */
230 {
231 	static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
232 	int i, ncall, ndef;
233 	int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
234 	Node *x;
235 	Cell *args[NARGS], *oargs[NARGS];	/* BUG: fixed size arrays */
236 	Cell *y, *z, *fcn;
237 	char *s;
238 
239 	fcn = execute(a[0]);	/* the function itself */
240 	s = fcn->nval;
241 	if (!isfcn(fcn))
242 		FATAL("calling undefined function %s", s);
243 	if (frame == NULL) {
244 		frp = frame = calloc(nframe += 100, sizeof(*frame));
245 		if (frame == NULL)
246 			FATAL("out of space for stack frames calling %s", s);
247 	}
248 	for (ncall = 0, x = a[1]; x != NULL; x = x->nnext)	/* args in call */
249 		ncall++;
250 	ndef = (int) fcn->fval;			/* args in defn */
251 	DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
252 	if (ncall > ndef)
253 		WARNING("function %s called with %d args, uses only %d",
254 			s, ncall, ndef);
255 	if (ncall + ndef > NARGS)
256 		FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
257 	for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) {	/* get call args */
258 		DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
259 		y = execute(x);
260 		oargs[i] = y;
261 		DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
262 			i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
263 		if (isfcn(y))
264 			FATAL("can't use function %s as argument in %s", y->nval, s);
265 		if (isarr(y))
266 			args[i] = y;	/* arrays by ref */
267 		else
268 			args[i] = copycell(y);
269 		tempfree(y);
270 	}
271 	for ( ; i < ndef; i++) {	/* add null args for ones not provided */
272 		args[i] = gettemp();
273 		*args[i] = newcopycell;
274 	}
275 	frp++;	/* now ok to up frame */
276 	if (frp >= frame + nframe) {
277 		int dfp = frp - frame;	/* old index */
278 		frame = reallocarray(frame, (nframe += 100), sizeof(*frame));
279 		if (frame == NULL)
280 			FATAL("out of space for stack frames in %s", s);
281 		frp = frame + dfp;
282 	}
283 	frp->fcncell = fcn;
284 	frp->args = args;
285 	frp->nargs = ndef;	/* number defined with (excess are locals) */
286 	frp->retval = gettemp();
287 
288 	DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
289 	y = execute((Node *)(fcn->sval));	/* execute body */
290 	DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
291 
292 	for (i = 0; i < ndef; i++) {
293 		Cell *t = frp->args[i];
294 		if (isarr(t)) {
295 			if (t->csub == CCOPY) {
296 				if (i >= ncall) {
297 					freesymtab(t);
298 					t->csub = CTEMP;
299 					tempfree(t);
300 				} else {
301 					oargs[i]->tval = t->tval;
302 					oargs[i]->tval &= ~(STR|NUM|DONTFREE);
303 					oargs[i]->sval = t->sval;
304 					tempfree(t);
305 				}
306 			}
307 		} else if (t != y) {	/* kludge to prevent freeing twice */
308 			t->csub = CTEMP;
309 			tempfree(t);
310 		} else if (t == y && t->csub == CCOPY) {
311 			t->csub = CTEMP;
312 			tempfree(t);
313 			freed = 1;
314 		}
315 	}
316 	tempfree(fcn);
317 	if (isexit(y) || isnext(y))
318 		return y;
319 	if (freed == 0) {
320 		tempfree(y);	/* don't free twice! */
321 	}
322 	z = frp->retval;			/* return value */
323 	DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
324 	frp--;
325 	return(z);
326 }
327 
328 Cell *copycell(Cell *x)	/* make a copy of a cell in a temp */
329 {
330 	Cell *y;
331 
332 	/* copy is not constant or field */
333 
334 	y = gettemp();
335 	y->tval = x->tval & ~(CON|FLD|REC);
336 	y->csub = CCOPY;	/* prevents freeing until call is over */
337 	y->nval = x->nval;	/* BUG? */
338 	if (isstr(x) /* || x->ctype == OCELL */) {
339 		y->sval = tostring(x->sval);
340 		y->tval &= ~DONTFREE;
341 	} else
342 		y->tval |= DONTFREE;
343 	y->fval = x->fval;
344 	return y;
345 }
346 
347 Cell *arg(Node **a, int n)	/* nth argument of a function */
348 {
349 
350 	n = ptoi(a[0]);	/* argument number, counting from 0 */
351 	DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
352 	if (n+1 > frp->nargs)
353 		FATAL("argument #%d of function %s was not supplied",
354 			n+1, frp->fcncell->nval);
355 	return frp->args[n];
356 }
357 
358 Cell *jump(Node **a, int n)	/* break, continue, next, nextfile, return */
359 {
360 	Cell *y;
361 
362 	switch (n) {
363 	case EXIT:
364 		if (a[0] != NULL) {
365 			y = execute(a[0]);
366 			errorflag = (int) getfval(y);
367 			tempfree(y);
368 		}
369 		longjmp(env, 1);
370 	case RETURN:
371 		if (a[0] != NULL) {
372 			y = execute(a[0]);
373 			if ((y->tval & (STR|NUM)) == (STR|NUM)) {
374 				setsval(frp->retval, getsval(y));
375 				frp->retval->fval = getfval(y);
376 				frp->retval->tval |= NUM;
377 			}
378 			else if (y->tval & STR)
379 				setsval(frp->retval, getsval(y));
380 			else if (y->tval & NUM)
381 				setfval(frp->retval, getfval(y));
382 			else		/* can't happen */
383 				FATAL("bad type variable %d", y->tval);
384 			tempfree(y);
385 		}
386 		return(jret);
387 	case NEXT:
388 		return(jnext);
389 	case NEXTFILE:
390 		nextfile();
391 		return(jnextfile);
392 	case BREAK:
393 		return(jbreak);
394 	case CONTINUE:
395 		return(jcont);
396 	default:	/* can't happen */
397 		FATAL("illegal jump type %d", n);
398 	}
399 	return 0;	/* not reached */
400 }
401 
402 Cell *awkgetline(Node **a, int n)	/* get next line from specific input */
403 {		/* a[0] is variable, a[1] is operator, a[2] is filename */
404 	Cell *r, *x;
405 	extern Cell **fldtab;
406 	FILE *fp;
407 	char *buf;
408 	int bufsize = recsize;
409 	int mode;
410 	bool newflag;
411 
412 	if ((buf = malloc(bufsize)) == NULL)
413 		FATAL("out of memory in getline");
414 
415 	fflush(stdout);	/* in case someone is waiting for a prompt */
416 	r = gettemp();
417 	if (a[1] != NULL) {		/* getline < file */
418 		x = execute(a[2]);		/* filename */
419 		mode = ptoi(a[1]);
420 		if (mode == '|')		/* input pipe */
421 			mode = LE;	/* arbitrary flag */
422 		fp = openfile(mode, getsval(x), &newflag);
423 		tempfree(x);
424 		if (fp == NULL)
425 			n = -1;
426 		else
427 			n = readrec(&buf, &bufsize, fp, newflag);
428 		if (n <= 0) {
429 			;
430 		} else if (a[0] != NULL) {	/* getline var <file */
431 			x = execute(a[0]);
432 			setsval(x, buf);
433 			if (is_number(x->sval)) {
434 				x->fval = atof(x->sval);
435 				x->tval |= NUM;
436 			}
437 			tempfree(x);
438 		} else {			/* getline <file */
439 			setsval(fldtab[0], buf);
440 			if (is_number(fldtab[0]->sval)) {
441 				fldtab[0]->fval = atof(fldtab[0]->sval);
442 				fldtab[0]->tval |= NUM;
443 			}
444 		}
445 	} else {			/* bare getline; use current input */
446 		if (a[0] == NULL)	/* getline */
447 			n = getrec(&record, &recsize, true);
448 		else {			/* getline var */
449 			n = getrec(&buf, &bufsize, false);
450 			x = execute(a[0]);
451 			setsval(x, buf);
452 			if (is_number(x->sval)) {
453 				x->fval = atof(x->sval);
454 				x->tval |= NUM;
455 			}
456 			tempfree(x);
457 		}
458 	}
459 	setfval(r, (Awkfloat) n);
460 	free(buf);
461 	return r;
462 }
463 
464 Cell *getnf(Node **a, int n)	/* get NF */
465 {
466 	if (!donefld)
467 		fldbld();
468 	return (Cell *) a[0];
469 }
470 
471 static char *
472 makearraystring(Node *p, const char *func)
473 {
474 	char *buf;
475 	int bufsz = recsize;
476 	size_t blen;
477 
478 	if ((buf = malloc(bufsz)) == NULL) {
479 		FATAL("%s: out of memory", func);
480 	}
481 
482 	blen = 0;
483 	buf[blen] = '\0';
484 
485 	for (; p; p = p->nnext) {
486 		Cell *x = execute(p);	/* expr */
487 		char *s = getsval(x);
488 		size_t seplen = strlen(getsval(subseploc));
489 		size_t nsub = p->nnext ? seplen : 0;
490 		size_t slen = strlen(s);
491 		size_t tlen = blen + slen + nsub;
492 
493 		if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
494 			FATAL("%s: out of memory %s[%s...]",
495 			    func, x->nval, buf);
496 		}
497 		memcpy(buf + blen, s, slen);
498 		if (nsub) {
499 			memcpy(buf + blen + slen, *SUBSEP, nsub);
500 		}
501 		buf[tlen] = '\0';
502 		blen = tlen;
503 		tempfree(x);
504 	}
505 	return buf;
506 }
507 
508 Cell *array(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
509 {
510 	Cell *x, *z;
511 	char *buf;
512 
513 	x = execute(a[0]);	/* Cell* for symbol table */
514 	buf = makearraystring(a[1], __func__);
515 	if (!isarr(x)) {
516 		DPRINTF("making %s into an array\n", NN(x->nval));
517 		if (freeable(x))
518 			xfree(x->sval);
519 		x->tval &= ~(STR|NUM|DONTFREE);
520 		x->tval |= ARR;
521 		x->sval = (char *) makesymtab(NSYMTAB);
522 	}
523 	z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
524 	z->ctype = OCELL;
525 	z->csub = CVAR;
526 	tempfree(x);
527 	free(buf);
528 	return(z);
529 }
530 
531 Cell *awkdelete(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
532 {
533 	Cell *x;
534 
535 	x = execute(a[0]);	/* Cell* for symbol table */
536 	if (x == symtabloc) {
537 		FATAL("cannot delete SYMTAB or its elements");
538 	}
539 	if (!isarr(x))
540 		return True;
541 	if (a[1] == NULL) {	/* delete the elements, not the table */
542 		freesymtab(x);
543 		x->tval &= ~STR;
544 		x->tval |= ARR;
545 		x->sval = (char *) makesymtab(NSYMTAB);
546 	} else {
547 		char *buf = makearraystring(a[1], __func__);
548 		freeelem(x, buf);
549 		free(buf);
550 	}
551 	tempfree(x);
552 	return True;
553 }
554 
555 Cell *intest(Node **a, int n)	/* a[0] is index (list), a[1] is symtab */
556 {
557 	Cell *ap, *k;
558 	char *buf;
559 
560 	ap = execute(a[1]);	/* array name */
561 	if (!isarr(ap)) {
562 		DPRINTF("making %s into an array\n", ap->nval);
563 		if (freeable(ap))
564 			xfree(ap->sval);
565 		ap->tval &= ~(STR|NUM|DONTFREE);
566 		ap->tval |= ARR;
567 		ap->sval = (char *) makesymtab(NSYMTAB);
568 	}
569 	buf = makearraystring(a[0], __func__);
570 	k = lookup(buf, (Array *) ap->sval);
571 	tempfree(ap);
572 	free(buf);
573 	if (k == NULL)
574 		return(False);
575 	else
576 		return(True);
577 }
578 
579 
580 Cell *matchop(Node **a, int n)	/* ~ and match() */
581 {
582 	Cell *x, *y;
583 	char *s, *t;
584 	int i;
585 	fa *pfa;
586 	int (*mf)(fa *, const char *) = match, mode = 0;
587 
588 	if (n == MATCHFCN) {
589 		mf = pmatch;
590 		mode = 1;
591 	}
592 	x = execute(a[1]);	/* a[1] = target text */
593 	s = getsval(x);
594 	if (a[0] == NULL)	/* a[1] == 0: already-compiled reg expr */
595 		i = (*mf)((fa *) a[2], s);
596 	else {
597 		y = execute(a[2]);	/* a[2] = regular expr */
598 		t = getsval(y);
599 		pfa = makedfa(t, mode);
600 		i = (*mf)(pfa, s);
601 		tempfree(y);
602 	}
603 	tempfree(x);
604 	if (n == MATCHFCN) {
605 		int start = patbeg - s + 1;
606 		if (patlen < 0)
607 			start = 0;
608 		setfval(rstartloc, (Awkfloat) start);
609 		setfval(rlengthloc, (Awkfloat) patlen);
610 		x = gettemp();
611 		x->tval = NUM;
612 		x->fval = start;
613 		return x;
614 	} else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
615 		return(True);
616 	else
617 		return(False);
618 }
619 
620 
621 Cell *boolop(Node **a, int n)	/* a[0] || a[1], a[0] && a[1], !a[0] */
622 {
623 	Cell *x, *y;
624 	int i;
625 
626 	x = execute(a[0]);
627 	i = istrue(x);
628 	tempfree(x);
629 	switch (n) {
630 	case BOR:
631 		if (i) return(True);
632 		y = execute(a[1]);
633 		i = istrue(y);
634 		tempfree(y);
635 		if (i) return(True);
636 		else return(False);
637 	case AND:
638 		if ( !i ) return(False);
639 		y = execute(a[1]);
640 		i = istrue(y);
641 		tempfree(y);
642 		if (i) return(True);
643 		else return(False);
644 	case NOT:
645 		if (i) return(False);
646 		else return(True);
647 	default:	/* can't happen */
648 		FATAL("unknown boolean operator %d", n);
649 	}
650 	return 0;	/*NOTREACHED*/
651 }
652 
653 Cell *relop(Node **a, int n)	/* a[0 < a[1], etc. */
654 {
655 	int i;
656 	Cell *x, *y;
657 	Awkfloat j;
658 
659 	x = execute(a[0]);
660 	y = execute(a[1]);
661 	if (x->tval&NUM && y->tval&NUM) {
662 		j = x->fval - y->fval;
663 		i = j<0? -1: (j>0? 1: 0);
664 	} else {
665 		i = strcmp(getsval(x), getsval(y));
666 	}
667 	tempfree(x);
668 	tempfree(y);
669 	switch (n) {
670 	case LT:	if (i<0) return(True);
671 			else return(False);
672 	case LE:	if (i<=0) return(True);
673 			else return(False);
674 	case NE:	if (i!=0) return(True);
675 			else return(False);
676 	case EQ:	if (i == 0) return(True);
677 			else return(False);
678 	case GE:	if (i>=0) return(True);
679 			else return(False);
680 	case GT:	if (i>0) return(True);
681 			else return(False);
682 	default:	/* can't happen */
683 		FATAL("unknown relational operator %d", n);
684 	}
685 	return 0;	/*NOTREACHED*/
686 }
687 
688 void tfree(Cell *a)	/* free a tempcell */
689 {
690 	if (freeable(a)) {
691 		DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
692 		xfree(a->sval);
693 	}
694 	if (a == tmps)
695 		FATAL("tempcell list is curdled");
696 	a->cnext = tmps;
697 	tmps = a;
698 }
699 
700 Cell *gettemp(void)	/* get a tempcell */
701 {	int i;
702 	Cell *x;
703 
704 	if (!tmps) {
705 		tmps = calloc(100, sizeof(*tmps));
706 		if (!tmps)
707 			FATAL("out of space for temporaries");
708 		for (i = 1; i < 100; i++)
709 			tmps[i-1].cnext = &tmps[i];
710 		tmps[i-1].cnext = NULL;
711 	}
712 	x = tmps;
713 	tmps = x->cnext;
714 	*x = tempcell;
715 	return(x);
716 }
717 
718 Cell *indirect(Node **a, int n)	/* $( a[0] ) */
719 {
720 	Awkfloat val;
721 	Cell *x;
722 	int m;
723 	char *s;
724 
725 	x = execute(a[0]);
726 	val = getfval(x);	/* freebsd: defend against super large field numbers */
727 	if ((Awkfloat)INT_MAX < val)
728 		FATAL("trying to access out of range field %s", x->nval);
729 	m = (int) val;
730 	if (m == 0 && !is_number(s = getsval(x)))	/* suspicion! */
731 		FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
732 		/* BUG: can x->nval ever be null??? */
733 	tempfree(x);
734 	x = fieldadr(m);
735 	x->ctype = OCELL;	/* BUG?  why are these needed? */
736 	x->csub = CFLD;
737 	return(x);
738 }
739 
740 Cell *substr(Node **a, int nnn)		/* substr(a[0], a[1], a[2]) */
741 {
742 	int k, m, n;
743 	char *s;
744 	int temp;
745 	Cell *x, *y, *z = NULL;
746 
747 	x = execute(a[0]);
748 	y = execute(a[1]);
749 	if (a[2] != NULL)
750 		z = execute(a[2]);
751 	s = getsval(x);
752 	k = strlen(s) + 1;
753 	if (k <= 1) {
754 		tempfree(x);
755 		tempfree(y);
756 		if (a[2] != NULL) {
757 			tempfree(z);
758 		}
759 		x = gettemp();
760 		setsval(x, "");
761 		return(x);
762 	}
763 	m = (int) getfval(y);
764 	if (m <= 0)
765 		m = 1;
766 	else if (m > k)
767 		m = k;
768 	tempfree(y);
769 	if (a[2] != NULL) {
770 		n = (int) getfval(z);
771 		tempfree(z);
772 	} else
773 		n = k - 1;
774 	if (n < 0)
775 		n = 0;
776 	else if (n > k - m)
777 		n = k - m;
778 	DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
779 	y = gettemp();
780 	temp = s[n+m-1];	/* with thanks to John Linderman */
781 	s[n+m-1] = '\0';
782 	setsval(y, s + m - 1);
783 	s[n+m-1] = temp;
784 	tempfree(x);
785 	return(y);
786 }
787 
788 Cell *sindex(Node **a, int nnn)		/* index(a[0], a[1]) */
789 {
790 	Cell *x, *y, *z;
791 	char *s1, *s2, *p1, *p2, *q;
792 	Awkfloat v = 0.0;
793 
794 	x = execute(a[0]);
795 	s1 = getsval(x);
796 	y = execute(a[1]);
797 	s2 = getsval(y);
798 
799 	z = gettemp();
800 	for (p1 = s1; *p1 != '\0'; p1++) {
801 		for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
802 			continue;
803 		if (*p2 == '\0') {
804 			v = (Awkfloat) (p1 - s1 + 1);	/* origin 1 */
805 			break;
806 		}
807 	}
808 	tempfree(x);
809 	tempfree(y);
810 	setfval(z, v);
811 	return(z);
812 }
813 
814 #define	MAXNUMSIZE	50
815 
816 int format(char **pbuf, int *pbufsize, const char *s, Node *a)	/* printf-like conversions */
817 {
818 	char *fmt;
819 	char *p, *t;
820 	const char *os;
821 	Cell *x;
822 	int flag = 0, n;
823 	int fmtwd; /* format width */
824 	int fmtsz = recsize;
825 	char *buf = *pbuf;
826 	int bufsize = *pbufsize;
827 #define FMTSZ(a)   (fmtsz - ((a) - fmt))
828 #define BUFSZ(a)   (bufsize - ((a) - buf))
829 
830 	static bool first = true;
831 	static bool have_a_format = false;
832 
833 	if (first) {
834 		char xbuf[100];
835 
836 		snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
837 		have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
838 		first = false;
839 	}
840 
841 	os = s;
842 	p = buf;
843 	if ((fmt = malloc(fmtsz)) == NULL)
844 		FATAL("out of memory in format()");
845 	while (*s) {
846 		adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
847 		if (*s != '%') {
848 			*p++ = *s++;
849 			continue;
850 		}
851 		if (*(s+1) == '%') {
852 			*p++ = '%';
853 			s += 2;
854 			continue;
855 		}
856 		/* have to be real careful in case this is a huge number, eg, %100000d */
857 		fmtwd = atoi(s+1);
858 		if (fmtwd < 0)
859 			fmtwd = -fmtwd;
860 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
861 		for (t = fmt; (*t++ = *s) != '\0'; s++) {
862 			if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
863 				FATAL("format item %.30s... ran format() out of memory", os);
864 			/* Ignore size specifiers */
865 			if (strchr("hjLlqtz", *s) != NULL) {	/* the ansi panoply */
866 				t--;
867 				continue;
868 			}
869 			if (isalpha((uschar)*s))
870 				break;
871 			if (*s == '$') {
872 				FATAL("'$' not permitted in awk formats");
873 			}
874 			if (*s == '*') {
875 				if (a == NULL) {
876 					FATAL("not enough args in printf(%s)", os);
877 				}
878 				x = execute(a);
879 				a = a->nnext;
880 				snprintf(t - 1, FMTSZ(t - 1),
881 				    "%d", fmtwd=(int) getfval(x));
882 				if (fmtwd < 0)
883 					fmtwd = -fmtwd;
884 				adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
885 				t = fmt + strlen(fmt);
886 				tempfree(x);
887 			}
888 		}
889 		*t = '\0';
890 		if (fmtwd < 0)
891 			fmtwd = -fmtwd;
892 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
893 		switch (*s) {
894 		case 'a': case 'A':
895 			if (have_a_format)
896 				flag = *s;
897 			else
898 				flag = 'f';
899 			break;
900 		case 'f': case 'e': case 'g': case 'E': case 'G':
901 			flag = 'f';
902 			break;
903 		case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
904 			flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
905 			*(t-1) = 'j';
906 			*t = *s;
907 			*++t = '\0';
908 			break;
909 		case 's':
910 			flag = 's';
911 			break;
912 		case 'c':
913 			flag = 'c';
914 			break;
915 		default:
916 			WARNING("weird printf conversion %s", fmt);
917 			flag = '?';
918 			break;
919 		}
920 		if (a == NULL)
921 			FATAL("not enough args in printf(%s)", os);
922 		x = execute(a);
923 		a = a->nnext;
924 		n = MAXNUMSIZE;
925 		if (fmtwd > n)
926 			n = fmtwd;
927 		adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
928 		switch (flag) {
929 		case '?':	snprintf(p, BUFSZ(p), "%s", fmt);	/* unknown, so dump it too */
930 			t = getsval(x);
931 			n = strlen(t);
932 			if (fmtwd > n)
933 				n = fmtwd;
934 			adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
935 			p += strlen(p);
936 			snprintf(p, BUFSZ(p), "%s", t);
937 			break;
938 		case 'a':
939 		case 'A':
940 		case 'f':	snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
941 		case 'd':	snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
942 		case 'u':	snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
943 		case 's':
944 			t = getsval(x);
945 			n = strlen(t);
946 			if (fmtwd > n)
947 				n = fmtwd;
948 			if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
949 				FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
950 			snprintf(p, BUFSZ(p), fmt, t);
951 			break;
952 		case 'c':
953 			if (isnum(x)) {
954 				if ((int)getfval(x))
955 					snprintf(p, BUFSZ(p), fmt, (int) getfval(x));
956 				else {
957 					*p++ = '\0'; /* explicit null byte */
958 					*p = '\0';   /* next output will start here */
959 				}
960 			} else
961 				snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
962 			break;
963 		default:
964 			FATAL("can't happen: bad conversion %c in format()", flag);
965 		}
966 		tempfree(x);
967 		p += strlen(p);
968 		s++;
969 	}
970 	*p = '\0';
971 	free(fmt);
972 	for ( ; a; a = a->nnext)		/* evaluate any remaining args */
973 		execute(a);
974 	*pbuf = buf;
975 	*pbufsize = bufsize;
976 	return p - buf;
977 }
978 
979 Cell *awksprintf(Node **a, int n)		/* sprintf(a[0]) */
980 {
981 	Cell *x;
982 	Node *y;
983 	char *buf;
984 	int bufsz=3*recsize;
985 
986 	if ((buf = malloc(bufsz)) == NULL)
987 		FATAL("out of memory in awksprintf");
988 	y = a[0]->nnext;
989 	x = execute(a[0]);
990 	if (format(&buf, &bufsz, getsval(x), y) == -1)
991 		FATAL("sprintf string %.30s... too long.  can't happen.", buf);
992 	tempfree(x);
993 	x = gettemp();
994 	x->sval = buf;
995 	x->tval = STR;
996 	return(x);
997 }
998 
999 Cell *awkprintf(Node **a, int n)		/* printf */
1000 {	/* a[0] is list of args, starting with format string */
1001 	/* a[1] is redirection operator, a[2] is redirection file */
1002 	FILE *fp;
1003 	Cell *x;
1004 	Node *y;
1005 	char *buf;
1006 	int len;
1007 	int bufsz=3*recsize;
1008 
1009 	if ((buf = malloc(bufsz)) == NULL)
1010 		FATAL("out of memory in awkprintf");
1011 	y = a[0]->nnext;
1012 	x = execute(a[0]);
1013 	if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1014 		FATAL("printf string %.30s... too long.  can't happen.", buf);
1015 	tempfree(x);
1016 	if (a[1] == NULL) {
1017 		/* fputs(buf, stdout); */
1018 		fwrite(buf, len, 1, stdout);
1019 		if (ferror(stdout))
1020 			FATAL("write error on stdout");
1021 	} else {
1022 		fp = redirect(ptoi(a[1]), a[2]);
1023 		/* fputs(buf, fp); */
1024 		fwrite(buf, len, 1, fp);
1025 		fflush(fp);
1026 		if (ferror(fp))
1027 			FATAL("write error on %s", filename(fp));
1028 	}
1029 	free(buf);
1030 	return(True);
1031 }
1032 
1033 Cell *arith(Node **a, int n)	/* a[0] + a[1], etc.  also -a[0] */
1034 {
1035 	Awkfloat i, j = 0;
1036 	double v;
1037 	Cell *x, *y, *z;
1038 
1039 	x = execute(a[0]);
1040 	i = getfval(x);
1041 	tempfree(x);
1042 	if (n != UMINUS && n != UPLUS) {
1043 		y = execute(a[1]);
1044 		j = getfval(y);
1045 		tempfree(y);
1046 	}
1047 	z = gettemp();
1048 	switch (n) {
1049 	case ADD:
1050 		i += j;
1051 		break;
1052 	case MINUS:
1053 		i -= j;
1054 		break;
1055 	case MULT:
1056 		i *= j;
1057 		break;
1058 	case DIVIDE:
1059 		if (j == 0)
1060 			FATAL("division by zero");
1061 		i /= j;
1062 		break;
1063 	case MOD:
1064 		if (j == 0)
1065 			FATAL("division by zero in mod");
1066 		modf(i/j, &v);
1067 		i = i - j * v;
1068 		break;
1069 	case UMINUS:
1070 		i = -i;
1071 		break;
1072 	case UPLUS: /* handled by getfval(), above */
1073 		break;
1074 	case POWER:
1075 		if (j >= 0 && modf(j, &v) == 0.0)	/* pos integer exponent */
1076 			i = ipow(i, (int) j);
1077                else {
1078 			errno = 0;
1079 			i = errcheck(pow(i, j), "pow");
1080                }
1081 		break;
1082 	default:	/* can't happen */
1083 		FATAL("illegal arithmetic operator %d", n);
1084 	}
1085 	setfval(z, i);
1086 	return(z);
1087 }
1088 
1089 double ipow(double x, int n)	/* x**n.  ought to be done by pow, but isn't always */
1090 {
1091 	double v;
1092 
1093 	if (n <= 0)
1094 		return 1;
1095 	v = ipow(x, n/2);
1096 	if (n % 2 == 0)
1097 		return v * v;
1098 	else
1099 		return x * v * v;
1100 }
1101 
1102 Cell *incrdecr(Node **a, int n)		/* a[0]++, etc. */
1103 {
1104 	Cell *x, *z;
1105 	int k;
1106 	Awkfloat xf;
1107 
1108 	x = execute(a[0]);
1109 	xf = getfval(x);
1110 	k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1111 	if (n == PREINCR || n == PREDECR) {
1112 		setfval(x, xf + k);
1113 		return(x);
1114 	}
1115 	z = gettemp();
1116 	setfval(z, xf);
1117 	setfval(x, xf + k);
1118 	tempfree(x);
1119 	return(z);
1120 }
1121 
1122 Cell *assign(Node **a, int n)	/* a[0] = a[1], a[0] += a[1], etc. */
1123 {		/* this is subtle; don't muck with it. */
1124 	Cell *x, *y;
1125 	Awkfloat xf, yf;
1126 	double v;
1127 
1128 	y = execute(a[1]);
1129 	x = execute(a[0]);
1130 	if (n == ASSIGN) {	/* ordinary assignment */
1131 		if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1132 			;	/* self-assignment: leave alone unless it's a field or NF */
1133 		else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1134 			setsval(x, getsval(y));
1135 			x->fval = getfval(y);
1136 			x->tval |= NUM;
1137 		}
1138 		else if (isstr(y))
1139 			setsval(x, getsval(y));
1140 		else if (isnum(y))
1141 			setfval(x, getfval(y));
1142 		else
1143 			funnyvar(y, "read value of");
1144 		tempfree(y);
1145 		return(x);
1146 	}
1147 	xf = getfval(x);
1148 	yf = getfval(y);
1149 	switch (n) {
1150 	case ADDEQ:
1151 		xf += yf;
1152 		break;
1153 	case SUBEQ:
1154 		xf -= yf;
1155 		break;
1156 	case MULTEQ:
1157 		xf *= yf;
1158 		break;
1159 	case DIVEQ:
1160 		if (yf == 0)
1161 			FATAL("division by zero in /=");
1162 		xf /= yf;
1163 		break;
1164 	case MODEQ:
1165 		if (yf == 0)
1166 			FATAL("division by zero in %%=");
1167 		modf(xf/yf, &v);
1168 		xf = xf - yf * v;
1169 		break;
1170 	case POWEQ:
1171 		if (yf >= 0 && modf(yf, &v) == 0.0)	/* pos integer exponent */
1172 			xf = ipow(xf, (int) yf);
1173                else {
1174 			errno = 0;
1175 			xf = errcheck(pow(xf, yf), "pow");
1176                }
1177 		break;
1178 	default:
1179 		FATAL("illegal assignment operator %d", n);
1180 		break;
1181 	}
1182 	tempfree(y);
1183 	setfval(x, xf);
1184 	return(x);
1185 }
1186 
1187 Cell *cat(Node **a, int q)	/* a[0] cat a[1] */
1188 {
1189 	Cell *x, *y, *z;
1190 	int n1, n2;
1191 	char *s = NULL;
1192 	int ssz = 0;
1193 
1194 	x = execute(a[0]);
1195 	n1 = strlen(getsval(x));
1196 	adjbuf(&s, &ssz, n1, recsize, 0, "cat1");
1197 	memcpy(s, x->sval, n1);
1198 
1199 	y = execute(a[1]);
1200 	n2 = strlen(getsval(y));
1201 	adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1202 	memcpy(s + n1, y->sval, n2);
1203 	s[n1 + n2] = '\0';
1204 
1205 	tempfree(x);
1206 	tempfree(y);
1207 
1208 	z = gettemp();
1209 	z->sval = s;
1210 	z->tval = STR;
1211 
1212 	return(z);
1213 }
1214 
1215 Cell *pastat(Node **a, int n)	/* a[0] { a[1] } */
1216 {
1217 	Cell *x;
1218 
1219 	if (a[0] == NULL)
1220 		x = execute(a[1]);
1221 	else {
1222 		x = execute(a[0]);
1223 		if (istrue(x)) {
1224 			tempfree(x);
1225 			x = execute(a[1]);
1226 		}
1227 	}
1228 	return x;
1229 }
1230 
1231 Cell *dopa2(Node **a, int n)	/* a[0], a[1] { a[2] } */
1232 {
1233 	Cell *x;
1234 	int pair;
1235 
1236 	pair = ptoi(a[3]);
1237 	if (pairstack[pair] == 0) {
1238 		x = execute(a[0]);
1239 		if (istrue(x))
1240 			pairstack[pair] = 1;
1241 		tempfree(x);
1242 	}
1243 	if (pairstack[pair] == 1) {
1244 		x = execute(a[1]);
1245 		if (istrue(x))
1246 			pairstack[pair] = 0;
1247 		tempfree(x);
1248 		x = execute(a[2]);
1249 		return(x);
1250 	}
1251 	return(False);
1252 }
1253 
1254 Cell *split(Node **a, int nnn)	/* split(a[0], a[1], a[2]); a[3] is type */
1255 {
1256 	Cell *x = NULL, *y, *ap;
1257 	const char *s, *origs, *t;
1258 	const char *fs = NULL;
1259 	char *origfs = NULL;
1260 	int sep;
1261 	char temp, num[50];
1262 	int n, tempstat, arg3type;
1263 
1264 	y = execute(a[0]);	/* source string */
1265 	origs = s = strdup(getsval(y));
1266 	if (s == NULL)
1267 		FATAL("out of space in split");
1268 	arg3type = ptoi(a[3]);
1269 	if (a[2] == NULL)		/* fs string */
1270 		fs = getsval(fsloc);
1271 	else if (arg3type == STRING) {	/* split(str,arr,"string") */
1272 		x = execute(a[2]);
1273 		fs = origfs = strdup(getsval(x));
1274 		if (fs == NULL)
1275 			FATAL("out of space in split");
1276 		tempfree(x);
1277 	} else if (arg3type == REGEXPR)
1278 		fs = "(regexpr)";	/* split(str,arr,/regexpr/) */
1279 	else
1280 		FATAL("illegal type of split");
1281 	sep = *fs;
1282 	ap = execute(a[1]);	/* array name */
1283 	freesymtab(ap);
1284 	DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1285 	ap->tval &= ~STR;
1286 	ap->tval |= ARR;
1287 	ap->sval = (char *) makesymtab(NSYMTAB);
1288 
1289 	n = 0;
1290         if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1291 		/* split(s, a, //); have to arrange that it looks like empty sep */
1292 		arg3type = 0;
1293 		fs = "";
1294 		sep = 0;
1295 	}
1296 	if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) {	/* reg expr */
1297 		fa *pfa;
1298 		if (arg3type == REGEXPR) {	/* it's ready already */
1299 			pfa = (fa *) a[2];
1300 		} else {
1301 			pfa = makedfa(fs, 1);
1302 		}
1303 		if (nematch(pfa,s)) {
1304 			tempstat = pfa->initstat;
1305 			pfa->initstat = 2;
1306 			do {
1307 				n++;
1308 				snprintf(num, sizeof(num), "%d", n);
1309 				temp = *patbeg;
1310 				setptr(patbeg, '\0');
1311 				if (is_number(s))
1312 					setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1313 				else
1314 					setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1315 				setptr(patbeg, temp);
1316 				s = patbeg + patlen;
1317 				if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1318 					n++;
1319 					snprintf(num, sizeof(num), "%d", n);
1320 					setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1321 					pfa->initstat = tempstat;
1322 					goto spdone;
1323 				}
1324 			} while (nematch(pfa,s));
1325 			pfa->initstat = tempstat; 	/* bwk: has to be here to reset */
1326 							/* cf gsub and refldbld */
1327 		}
1328 		n++;
1329 		snprintf(num, sizeof(num), "%d", n);
1330 		if (is_number(s))
1331 			setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1332 		else
1333 			setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1334   spdone:
1335 		pfa = NULL;
1336 	} else if (sep == ' ') {
1337 		for (n = 0; ; ) {
1338 #define ISWS(c)	((c) == ' ' || (c) == '\t' || (c) == '\n')
1339 			while (ISWS(*s))
1340 				s++;
1341 			if (*s == '\0')
1342 				break;
1343 			n++;
1344 			t = s;
1345 			do
1346 				s++;
1347 			while (*s != '\0' && !ISWS(*s));
1348 			temp = *s;
1349 			setptr(s, '\0');
1350 			snprintf(num, sizeof(num), "%d", n);
1351 			if (is_number(t))
1352 				setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1353 			else
1354 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1355 			setptr(s, temp);
1356 			if (*s != '\0')
1357 				s++;
1358 		}
1359 	} else if (sep == 0) {	/* new: split(s, a, "") => 1 char/elem */
1360 		for (n = 0; *s != '\0'; s++) {
1361 			char buf[2];
1362 			n++;
1363 			snprintf(num, sizeof(num), "%d", n);
1364 			buf[0] = *s;
1365 			buf[1] = '\0';
1366 			if (isdigit((uschar)buf[0]))
1367 				setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1368 			else
1369 				setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1370 		}
1371 	} else if (*s != '\0') {
1372 		for (;;) {
1373 			n++;
1374 			t = s;
1375 			while (*s != sep && *s != '\n' && *s != '\0')
1376 				s++;
1377 			temp = *s;
1378 			setptr(s, '\0');
1379 			snprintf(num, sizeof(num), "%d", n);
1380 			if (is_number(t))
1381 				setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1382 			else
1383 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1384 			setptr(s, temp);
1385 			if (*s++ == '\0')
1386 				break;
1387 		}
1388 	}
1389 	tempfree(ap);
1390 	tempfree(y);
1391 	xfree(origs);
1392 	xfree(origfs);
1393 	x = gettemp();
1394 	x->tval = NUM;
1395 	x->fval = n;
1396 	return(x);
1397 }
1398 
1399 Cell *condexpr(Node **a, int n)	/* a[0] ? a[1] : a[2] */
1400 {
1401 	Cell *x;
1402 
1403 	x = execute(a[0]);
1404 	if (istrue(x)) {
1405 		tempfree(x);
1406 		x = execute(a[1]);
1407 	} else {
1408 		tempfree(x);
1409 		x = execute(a[2]);
1410 	}
1411 	return(x);
1412 }
1413 
1414 Cell *ifstat(Node **a, int n)	/* if (a[0]) a[1]; else a[2] */
1415 {
1416 	Cell *x;
1417 
1418 	x = execute(a[0]);
1419 	if (istrue(x)) {
1420 		tempfree(x);
1421 		x = execute(a[1]);
1422 	} else if (a[2] != NULL) {
1423 		tempfree(x);
1424 		x = execute(a[2]);
1425 	}
1426 	return(x);
1427 }
1428 
1429 Cell *whilestat(Node **a, int n)	/* while (a[0]) a[1] */
1430 {
1431 	Cell *x;
1432 
1433 	for (;;) {
1434 		x = execute(a[0]);
1435 		if (!istrue(x))
1436 			return(x);
1437 		tempfree(x);
1438 		x = execute(a[1]);
1439 		if (isbreak(x)) {
1440 			x = True;
1441 			return(x);
1442 		}
1443 		if (isnext(x) || isexit(x) || isret(x))
1444 			return(x);
1445 		tempfree(x);
1446 	}
1447 }
1448 
1449 Cell *dostat(Node **a, int n)	/* do a[0]; while(a[1]) */
1450 {
1451 	Cell *x;
1452 
1453 	for (;;) {
1454 		x = execute(a[0]);
1455 		if (isbreak(x))
1456 			return True;
1457 		if (isnext(x) || isexit(x) || isret(x))
1458 			return(x);
1459 		tempfree(x);
1460 		x = execute(a[1]);
1461 		if (!istrue(x))
1462 			return(x);
1463 		tempfree(x);
1464 	}
1465 }
1466 
1467 Cell *forstat(Node **a, int n)	/* for (a[0]; a[1]; a[2]) a[3] */
1468 {
1469 	Cell *x;
1470 
1471 	x = execute(a[0]);
1472 	tempfree(x);
1473 	for (;;) {
1474 		if (a[1]!=NULL) {
1475 			x = execute(a[1]);
1476 			if (!istrue(x)) return(x);
1477 			else tempfree(x);
1478 		}
1479 		x = execute(a[3]);
1480 		if (isbreak(x))		/* turn off break */
1481 			return True;
1482 		if (isnext(x) || isexit(x) || isret(x))
1483 			return(x);
1484 		tempfree(x);
1485 		x = execute(a[2]);
1486 		tempfree(x);
1487 	}
1488 }
1489 
1490 Cell *instat(Node **a, int n)	/* for (a[0] in a[1]) a[2] */
1491 {
1492 	Cell *x, *vp, *arrayp, *cp, *ncp;
1493 	Array *tp;
1494 	int i;
1495 
1496 	vp = execute(a[0]);
1497 	arrayp = execute(a[1]);
1498 	if (!isarr(arrayp)) {
1499 		return True;
1500 	}
1501 	tp = (Array *) arrayp->sval;
1502 	tempfree(arrayp);
1503 	for (i = 0; i < tp->size; i++) {	/* this routine knows too much */
1504 		for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1505 			setsval(vp, cp->nval);
1506 			ncp = cp->cnext;
1507 			x = execute(a[2]);
1508 			if (isbreak(x)) {
1509 				tempfree(vp);
1510 				return True;
1511 			}
1512 			if (isnext(x) || isexit(x) || isret(x)) {
1513 				tempfree(vp);
1514 				return(x);
1515 			}
1516 			tempfree(x);
1517 		}
1518 	}
1519 	return True;
1520 }
1521 
1522 static char *nawk_convert(const char *s, int (*fun_c)(int),
1523     wint_t (*fun_wc)(wint_t))
1524 {
1525 	char *buf      = NULL;
1526 	char *pbuf     = NULL;
1527 	const char *ps = NULL;
1528 	size_t n       = 0;
1529 	wchar_t wc;
1530 	size_t sz = MB_CUR_MAX;
1531 
1532 	if (sz == 1) {
1533 		buf = tostring(s);
1534 
1535 		for (pbuf = buf; *pbuf; pbuf++)
1536 			*pbuf = fun_c((uschar)*pbuf);
1537 
1538 		return buf;
1539 	} else {
1540 		/* upper/lower character may be shorter/longer */
1541 		buf = tostringN(s, strlen(s) * sz + 1);
1542 
1543 		(void) mbtowc(NULL, NULL, 0);	/* reset internal state */
1544 		/*
1545 		 * Reset internal state here too.
1546 		 * Assign result to avoid a compiler warning. (Casting to void
1547 		 * doesn't work.)
1548 		 * Increment said variable to avoid a different warning.
1549 		 */
1550 		int unused = wctomb(NULL, L'\0');
1551 		unused++;
1552 
1553 		ps   = s;
1554 		pbuf = buf;
1555 		while (n = mbtowc(&wc, ps, sz),
1556 		       n > 0 && n != (size_t)-1 && n != (size_t)-2)
1557 		{
1558 			ps += n;
1559 
1560 			n = wctomb(pbuf, fun_wc(wc));
1561 			if (n == (size_t)-1)
1562 				FATAL("illegal wide character %s", s);
1563 
1564 			pbuf += n;
1565 		}
1566 
1567 		*pbuf = '\0';
1568 
1569 		if (n)
1570 			FATAL("illegal byte sequence %s", s);
1571 
1572 		return buf;
1573 	}
1574 }
1575 
1576 static char *nawk_toupper(const char *s)
1577 {
1578 	return nawk_convert(s, toupper, towupper);
1579 }
1580 
1581 static char *nawk_tolower(const char *s)
1582 {
1583 	return nawk_convert(s, tolower, towlower);
1584 }
1585 
1586 Cell *bltin(Node **a, int n)	/* builtin functions. a[0] is type, a[1] is arg list */
1587 {
1588 	Cell *x, *y;
1589 	Awkfloat u;
1590 	int t, sz;
1591 	Awkfloat tmp;
1592 	char *buf, *fmt;
1593 	Node *nextarg;
1594 	FILE *fp;
1595 	int status = 0;
1596 	time_t tv;
1597 	struct tm *tm, tmbuf;
1598 
1599 	t = ptoi(a[0]);
1600 	x = execute(a[1]);
1601 	nextarg = a[1]->nnext;
1602 	switch (t) {
1603 	case FLENGTH:
1604 		if (isarr(x))
1605 			u = ((Array *) x->sval)->nelem;	/* GROT.  should be function*/
1606 		else
1607 			u = strlen(getsval(x));
1608 		break;
1609 	case FLOG:
1610 		errno = 0;
1611 		u = errcheck(log(getfval(x)), "log");
1612 		break;
1613 	case FINT:
1614 		modf(getfval(x), &u); break;
1615 	case FEXP:
1616 		errno = 0;
1617 		u = errcheck(exp(getfval(x)), "exp");
1618 		break;
1619 	case FSQRT:
1620 		errno = 0;
1621 		u = errcheck(sqrt(getfval(x)), "sqrt");
1622 		break;
1623 	case FSIN:
1624 		u = sin(getfval(x)); break;
1625 	case FCOS:
1626 		u = cos(getfval(x)); break;
1627 	case FATAN:
1628 		if (nextarg == NULL) {
1629 			WARNING("atan2 requires two arguments; returning 1.0");
1630 			u = 1.0;
1631 		} else {
1632 			y = execute(a[1]->nnext);
1633 			u = atan2(getfval(x), getfval(y));
1634 			tempfree(y);
1635 			nextarg = nextarg->nnext;
1636 		}
1637 		break;
1638 	case FCOMPL:
1639 		u = ~((int)getfval(x));
1640 		break;
1641 	case FAND:
1642 		if (nextarg == 0) {
1643 			WARNING("and requires two arguments; returning 0");
1644 			u = 0;
1645 			break;
1646 		}
1647 		y = execute(a[1]->nnext);
1648 		u = ((int)getfval(x)) & ((int)getfval(y));
1649 		tempfree(y);
1650 		nextarg = nextarg->nnext;
1651 		break;
1652 	case FFOR:
1653 		if (nextarg == 0) {
1654 			WARNING("or requires two arguments; returning 0");
1655 			u = 0;
1656 			break;
1657 		}
1658 		y = execute(a[1]->nnext);
1659 		u = ((int)getfval(x)) | ((int)getfval(y));
1660 		tempfree(y);
1661 		nextarg = nextarg->nnext;
1662 		break;
1663 	case FXOR:
1664 		if (nextarg == 0) {
1665 			WARNING("xor requires two arguments; returning 0");
1666 			u = 0;
1667 			break;
1668 		}
1669 		y = execute(a[1]->nnext);
1670 		u = ((int)getfval(x)) ^ ((int)getfval(y));
1671 		tempfree(y);
1672 		nextarg = nextarg->nnext;
1673 		break;
1674 	case FLSHIFT:
1675 		if (nextarg == 0) {
1676 			WARNING("lshift requires two arguments; returning 0");
1677 			u = 0;
1678 			break;
1679 		}
1680 		y = execute(a[1]->nnext);
1681 		u = ((int)getfval(x)) << ((int)getfval(y));
1682 		tempfree(y);
1683 		nextarg = nextarg->nnext;
1684 		break;
1685 	case FRSHIFT:
1686 		if (nextarg == 0) {
1687 			WARNING("rshift requires two arguments; returning 0");
1688 			u = 0;
1689 			break;
1690 		}
1691 		y = execute(a[1]->nnext);
1692 		u = ((int)getfval(x)) >> ((int)getfval(y));
1693 		tempfree(y);
1694 		nextarg = nextarg->nnext;
1695 		break;
1696 	case FSYSTEM:
1697 		fflush(stdout);		/* in case something is buffered already */
1698 		status = system(getsval(x));
1699 		u = status;
1700 		if (status != -1) {
1701 			if (WIFEXITED(status)) {
1702 				u = WEXITSTATUS(status);
1703 			} else if (WIFSIGNALED(status)) {
1704 				u = WTERMSIG(status) + 256;
1705 #ifdef WCOREDUMP
1706 				if (WCOREDUMP(status))
1707 					u += 256;
1708 #endif
1709 			} else	/* something else?!? */
1710 				u = 0;
1711 		}
1712 		break;
1713 	case FRAND:
1714 		/* random() returns numbers in [0..2^31-1]
1715 		 * in order to get a number in [0, 1), divide it by 2^31
1716 		 */
1717 		u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
1718 		break;
1719 	case FSRAND:
1720 		if (isrec(x)) {		/* no argument provided */
1721 			u = time(NULL);
1722 			tmp = u;
1723 			srandom((unsigned int) u);
1724 		} else {
1725 			u = getfval(x);
1726 			tmp = u;
1727 			srandom_deterministic((unsigned int) u);
1728 		}
1729 		u = srand_seed;
1730 		srand_seed = tmp;
1731 		break;
1732 	case FTOUPPER:
1733 	case FTOLOWER:
1734 		if (t == FTOUPPER)
1735 			buf = nawk_toupper(getsval(x));
1736 		else
1737 			buf = nawk_tolower(getsval(x));
1738 		tempfree(x);
1739 		x = gettemp();
1740 		setsval(x, buf);
1741 		free(buf);
1742 		return x;
1743 	case FFLUSH:
1744 		if (isrec(x) || strlen(getsval(x)) == 0) {
1745 			flush_all();	/* fflush() or fflush("") -> all */
1746 			u = 0;
1747 		} else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
1748 			u = EOF;
1749 		else
1750 			u = fflush(fp);
1751 		break;
1752 	case FMKTIME:
1753 		memset(&tmbuf, 0, sizeof(tmbuf));
1754 		tm = &tmbuf;
1755 		t = sscanf(getsval(x), "%d %d %d %d %d %d %d",
1756 		    &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour,
1757 		    &tm->tm_min, &tm->tm_sec, &tm->tm_isdst);
1758 		switch (t) {
1759 		case 6:
1760 			tm->tm_isdst = -1;	/* let mktime figure it out */
1761 			/* FALLTHROUGH */
1762 		case 7:
1763 			tm->tm_year -= 1900;
1764 			tm->tm_mon--;
1765 			u = mktime(tm);
1766 			break;
1767 		default:
1768 			u = -1;
1769 			break;
1770 		}
1771 		break;
1772 	case FSYSTIME:
1773 		u = time((time_t *) 0);
1774 		break;
1775 	case FSTRFTIME:
1776 		/* strftime([format [,timestamp]]) */
1777 		if (nextarg) {
1778 			y = execute(nextarg);
1779 			nextarg = nextarg->nnext;
1780 			tv = (time_t) getfval(y);
1781 			tempfree(y);
1782 		} else
1783 			tv = time((time_t *) 0);
1784 		tm = localtime(&tv);
1785 		if (tm == NULL)
1786 			FATAL("bad time %ld", (long)tv);
1787 
1788 		if (isrec(x)) {
1789 			/* format argument not provided, use default */
1790 			fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
1791 		} else
1792 			fmt = tostring(getsval(x));
1793 
1794 		sz = 32;
1795 		buf = NULL;
1796 		do {
1797 			if ((buf = reallocarray(buf, 2, sz)) == NULL)
1798 				FATAL("out of memory in strftime");
1799 			sz *= 2;
1800 		} while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
1801 
1802 		y = gettemp();
1803 		setsval(y, buf);
1804 		free(fmt);
1805 		free(buf);
1806 
1807 		return y;
1808 	default:	/* can't happen */
1809 		FATAL("illegal function type %d", t);
1810 		break;
1811 	}
1812 	tempfree(x);
1813 	x = gettemp();
1814 	setfval(x, u);
1815 	if (nextarg != NULL) {
1816 		WARNING("warning: function has too many arguments");
1817 		for ( ; nextarg; nextarg = nextarg->nnext)
1818 			execute(nextarg);
1819 	}
1820 	return(x);
1821 }
1822 
1823 Cell *printstat(Node **a, int n)	/* print a[0] */
1824 {
1825 	Node *x;
1826 	Cell *y;
1827 	FILE *fp;
1828 
1829 	if (a[1] == NULL)	/* a[1] is redirection operator, a[2] is file */
1830 		fp = stdout;
1831 	else
1832 		fp = redirect(ptoi(a[1]), a[2]);
1833 	for (x = a[0]; x != NULL; x = x->nnext) {
1834 		y = execute(x);
1835 		fputs(getpssval(y), fp);
1836 		tempfree(y);
1837 		if (x->nnext == NULL)
1838 			fputs(getsval(orsloc), fp);
1839 		else
1840 			fputs(getsval(ofsloc), fp);
1841 	}
1842 	if (a[1] != NULL)
1843 		fflush(fp);
1844 	if (ferror(fp))
1845 		FATAL("write error on %s", filename(fp));
1846 	return(True);
1847 }
1848 
1849 Cell *nullproc(Node **a, int n)
1850 {
1851 	return 0;
1852 }
1853 
1854 
1855 FILE *redirect(int a, Node *b)	/* set up all i/o redirections */
1856 {
1857 	FILE *fp;
1858 	Cell *x;
1859 	char *fname;
1860 
1861 	x = execute(b);
1862 	fname = getsval(x);
1863 	fp = openfile(a, fname, NULL);
1864 	if (fp == NULL)
1865 		FATAL("can't open file %s", fname);
1866 	tempfree(x);
1867 	return fp;
1868 }
1869 
1870 struct files {
1871 	FILE	*fp;
1872 	const char	*fname;
1873 	int	mode;	/* '|', 'a', 'w' => LE/LT, GT */
1874 } *files;
1875 
1876 size_t nfiles;
1877 
1878 static void stdinit(void)	/* in case stdin, etc., are not constants */
1879 {
1880 	nfiles = FOPEN_MAX;
1881 	files = calloc(nfiles, sizeof(*files));
1882 	if (files == NULL)
1883 		FATAL("can't allocate file memory for %zu files", nfiles);
1884         files[0].fp = stdin;
1885 	files[0].fname = "/dev/stdin";
1886 	files[0].mode = LT;
1887         files[1].fp = stdout;
1888 	files[1].fname = "/dev/stdout";
1889 	files[1].mode = GT;
1890         files[2].fp = stderr;
1891 	files[2].fname = "/dev/stderr";
1892 	files[2].mode = GT;
1893 }
1894 
1895 FILE *openfile(int a, const char *us, bool *pnewflag)
1896 {
1897 	const char *s = us;
1898 	size_t i;
1899 	int m;
1900 	FILE *fp = NULL;
1901 
1902 	if (*s == '\0')
1903 		FATAL("null file name in print or getline");
1904 	for (i = 0; i < nfiles; i++)
1905 		if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
1906 		    (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
1907 		     a == FFLUSH)) {
1908 			if (pnewflag)
1909 				*pnewflag = false;
1910 			return files[i].fp;
1911 		}
1912 	if (a == FFLUSH)	/* didn't find it, so don't create it! */
1913 		return NULL;
1914 
1915 	for (i = 0; i < nfiles; i++)
1916 		if (files[i].fp == NULL)
1917 			break;
1918 	if (i >= nfiles) {
1919 		struct files *nf;
1920 		size_t nnf = nfiles + FOPEN_MAX;
1921 		nf = reallocarray(files, nnf, sizeof(*nf));
1922 		if (nf == NULL)
1923 			FATAL("cannot grow files for %s and %zu files", s, nnf);
1924 		memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
1925 		nfiles = nnf;
1926 		files = nf;
1927 	}
1928 	fflush(stdout);	/* force a semblance of order */
1929 	m = a;
1930 	if (a == GT) {
1931 		fp = fopen(s, "w");
1932 	} else if (a == APPEND) {
1933 		fp = fopen(s, "a");
1934 		m = GT;	/* so can mix > and >> */
1935 	} else if (a == '|') {	/* output pipe */
1936 		fp = popen(s, "w");
1937 	} else if (a == LE) {	/* input pipe */
1938 		fp = popen(s, "r");
1939 	} else if (a == LT) {	/* getline <file */
1940 		fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r");	/* "-" is stdin */
1941 	} else	/* can't happen */
1942 		FATAL("illegal redirection %d", a);
1943 	if (fp != NULL) {
1944 		files[i].fname = tostring(s);
1945 		files[i].fp = fp;
1946 		files[i].mode = m;
1947 		if (pnewflag)
1948 			*pnewflag = true;
1949 		if (fp != stdin && fp != stdout && fp != stderr)
1950 			(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
1951 	}
1952 	return fp;
1953 }
1954 
1955 const char *filename(FILE *fp)
1956 {
1957 	size_t i;
1958 
1959 	for (i = 0; i < nfiles; i++)
1960 		if (fp == files[i].fp)
1961 			return files[i].fname;
1962 	return "???";
1963 }
1964 
1965  Cell *closefile(Node **a, int n)
1966  {
1967  	Cell *x;
1968 	size_t i;
1969 	bool stat;
1970 
1971  	x = execute(a[0]);
1972  	getsval(x);
1973 	stat = true;
1974  	for (i = 0; i < nfiles; i++) {
1975 		if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
1976 			continue;
1977 		if (ferror(files[i].fp))
1978 			FATAL("i/o error occurred on %s", files[i].fname);
1979 		if (files[i].fp == stdin || files[i].fp == stdout ||
1980 		    files[i].fp == stderr)
1981 			stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
1982 		else if (files[i].mode == '|' || files[i].mode == LE)
1983 			stat = pclose(files[i].fp) == -1;
1984 		else
1985 			stat = fclose(files[i].fp) == EOF;
1986 		if (stat)
1987 			FATAL("i/o error occurred closing %s", files[i].fname);
1988 		if (i > 2)	/* don't do /dev/std... */
1989 			xfree(files[i].fname);
1990 		files[i].fname = NULL;	/* watch out for ref thru this */
1991 		files[i].fp = NULL;
1992 		break;
1993  	}
1994  	tempfree(x);
1995  	x = gettemp();
1996 	setfval(x, (Awkfloat) (stat ? -1 : 0));
1997  	return(x);
1998  }
1999 
2000 void closeall(void)
2001 {
2002 	size_t i;
2003 	bool stat = false;
2004 
2005 	for (i = 0; i < nfiles; i++) {
2006 		if (! files[i].fp)
2007 			continue;
2008 		if (ferror(files[i].fp))
2009 			FATAL( "i/o error occurred on %s", files[i].fname );
2010 		if (files[i].fp == stdin)
2011 			continue;
2012 		if (files[i].mode == '|' || files[i].mode == LE)
2013 			stat = pclose(files[i].fp) == -1;
2014 		else if (files[i].fp == stdout || files[i].fp == stderr)
2015 			stat = fflush(files[i].fp) == EOF;
2016 		else
2017 			stat = fclose(files[i].fp) == EOF;
2018 		if (stat)
2019 			FATAL( "i/o error occurred while closing %s", files[i].fname );
2020 	}
2021 }
2022 
2023 static void flush_all(void)
2024 {
2025 	size_t i;
2026 
2027 	for (i = 0; i < nfiles; i++)
2028 		if (files[i].fp)
2029 			fflush(files[i].fp);
2030 }
2031 
2032 void backsub(char **pb_ptr, const char **sptr_ptr);
2033 
2034 Cell *sub(Node **a, int nnn)	/* substitute command */
2035 {
2036 	const char *sptr, *q;
2037 	Cell *x, *y, *result;
2038 	char *t, *buf, *pb;
2039 	fa *pfa;
2040 	int bufsz = recsize;
2041 
2042 	if ((buf = malloc(bufsz)) == NULL)
2043 		FATAL("out of memory in sub");
2044 	x = execute(a[3]);	/* target string */
2045 	t = getsval(x);
2046 	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
2047 		pfa = (fa *) a[1];	/* regular expression */
2048 	else {
2049 		y = execute(a[1]);
2050 		pfa = makedfa(getsval(y), 1);
2051 		tempfree(y);
2052 	}
2053 	y = execute(a[2]);	/* replacement string */
2054 	result = False;
2055 	if (pmatch(pfa, t)) {
2056 		sptr = t;
2057 		adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub");
2058 		pb = buf;
2059 		while (sptr < patbeg)
2060 			*pb++ = *sptr++;
2061 		sptr = getsval(y);
2062 		while (*sptr != '\0') {
2063 			adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
2064 			if (*sptr == '\\') {
2065 				backsub(&pb, &sptr);
2066 			} else if (*sptr == '&') {
2067 				sptr++;
2068 				adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub");
2069 				for (q = patbeg; q < patbeg+patlen; )
2070 					*pb++ = *q++;
2071 			} else
2072 				*pb++ = *sptr++;
2073 		}
2074 		*pb = '\0';
2075 		if (pb > buf + bufsz)
2076 			FATAL("sub result1 %.30s too big; can't happen", buf);
2077 		sptr = patbeg + patlen;
2078 		if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
2079 			adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
2080 			while ((*pb++ = *sptr++) != '\0')
2081 				continue;
2082 		}
2083 		if (pb > buf + bufsz)
2084 			FATAL("sub result2 %.30s too big; can't happen", buf);
2085 		setsval(x, buf);	/* BUG: should be able to avoid copy */
2086 		result = True;
2087 	}
2088 	tempfree(x);
2089 	tempfree(y);
2090 	free(buf);
2091 	return result;
2092 }
2093 
2094 Cell *gsub(Node **a, int nnn)	/* global substitute */
2095 {
2096 	Cell *x, *y;
2097 	char *rptr, *pb;
2098 	const char *q, *t, *sptr;
2099 	char *buf;
2100 	fa *pfa;
2101 	int mflag, tempstat, num;
2102 	int bufsz = recsize;
2103 
2104 	if ((buf = malloc(bufsz)) == NULL)
2105 		FATAL("out of memory in gsub");
2106 	mflag = 0;	/* if mflag == 0, can replace empty string */
2107 	num = 0;
2108 	x = execute(a[3]);	/* target string */
2109 	t = getsval(x);
2110 	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
2111 		pfa = (fa *) a[1];	/* regular expression */
2112 	else {
2113 		y = execute(a[1]);
2114 		pfa = makedfa(getsval(y), 1);
2115 		tempfree(y);
2116 	}
2117 	y = execute(a[2]);	/* replacement string */
2118 	if (pmatch(pfa, t)) {
2119 		tempstat = pfa->initstat;
2120 		pfa->initstat = 2;
2121 		pb = buf;
2122 		rptr = getsval(y);
2123 		do {
2124 			if (patlen == 0 && *patbeg != '\0') {	/* matched empty string */
2125 				if (mflag == 0) {	/* can replace empty */
2126 					num++;
2127 					sptr = rptr;
2128 					while (*sptr != '\0') {
2129 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2130 						if (*sptr == '\\') {
2131 							backsub(&pb, &sptr);
2132 						} else if (*sptr == '&') {
2133 							sptr++;
2134 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2135 							for (q = patbeg; q < patbeg+patlen; )
2136 								*pb++ = *q++;
2137 						} else
2138 							*pb++ = *sptr++;
2139 					}
2140 				}
2141 				if (*t == '\0')	/* at end */
2142 					goto done;
2143 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
2144 				*pb++ = *t++;
2145 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2146 					FATAL("gsub result0 %.30s too big; can't happen", buf);
2147 				mflag = 0;
2148 			}
2149 			else {	/* matched nonempty string */
2150 				num++;
2151 				sptr = t;
2152 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub");
2153 				while (sptr < patbeg)
2154 					*pb++ = *sptr++;
2155 				sptr = rptr;
2156 				while (*sptr != '\0') {
2157 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2158 					if (*sptr == '\\') {
2159 						backsub(&pb, &sptr);
2160 					} else if (*sptr == '&') {
2161 						sptr++;
2162 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2163 						for (q = patbeg; q < patbeg+patlen; )
2164 							*pb++ = *q++;
2165 					} else
2166 						*pb++ = *sptr++;
2167 				}
2168 				t = patbeg + patlen;
2169 				if (patlen == 0 || *t == '\0' || *(t-1) == '\0')
2170 					goto done;
2171 				if (pb > buf + bufsz)
2172 					FATAL("gsub result1 %.30s too big; can't happen", buf);
2173 				mflag = 1;
2174 			}
2175 		} while (pmatch(pfa,t));
2176 		sptr = t;
2177 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
2178 		while ((*pb++ = *sptr++) != '\0')
2179 			continue;
2180 	done:	if (pb < buf + bufsz)
2181 			*pb = '\0';
2182 		else if (*(pb-1) != '\0')
2183 			FATAL("gsub result2 %.30s truncated; can't happen", buf);
2184 		setsval(x, buf);	/* BUG: should be able to avoid copy + free */
2185 		pfa->initstat = tempstat;
2186 	}
2187 	tempfree(x);
2188 	tempfree(y);
2189 	x = gettemp();
2190 	x->tval = NUM;
2191 	x->fval = num;
2192 	free(buf);
2193 	return(x);
2194 }
2195 
2196 Cell *gensub(Node **a, int nnn)	/* global selective substitute */
2197 	/* XXX incomplete - doesn't support backreferences \0 ... \9 */
2198 {
2199 	Cell *x, *y, *res, *h;
2200 	char *rptr;
2201 	const char *sptr;
2202 	char *buf, *pb;
2203 	const char *t, *q;
2204 	fa *pfa;
2205 	int mflag, tempstat, num, whichm;
2206 	int bufsz = recsize;
2207 
2208 	if ((buf = malloc(bufsz)) == NULL)
2209 		FATAL("out of memory in gensub");
2210 	mflag = 0;	/* if mflag == 0, can replace empty string */
2211 	num = 0;
2212 	x = execute(a[4]);	/* source string */
2213 	t = getsval(x);
2214 	res = copycell(x);	/* target string - initially copy of source */
2215 	res->csub = CTEMP;	/* result values are temporary */
2216 	if (a[0] == 0)		/* 0 => a[1] is already-compiled regexpr */
2217 		pfa = (fa *) a[1];	/* regular expression */
2218 	else {
2219 		y = execute(a[1]);
2220 		pfa = makedfa(getsval(y), 1);
2221 		tempfree(y);
2222 	}
2223 	y = execute(a[2]);	/* replacement string */
2224 	h = execute(a[3]);	/* which matches should be replaced */
2225 	sptr = getsval(h);
2226 	if (sptr[0] == 'g' || sptr[0] == 'G')
2227 		whichm = -1;
2228 	else {
2229 		/*
2230 		 * The specified number is index of replacement, starting
2231 		 * from 1. GNU awk treats index lower than 0 same as
2232 		 * 1, we do same for compatibility.
2233 		 */
2234 		whichm = (int) getfval(h) - 1;
2235 		if (whichm < 0)
2236 			whichm = 0;
2237 	}
2238 	tempfree(h);
2239 
2240 	if (pmatch(pfa, t)) {
2241 		char *sl;
2242 
2243 		tempstat = pfa->initstat;
2244 		pfa->initstat = 2;
2245 		pb = buf;
2246 		rptr = getsval(y);
2247 		/*
2248 		 * XXX if there are any backreferences in subst string,
2249 		 * complain now.
2250 		 */
2251 		for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2252 			if (strchr("0123456789", sl[1])) {
2253 				FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2254 			}
2255 		}
2256 
2257 		do {
2258 			if (whichm >= 0 && whichm != num) {
2259 				num++;
2260 				adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2261 
2262 				/* copy the part of string up to and including
2263 				 * match to output buffer */
2264 				while (t < patbeg + patlen)
2265 					*pb++ = *t++;
2266 				continue;
2267 			}
2268 
2269 			if (patlen == 0 && *patbeg != 0) {	/* matched empty string */
2270 				if (mflag == 0) {	/* can replace empty */
2271 					num++;
2272 					sptr = rptr;
2273 					while (*sptr != 0) {
2274 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2275 						if (*sptr == '\\') {
2276 							backsub(&pb, &sptr);
2277 						} else if (*sptr == '&') {
2278 							sptr++;
2279 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2280 							for (q = patbeg; q < patbeg+patlen; )
2281 								*pb++ = *q++;
2282 						} else
2283 							*pb++ = *sptr++;
2284 					}
2285 				}
2286 				if (*t == 0)	/* at end */
2287 					goto done;
2288 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2289 				*pb++ = *t++;
2290 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2291 					FATAL("gensub result0 %.30s too big; can't happen", buf);
2292 				mflag = 0;
2293 			}
2294 			else {	/* matched nonempty string */
2295 				num++;
2296 				sptr = t;
2297 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2298 				while (sptr < patbeg)
2299 					*pb++ = *sptr++;
2300 				sptr = rptr;
2301 				while (*sptr != 0) {
2302 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2303 					if (*sptr == '\\') {
2304 						backsub(&pb, &sptr);
2305 					} else if (*sptr == '&') {
2306 						sptr++;
2307 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2308 						for (q = patbeg; q < patbeg+patlen; )
2309 							*pb++ = *q++;
2310 					} else
2311 						*pb++ = *sptr++;
2312 				}
2313 				t = patbeg + patlen;
2314 				if (patlen == 0 || *t == 0 || *(t-1) == 0)
2315 					goto done;
2316 				if (pb > buf + bufsz)
2317 					FATAL("gensub result1 %.30s too big; can't happen", buf);
2318 				mflag = 1;
2319 			}
2320 		} while (pmatch(pfa,t));
2321 		sptr = t;
2322 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2323 		while ((*pb++ = *sptr++) != 0)
2324 			;
2325 	done:	if (pb > buf + bufsz)
2326 			FATAL("gensub result2 %.30s too big; can't happen", buf);
2327 		*pb = '\0';
2328 		setsval(res, buf);
2329 		pfa->initstat = tempstat;
2330 	}
2331 	tempfree(x);
2332 	tempfree(y);
2333 	free(buf);
2334 	return(res);
2335 }
2336 
2337 void backsub(char **pb_ptr, const char **sptr_ptr)	/* handle \\& variations */
2338 {						/* sptr[0] == '\\' */
2339 	char *pb = *pb_ptr;
2340 	const char *sptr = *sptr_ptr;
2341 
2342 	if (sptr[1] == '\\') {
2343 		if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2344 			*pb++ = '\\';
2345 			*pb++ = '&';
2346 			sptr += 4;
2347 		} else if (sptr[2] == '&') {	/* \\& -> \ + matched */
2348 			*pb++ = '\\';
2349 			sptr += 2;
2350 		} else if (do_posix) {		/* \\x -> \x */
2351 			sptr++;
2352 			*pb++ = *sptr++;
2353 		} else {			/* \\x -> \\x */
2354 			*pb++ = *sptr++;
2355 			*pb++ = *sptr++;
2356 		}
2357 	} else if (sptr[1] == '&') {	/* literal & */
2358 		sptr++;
2359 		*pb++ = *sptr++;
2360 	} else				/* literal \ */
2361 		*pb++ = *sptr++;
2362 
2363 	*pb_ptr = pb;
2364 	*sptr_ptr = sptr;
2365 }
2366