xref: /freebsd-src/contrib/one-true-awk/run.c (revision 8d457988a72487b35ee3922671775d73169339e3)
12a55deb1SDavid E. O'Brien /****************************************************************
22a55deb1SDavid E. O'Brien Copyright (C) Lucent Technologies 1997
32a55deb1SDavid E. O'Brien All Rights Reserved
42a55deb1SDavid E. O'Brien 
52a55deb1SDavid E. O'Brien Permission to use, copy, modify, and distribute this software and
62a55deb1SDavid E. O'Brien its documentation for any purpose and without fee is hereby
72a55deb1SDavid E. O'Brien granted, provided that the above copyright notice appear in all
82a55deb1SDavid E. O'Brien copies and that both that the copyright notice and this
92a55deb1SDavid E. O'Brien permission notice and warranty disclaimer appear in supporting
102a55deb1SDavid E. O'Brien documentation, and that the name Lucent Technologies or any of
112a55deb1SDavid E. O'Brien its entities not be used in advertising or publicity pertaining
122a55deb1SDavid E. O'Brien to distribution of the software without specific, written prior
132a55deb1SDavid E. O'Brien permission.
142a55deb1SDavid E. O'Brien 
152a55deb1SDavid E. O'Brien LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
162a55deb1SDavid E. O'Brien INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
172a55deb1SDavid E. O'Brien IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
182a55deb1SDavid E. O'Brien SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
192a55deb1SDavid E. O'Brien WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
202a55deb1SDavid E. O'Brien IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
212a55deb1SDavid E. O'Brien ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
222a55deb1SDavid E. O'Brien THIS SOFTWARE.
232a55deb1SDavid E. O'Brien ****************************************************************/
242a55deb1SDavid E. O'Brien 
252a55deb1SDavid E. O'Brien #define DEBUG
262a55deb1SDavid E. O'Brien #include <stdio.h>
272a55deb1SDavid E. O'Brien #include <ctype.h>
28f39dd6a9SWarner Losh #include <errno.h>
29f39dd6a9SWarner Losh #include <wctype.h>
30f39dd6a9SWarner Losh #include <fcntl.h>
312a55deb1SDavid E. O'Brien #include <setjmp.h>
32c263f9bfSRuslan Ermilov #include <limits.h>
332a55deb1SDavid E. O'Brien #include <math.h>
342a55deb1SDavid E. O'Brien #include <string.h>
352a55deb1SDavid E. O'Brien #include <stdlib.h>
362a55deb1SDavid E. O'Brien #include <time.h>
37b5253557SWarner Losh #include <sys/types.h>
38b5253557SWarner Losh #include <sys/wait.h>
392a55deb1SDavid E. O'Brien #include "awk.h"
40f39dd6a9SWarner Losh #include "awkgram.tab.h"
412a55deb1SDavid E. O'Brien 
42f32a6403SWarner Losh 
43f39dd6a9SWarner Losh static void stdinit(void);
44f39dd6a9SWarner Losh static void flush_all(void);
45f32a6403SWarner Losh static char *wide_char_to_byte_str(int rune, size_t *outlen);
462a55deb1SDavid E. O'Brien 
47f39dd6a9SWarner Losh #if 1
48f39dd6a9SWarner Losh #define tempfree(x)	do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
49f39dd6a9SWarner Losh #else
502a55deb1SDavid E. O'Brien void tempfree(Cell *p) {
512a55deb1SDavid E. O'Brien 	if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
522a55deb1SDavid E. O'Brien 		WARNING("bad csub %d in Cell %d %s",
532a55deb1SDavid E. O'Brien 			p->csub, p->ctype, p->sval);
542a55deb1SDavid E. O'Brien 	}
552a55deb1SDavid E. O'Brien 	if (istemp(p))
562a55deb1SDavid E. O'Brien 		tfree(p);
572a55deb1SDavid E. O'Brien }
58f39dd6a9SWarner Losh #endif
592a55deb1SDavid E. O'Brien 
60addad6afSRong-En Fan /* do we really need these? */
61addad6afSRong-En Fan /* #ifdef _NFILE */
62addad6afSRong-En Fan /* #ifndef FOPEN_MAX */
63addad6afSRong-En Fan /* #define FOPEN_MAX _NFILE */
64addad6afSRong-En Fan /* #endif */
65addad6afSRong-En Fan /* #endif */
66addad6afSRong-En Fan /*  */
67addad6afSRong-En Fan /* #ifndef	FOPEN_MAX */
68addad6afSRong-En Fan /* #define	FOPEN_MAX	40 */	/* max number of open files */
69addad6afSRong-En Fan /* #endif */
70addad6afSRong-En Fan /*  */
71addad6afSRong-En Fan /* #ifndef RAND_MAX */
72addad6afSRong-En Fan /* #define RAND_MAX	32767 */	/* all that ansi guarantees */
73addad6afSRong-En Fan /* #endif */
742a55deb1SDavid E. O'Brien 
752a55deb1SDavid E. O'Brien jmp_buf env;
762a55deb1SDavid E. O'Brien extern	int	pairstack[];
771b11b783SRuslan Ermilov extern	Awkfloat	srand_seed;
782a55deb1SDavid E. O'Brien 
792a55deb1SDavid E. O'Brien Node	*winner = NULL;	/* root of parse tree */
802a55deb1SDavid E. O'Brien Cell	*tmps;		/* free temporary cells for execution */
812a55deb1SDavid E. O'Brien 
82f39dd6a9SWarner Losh static Cell	truecell	={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
832a55deb1SDavid E. O'Brien Cell	*True	= &truecell;
84f39dd6a9SWarner Losh static Cell	falsecell	={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
852a55deb1SDavid E. O'Brien Cell	*False	= &falsecell;
86f39dd6a9SWarner Losh static Cell	breakcell	={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
872a55deb1SDavid E. O'Brien Cell	*jbreak	= &breakcell;
88f39dd6a9SWarner Losh static Cell	contcell	={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
892a55deb1SDavid E. O'Brien Cell	*jcont	= &contcell;
90f39dd6a9SWarner Losh static Cell	nextcell	={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
912a55deb1SDavid E. O'Brien Cell	*jnext	= &nextcell;
92f39dd6a9SWarner Losh static Cell	nextfilecell	={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
932a55deb1SDavid E. O'Brien Cell	*jnextfile	= &nextfilecell;
94f39dd6a9SWarner Losh static Cell	exitcell	={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
952a55deb1SDavid E. O'Brien Cell	*jexit	= &exitcell;
96f39dd6a9SWarner Losh static Cell	retcell		={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
972a55deb1SDavid E. O'Brien Cell	*jret	= &retcell;
98f39dd6a9SWarner Losh static Cell	tempcell	={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
992a55deb1SDavid E. O'Brien 
1002a55deb1SDavid E. O'Brien Node	*curnode = NULL;	/* the node being executed, for debugging */
1012a55deb1SDavid E. O'Brien 
1022a55deb1SDavid E. O'Brien /* buffer memory management */
1032a55deb1SDavid E. O'Brien int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
104813da98dSDavid E. O'Brien 	const char *whatrtn)
1052a55deb1SDavid E. O'Brien /* pbuf:    address of pointer to buffer being managed
1062a55deb1SDavid E. O'Brien  * psiz:    address of buffer size variable
1072a55deb1SDavid E. O'Brien  * minlen:  minimum length of buffer needed
1082a55deb1SDavid E. O'Brien  * quantum: buffer size quantum
1092a55deb1SDavid E. O'Brien  * pbptr:   address of movable pointer into buffer, or 0 if none
1102a55deb1SDavid E. O'Brien  * whatrtn: name of the calling routine if failure should cause fatal error
1112a55deb1SDavid E. O'Brien  *
1122a55deb1SDavid E. O'Brien  * return   0 for realloc failure, !=0 for success
1132a55deb1SDavid E. O'Brien  */
1142a55deb1SDavid E. O'Brien {
1152a55deb1SDavid E. O'Brien 	if (minlen > *psiz) {
1162a55deb1SDavid E. O'Brien 		char *tbuf;
1172a55deb1SDavid E. O'Brien 		int rminlen = quantum ? minlen % quantum : 0;
1182a55deb1SDavid E. O'Brien 		int boff = pbptr ? *pbptr - *pbuf : 0;
1192a55deb1SDavid E. O'Brien 		/* round up to next multiple of quantum */
1202a55deb1SDavid E. O'Brien 		if (rminlen)
1212a55deb1SDavid E. O'Brien 			minlen += quantum - rminlen;
1222a55deb1SDavid E. O'Brien 		tbuf = (char *) realloc(*pbuf, minlen);
123f39dd6a9SWarner Losh 		DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
1242a55deb1SDavid E. O'Brien 		if (tbuf == NULL) {
1252a55deb1SDavid E. O'Brien 			if (whatrtn)
1262a55deb1SDavid E. O'Brien 				FATAL("out of memory in %s", whatrtn);
1272a55deb1SDavid E. O'Brien 			return 0;
1282a55deb1SDavid E. O'Brien 		}
1292a55deb1SDavid E. O'Brien 		*pbuf = tbuf;
1302a55deb1SDavid E. O'Brien 		*psiz = minlen;
1312a55deb1SDavid E. O'Brien 		if (pbptr)
1322a55deb1SDavid E. O'Brien 			*pbptr = tbuf + boff;
1332a55deb1SDavid E. O'Brien 	}
1342a55deb1SDavid E. O'Brien 	return 1;
1352a55deb1SDavid E. O'Brien }
1362a55deb1SDavid E. O'Brien 
1372a55deb1SDavid E. O'Brien void run(Node *a)	/* execution of parse tree starts here */
1382a55deb1SDavid E. O'Brien {
1392a55deb1SDavid E. O'Brien 
1402a55deb1SDavid E. O'Brien 	stdinit();
1412a55deb1SDavid E. O'Brien 	execute(a);
1422a55deb1SDavid E. O'Brien 	closeall();
1432a55deb1SDavid E. O'Brien }
1442a55deb1SDavid E. O'Brien 
1452a55deb1SDavid E. O'Brien Cell *execute(Node *u)	/* execute a node of the parse tree */
1462a55deb1SDavid E. O'Brien {
1472a55deb1SDavid E. O'Brien 	Cell *(*proc)(Node **, int);
1482a55deb1SDavid E. O'Brien 	Cell *x;
1492a55deb1SDavid E. O'Brien 	Node *a;
1502a55deb1SDavid E. O'Brien 
1512a55deb1SDavid E. O'Brien 	if (u == NULL)
1522a55deb1SDavid E. O'Brien 		return(True);
1532a55deb1SDavid E. O'Brien 	for (a = u; ; a = a->nnext) {
1542a55deb1SDavid E. O'Brien 		curnode = a;
1552a55deb1SDavid E. O'Brien 		if (isvalue(a)) {
1562a55deb1SDavid E. O'Brien 			x = (Cell *) (a->narg[0]);
1572a55deb1SDavid E. O'Brien 			if (isfld(x) && !donefld)
1582a55deb1SDavid E. O'Brien 				fldbld();
1592a55deb1SDavid E. O'Brien 			else if (isrec(x) && !donerec)
1602a55deb1SDavid E. O'Brien 				recbld();
1612a55deb1SDavid E. O'Brien 			return(x);
1622a55deb1SDavid E. O'Brien 		}
1632a55deb1SDavid E. O'Brien 		if (notlegal(a->nobj))	/* probably a Cell* but too risky to print */
1642a55deb1SDavid E. O'Brien 			FATAL("illegal statement");
1652a55deb1SDavid E. O'Brien 		proc = proctab[a->nobj-FIRSTTOKEN];
1662a55deb1SDavid E. O'Brien 		x = (*proc)(a->narg, a->nobj);
1672a55deb1SDavid E. O'Brien 		if (isfld(x) && !donefld)
1682a55deb1SDavid E. O'Brien 			fldbld();
1692a55deb1SDavid E. O'Brien 		else if (isrec(x) && !donerec)
1702a55deb1SDavid E. O'Brien 			recbld();
1712a55deb1SDavid E. O'Brien 		if (isexpr(a))
1722a55deb1SDavid E. O'Brien 			return(x);
1732a55deb1SDavid E. O'Brien 		if (isjump(x))
1742a55deb1SDavid E. O'Brien 			return(x);
1752a55deb1SDavid E. O'Brien 		if (a->nnext == NULL)
1762a55deb1SDavid E. O'Brien 			return(x);
1772a55deb1SDavid E. O'Brien 		tempfree(x);
1782a55deb1SDavid E. O'Brien 	}
1792a55deb1SDavid E. O'Brien }
1802a55deb1SDavid E. O'Brien 
1812a55deb1SDavid E. O'Brien 
1822a55deb1SDavid E. O'Brien Cell *program(Node **a, int n)	/* execute an awk program */
1832a55deb1SDavid E. O'Brien {				/* a[0] = BEGIN, a[1] = body, a[2] = END */
1842a55deb1SDavid E. O'Brien 	Cell *x;
1852a55deb1SDavid E. O'Brien 
1862a55deb1SDavid E. O'Brien 	if (setjmp(env) != 0)
1872a55deb1SDavid E. O'Brien 		goto ex;
1882a55deb1SDavid E. O'Brien 	if (a[0]) {		/* BEGIN */
1892a55deb1SDavid E. O'Brien 		x = execute(a[0]);
1902a55deb1SDavid E. O'Brien 		if (isexit(x))
1912a55deb1SDavid E. O'Brien 			return(True);
1922a55deb1SDavid E. O'Brien 		if (isjump(x))
1932a55deb1SDavid E. O'Brien 			FATAL("illegal break, continue, next or nextfile from BEGIN");
1942a55deb1SDavid E. O'Brien 		tempfree(x);
1952a55deb1SDavid E. O'Brien 	}
1962a55deb1SDavid E. O'Brien 	if (a[1] || a[2])
197f39dd6a9SWarner Losh 		while (getrec(&record, &recsize, true) > 0) {
1982a55deb1SDavid E. O'Brien 			x = execute(a[1]);
1992a55deb1SDavid E. O'Brien 			if (isexit(x))
2002a55deb1SDavid E. O'Brien 				break;
2012a55deb1SDavid E. O'Brien 			tempfree(x);
2022a55deb1SDavid E. O'Brien 		}
2032a55deb1SDavid E. O'Brien   ex:
2042a55deb1SDavid E. O'Brien 	if (setjmp(env) != 0)	/* handles exit within END */
2052a55deb1SDavid E. O'Brien 		goto ex1;
2062a55deb1SDavid E. O'Brien 	if (a[2]) {		/* END */
2072a55deb1SDavid E. O'Brien 		x = execute(a[2]);
2082a55deb1SDavid E. O'Brien 		if (isbreak(x) || isnext(x) || iscont(x))
2092a55deb1SDavid E. O'Brien 			FATAL("illegal break, continue, next or nextfile from END");
2102a55deb1SDavid E. O'Brien 		tempfree(x);
2112a55deb1SDavid E. O'Brien 	}
2122a55deb1SDavid E. O'Brien   ex1:
2132a55deb1SDavid E. O'Brien 	return(True);
2142a55deb1SDavid E. O'Brien }
2152a55deb1SDavid E. O'Brien 
2162a55deb1SDavid E. O'Brien struct Frame {	/* stack frame for awk function calls */
2172a55deb1SDavid E. O'Brien 	int nargs;	/* number of arguments in this call */
2182a55deb1SDavid E. O'Brien 	Cell *fcncell;	/* pointer to Cell for function */
2192a55deb1SDavid E. O'Brien 	Cell **args;	/* pointer to array of arguments after execute */
2202a55deb1SDavid E. O'Brien 	Cell *retval;	/* return value */
2212a55deb1SDavid E. O'Brien };
2222a55deb1SDavid E. O'Brien 
2232a55deb1SDavid E. O'Brien #define	NARGS	50	/* max args in a call */
2242a55deb1SDavid E. O'Brien 
2252a55deb1SDavid E. O'Brien struct Frame *frame = NULL;	/* base of stack frames; dynamically allocated */
2262a55deb1SDavid E. O'Brien int	nframe = 0;		/* number of frames allocated */
227f39dd6a9SWarner Losh struct Frame *frp = NULL;	/* frame pointer. bottom level unused */
2282a55deb1SDavid E. O'Brien 
2292a55deb1SDavid E. O'Brien Cell *call(Node **a, int n)	/* function call.  very kludgy and fragile */
2302a55deb1SDavid E. O'Brien {
231f39dd6a9SWarner Losh 	static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
2322a55deb1SDavid E. O'Brien 	int i, ncall, ndef;
23362ebc626SRuslan Ermilov 	int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
2342a55deb1SDavid E. O'Brien 	Node *x;
2352a55deb1SDavid E. O'Brien 	Cell *args[NARGS], *oargs[NARGS];	/* BUG: fixed size arrays */
2362a55deb1SDavid E. O'Brien 	Cell *y, *z, *fcn;
2372a55deb1SDavid E. O'Brien 	char *s;
2382a55deb1SDavid E. O'Brien 
2392a55deb1SDavid E. O'Brien 	fcn = execute(a[0]);	/* the function itself */
2402a55deb1SDavid E. O'Brien 	s = fcn->nval;
2412a55deb1SDavid E. O'Brien 	if (!isfcn(fcn))
2422a55deb1SDavid E. O'Brien 		FATAL("calling undefined function %s", s);
2432a55deb1SDavid E. O'Brien 	if (frame == NULL) {
244f39dd6a9SWarner Losh 		frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
2452a55deb1SDavid E. O'Brien 		if (frame == NULL)
2462a55deb1SDavid E. O'Brien 			FATAL("out of space for stack frames calling %s", s);
2472a55deb1SDavid E. O'Brien 	}
2482a55deb1SDavid E. O'Brien 	for (ncall = 0, x = a[1]; x != NULL; x = x->nnext)	/* args in call */
2492a55deb1SDavid E. O'Brien 		ncall++;
2502a55deb1SDavid E. O'Brien 	ndef = (int) fcn->fval;			/* args in defn */
251f39dd6a9SWarner Losh 	DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
2522a55deb1SDavid E. O'Brien 	if (ncall > ndef)
2532a55deb1SDavid E. O'Brien 		WARNING("function %s called with %d args, uses only %d",
2542a55deb1SDavid E. O'Brien 			s, ncall, ndef);
2552a55deb1SDavid E. O'Brien 	if (ncall + ndef > NARGS)
2562a55deb1SDavid E. O'Brien 		FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
2572a55deb1SDavid E. O'Brien 	for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) {	/* get call args */
258f39dd6a9SWarner Losh 		DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
2592a55deb1SDavid E. O'Brien 		y = execute(x);
2602a55deb1SDavid E. O'Brien 		oargs[i] = y;
261f39dd6a9SWarner Losh 		DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
262f39dd6a9SWarner Losh 			i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
2632a55deb1SDavid E. O'Brien 		if (isfcn(y))
2642a55deb1SDavid E. O'Brien 			FATAL("can't use function %s as argument in %s", y->nval, s);
2652a55deb1SDavid E. O'Brien 		if (isarr(y))
2662a55deb1SDavid E. O'Brien 			args[i] = y;	/* arrays by ref */
2672a55deb1SDavid E. O'Brien 		else
2682a55deb1SDavid E. O'Brien 			args[i] = copycell(y);
2692a55deb1SDavid E. O'Brien 		tempfree(y);
2702a55deb1SDavid E. O'Brien 	}
2712a55deb1SDavid E. O'Brien 	for ( ; i < ndef; i++) {	/* add null args for ones not provided */
2722a55deb1SDavid E. O'Brien 		args[i] = gettemp();
2732a55deb1SDavid E. O'Brien 		*args[i] = newcopycell;
2742a55deb1SDavid E. O'Brien 	}
275f39dd6a9SWarner Losh 	frp++;	/* now ok to up frame */
276f39dd6a9SWarner Losh 	if (frp >= frame + nframe) {
277f39dd6a9SWarner Losh 		int dfp = frp - frame;	/* old index */
278f39dd6a9SWarner Losh 		frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame));
2792a55deb1SDavid E. O'Brien 		if (frame == NULL)
2802a55deb1SDavid E. O'Brien 			FATAL("out of space for stack frames in %s", s);
281f39dd6a9SWarner Losh 		frp = frame + dfp;
2822a55deb1SDavid E. O'Brien 	}
283f39dd6a9SWarner Losh 	frp->fcncell = fcn;
284f39dd6a9SWarner Losh 	frp->args = args;
285f39dd6a9SWarner Losh 	frp->nargs = ndef;	/* number defined with (excess are locals) */
286f39dd6a9SWarner Losh 	frp->retval = gettemp();
2872a55deb1SDavid E. O'Brien 
288f39dd6a9SWarner Losh 	DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
2892a55deb1SDavid E. O'Brien 	y = execute((Node *)(fcn->sval));	/* execute body */
290f39dd6a9SWarner Losh 	DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
2912a55deb1SDavid E. O'Brien 
2922a55deb1SDavid E. O'Brien 	for (i = 0; i < ndef; i++) {
293f39dd6a9SWarner Losh 		Cell *t = frp->args[i];
2942a55deb1SDavid E. O'Brien 		if (isarr(t)) {
2952a55deb1SDavid E. O'Brien 			if (t->csub == CCOPY) {
2962a55deb1SDavid E. O'Brien 				if (i >= ncall) {
2972a55deb1SDavid E. O'Brien 					freesymtab(t);
2982a55deb1SDavid E. O'Brien 					t->csub = CTEMP;
2992a55deb1SDavid E. O'Brien 					tempfree(t);
3002a55deb1SDavid E. O'Brien 				} else {
3012a55deb1SDavid E. O'Brien 					oargs[i]->tval = t->tval;
3022a55deb1SDavid E. O'Brien 					oargs[i]->tval &= ~(STR|NUM|DONTFREE);
3032a55deb1SDavid E. O'Brien 					oargs[i]->sval = t->sval;
3042a55deb1SDavid E. O'Brien 					tempfree(t);
3052a55deb1SDavid E. O'Brien 				}
3062a55deb1SDavid E. O'Brien 			}
3072a55deb1SDavid E. O'Brien 		} else if (t != y) {	/* kludge to prevent freeing twice */
3082a55deb1SDavid E. O'Brien 			t->csub = CTEMP;
3092a55deb1SDavid E. O'Brien 			tempfree(t);
31062ebc626SRuslan Ermilov 		} else if (t == y && t->csub == CCOPY) {
31162ebc626SRuslan Ermilov 			t->csub = CTEMP;
31262ebc626SRuslan Ermilov 			tempfree(t);
31362ebc626SRuslan Ermilov 			freed = 1;
3142a55deb1SDavid E. O'Brien 		}
3152a55deb1SDavid E. O'Brien 	}
3162a55deb1SDavid E. O'Brien 	tempfree(fcn);
3172a55deb1SDavid E. O'Brien 	if (isexit(y) || isnext(y))
3182a55deb1SDavid E. O'Brien 		return y;
31962ebc626SRuslan Ermilov 	if (freed == 0) {
32062ebc626SRuslan Ermilov 		tempfree(y);	/* don't free twice! */
32162ebc626SRuslan Ermilov 	}
322f39dd6a9SWarner Losh 	z = frp->retval;			/* return value */
323f39dd6a9SWarner Losh 	DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
324f39dd6a9SWarner Losh 	frp--;
3252a55deb1SDavid E. O'Brien 	return(z);
3262a55deb1SDavid E. O'Brien }
3272a55deb1SDavid E. O'Brien 
3282a55deb1SDavid E. O'Brien Cell *copycell(Cell *x)	/* make a copy of a cell in a temp */
3292a55deb1SDavid E. O'Brien {
3302a55deb1SDavid E. O'Brien 	Cell *y;
3312a55deb1SDavid E. O'Brien 
332b5253557SWarner Losh 	/* copy is not constant or field */
333b5253557SWarner Losh 
3342a55deb1SDavid E. O'Brien 	y = gettemp();
335b5253557SWarner Losh 	y->tval = x->tval & ~(CON|FLD|REC);
3362a55deb1SDavid E. O'Brien 	y->csub = CCOPY;	/* prevents freeing until call is over */
3372a55deb1SDavid E. O'Brien 	y->nval = x->nval;	/* BUG? */
338b5253557SWarner Losh 	if (isstr(x) /* || x->ctype == OCELL */) {
3392a55deb1SDavid E. O'Brien 		y->sval = tostring(x->sval);
340b5253557SWarner Losh 		y->tval &= ~DONTFREE;
341b5253557SWarner Losh 	} else
342b5253557SWarner Losh 		y->tval |= DONTFREE;
3432a55deb1SDavid E. O'Brien 	y->fval = x->fval;
3442a55deb1SDavid E. O'Brien 	return y;
3452a55deb1SDavid E. O'Brien }
3462a55deb1SDavid E. O'Brien 
3472a55deb1SDavid E. O'Brien Cell *arg(Node **a, int n)	/* nth argument of a function */
3482a55deb1SDavid E. O'Brien {
3492a55deb1SDavid E. O'Brien 
3502a55deb1SDavid E. O'Brien 	n = ptoi(a[0]);	/* argument number, counting from 0 */
351f39dd6a9SWarner Losh 	DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
352f39dd6a9SWarner Losh 	if (n+1 > frp->nargs)
3532a55deb1SDavid E. O'Brien 		FATAL("argument #%d of function %s was not supplied",
354f39dd6a9SWarner Losh 			n+1, frp->fcncell->nval);
355f39dd6a9SWarner Losh 	return frp->args[n];
3562a55deb1SDavid E. O'Brien }
3572a55deb1SDavid E. O'Brien 
3582a55deb1SDavid E. O'Brien Cell *jump(Node **a, int n)	/* break, continue, next, nextfile, return */
3592a55deb1SDavid E. O'Brien {
3602a55deb1SDavid E. O'Brien 	Cell *y;
3612a55deb1SDavid E. O'Brien 
3622a55deb1SDavid E. O'Brien 	switch (n) {
3632a55deb1SDavid E. O'Brien 	case EXIT:
3642a55deb1SDavid E. O'Brien 		if (a[0] != NULL) {
3652a55deb1SDavid E. O'Brien 			y = execute(a[0]);
3662a55deb1SDavid E. O'Brien 			errorflag = (int) getfval(y);
3672a55deb1SDavid E. O'Brien 			tempfree(y);
3682a55deb1SDavid E. O'Brien 		}
3692a55deb1SDavid E. O'Brien 		longjmp(env, 1);
3702a55deb1SDavid E. O'Brien 	case RETURN:
3712a55deb1SDavid E. O'Brien 		if (a[0] != NULL) {
3722a55deb1SDavid E. O'Brien 			y = execute(a[0]);
3732a55deb1SDavid E. O'Brien 			if ((y->tval & (STR|NUM)) == (STR|NUM)) {
374f39dd6a9SWarner Losh 				setsval(frp->retval, getsval(y));
375f39dd6a9SWarner Losh 				frp->retval->fval = getfval(y);
376f39dd6a9SWarner Losh 				frp->retval->tval |= NUM;
3772a55deb1SDavid E. O'Brien 			}
3782a55deb1SDavid E. O'Brien 			else if (y->tval & STR)
379f39dd6a9SWarner Losh 				setsval(frp->retval, getsval(y));
3802a55deb1SDavid E. O'Brien 			else if (y->tval & NUM)
381f39dd6a9SWarner Losh 				setfval(frp->retval, getfval(y));
3822a55deb1SDavid E. O'Brien 			else		/* can't happen */
3832a55deb1SDavid E. O'Brien 				FATAL("bad type variable %d", y->tval);
3842a55deb1SDavid E. O'Brien 			tempfree(y);
3852a55deb1SDavid E. O'Brien 		}
3862a55deb1SDavid E. O'Brien 		return(jret);
3872a55deb1SDavid E. O'Brien 	case NEXT:
3882a55deb1SDavid E. O'Brien 		return(jnext);
3892a55deb1SDavid E. O'Brien 	case NEXTFILE:
3902a55deb1SDavid E. O'Brien 		nextfile();
3912a55deb1SDavid E. O'Brien 		return(jnextfile);
3922a55deb1SDavid E. O'Brien 	case BREAK:
3932a55deb1SDavid E. O'Brien 		return(jbreak);
3942a55deb1SDavid E. O'Brien 	case CONTINUE:
3952a55deb1SDavid E. O'Brien 		return(jcont);
3962a55deb1SDavid E. O'Brien 	default:	/* can't happen */
3972a55deb1SDavid E. O'Brien 		FATAL("illegal jump type %d", n);
3982a55deb1SDavid E. O'Brien 	}
3992a55deb1SDavid E. O'Brien 	return 0;	/* not reached */
4002a55deb1SDavid E. O'Brien }
4012a55deb1SDavid E. O'Brien 
40291217c1cSRuslan Ermilov Cell *awkgetline(Node **a, int n)	/* get next line from specific input */
4032a55deb1SDavid E. O'Brien {		/* a[0] is variable, a[1] is operator, a[2] is filename */
4042a55deb1SDavid E. O'Brien 	Cell *r, *x;
4052a55deb1SDavid E. O'Brien 	extern Cell **fldtab;
4062a55deb1SDavid E. O'Brien 	FILE *fp;
4072a55deb1SDavid E. O'Brien 	char *buf;
4082a55deb1SDavid E. O'Brien 	int bufsize = recsize;
4092a55deb1SDavid E. O'Brien 	int mode;
410f39dd6a9SWarner Losh 	bool newflag;
411f39dd6a9SWarner Losh 	double result;
4122a55deb1SDavid E. O'Brien 
4132a55deb1SDavid E. O'Brien 	if ((buf = (char *) malloc(bufsize)) == NULL)
4142a55deb1SDavid E. O'Brien 		FATAL("out of memory in getline");
4152a55deb1SDavid E. O'Brien 
4162a55deb1SDavid E. O'Brien 	fflush(stdout);	/* in case someone is waiting for a prompt */
4172a55deb1SDavid E. O'Brien 	r = gettemp();
4182a55deb1SDavid E. O'Brien 	if (a[1] != NULL) {		/* getline < file */
4192a55deb1SDavid E. O'Brien 		x = execute(a[2]);		/* filename */
4202a55deb1SDavid E. O'Brien 		mode = ptoi(a[1]);
4212a55deb1SDavid E. O'Brien 		if (mode == '|')		/* input pipe */
4222a55deb1SDavid E. O'Brien 			mode = LE;	/* arbitrary flag */
423f39dd6a9SWarner Losh 		fp = openfile(mode, getsval(x), &newflag);
4242a55deb1SDavid E. O'Brien 		tempfree(x);
4252a55deb1SDavid E. O'Brien 		if (fp == NULL)
4262a55deb1SDavid E. O'Brien 			n = -1;
4272a55deb1SDavid E. O'Brien 		else
428f39dd6a9SWarner Losh 			n = readrec(&buf, &bufsize, fp, newflag);
4292a55deb1SDavid E. O'Brien 		if (n <= 0) {
4302a55deb1SDavid E. O'Brien 			;
4312a55deb1SDavid E. O'Brien 		} else if (a[0] != NULL) {	/* getline var <file */
4322a55deb1SDavid E. O'Brien 			x = execute(a[0]);
4332a55deb1SDavid E. O'Brien 			setsval(x, buf);
434f39dd6a9SWarner Losh 			if (is_number(x->sval, & result)) {
435f39dd6a9SWarner Losh 				x->fval = result;
436b5253557SWarner Losh 				x->tval |= NUM;
437b5253557SWarner Losh 			}
4382a55deb1SDavid E. O'Brien 			tempfree(x);
4392a55deb1SDavid E. O'Brien 		} else {			/* getline <file */
4402a55deb1SDavid E. O'Brien 			setsval(fldtab[0], buf);
441f39dd6a9SWarner Losh 			if (is_number(fldtab[0]->sval, & result)) {
442f39dd6a9SWarner Losh 				fldtab[0]->fval = result;
4432a55deb1SDavid E. O'Brien 				fldtab[0]->tval |= NUM;
4442a55deb1SDavid E. O'Brien 			}
4452a55deb1SDavid E. O'Brien 		}
4462a55deb1SDavid E. O'Brien 	} else {			/* bare getline; use current input */
4472a55deb1SDavid E. O'Brien 		if (a[0] == NULL)	/* getline */
448f39dd6a9SWarner Losh 			n = getrec(&record, &recsize, true);
4492a55deb1SDavid E. O'Brien 		else {			/* getline var */
450f39dd6a9SWarner Losh 			n = getrec(&buf, &bufsize, false);
451f32a6403SWarner Losh 			if (n > 0) {
4522a55deb1SDavid E. O'Brien 				x = execute(a[0]);
4532a55deb1SDavid E. O'Brien 				setsval(x, buf);
454f39dd6a9SWarner Losh 				if (is_number(x->sval, & result)) {
455f39dd6a9SWarner Losh 					x->fval = result;
456b5253557SWarner Losh 					x->tval |= NUM;
457b5253557SWarner Losh 				}
4582a55deb1SDavid E. O'Brien 				tempfree(x);
4592a55deb1SDavid E. O'Brien 			}
4602a55deb1SDavid E. O'Brien 		}
461f32a6403SWarner Losh 	}
4622a55deb1SDavid E. O'Brien 	setfval(r, (Awkfloat) n);
4632a55deb1SDavid E. O'Brien 	free(buf);
4642a55deb1SDavid E. O'Brien 	return r;
4652a55deb1SDavid E. O'Brien }
4662a55deb1SDavid E. O'Brien 
4672a55deb1SDavid E. O'Brien Cell *getnf(Node **a, int n)	/* get NF */
4682a55deb1SDavid E. O'Brien {
469f39dd6a9SWarner Losh 	if (!donefld)
4702a55deb1SDavid E. O'Brien 		fldbld();
4712a55deb1SDavid E. O'Brien 	return (Cell *) a[0];
4722a55deb1SDavid E. O'Brien }
4732a55deb1SDavid E. O'Brien 
474f39dd6a9SWarner Losh static char *
475f39dd6a9SWarner Losh makearraystring(Node *p, const char *func)
4762a55deb1SDavid E. O'Brien {
4772a55deb1SDavid E. O'Brien 	char *buf;
4782a55deb1SDavid E. O'Brien 	int bufsz = recsize;
479f39dd6a9SWarner Losh 	size_t blen;
4802a55deb1SDavid E. O'Brien 
481f39dd6a9SWarner Losh 	if ((buf = (char *) malloc(bufsz)) == NULL) {
482f39dd6a9SWarner Losh 		FATAL("%s: out of memory", func);
483f39dd6a9SWarner Losh 	}
484f39dd6a9SWarner Losh 
485f39dd6a9SWarner Losh 	blen = 0;
486f39dd6a9SWarner Losh 	buf[blen] = '\0';
487f39dd6a9SWarner Losh 
488f39dd6a9SWarner Losh 	for (; p; p = p->nnext) {
489f39dd6a9SWarner Losh 		Cell *x = execute(p);	/* expr */
490f39dd6a9SWarner Losh 		char *s = getsval(x);
491f39dd6a9SWarner Losh 		size_t seplen = strlen(getsval(subseploc));
492f39dd6a9SWarner Losh 		size_t nsub = p->nnext ? seplen : 0;
493f39dd6a9SWarner Losh 		size_t slen = strlen(s);
494f39dd6a9SWarner Losh 		size_t tlen = blen + slen + nsub;
495f39dd6a9SWarner Losh 
496f39dd6a9SWarner Losh 		if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
497f39dd6a9SWarner Losh 			FATAL("%s: out of memory %s[%s...]",
498f39dd6a9SWarner Losh 			    func, x->nval, buf);
499f39dd6a9SWarner Losh 		}
500f39dd6a9SWarner Losh 		memcpy(buf + blen, s, slen);
501f39dd6a9SWarner Losh 		if (nsub) {
502f39dd6a9SWarner Losh 			memcpy(buf + blen + slen, *SUBSEP, nsub);
503f39dd6a9SWarner Losh 		}
504f39dd6a9SWarner Losh 		buf[tlen] = '\0';
505f39dd6a9SWarner Losh 		blen = tlen;
506f39dd6a9SWarner Losh 		tempfree(x);
507f39dd6a9SWarner Losh 	}
508f39dd6a9SWarner Losh 	return buf;
509f39dd6a9SWarner Losh }
510f39dd6a9SWarner Losh 
511f39dd6a9SWarner Losh Cell *array(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
512f39dd6a9SWarner Losh {
513f39dd6a9SWarner Losh 	Cell *x, *z;
514f39dd6a9SWarner Losh 	char *buf;
5152a55deb1SDavid E. O'Brien 
5162a55deb1SDavid E. O'Brien 	x = execute(a[0]);	/* Cell* for symbol table */
517f39dd6a9SWarner Losh 	buf = makearraystring(a[1], __func__);
5182a55deb1SDavid E. O'Brien 	if (!isarr(x)) {
519f39dd6a9SWarner Losh 		DPRINTF("making %s into an array\n", NN(x->nval));
5202a55deb1SDavid E. O'Brien 		if (freeable(x))
5212a55deb1SDavid E. O'Brien 			xfree(x->sval);
5222a55deb1SDavid E. O'Brien 		x->tval &= ~(STR|NUM|DONTFREE);
5232a55deb1SDavid E. O'Brien 		x->tval |= ARR;
5242a55deb1SDavid E. O'Brien 		x->sval = (char *) makesymtab(NSYMTAB);
5252a55deb1SDavid E. O'Brien 	}
5262a55deb1SDavid E. O'Brien 	z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
5272a55deb1SDavid E. O'Brien 	z->ctype = OCELL;
5282a55deb1SDavid E. O'Brien 	z->csub = CVAR;
5292a55deb1SDavid E. O'Brien 	tempfree(x);
5302a55deb1SDavid E. O'Brien 	free(buf);
5312a55deb1SDavid E. O'Brien 	return(z);
5322a55deb1SDavid E. O'Brien }
5332a55deb1SDavid E. O'Brien 
5342a55deb1SDavid E. O'Brien Cell *awkdelete(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
5352a55deb1SDavid E. O'Brien {
536f39dd6a9SWarner Losh 	Cell *x;
5372a55deb1SDavid E. O'Brien 
5382a55deb1SDavid E. O'Brien 	x = execute(a[0]);	/* Cell* for symbol table */
539f39dd6a9SWarner Losh 	if (x == symtabloc) {
540f39dd6a9SWarner Losh 		FATAL("cannot delete SYMTAB or its elements");
541f39dd6a9SWarner Losh 	}
5422a55deb1SDavid E. O'Brien 	if (!isarr(x))
5432a55deb1SDavid E. O'Brien 		return True;
54410ce5b99SWarner Losh 	if (a[1] == NULL) {	/* delete the elements, not the table */
5452a55deb1SDavid E. O'Brien 		freesymtab(x);
5462a55deb1SDavid E. O'Brien 		x->tval &= ~STR;
5472a55deb1SDavid E. O'Brien 		x->tval |= ARR;
5482a55deb1SDavid E. O'Brien 		x->sval = (char *) makesymtab(NSYMTAB);
5492a55deb1SDavid E. O'Brien 	} else {
550f39dd6a9SWarner Losh 		char *buf = makearraystring(a[1], __func__);
5512a55deb1SDavid E. O'Brien 		freeelem(x, buf);
5522a55deb1SDavid E. O'Brien 		free(buf);
5532a55deb1SDavid E. O'Brien 	}
5542a55deb1SDavid E. O'Brien 	tempfree(x);
5552a55deb1SDavid E. O'Brien 	return True;
5562a55deb1SDavid E. O'Brien }
5572a55deb1SDavid E. O'Brien 
5582a55deb1SDavid E. O'Brien Cell *intest(Node **a, int n)	/* a[0] is index (list), a[1] is symtab */
5592a55deb1SDavid E. O'Brien {
560f39dd6a9SWarner Losh 	Cell *ap, *k;
5612a55deb1SDavid E. O'Brien 	char *buf;
5622a55deb1SDavid E. O'Brien 
5632a55deb1SDavid E. O'Brien 	ap = execute(a[1]);	/* array name */
5642a55deb1SDavid E. O'Brien 	if (!isarr(ap)) {
565f39dd6a9SWarner Losh 		DPRINTF("making %s into an array\n", ap->nval);
5662a55deb1SDavid E. O'Brien 		if (freeable(ap))
5672a55deb1SDavid E. O'Brien 			xfree(ap->sval);
5682a55deb1SDavid E. O'Brien 		ap->tval &= ~(STR|NUM|DONTFREE);
5692a55deb1SDavid E. O'Brien 		ap->tval |= ARR;
5702a55deb1SDavid E. O'Brien 		ap->sval = (char *) makesymtab(NSYMTAB);
5712a55deb1SDavid E. O'Brien 	}
572f39dd6a9SWarner Losh 	buf = makearraystring(a[0], __func__);
5732a55deb1SDavid E. O'Brien 	k = lookup(buf, (Array *) ap->sval);
5742a55deb1SDavid E. O'Brien 	tempfree(ap);
5752a55deb1SDavid E. O'Brien 	free(buf);
5762a55deb1SDavid E. O'Brien 	if (k == NULL)
5772a55deb1SDavid E. O'Brien 		return(False);
5782a55deb1SDavid E. O'Brien 	else
5792a55deb1SDavid E. O'Brien 		return(True);
5802a55deb1SDavid E. O'Brien }
5812a55deb1SDavid E. O'Brien 
5822a55deb1SDavid E. O'Brien 
583f32a6403SWarner Losh /* ======== utf-8 code ========== */
584f32a6403SWarner Losh 
585f32a6403SWarner Losh /*
586f32a6403SWarner Losh  * Awk strings can contain ascii, random 8-bit items (eg Latin-1),
587f32a6403SWarner Losh  * or utf-8.  u8_isutf tests whether a string starts with a valid
588f32a6403SWarner Losh  * utf-8 sequence, and returns 0 if not (e.g., high bit set).
589f32a6403SWarner Losh  * u8_nextlen returns length of next valid sequence, which is
590f32a6403SWarner Losh  * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf.
591f32a6403SWarner Losh  * u8_strlen returns length of string in valid utf-8 sequences
592f32a6403SWarner Losh  * and/or high-bit bytes.  Conversion functions go between byte
593f32a6403SWarner Losh  * number and character number.
594f32a6403SWarner Losh  *
595f32a6403SWarner Losh  * In theory, this behaves the same as before for non-utf8 bytes.
596f32a6403SWarner Losh  *
597f32a6403SWarner Losh  * Limited checking! This is a potential security hole.
598f32a6403SWarner Losh  */
599f32a6403SWarner Losh 
600f32a6403SWarner Losh /* is s the beginning of a valid utf-8 string? */
601f32a6403SWarner Losh /* return length 1..4 if yes, 0 if no */
602f32a6403SWarner Losh int u8_isutf(const char *s)
603f32a6403SWarner Losh {
604f32a6403SWarner Losh 	int n, ret;
605f32a6403SWarner Losh 	unsigned char c;
606f32a6403SWarner Losh 
607f32a6403SWarner Losh 	c = s[0];
608f32a6403SWarner Losh 	if (c < 128 || awk_mb_cur_max == 1)
609f32a6403SWarner Losh 		return 1; /* what if it's 0? */
610f32a6403SWarner Losh 
611f32a6403SWarner Losh 	n = strlen(s);
612f32a6403SWarner Losh 	if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
613f32a6403SWarner Losh 		ret = 2; /* 110xxxxx 10xxxxxx */
614f32a6403SWarner Losh 	} else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
615f32a6403SWarner Losh 			 && (s[2] & 0xC0) == 0x80) {
616f32a6403SWarner Losh 		ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
617f32a6403SWarner Losh 	} else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
618f32a6403SWarner Losh 			 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
619f32a6403SWarner Losh 		ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
620f32a6403SWarner Losh 	} else {
621f32a6403SWarner Losh 		ret = 0;
622f32a6403SWarner Losh 	}
623f32a6403SWarner Losh 	return ret;
624f32a6403SWarner Losh }
625f32a6403SWarner Losh 
626f32a6403SWarner Losh /* Convert (prefix of) utf8 string to utf-32 rune. */
627f32a6403SWarner Losh /* Sets *rune to the value, returns the length. */
628f32a6403SWarner Losh /* No error checking: watch out. */
629f32a6403SWarner Losh int u8_rune(int *rune, const char *s)
630f32a6403SWarner Losh {
631f32a6403SWarner Losh 	int n, ret;
632f32a6403SWarner Losh 	unsigned char c;
633f32a6403SWarner Losh 
634f32a6403SWarner Losh 	c = s[0];
635f32a6403SWarner Losh 	if (c < 128 || awk_mb_cur_max == 1) {
636f32a6403SWarner Losh 		*rune = c;
637f32a6403SWarner Losh 		return 1;
638f32a6403SWarner Losh 	}
639f32a6403SWarner Losh 
640f32a6403SWarner Losh 	n = strlen(s);
641f32a6403SWarner Losh 	if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
642f32a6403SWarner Losh 		*rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */
643f32a6403SWarner Losh 		ret = 2;
644f32a6403SWarner Losh 	} else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
645f32a6403SWarner Losh 			  && (s[2] & 0xC0) == 0x80) {
646f32a6403SWarner Losh 		*rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
647f32a6403SWarner Losh 			/* 1110xxxx 10xxxxxx 10xxxxxx */
648f32a6403SWarner Losh 		ret = 3;
649f32a6403SWarner Losh 	} else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
650f32a6403SWarner Losh 			  && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
651f32a6403SWarner Losh 		*rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
652f32a6403SWarner Losh 			/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
653f32a6403SWarner Losh 		ret = 4;
654f32a6403SWarner Losh 	} else {
655f32a6403SWarner Losh 		*rune = c;
656f32a6403SWarner Losh 		ret = 1;
657f32a6403SWarner Losh 	}
658f32a6403SWarner Losh 	return ret; /* returns one byte if sequence doesn't look like utf */
659f32a6403SWarner Losh }
660f32a6403SWarner Losh 
661f32a6403SWarner Losh /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */
662f32a6403SWarner Losh int u8_nextlen(const char *s)
663f32a6403SWarner Losh {
664f32a6403SWarner Losh 	int len;
665f32a6403SWarner Losh 
666f32a6403SWarner Losh 	len = u8_isutf(s);
667f32a6403SWarner Losh 	if (len == 0)
668f32a6403SWarner Losh 		len = 1;
669f32a6403SWarner Losh 	return len;
670f32a6403SWarner Losh }
671f32a6403SWarner Losh 
672f32a6403SWarner Losh /* return number of utf characters or single non-utf bytes */
673f32a6403SWarner Losh int u8_strlen(const char *s)
674f32a6403SWarner Losh {
675f32a6403SWarner Losh 	int i, len, n, totlen;
676f32a6403SWarner Losh 	unsigned char c;
677f32a6403SWarner Losh 
678f32a6403SWarner Losh 	n = strlen(s);
679f32a6403SWarner Losh 	totlen = 0;
680f32a6403SWarner Losh 	for (i = 0; i < n; i += len) {
681f32a6403SWarner Losh 		c = s[i];
682f32a6403SWarner Losh 		if (c < 128 || awk_mb_cur_max == 1) {
683f32a6403SWarner Losh 			len = 1;
684f32a6403SWarner Losh 		} else {
685f32a6403SWarner Losh 			len = u8_nextlen(&s[i]);
686f32a6403SWarner Losh 		}
687f32a6403SWarner Losh 		totlen++;
688f32a6403SWarner Losh 		if (i > n)
689f32a6403SWarner Losh 			FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i);
690f32a6403SWarner Losh 	}
691f32a6403SWarner Losh 	return totlen;
692f32a6403SWarner Losh }
693f32a6403SWarner Losh 
694f32a6403SWarner Losh /* convert utf-8 char number in a string to its byte offset */
695f32a6403SWarner Losh int u8_char2byte(const char *s, int charnum)
696f32a6403SWarner Losh {
697f32a6403SWarner Losh 	int n;
698f32a6403SWarner Losh 	int bytenum = 0;
699f32a6403SWarner Losh 
700f32a6403SWarner Losh 	while (charnum > 0) {
701f32a6403SWarner Losh 		n = u8_nextlen(s);
702f32a6403SWarner Losh 		s += n;
703f32a6403SWarner Losh 		bytenum += n;
704f32a6403SWarner Losh 		charnum--;
705f32a6403SWarner Losh 	}
706f32a6403SWarner Losh 	return bytenum;
707f32a6403SWarner Losh }
708f32a6403SWarner Losh 
709f32a6403SWarner Losh /* convert byte offset in s to utf-8 char number that starts there */
710f32a6403SWarner Losh int u8_byte2char(const char *s, int bytenum)
711f32a6403SWarner Losh {
712f32a6403SWarner Losh 	int i, len, b;
713f32a6403SWarner Losh 	int charnum = 0; /* BUG: what origin? */
714f32a6403SWarner Losh 	/* should be 0 to match start==0 which means no match */
715f32a6403SWarner Losh 
716f32a6403SWarner Losh 	b = strlen(s);
717f32a6403SWarner Losh 	if (bytenum > b) {
718f32a6403SWarner Losh 		return -1; /* ??? */
719f32a6403SWarner Losh 	}
720f32a6403SWarner Losh 	for (i = 0; i <= bytenum; i += len) {
721f32a6403SWarner Losh 		len = u8_nextlen(s+i);
722f32a6403SWarner Losh 		charnum++;
723f32a6403SWarner Losh 	}
724f32a6403SWarner Losh 	return charnum;
725f32a6403SWarner Losh }
726f32a6403SWarner Losh 
72717853db4SWarner Losh /* runetochar() adapted from rune.c in the Plan 9 distribution */
728f32a6403SWarner Losh 
729f32a6403SWarner Losh enum
730f32a6403SWarner Losh {
731f32a6403SWarner Losh 	Runeerror = 128, /* from somewhere else */
732f32a6403SWarner Losh 	Runemax = 0x10FFFF,
733f32a6403SWarner Losh 
734f32a6403SWarner Losh 	Bit1    = 7,
735f32a6403SWarner Losh 	Bitx    = 6,
736f32a6403SWarner Losh 	Bit2    = 5,
737f32a6403SWarner Losh 	Bit3    = 4,
738f32a6403SWarner Losh 	Bit4    = 3,
739f32a6403SWarner Losh 	Bit5    = 2,
740f32a6403SWarner Losh 
741f32a6403SWarner Losh 	T1      = ((1<<(Bit1+1))-1) ^ 0xFF,     /* 0000 0000 */
742f32a6403SWarner Losh 	Tx      = ((1<<(Bitx+1))-1) ^ 0xFF,     /* 1000 0000 */
743f32a6403SWarner Losh 	T2      = ((1<<(Bit2+1))-1) ^ 0xFF,     /* 1100 0000 */
744f32a6403SWarner Losh 	T3      = ((1<<(Bit3+1))-1) ^ 0xFF,     /* 1110 0000 */
745f32a6403SWarner Losh 	T4      = ((1<<(Bit4+1))-1) ^ 0xFF,     /* 1111 0000 */
746f32a6403SWarner Losh 	T5      = ((1<<(Bit5+1))-1) ^ 0xFF,     /* 1111 1000 */
747f32a6403SWarner Losh 
748f32a6403SWarner Losh 	Rune1   = (1<<(Bit1+0*Bitx))-1,	 	/* 0000 0000 0000 0000 0111 1111 */
749f32a6403SWarner Losh 	Rune2   = (1<<(Bit2+1*Bitx))-1,	 	/* 0000 0000 0000 0111 1111 1111 */
750f32a6403SWarner Losh 	Rune3   = (1<<(Bit3+2*Bitx))-1,	 	/* 0000 0000 1111 1111 1111 1111 */
751f32a6403SWarner Losh 	Rune4   = (1<<(Bit4+3*Bitx))-1,	 	/* 0011 1111 1111 1111 1111 1111 */
752f32a6403SWarner Losh 
753f32a6403SWarner Losh 	Maskx   = (1<<Bitx)-1,		  	/* 0011 1111 */
754f32a6403SWarner Losh 	Testx   = Maskx ^ 0xFF,		 	/* 1100 0000 */
755f32a6403SWarner Losh 
756f32a6403SWarner Losh };
757f32a6403SWarner Losh 
758f32a6403SWarner Losh int runetochar(char *str, int c)
759f32a6403SWarner Losh {
760f32a6403SWarner Losh 	/* one character sequence 00000-0007F => 00-7F */
761f32a6403SWarner Losh 	if (c <= Rune1) {
762f32a6403SWarner Losh 		str[0] = c;
763f32a6403SWarner Losh 		return 1;
764f32a6403SWarner Losh 	}
765f32a6403SWarner Losh 
766f32a6403SWarner Losh 	/* two character sequence 00080-007FF => T2 Tx */
767f32a6403SWarner Losh 	if (c <= Rune2) {
768f32a6403SWarner Losh 		str[0] = T2 | (c >> 1*Bitx);
769f32a6403SWarner Losh 		str[1] = Tx | (c & Maskx);
770f32a6403SWarner Losh 		return 2;
771f32a6403SWarner Losh 	}
772f32a6403SWarner Losh 
773f32a6403SWarner Losh 	/* three character sequence 00800-0FFFF => T3 Tx Tx */
774f32a6403SWarner Losh 	if (c > Runemax)
775f32a6403SWarner Losh 		c = Runeerror;
776f32a6403SWarner Losh 	if (c <= Rune3) {
777f32a6403SWarner Losh 		str[0] = T3 |  (c >> 2*Bitx);
778f32a6403SWarner Losh 		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
779f32a6403SWarner Losh 		str[2] = Tx |  (c & Maskx);
780f32a6403SWarner Losh 		return 3;
781f32a6403SWarner Losh 	}
782f32a6403SWarner Losh 
783f32a6403SWarner Losh 	/* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */
784f32a6403SWarner Losh 	str[0] = T4 |  (c >> 3*Bitx);
785f32a6403SWarner Losh 	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
786f32a6403SWarner Losh 	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
787f32a6403SWarner Losh 	str[3] = Tx |  (c & Maskx);
788f32a6403SWarner Losh 	return 4;
789f32a6403SWarner Losh }
790f32a6403SWarner Losh 
791f32a6403SWarner Losh 
792f32a6403SWarner Losh /* ========== end of utf8 code =========== */
793f32a6403SWarner Losh 
794f32a6403SWarner Losh 
795f32a6403SWarner Losh 
7962a55deb1SDavid E. O'Brien Cell *matchop(Node **a, int n)	/* ~ and match() */
7972a55deb1SDavid E. O'Brien {
798f32a6403SWarner Losh 	Cell *x, *y, *z;
7992a55deb1SDavid E. O'Brien 	char *s, *t;
8002a55deb1SDavid E. O'Brien 	int i;
801f32a6403SWarner Losh 	int cstart, cpatlen, len;
8022a55deb1SDavid E. O'Brien 	fa *pfa;
803813da98dSDavid E. O'Brien 	int (*mf)(fa *, const char *) = match, mode = 0;
8042a55deb1SDavid E. O'Brien 
8052a55deb1SDavid E. O'Brien 	if (n == MATCHFCN) {
8062a55deb1SDavid E. O'Brien 		mf = pmatch;
8072a55deb1SDavid E. O'Brien 		mode = 1;
8082a55deb1SDavid E. O'Brien 	}
8092a55deb1SDavid E. O'Brien 	x = execute(a[1]);	/* a[1] = target text */
8102a55deb1SDavid E. O'Brien 	s = getsval(x);
81110ce5b99SWarner Losh 	if (a[0] == NULL)	/* a[1] == 0: already-compiled reg expr */
8122a55deb1SDavid E. O'Brien 		i = (*mf)((fa *) a[2], s);
8132a55deb1SDavid E. O'Brien 	else {
8142a55deb1SDavid E. O'Brien 		y = execute(a[2]);	/* a[2] = regular expr */
8152a55deb1SDavid E. O'Brien 		t = getsval(y);
8162a55deb1SDavid E. O'Brien 		pfa = makedfa(t, mode);
8172a55deb1SDavid E. O'Brien 		i = (*mf)(pfa, s);
8182a55deb1SDavid E. O'Brien 		tempfree(y);
8192a55deb1SDavid E. O'Brien 	}
820f32a6403SWarner Losh 	z = x;
8212a55deb1SDavid E. O'Brien 	if (n == MATCHFCN) {
822f32a6403SWarner Losh 		int start = patbeg - s + 1; /* origin 1 */
823f32a6403SWarner Losh 		if (patlen < 0) {
824f32a6403SWarner Losh 			start = 0; /* not found */
825f32a6403SWarner Losh 		} else {
826f32a6403SWarner Losh 			cstart = u8_byte2char(s, start-1);
827f32a6403SWarner Losh 			cpatlen = 0;
828f32a6403SWarner Losh 			for (i = 0; i < patlen; i += len) {
829f32a6403SWarner Losh 				len = u8_nextlen(patbeg+i);
830f32a6403SWarner Losh 				cpatlen++;
831f32a6403SWarner Losh 			}
832f32a6403SWarner Losh 
833f32a6403SWarner Losh 			start = cstart;
834f32a6403SWarner Losh 			patlen = cpatlen;
835f32a6403SWarner Losh 		}
836f32a6403SWarner Losh 
8372a55deb1SDavid E. O'Brien 		setfval(rstartloc, (Awkfloat) start);
8382a55deb1SDavid E. O'Brien 		setfval(rlengthloc, (Awkfloat) patlen);
8392a55deb1SDavid E. O'Brien 		x = gettemp();
8402a55deb1SDavid E. O'Brien 		x->tval = NUM;
8412a55deb1SDavid E. O'Brien 		x->fval = start;
8422a55deb1SDavid E. O'Brien 	} else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
843f32a6403SWarner Losh 		x = True;
8442a55deb1SDavid E. O'Brien 	else
845f32a6403SWarner Losh 		x = False;
846f32a6403SWarner Losh 
847f32a6403SWarner Losh 	tempfree(z);
848f32a6403SWarner Losh 	return x;
8492a55deb1SDavid E. O'Brien }
8502a55deb1SDavid E. O'Brien 
8512a55deb1SDavid E. O'Brien 
8522a55deb1SDavid E. O'Brien Cell *boolop(Node **a, int n)	/* a[0] || a[1], a[0] && a[1], !a[0] */
8532a55deb1SDavid E. O'Brien {
8542a55deb1SDavid E. O'Brien 	Cell *x, *y;
8552a55deb1SDavid E. O'Brien 	int i;
8562a55deb1SDavid E. O'Brien 
8572a55deb1SDavid E. O'Brien 	x = execute(a[0]);
8582a55deb1SDavid E. O'Brien 	i = istrue(x);
8592a55deb1SDavid E. O'Brien 	tempfree(x);
8602a55deb1SDavid E. O'Brien 	switch (n) {
8612a55deb1SDavid E. O'Brien 	case BOR:
8622a55deb1SDavid E. O'Brien 		if (i) return(True);
8632a55deb1SDavid E. O'Brien 		y = execute(a[1]);
8642a55deb1SDavid E. O'Brien 		i = istrue(y);
8652a55deb1SDavid E. O'Brien 		tempfree(y);
8662a55deb1SDavid E. O'Brien 		if (i) return(True);
8672a55deb1SDavid E. O'Brien 		else return(False);
8682a55deb1SDavid E. O'Brien 	case AND:
8692a55deb1SDavid E. O'Brien 		if ( !i ) return(False);
8702a55deb1SDavid E. O'Brien 		y = execute(a[1]);
8712a55deb1SDavid E. O'Brien 		i = istrue(y);
8722a55deb1SDavid E. O'Brien 		tempfree(y);
8732a55deb1SDavid E. O'Brien 		if (i) return(True);
8742a55deb1SDavid E. O'Brien 		else return(False);
8752a55deb1SDavid E. O'Brien 	case NOT:
8762a55deb1SDavid E. O'Brien 		if (i) return(False);
8772a55deb1SDavid E. O'Brien 		else return(True);
8782a55deb1SDavid E. O'Brien 	default:	/* can't happen */
8792a55deb1SDavid E. O'Brien 		FATAL("unknown boolean operator %d", n);
8802a55deb1SDavid E. O'Brien 	}
8812a55deb1SDavid E. O'Brien 	return 0;	/*NOTREACHED*/
8822a55deb1SDavid E. O'Brien }
8832a55deb1SDavid E. O'Brien 
8842a55deb1SDavid E. O'Brien Cell *relop(Node **a, int n)	/* a[0 < a[1], etc. */
8852a55deb1SDavid E. O'Brien {
8862a55deb1SDavid E. O'Brien 	int i;
8872a55deb1SDavid E. O'Brien 	Cell *x, *y;
8882a55deb1SDavid E. O'Brien 	Awkfloat j;
889f32a6403SWarner Losh 	bool x_is_nan, y_is_nan;
8902a55deb1SDavid E. O'Brien 
8912a55deb1SDavid E. O'Brien 	x = execute(a[0]);
8922a55deb1SDavid E. O'Brien 	y = execute(a[1]);
893f32a6403SWarner Losh 	x_is_nan = isnan(x->fval);
894f32a6403SWarner Losh 	y_is_nan = isnan(y->fval);
8952a55deb1SDavid E. O'Brien 	if (x->tval&NUM && y->tval&NUM) {
896f32a6403SWarner Losh 		if ((x_is_nan || y_is_nan) && n != NE)
897f32a6403SWarner Losh 			return(False);
8982a55deb1SDavid E. O'Brien 		j = x->fval - y->fval;
8992a55deb1SDavid E. O'Brien 		i = j<0? -1: (j>0? 1: 0);
9002a55deb1SDavid E. O'Brien 	} else {
901628bd30aSWarner Losh 		i = strcmp(getsval(x), getsval(y));
9022a55deb1SDavid E. O'Brien 	}
9032a55deb1SDavid E. O'Brien 	tempfree(x);
9042a55deb1SDavid E. O'Brien 	tempfree(y);
9052a55deb1SDavid E. O'Brien 	switch (n) {
9062a55deb1SDavid E. O'Brien 	case LT:	if (i<0) return(True);
9072a55deb1SDavid E. O'Brien 			else return(False);
9082a55deb1SDavid E. O'Brien 	case LE:	if (i<=0) return(True);
9092a55deb1SDavid E. O'Brien 			else return(False);
910f32a6403SWarner Losh 	case NE:	if (x_is_nan && y_is_nan) return(True);
911f32a6403SWarner Losh 			else if (i!=0) return(True);
9122a55deb1SDavid E. O'Brien 			else return(False);
9132a55deb1SDavid E. O'Brien 	case EQ:	if (i == 0) return(True);
9142a55deb1SDavid E. O'Brien 			else return(False);
9152a55deb1SDavid E. O'Brien 	case GE:	if (i>=0) return(True);
9162a55deb1SDavid E. O'Brien 			else return(False);
9172a55deb1SDavid E. O'Brien 	case GT:	if (i>0) return(True);
9182a55deb1SDavid E. O'Brien 			else return(False);
9192a55deb1SDavid E. O'Brien 	default:	/* can't happen */
9202a55deb1SDavid E. O'Brien 		FATAL("unknown relational operator %d", n);
9212a55deb1SDavid E. O'Brien 	}
9222a55deb1SDavid E. O'Brien 	return 0;	/*NOTREACHED*/
9232a55deb1SDavid E. O'Brien }
9242a55deb1SDavid E. O'Brien 
9252a55deb1SDavid E. O'Brien void tfree(Cell *a)	/* free a tempcell */
9262a55deb1SDavid E. O'Brien {
9272a55deb1SDavid E. O'Brien 	if (freeable(a)) {
928f39dd6a9SWarner Losh 		DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
9292a55deb1SDavid E. O'Brien 		xfree(a->sval);
9302a55deb1SDavid E. O'Brien 	}
9312a55deb1SDavid E. O'Brien 	if (a == tmps)
9322a55deb1SDavid E. O'Brien 		FATAL("tempcell list is curdled");
9332a55deb1SDavid E. O'Brien 	a->cnext = tmps;
9342a55deb1SDavid E. O'Brien 	tmps = a;
9352a55deb1SDavid E. O'Brien }
9362a55deb1SDavid E. O'Brien 
9372a55deb1SDavid E. O'Brien Cell *gettemp(void)	/* get a tempcell */
9382a55deb1SDavid E. O'Brien {	int i;
9392a55deb1SDavid E. O'Brien 	Cell *x;
9402a55deb1SDavid E. O'Brien 
9412a55deb1SDavid E. O'Brien 	if (!tmps) {
942f39dd6a9SWarner Losh 		tmps = (Cell *) calloc(100, sizeof(*tmps));
9432a55deb1SDavid E. O'Brien 		if (!tmps)
9442a55deb1SDavid E. O'Brien 			FATAL("out of space for temporaries");
9452a55deb1SDavid E. O'Brien 		for (i = 1; i < 100; i++)
9462a55deb1SDavid E. O'Brien 			tmps[i-1].cnext = &tmps[i];
94710ce5b99SWarner Losh 		tmps[i-1].cnext = NULL;
9482a55deb1SDavid E. O'Brien 	}
9492a55deb1SDavid E. O'Brien 	x = tmps;
9502a55deb1SDavid E. O'Brien 	tmps = x->cnext;
9512a55deb1SDavid E. O'Brien 	*x = tempcell;
9522a55deb1SDavid E. O'Brien 	return(x);
9532a55deb1SDavid E. O'Brien }
9542a55deb1SDavid E. O'Brien 
9552a55deb1SDavid E. O'Brien Cell *indirect(Node **a, int n)	/* $( a[0] ) */
9562a55deb1SDavid E. O'Brien {
957c263f9bfSRuslan Ermilov 	Awkfloat val;
9582a55deb1SDavid E. O'Brien 	Cell *x;
9592a55deb1SDavid E. O'Brien 	int m;
9602a55deb1SDavid E. O'Brien 	char *s;
9612a55deb1SDavid E. O'Brien 
9622a55deb1SDavid E. O'Brien 	x = execute(a[0]);
963c263f9bfSRuslan Ermilov 	val = getfval(x);	/* freebsd: defend against super large field numbers */
964c263f9bfSRuslan Ermilov 	if ((Awkfloat)INT_MAX < val)
965c263f9bfSRuslan Ermilov 		FATAL("trying to access out of range field %s", x->nval);
966c263f9bfSRuslan Ermilov 	m = (int) val;
967f39dd6a9SWarner Losh 	if (m == 0 && !is_number(s = getsval(x), NULL))	/* suspicion! */
9682a55deb1SDavid E. O'Brien 		FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
9692a55deb1SDavid E. O'Brien 		/* BUG: can x->nval ever be null??? */
9702a55deb1SDavid E. O'Brien 	tempfree(x);
9712a55deb1SDavid E. O'Brien 	x = fieldadr(m);
9722a55deb1SDavid E. O'Brien 	x->ctype = OCELL;	/* BUG?  why are these needed? */
9732a55deb1SDavid E. O'Brien 	x->csub = CFLD;
9742a55deb1SDavid E. O'Brien 	return(x);
9752a55deb1SDavid E. O'Brien }
9762a55deb1SDavid E. O'Brien 
9772a55deb1SDavid E. O'Brien Cell *substr(Node **a, int nnn)		/* substr(a[0], a[1], a[2]) */
9782a55deb1SDavid E. O'Brien {
9792a55deb1SDavid E. O'Brien 	int k, m, n;
980f32a6403SWarner Losh 	int mb, nb;
9812a55deb1SDavid E. O'Brien 	char *s;
9822a55deb1SDavid E. O'Brien 	int temp;
98310ce5b99SWarner Losh 	Cell *x, *y, *z = NULL;
9842a55deb1SDavid E. O'Brien 
9852a55deb1SDavid E. O'Brien 	x = execute(a[0]);
9862a55deb1SDavid E. O'Brien 	y = execute(a[1]);
98710ce5b99SWarner Losh 	if (a[2] != NULL)
9882a55deb1SDavid E. O'Brien 		z = execute(a[2]);
9892a55deb1SDavid E. O'Brien 	s = getsval(x);
990f32a6403SWarner Losh 	k = u8_strlen(s) + 1;
9912a55deb1SDavid E. O'Brien 	if (k <= 1) {
9922a55deb1SDavid E. O'Brien 		tempfree(x);
9932a55deb1SDavid E. O'Brien 		tempfree(y);
99410ce5b99SWarner Losh 		if (a[2] != NULL) {
9952a55deb1SDavid E. O'Brien 			tempfree(z);
9962a55deb1SDavid E. O'Brien 		}
9972a55deb1SDavid E. O'Brien 		x = gettemp();
9982a55deb1SDavid E. O'Brien 		setsval(x, "");
9992a55deb1SDavid E. O'Brien 		return(x);
10002a55deb1SDavid E. O'Brien 	}
10012a55deb1SDavid E. O'Brien 	m = (int) getfval(y);
10022a55deb1SDavid E. O'Brien 	if (m <= 0)
10032a55deb1SDavid E. O'Brien 		m = 1;
10042a55deb1SDavid E. O'Brien 	else if (m > k)
10052a55deb1SDavid E. O'Brien 		m = k;
10062a55deb1SDavid E. O'Brien 	tempfree(y);
100710ce5b99SWarner Losh 	if (a[2] != NULL) {
10082a55deb1SDavid E. O'Brien 		n = (int) getfval(z);
10092a55deb1SDavid E. O'Brien 		tempfree(z);
10102a55deb1SDavid E. O'Brien 	} else
10112a55deb1SDavid E. O'Brien 		n = k - 1;
10122a55deb1SDavid E. O'Brien 	if (n < 0)
10132a55deb1SDavid E. O'Brien 		n = 0;
10142a55deb1SDavid E. O'Brien 	else if (n > k - m)
10152a55deb1SDavid E. O'Brien 		n = k - m;
1016f32a6403SWarner Losh 	/* m is start, n is length from there */
1017f39dd6a9SWarner Losh 	DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
10182a55deb1SDavid E. O'Brien 	y = gettemp();
1019f32a6403SWarner Losh 	mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
1020f32a6403SWarner Losh 	nb = u8_char2byte(s, m-1+n);  /* byte offset of end+1 char in s */
1021f32a6403SWarner Losh 
1022f32a6403SWarner Losh 	temp = s[nb];	/* with thanks to John Linderman */
1023f32a6403SWarner Losh 	s[nb] = '\0';
1024f32a6403SWarner Losh 	setsval(y, s + mb);
1025f32a6403SWarner Losh 	s[nb] = temp;
10262a55deb1SDavid E. O'Brien 	tempfree(x);
10272a55deb1SDavid E. O'Brien 	return(y);
10282a55deb1SDavid E. O'Brien }
10292a55deb1SDavid E. O'Brien 
10302a55deb1SDavid E. O'Brien Cell *sindex(Node **a, int nnn)		/* index(a[0], a[1]) */
10312a55deb1SDavid E. O'Brien {
10322a55deb1SDavid E. O'Brien 	Cell *x, *y, *z;
10332a55deb1SDavid E. O'Brien 	char *s1, *s2, *p1, *p2, *q;
10342a55deb1SDavid E. O'Brien 	Awkfloat v = 0.0;
10352a55deb1SDavid E. O'Brien 
10362a55deb1SDavid E. O'Brien 	x = execute(a[0]);
10372a55deb1SDavid E. O'Brien 	s1 = getsval(x);
10382a55deb1SDavid E. O'Brien 	y = execute(a[1]);
10392a55deb1SDavid E. O'Brien 	s2 = getsval(y);
10402a55deb1SDavid E. O'Brien 
10412a55deb1SDavid E. O'Brien 	z = gettemp();
10422a55deb1SDavid E. O'Brien 	for (p1 = s1; *p1 != '\0'; p1++) {
10432a55deb1SDavid E. O'Brien 		for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
1044f39dd6a9SWarner Losh 			continue;
10452a55deb1SDavid E. O'Brien 		if (*p2 == '\0') {
1046f32a6403SWarner Losh 			/* v = (Awkfloat) (p1 - s1 + 1);	 origin 1 */
1047f32a6403SWarner Losh 
1048f32a6403SWarner Losh 		   /* should be a function: used in match() as well */
1049f32a6403SWarner Losh 			int i, len;
1050f32a6403SWarner Losh 			v = 0;
1051f32a6403SWarner Losh 			for (i = 0; i < p1-s1+1; i += len) {
1052f32a6403SWarner Losh 				len = u8_nextlen(s1+i);
1053f32a6403SWarner Losh 				v++;
1054f32a6403SWarner Losh 			}
10552a55deb1SDavid E. O'Brien 			break;
10562a55deb1SDavid E. O'Brien 		}
10572a55deb1SDavid E. O'Brien 	}
10582a55deb1SDavid E. O'Brien 	tempfree(x);
10592a55deb1SDavid E. O'Brien 	tempfree(y);
10602a55deb1SDavid E. O'Brien 	setfval(z, v);
10612a55deb1SDavid E. O'Brien 	return(z);
10622a55deb1SDavid E. O'Brien }
10632a55deb1SDavid E. O'Brien 
1064f32a6403SWarner Losh int has_utf8(char *s)	/* return 1 if s contains any utf-8 (2 bytes or more) character */
1065f32a6403SWarner Losh {
1066f32a6403SWarner Losh 	int n;
1067f32a6403SWarner Losh 
1068f32a6403SWarner Losh 	for (n = 0; *s != 0; s += n) {
1069f32a6403SWarner Losh 		n = u8_nextlen(s);
1070f32a6403SWarner Losh 		if (n > 1)
1071f32a6403SWarner Losh 			return 1;
1072f32a6403SWarner Losh 	}
1073f32a6403SWarner Losh 	return 0;
1074f32a6403SWarner Losh }
1075f32a6403SWarner Losh 
10762a55deb1SDavid E. O'Brien #define	MAXNUMSIZE	50
10772a55deb1SDavid E. O'Brien 
1078813da98dSDavid E. O'Brien int format(char **pbuf, int *pbufsize, const char *s, Node *a)	/* printf-like conversions */
10792a55deb1SDavid E. O'Brien {
10802a55deb1SDavid E. O'Brien 	char *fmt;
1081813da98dSDavid E. O'Brien 	char *p, *t;
1082813da98dSDavid E. O'Brien 	const char *os;
10832a55deb1SDavid E. O'Brien 	Cell *x;
10842a55deb1SDavid E. O'Brien 	int flag = 0, n;
10852a55deb1SDavid E. O'Brien 	int fmtwd; /* format width */
10862a55deb1SDavid E. O'Brien 	int fmtsz = recsize;
10872a55deb1SDavid E. O'Brien 	char *buf = *pbuf;
10882a55deb1SDavid E. O'Brien 	int bufsize = *pbufsize;
1089f39dd6a9SWarner Losh #define FMTSZ(a)   (fmtsz - ((a) - fmt))
1090f39dd6a9SWarner Losh #define BUFSZ(a)   (bufsize - ((a) - buf))
10912a55deb1SDavid E. O'Brien 
1092f39dd6a9SWarner Losh 	static bool first = true;
1093f39dd6a9SWarner Losh 	static bool have_a_format = false;
1094b5253557SWarner Losh 
1095b5253557SWarner Losh 	if (first) {
1096f39dd6a9SWarner Losh 		char xbuf[100];
1097b5253557SWarner Losh 
1098f39dd6a9SWarner Losh 		snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
1099f39dd6a9SWarner Losh 		have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
1100f39dd6a9SWarner Losh 		first = false;
1101b5253557SWarner Losh 	}
1102b5253557SWarner Losh 
11032a55deb1SDavid E. O'Brien 	os = s;
11042a55deb1SDavid E. O'Brien 	p = buf;
11052a55deb1SDavid E. O'Brien 	if ((fmt = (char *) malloc(fmtsz)) == NULL)
11062a55deb1SDavid E. O'Brien 		FATAL("out of memory in format()");
11072a55deb1SDavid E. O'Brien 	while (*s) {
1108addad6afSRong-En Fan 		adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
11092a55deb1SDavid E. O'Brien 		if (*s != '%') {
11102a55deb1SDavid E. O'Brien 			*p++ = *s++;
11112a55deb1SDavid E. O'Brien 			continue;
11122a55deb1SDavid E. O'Brien 		}
11132a55deb1SDavid E. O'Brien 		if (*(s+1) == '%') {
11142a55deb1SDavid E. O'Brien 			*p++ = '%';
11152a55deb1SDavid E. O'Brien 			s += 2;
11162a55deb1SDavid E. O'Brien 			continue;
11172a55deb1SDavid E. O'Brien 		}
11182a55deb1SDavid E. O'Brien 		fmtwd = atoi(s+1);
11192a55deb1SDavid E. O'Brien 		if (fmtwd < 0)
11202a55deb1SDavid E. O'Brien 			fmtwd = -fmtwd;
1121addad6afSRong-En Fan 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
11222a55deb1SDavid E. O'Brien 		for (t = fmt; (*t++ = *s) != '\0'; s++) {
1123addad6afSRong-En Fan 			if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
11242a55deb1SDavid E. O'Brien 				FATAL("format item %.30s... ran format() out of memory", os);
1125f39dd6a9SWarner Losh 			/* Ignore size specifiers */
1126f39dd6a9SWarner Losh 			if (strchr("hjLlqtz", *s) != NULL) {	/* the ansi panoply */
1127f39dd6a9SWarner Losh 				t--;
1128f39dd6a9SWarner Losh 				continue;
1129f39dd6a9SWarner Losh 			}
1130f39dd6a9SWarner Losh 			if (isalpha((uschar)*s))
1131f39dd6a9SWarner Losh 				break;
1132b5253557SWarner Losh 			if (*s == '$') {
1133b5253557SWarner Losh 				FATAL("'$' not permitted in awk formats");
1134b5253557SWarner Losh 			}
11352a55deb1SDavid E. O'Brien 			if (*s == '*') {
1136b5253557SWarner Losh 				if (a == NULL) {
1137b5253557SWarner Losh 					FATAL("not enough args in printf(%s)", os);
1138b5253557SWarner Losh 				}
11392a55deb1SDavid E. O'Brien 				x = execute(a);
11402a55deb1SDavid E. O'Brien 				a = a->nnext;
1141f39dd6a9SWarner Losh 				snprintf(t - 1, FMTSZ(t - 1),
1142f39dd6a9SWarner Losh 				    "%d", fmtwd=(int) getfval(x));
11432a55deb1SDavid E. O'Brien 				if (fmtwd < 0)
11442a55deb1SDavid E. O'Brien 					fmtwd = -fmtwd;
11452a55deb1SDavid E. O'Brien 				adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
11462a55deb1SDavid E. O'Brien 				t = fmt + strlen(fmt);
11472a55deb1SDavid E. O'Brien 				tempfree(x);
11482a55deb1SDavid E. O'Brien 			}
11492a55deb1SDavid E. O'Brien 		}
11502a55deb1SDavid E. O'Brien 		*t = '\0';
11512a55deb1SDavid E. O'Brien 		if (fmtwd < 0)
11522a55deb1SDavid E. O'Brien 			fmtwd = -fmtwd;
1153addad6afSRong-En Fan 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
11542a55deb1SDavid E. O'Brien 		switch (*s) {
1155b5253557SWarner Losh 		case 'a': case 'A':
1156b5253557SWarner Losh 			if (have_a_format)
1157b5253557SWarner Losh 				flag = *s;
1158b5253557SWarner Losh 			else
1159b5253557SWarner Losh 				flag = 'f';
1160b5253557SWarner Losh 			break;
11612a55deb1SDavid E. O'Brien 		case 'f': case 'e': case 'g': case 'E': case 'G':
1162813da98dSDavid E. O'Brien 			flag = 'f';
11632a55deb1SDavid E. O'Brien 			break;
1164f39dd6a9SWarner Losh 		case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
1165f39dd6a9SWarner Losh 			flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
1166f39dd6a9SWarner Losh 			*(t-1) = 'j';
1167f39dd6a9SWarner Losh 			*t = *s;
11682a55deb1SDavid E. O'Brien 			*++t = '\0';
11692a55deb1SDavid E. O'Brien 			break;
11702a55deb1SDavid E. O'Brien 		case 's':
1171813da98dSDavid E. O'Brien 			flag = 's';
11722a55deb1SDavid E. O'Brien 			break;
11732a55deb1SDavid E. O'Brien 		case 'c':
1174813da98dSDavid E. O'Brien 			flag = 'c';
11752a55deb1SDavid E. O'Brien 			break;
11762a55deb1SDavid E. O'Brien 		default:
11772a55deb1SDavid E. O'Brien 			WARNING("weird printf conversion %s", fmt);
1178813da98dSDavid E. O'Brien 			flag = '?';
11792a55deb1SDavid E. O'Brien 			break;
11802a55deb1SDavid E. O'Brien 		}
11812a55deb1SDavid E. O'Brien 		if (a == NULL)
11822a55deb1SDavid E. O'Brien 			FATAL("not enough args in printf(%s)", os);
11832a55deb1SDavid E. O'Brien 		x = execute(a);
11842a55deb1SDavid E. O'Brien 		a = a->nnext;
11852a55deb1SDavid E. O'Brien 		n = MAXNUMSIZE;
11862a55deb1SDavid E. O'Brien 		if (fmtwd > n)
11872a55deb1SDavid E. O'Brien 			n = fmtwd;
1188addad6afSRong-En Fan 		adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
11892a55deb1SDavid E. O'Brien 		switch (flag) {
1190f32a6403SWarner Losh 		case '?':
1191f32a6403SWarner Losh 			snprintf(p, BUFSZ(p), "%s", fmt);	/* unknown, so dump it too */
11922a55deb1SDavid E. O'Brien 			t = getsval(x);
11932a55deb1SDavid E. O'Brien 			n = strlen(t);
11942a55deb1SDavid E. O'Brien 			if (fmtwd > n)
11952a55deb1SDavid E. O'Brien 				n = fmtwd;
1196addad6afSRong-En Fan 			adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
11972a55deb1SDavid E. O'Brien 			p += strlen(p);
1198f39dd6a9SWarner Losh 			snprintf(p, BUFSZ(p), "%s", t);
11992a55deb1SDavid E. O'Brien 			break;
1200b5253557SWarner Losh 		case 'a':
1201b5253557SWarner Losh 		case 'A':
1202f39dd6a9SWarner Losh 		case 'f':	snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
1203f39dd6a9SWarner Losh 		case 'd':	snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
1204f39dd6a9SWarner Losh 		case 'u':	snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
1205f32a6403SWarner Losh 
1206f32a6403SWarner Losh 		case 's': {
12072a55deb1SDavid E. O'Brien 			t = getsval(x);
12082a55deb1SDavid E. O'Brien 			n = strlen(t);
1209f32a6403SWarner Losh 			/* if simple format or no utf-8 in the string, sprintf works */
1210f32a6403SWarner Losh 			if (!has_utf8(t) || strcmp(fmt,"%s") == 0) {
12112a55deb1SDavid E. O'Brien 				if (fmtwd > n)
12122a55deb1SDavid E. O'Brien 					n = fmtwd;
1213addad6afSRong-En Fan 				if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
1214f32a6403SWarner Losh 					FATAL("huge string/format (%d chars) in printf %.30s..." \
1215f32a6403SWarner Losh 						" ran format() out of memory", n, t);
1216f39dd6a9SWarner Losh 				snprintf(p, BUFSZ(p), fmt, t);
12172a55deb1SDavid E. O'Brien 				break;
1218f32a6403SWarner Losh 			}
1219f32a6403SWarner Losh 
1220f32a6403SWarner Losh 			/* get here if string has utf-8 chars and fmt is not plain %s */
1221f32a6403SWarner Losh 			/* "%-w.ps", where -, w and .p are all optional */
1222f32a6403SWarner Losh 			/* '0' before the w is a flag character */
1223f32a6403SWarner Losh 			/* fmt points at % */
1224f32a6403SWarner Losh 			int ljust = 0, wid = 0, prec = n, pad = 0;
1225f32a6403SWarner Losh 			char *f = fmt+1;
1226f32a6403SWarner Losh 			if (f[0] == '-') {
1227f32a6403SWarner Losh 				ljust = 1;
1228f32a6403SWarner Losh 				f++;
1229f32a6403SWarner Losh 			}
1230f32a6403SWarner Losh 			// flags '0' and '+' are recognized but skipped
1231f32a6403SWarner Losh 			if (f[0] == '0') {
1232f32a6403SWarner Losh 				f++;
1233f32a6403SWarner Losh 				if (f[0] == '+')
1234f32a6403SWarner Losh 					f++;
1235f32a6403SWarner Losh 			}
1236f32a6403SWarner Losh 			if (f[0] == '+') {
1237f32a6403SWarner Losh 				f++;
1238f32a6403SWarner Losh 				if (f[0] == '0')
1239f32a6403SWarner Losh 					f++;
1240f32a6403SWarner Losh 			}
1241f32a6403SWarner Losh 			if (isdigit(f[0])) { /* there is a wid */
1242f32a6403SWarner Losh 				wid = strtol(f, &f, 10);
1243f32a6403SWarner Losh 			}
1244f32a6403SWarner Losh 			if (f[0] == '.') { /* there is a .prec */
1245f32a6403SWarner Losh 				prec = strtol(++f, &f, 10);
1246f32a6403SWarner Losh 			}
1247f32a6403SWarner Losh 			if (prec > u8_strlen(t))
1248f32a6403SWarner Losh 				prec = u8_strlen(t);
1249f32a6403SWarner Losh 			pad = wid>prec ? wid - prec : 0;  // has to be >= 0
1250f32a6403SWarner Losh 			int i, k, n;
1251f32a6403SWarner Losh 
1252f32a6403SWarner Losh 			if (ljust) { // print prec chars from t, then pad blanks
1253f32a6403SWarner Losh 				n = u8_char2byte(t, prec);
1254f32a6403SWarner Losh 				for (k = 0; k < n; k++) {
1255f32a6403SWarner Losh 					//putchar(t[k]);
1256f32a6403SWarner Losh 					*p++ = t[k];
1257f32a6403SWarner Losh 				}
1258f32a6403SWarner Losh 				for (i = 0; i < pad; i++) {
1259f32a6403SWarner Losh 					//printf(" ");
1260f32a6403SWarner Losh 					*p++ = ' ';
1261f32a6403SWarner Losh 				}
1262f32a6403SWarner Losh 			} else { // print pad blanks, then prec chars from t
1263f32a6403SWarner Losh 				for (i = 0; i < pad; i++) {
1264f32a6403SWarner Losh 					//printf(" ");
1265f32a6403SWarner Losh 					*p++ = ' ';
1266f32a6403SWarner Losh 				}
1267f32a6403SWarner Losh 				n = u8_char2byte(t, prec);
1268f32a6403SWarner Losh 				for (k = 0; k < n; k++) {
1269f32a6403SWarner Losh 					//putchar(t[k]);
1270f32a6403SWarner Losh 					*p++ = t[k];
1271f32a6403SWarner Losh 				}
1272f32a6403SWarner Losh 			}
1273f32a6403SWarner Losh 			*p = 0;
1274f32a6403SWarner Losh 			break;
1275f32a6403SWarner Losh 		}
1276f32a6403SWarner Losh 
1277f32a6403SWarner Losh                case 'c': {
1278f32a6403SWarner Losh 			/*
1279f32a6403SWarner Losh 			 * If a numeric value is given, awk should just turn
1280f32a6403SWarner Losh 			 * it into a character and print it:
1281f32a6403SWarner Losh 			 *      BEGIN { printf("%c\n", 65) }
1282f32a6403SWarner Losh 			 * prints "A".
1283f32a6403SWarner Losh 			 *
1284f32a6403SWarner Losh 			 * But what if the numeric value is > 128 and
1285f32a6403SWarner Losh 			 * represents a valid Unicode code point?!? We do
1286f32a6403SWarner Losh 			 * our best to convert it back into UTF-8. If we
1287f32a6403SWarner Losh 			 * can't, we output the encoding of the Unicode
1288f32a6403SWarner Losh 			 * "invalid character", 0xFFFD.
1289f32a6403SWarner Losh 			 */
12902a55deb1SDavid E. O'Brien 			if (isnum(x)) {
1291f32a6403SWarner Losh 				int charval = (int) getfval(x);
1292f32a6403SWarner Losh 
1293f32a6403SWarner Losh 				if (charval != 0) {
1294f32a6403SWarner Losh 					if (charval < 128 || awk_mb_cur_max == 1)
1295f32a6403SWarner Losh 						snprintf(p, BUFSZ(p), fmt, charval);
1296813da98dSDavid E. O'Brien 					else {
1297f32a6403SWarner Losh 						// possible unicode character
1298f32a6403SWarner Losh 						size_t count;
1299f32a6403SWarner Losh 						char *bs = wide_char_to_byte_str(charval, &count);
1300f32a6403SWarner Losh 
1301f32a6403SWarner Losh 						if (bs == NULL)	{ // invalid character
1302f32a6403SWarner Losh 							// use unicode invalid character, 0xFFFD
1303f32a6403SWarner Losh 							static char invalid_char[] = "\357\277\275";
1304f32a6403SWarner Losh 							bs = invalid_char;
1305f32a6403SWarner Losh 							count = 3;
1306f32a6403SWarner Losh 						}
1307f32a6403SWarner Losh 						t = bs;
1308f32a6403SWarner Losh 						n = count;
1309f32a6403SWarner Losh 						goto format_percent_c;
1310f32a6403SWarner Losh 					}
1311f32a6403SWarner Losh 				} else {
1312813da98dSDavid E. O'Brien 					*p++ = '\0'; /* explicit null byte */
1313813da98dSDavid E. O'Brien 					*p = '\0';   /* next output will start here */
1314813da98dSDavid E. O'Brien 				}
1315f32a6403SWarner Losh 				break;
1316f32a6403SWarner Losh 			}
1317f32a6403SWarner Losh 			t = getsval(x);
1318f32a6403SWarner Losh 			n = u8_nextlen(t);
1319f32a6403SWarner Losh 		format_percent_c:
1320f32a6403SWarner Losh 			if (n < 2) { /* not utf8 */
1321f39dd6a9SWarner Losh 				snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
13222a55deb1SDavid E. O'Brien 				break;
1323f32a6403SWarner Losh 			}
1324f32a6403SWarner Losh 
1325f32a6403SWarner Losh 			// utf8 character, almost same song and dance as for %s
1326f32a6403SWarner Losh 			int ljust = 0, wid = 0, prec = n, pad = 0;
1327f32a6403SWarner Losh 			char *f = fmt+1;
1328f32a6403SWarner Losh 			if (f[0] == '-') {
1329f32a6403SWarner Losh 				ljust = 1;
1330f32a6403SWarner Losh 				f++;
1331f32a6403SWarner Losh 			}
1332f32a6403SWarner Losh 			// flags '0' and '+' are recognized but skipped
1333f32a6403SWarner Losh 			if (f[0] == '0') {
1334f32a6403SWarner Losh 				f++;
1335f32a6403SWarner Losh 				if (f[0] == '+')
1336f32a6403SWarner Losh 					f++;
1337f32a6403SWarner Losh 			}
1338f32a6403SWarner Losh 			if (f[0] == '+') {
1339f32a6403SWarner Losh 				f++;
1340f32a6403SWarner Losh 				if (f[0] == '0')
1341f32a6403SWarner Losh 					f++;
1342f32a6403SWarner Losh 			}
1343f32a6403SWarner Losh 			if (isdigit(f[0])) { /* there is a wid */
1344f32a6403SWarner Losh 				wid = strtol(f, &f, 10);
1345f32a6403SWarner Losh 			}
1346f32a6403SWarner Losh 			if (f[0] == '.') { /* there is a .prec */
1347f32a6403SWarner Losh 				prec = strtol(++f, &f, 10);
1348f32a6403SWarner Losh 			}
1349f32a6403SWarner Losh 			if (prec > 1)           // %c --> only one character
1350f32a6403SWarner Losh 				prec = 1;
1351f32a6403SWarner Losh 			pad = wid>prec ? wid - prec : 0;  // has to be >= 0
1352f32a6403SWarner Losh 			int i;
1353f32a6403SWarner Losh 
1354f32a6403SWarner Losh 			if (ljust) { // print one char from t, then pad blanks
1355f32a6403SWarner Losh 				for (i = 0; i < n; i++)
1356f32a6403SWarner Losh 					*p++ = t[i];
1357f32a6403SWarner Losh 				for (i = 0; i < pad; i++) {
1358f32a6403SWarner Losh 					//printf(" ");
1359f32a6403SWarner Losh 					*p++ = ' ';
1360f32a6403SWarner Losh 				}
1361f32a6403SWarner Losh 			} else { // print pad blanks, then prec chars from t
1362f32a6403SWarner Losh 				for (i = 0; i < pad; i++) {
1363f32a6403SWarner Losh 					//printf(" ");
1364f32a6403SWarner Losh 					*p++ = ' ';
1365f32a6403SWarner Losh 				}
1366f32a6403SWarner Losh 				for (i = 0; i < n; i++)
1367f32a6403SWarner Losh 					*p++ = t[i];
1368f32a6403SWarner Losh 			}
1369f32a6403SWarner Losh 			*p = 0;
1370f32a6403SWarner Losh 			break;
1371f32a6403SWarner Losh 		}
1372813da98dSDavid E. O'Brien 		default:
1373813da98dSDavid E. O'Brien 			FATAL("can't happen: bad conversion %c in format()", flag);
13742a55deb1SDavid E. O'Brien 		}
1375f32a6403SWarner Losh 
13762a55deb1SDavid E. O'Brien 		tempfree(x);
13772a55deb1SDavid E. O'Brien 		p += strlen(p);
13782a55deb1SDavid E. O'Brien 		s++;
13792a55deb1SDavid E. O'Brien 	}
13802a55deb1SDavid E. O'Brien 	*p = '\0';
13812a55deb1SDavid E. O'Brien 	free(fmt);
1382f32a6403SWarner Losh 	for ( ; a; a = a->nnext) {		/* evaluate any remaining args */
1383f32a6403SWarner Losh 		x = execute(a);
1384f32a6403SWarner Losh 		tempfree(x);
1385f32a6403SWarner Losh 	}
13862a55deb1SDavid E. O'Brien 	*pbuf = buf;
13872a55deb1SDavid E. O'Brien 	*pbufsize = bufsize;
13882a55deb1SDavid E. O'Brien 	return p - buf;
13892a55deb1SDavid E. O'Brien }
13902a55deb1SDavid E. O'Brien 
13912a55deb1SDavid E. O'Brien Cell *awksprintf(Node **a, int n)		/* sprintf(a[0]) */
13922a55deb1SDavid E. O'Brien {
13932a55deb1SDavid E. O'Brien 	Cell *x;
13942a55deb1SDavid E. O'Brien 	Node *y;
13952a55deb1SDavid E. O'Brien 	char *buf;
13962a55deb1SDavid E. O'Brien 	int bufsz=3*recsize;
13972a55deb1SDavid E. O'Brien 
13982a55deb1SDavid E. O'Brien 	if ((buf = (char *) malloc(bufsz)) == NULL)
13992a55deb1SDavid E. O'Brien 		FATAL("out of memory in awksprintf");
14002a55deb1SDavid E. O'Brien 	y = a[0]->nnext;
14012a55deb1SDavid E. O'Brien 	x = execute(a[0]);
14022a55deb1SDavid E. O'Brien 	if (format(&buf, &bufsz, getsval(x), y) == -1)
14032a55deb1SDavid E. O'Brien 		FATAL("sprintf string %.30s... too long.  can't happen.", buf);
14042a55deb1SDavid E. O'Brien 	tempfree(x);
14052a55deb1SDavid E. O'Brien 	x = gettemp();
14062a55deb1SDavid E. O'Brien 	x->sval = buf;
14072a55deb1SDavid E. O'Brien 	x->tval = STR;
14082a55deb1SDavid E. O'Brien 	return(x);
14092a55deb1SDavid E. O'Brien }
14102a55deb1SDavid E. O'Brien 
14112a55deb1SDavid E. O'Brien Cell *awkprintf(Node **a, int n)		/* printf */
14122a55deb1SDavid E. O'Brien {	/* a[0] is list of args, starting with format string */
14132a55deb1SDavid E. O'Brien 	/* a[1] is redirection operator, a[2] is redirection file */
14142a55deb1SDavid E. O'Brien 	FILE *fp;
14152a55deb1SDavid E. O'Brien 	Cell *x;
14162a55deb1SDavid E. O'Brien 	Node *y;
14172a55deb1SDavid E. O'Brien 	char *buf;
14182a55deb1SDavid E. O'Brien 	int len;
14192a55deb1SDavid E. O'Brien 	int bufsz=3*recsize;
14202a55deb1SDavid E. O'Brien 
14212a55deb1SDavid E. O'Brien 	if ((buf = (char *) malloc(bufsz)) == NULL)
14222a55deb1SDavid E. O'Brien 		FATAL("out of memory in awkprintf");
14232a55deb1SDavid E. O'Brien 	y = a[0]->nnext;
14242a55deb1SDavid E. O'Brien 	x = execute(a[0]);
14252a55deb1SDavid E. O'Brien 	if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
14262a55deb1SDavid E. O'Brien 		FATAL("printf string %.30s... too long.  can't happen.", buf);
14272a55deb1SDavid E. O'Brien 	tempfree(x);
14282a55deb1SDavid E. O'Brien 	if (a[1] == NULL) {
14292a55deb1SDavid E. O'Brien 		/* fputs(buf, stdout); */
14302a55deb1SDavid E. O'Brien 		fwrite(buf, len, 1, stdout);
14312a55deb1SDavid E. O'Brien 		if (ferror(stdout))
14322a55deb1SDavid E. O'Brien 			FATAL("write error on stdout");
14332a55deb1SDavid E. O'Brien 	} else {
14342a55deb1SDavid E. O'Brien 		fp = redirect(ptoi(a[1]), a[2]);
14352a55deb1SDavid E. O'Brien 		/* fputs(buf, fp); */
14362a55deb1SDavid E. O'Brien 		fwrite(buf, len, 1, fp);
14372a55deb1SDavid E. O'Brien 		fflush(fp);
14382a55deb1SDavid E. O'Brien 		if (ferror(fp))
14392a55deb1SDavid E. O'Brien 			FATAL("write error on %s", filename(fp));
14402a55deb1SDavid E. O'Brien 	}
14412a55deb1SDavid E. O'Brien 	free(buf);
14422a55deb1SDavid E. O'Brien 	return(True);
14432a55deb1SDavid E. O'Brien }
14442a55deb1SDavid E. O'Brien 
14452a55deb1SDavid E. O'Brien Cell *arith(Node **a, int n)	/* a[0] + a[1], etc.  also -a[0] */
14462a55deb1SDavid E. O'Brien {
14472a55deb1SDavid E. O'Brien 	Awkfloat i, j = 0;
14482a55deb1SDavid E. O'Brien 	double v;
14492a55deb1SDavid E. O'Brien 	Cell *x, *y, *z;
14502a55deb1SDavid E. O'Brien 
14512a55deb1SDavid E. O'Brien 	x = execute(a[0]);
14522a55deb1SDavid E. O'Brien 	i = getfval(x);
14532a55deb1SDavid E. O'Brien 	tempfree(x);
1454b5253557SWarner Losh 	if (n != UMINUS && n != UPLUS) {
14552a55deb1SDavid E. O'Brien 		y = execute(a[1]);
14562a55deb1SDavid E. O'Brien 		j = getfval(y);
14572a55deb1SDavid E. O'Brien 		tempfree(y);
14582a55deb1SDavid E. O'Brien 	}
14592a55deb1SDavid E. O'Brien 	z = gettemp();
14602a55deb1SDavid E. O'Brien 	switch (n) {
14612a55deb1SDavid E. O'Brien 	case ADD:
14622a55deb1SDavid E. O'Brien 		i += j;
14632a55deb1SDavid E. O'Brien 		break;
14642a55deb1SDavid E. O'Brien 	case MINUS:
14652a55deb1SDavid E. O'Brien 		i -= j;
14662a55deb1SDavid E. O'Brien 		break;
14672a55deb1SDavid E. O'Brien 	case MULT:
14682a55deb1SDavid E. O'Brien 		i *= j;
14692a55deb1SDavid E. O'Brien 		break;
14702a55deb1SDavid E. O'Brien 	case DIVIDE:
14712a55deb1SDavid E. O'Brien 		if (j == 0)
14722a55deb1SDavid E. O'Brien 			FATAL("division by zero");
14732a55deb1SDavid E. O'Brien 		i /= j;
14742a55deb1SDavid E. O'Brien 		break;
14752a55deb1SDavid E. O'Brien 	case MOD:
14762a55deb1SDavid E. O'Brien 		if (j == 0)
14772a55deb1SDavid E. O'Brien 			FATAL("division by zero in mod");
14782a55deb1SDavid E. O'Brien 		modf(i/j, &v);
14792a55deb1SDavid E. O'Brien 		i = i - j * v;
14802a55deb1SDavid E. O'Brien 		break;
14812a55deb1SDavid E. O'Brien 	case UMINUS:
14822a55deb1SDavid E. O'Brien 		i = -i;
14832a55deb1SDavid E. O'Brien 		break;
1484b5253557SWarner Losh 	case UPLUS: /* handled by getfval(), above */
1485b5253557SWarner Losh 		break;
14862a55deb1SDavid E. O'Brien 	case POWER:
14872a55deb1SDavid E. O'Brien 		if (j >= 0 && modf(j, &v) == 0.0)	/* pos integer exponent */
14882a55deb1SDavid E. O'Brien 			i = ipow(i, (int) j);
1489f39dd6a9SWarner Losh                else {
1490f39dd6a9SWarner Losh 			errno = 0;
14912a55deb1SDavid E. O'Brien 			i = errcheck(pow(i, j), "pow");
1492f39dd6a9SWarner Losh                }
14932a55deb1SDavid E. O'Brien 		break;
14942a55deb1SDavid E. O'Brien 	default:	/* can't happen */
14952a55deb1SDavid E. O'Brien 		FATAL("illegal arithmetic operator %d", n);
14962a55deb1SDavid E. O'Brien 	}
14972a55deb1SDavid E. O'Brien 	setfval(z, i);
14982a55deb1SDavid E. O'Brien 	return(z);
14992a55deb1SDavid E. O'Brien }
15002a55deb1SDavid E. O'Brien 
15012a55deb1SDavid E. O'Brien double ipow(double x, int n)	/* x**n.  ought to be done by pow, but isn't always */
15022a55deb1SDavid E. O'Brien {
15032a55deb1SDavid E. O'Brien 	double v;
15042a55deb1SDavid E. O'Brien 
15052a55deb1SDavid E. O'Brien 	if (n <= 0)
15062a55deb1SDavid E. O'Brien 		return 1;
15072a55deb1SDavid E. O'Brien 	v = ipow(x, n/2);
15082a55deb1SDavid E. O'Brien 	if (n % 2 == 0)
15092a55deb1SDavid E. O'Brien 		return v * v;
15102a55deb1SDavid E. O'Brien 	else
15112a55deb1SDavid E. O'Brien 		return x * v * v;
15122a55deb1SDavid E. O'Brien }
15132a55deb1SDavid E. O'Brien 
15142a55deb1SDavid E. O'Brien Cell *incrdecr(Node **a, int n)		/* a[0]++, etc. */
15152a55deb1SDavid E. O'Brien {
15162a55deb1SDavid E. O'Brien 	Cell *x, *z;
15172a55deb1SDavid E. O'Brien 	int k;
15182a55deb1SDavid E. O'Brien 	Awkfloat xf;
15192a55deb1SDavid E. O'Brien 
15202a55deb1SDavid E. O'Brien 	x = execute(a[0]);
15212a55deb1SDavid E. O'Brien 	xf = getfval(x);
15222a55deb1SDavid E. O'Brien 	k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
15232a55deb1SDavid E. O'Brien 	if (n == PREINCR || n == PREDECR) {
15242a55deb1SDavid E. O'Brien 		setfval(x, xf + k);
15252a55deb1SDavid E. O'Brien 		return(x);
15262a55deb1SDavid E. O'Brien 	}
15272a55deb1SDavid E. O'Brien 	z = gettemp();
15282a55deb1SDavid E. O'Brien 	setfval(z, xf);
15292a55deb1SDavid E. O'Brien 	setfval(x, xf + k);
15302a55deb1SDavid E. O'Brien 	tempfree(x);
15312a55deb1SDavid E. O'Brien 	return(z);
15322a55deb1SDavid E. O'Brien }
15332a55deb1SDavid E. O'Brien 
15342a55deb1SDavid E. O'Brien Cell *assign(Node **a, int n)	/* a[0] = a[1], a[0] += a[1], etc. */
15352a55deb1SDavid E. O'Brien {		/* this is subtle; don't muck with it. */
15362a55deb1SDavid E. O'Brien 	Cell *x, *y;
15372a55deb1SDavid E. O'Brien 	Awkfloat xf, yf;
15382a55deb1SDavid E. O'Brien 	double v;
15392a55deb1SDavid E. O'Brien 
15402a55deb1SDavid E. O'Brien 	y = execute(a[1]);
15412a55deb1SDavid E. O'Brien 	x = execute(a[0]);
15422a55deb1SDavid E. O'Brien 	if (n == ASSIGN) {	/* ordinary assignment */
1543b5253557SWarner Losh 		if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1544b5253557SWarner Losh 			;	/* self-assignment: leave alone unless it's a field or NF */
15452a55deb1SDavid E. O'Brien 		else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1546f32a6403SWarner Losh 			yf = getfval(y);
15472a55deb1SDavid E. O'Brien 			setsval(x, getsval(y));
1548f32a6403SWarner Losh 			x->fval = yf;
15492a55deb1SDavid E. O'Brien 			x->tval |= NUM;
15502a55deb1SDavid E. O'Brien 		}
15512a55deb1SDavid E. O'Brien 		else if (isstr(y))
15522a55deb1SDavid E. O'Brien 			setsval(x, getsval(y));
15532a55deb1SDavid E. O'Brien 		else if (isnum(y))
15542a55deb1SDavid E. O'Brien 			setfval(x, getfval(y));
15552a55deb1SDavid E. O'Brien 		else
15562a55deb1SDavid E. O'Brien 			funnyvar(y, "read value of");
15572a55deb1SDavid E. O'Brien 		tempfree(y);
15582a55deb1SDavid E. O'Brien 		return(x);
15592a55deb1SDavid E. O'Brien 	}
15602a55deb1SDavid E. O'Brien 	xf = getfval(x);
15612a55deb1SDavid E. O'Brien 	yf = getfval(y);
15622a55deb1SDavid E. O'Brien 	switch (n) {
15632a55deb1SDavid E. O'Brien 	case ADDEQ:
15642a55deb1SDavid E. O'Brien 		xf += yf;
15652a55deb1SDavid E. O'Brien 		break;
15662a55deb1SDavid E. O'Brien 	case SUBEQ:
15672a55deb1SDavid E. O'Brien 		xf -= yf;
15682a55deb1SDavid E. O'Brien 		break;
15692a55deb1SDavid E. O'Brien 	case MULTEQ:
15702a55deb1SDavid E. O'Brien 		xf *= yf;
15712a55deb1SDavid E. O'Brien 		break;
15722a55deb1SDavid E. O'Brien 	case DIVEQ:
15732a55deb1SDavid E. O'Brien 		if (yf == 0)
15742a55deb1SDavid E. O'Brien 			FATAL("division by zero in /=");
15752a55deb1SDavid E. O'Brien 		xf /= yf;
15762a55deb1SDavid E. O'Brien 		break;
15772a55deb1SDavid E. O'Brien 	case MODEQ:
15782a55deb1SDavid E. O'Brien 		if (yf == 0)
15792a55deb1SDavid E. O'Brien 			FATAL("division by zero in %%=");
15802a55deb1SDavid E. O'Brien 		modf(xf/yf, &v);
15812a55deb1SDavid E. O'Brien 		xf = xf - yf * v;
15822a55deb1SDavid E. O'Brien 		break;
15832a55deb1SDavid E. O'Brien 	case POWEQ:
15842a55deb1SDavid E. O'Brien 		if (yf >= 0 && modf(yf, &v) == 0.0)	/* pos integer exponent */
15852a55deb1SDavid E. O'Brien 			xf = ipow(xf, (int) yf);
1586f39dd6a9SWarner Losh                else {
1587f39dd6a9SWarner Losh 			errno = 0;
15882a55deb1SDavid E. O'Brien 			xf = errcheck(pow(xf, yf), "pow");
1589f39dd6a9SWarner Losh                }
15902a55deb1SDavid E. O'Brien 		break;
15912a55deb1SDavid E. O'Brien 	default:
15922a55deb1SDavid E. O'Brien 		FATAL("illegal assignment operator %d", n);
15932a55deb1SDavid E. O'Brien 		break;
15942a55deb1SDavid E. O'Brien 	}
15952a55deb1SDavid E. O'Brien 	tempfree(y);
15962a55deb1SDavid E. O'Brien 	setfval(x, xf);
15972a55deb1SDavid E. O'Brien 	return(x);
15982a55deb1SDavid E. O'Brien }
15992a55deb1SDavid E. O'Brien 
16002a55deb1SDavid E. O'Brien Cell *cat(Node **a, int q)	/* a[0] cat a[1] */
16012a55deb1SDavid E. O'Brien {
16022a55deb1SDavid E. O'Brien 	Cell *x, *y, *z;
16032a55deb1SDavid E. O'Brien 	int n1, n2;
1604b5253557SWarner Losh 	char *s = NULL;
1605b5253557SWarner Losh 	int ssz = 0;
16062a55deb1SDavid E. O'Brien 
16072a55deb1SDavid E. O'Brien 	x = execute(a[0]);
1608b5253557SWarner Losh 	n1 = strlen(getsval(x));
1609f32a6403SWarner Losh 	adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
1610f39dd6a9SWarner Losh 	memcpy(s, x->sval, n1);
1611b5253557SWarner Losh 
1612f32a6403SWarner Losh 	tempfree(x);
1613f32a6403SWarner Losh 
16142a55deb1SDavid E. O'Brien 	y = execute(a[1]);
1615b5253557SWarner Losh 	n2 = strlen(getsval(y));
1616b5253557SWarner Losh 	adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1617f39dd6a9SWarner Losh 	memcpy(s + n1, y->sval, n2);
1618f39dd6a9SWarner Losh 	s[n1 + n2] = '\0';
1619b5253557SWarner Losh 
16202a55deb1SDavid E. O'Brien 	tempfree(y);
1621b5253557SWarner Losh 
16222a55deb1SDavid E. O'Brien 	z = gettemp();
16232a55deb1SDavid E. O'Brien 	z->sval = s;
16242a55deb1SDavid E. O'Brien 	z->tval = STR;
1625b5253557SWarner Losh 
16262a55deb1SDavid E. O'Brien 	return(z);
16272a55deb1SDavid E. O'Brien }
16282a55deb1SDavid E. O'Brien 
16292a55deb1SDavid E. O'Brien Cell *pastat(Node **a, int n)	/* a[0] { a[1] } */
16302a55deb1SDavid E. O'Brien {
16312a55deb1SDavid E. O'Brien 	Cell *x;
16322a55deb1SDavid E. O'Brien 
163310ce5b99SWarner Losh 	if (a[0] == NULL)
16342a55deb1SDavid E. O'Brien 		x = execute(a[1]);
16352a55deb1SDavid E. O'Brien 	else {
16362a55deb1SDavid E. O'Brien 		x = execute(a[0]);
16372a55deb1SDavid E. O'Brien 		if (istrue(x)) {
16382a55deb1SDavid E. O'Brien 			tempfree(x);
16392a55deb1SDavid E. O'Brien 			x = execute(a[1]);
16402a55deb1SDavid E. O'Brien 		}
16412a55deb1SDavid E. O'Brien 	}
16422a55deb1SDavid E. O'Brien 	return x;
16432a55deb1SDavid E. O'Brien }
16442a55deb1SDavid E. O'Brien 
16452a55deb1SDavid E. O'Brien Cell *dopa2(Node **a, int n)	/* a[0], a[1] { a[2] } */
16462a55deb1SDavid E. O'Brien {
16472a55deb1SDavid E. O'Brien 	Cell *x;
16482a55deb1SDavid E. O'Brien 	int pair;
16492a55deb1SDavid E. O'Brien 
16502a55deb1SDavid E. O'Brien 	pair = ptoi(a[3]);
16512a55deb1SDavid E. O'Brien 	if (pairstack[pair] == 0) {
16522a55deb1SDavid E. O'Brien 		x = execute(a[0]);
16532a55deb1SDavid E. O'Brien 		if (istrue(x))
16542a55deb1SDavid E. O'Brien 			pairstack[pair] = 1;
16552a55deb1SDavid E. O'Brien 		tempfree(x);
16562a55deb1SDavid E. O'Brien 	}
16572a55deb1SDavid E. O'Brien 	if (pairstack[pair] == 1) {
16582a55deb1SDavid E. O'Brien 		x = execute(a[1]);
16592a55deb1SDavid E. O'Brien 		if (istrue(x))
16602a55deb1SDavid E. O'Brien 			pairstack[pair] = 0;
16612a55deb1SDavid E. O'Brien 		tempfree(x);
16622a55deb1SDavid E. O'Brien 		x = execute(a[2]);
16632a55deb1SDavid E. O'Brien 		return(x);
16642a55deb1SDavid E. O'Brien 	}
16652a55deb1SDavid E. O'Brien 	return(False);
16662a55deb1SDavid E. O'Brien }
16672a55deb1SDavid E. O'Brien 
16682a55deb1SDavid E. O'Brien Cell *split(Node **a, int nnn)	/* split(a[0], a[1], a[2]); a[3] is type */
16692a55deb1SDavid E. O'Brien {
167010ce5b99SWarner Losh 	Cell *x = NULL, *y, *ap;
1671f39dd6a9SWarner Losh 	const char *s, *origs, *t;
1672f39dd6a9SWarner Losh 	const char *fs = NULL;
1673f39dd6a9SWarner Losh 	char *origfs = NULL;
16742a55deb1SDavid E. O'Brien 	int sep;
1675f39dd6a9SWarner Losh 	char temp, num[50];
16762a55deb1SDavid E. O'Brien 	int n, tempstat, arg3type;
1677f32a6403SWarner Losh 	int j;
1678f39dd6a9SWarner Losh 	double result;
16792a55deb1SDavid E. O'Brien 
16802a55deb1SDavid E. O'Brien 	y = execute(a[0]);	/* source string */
16810840e960SXin LI 	origs = s = strdup(getsval(y));
1682f32a6403SWarner Losh 	tempfree(y);
16832a55deb1SDavid E. O'Brien 	arg3type = ptoi(a[3]);
1684f32a6403SWarner Losh 	if (a[2] == NULL) {		/* BUG: CSV should override implicit fs but not explicit */
1685d9e8cf28SWarner Losh 		fs = getsval(fsloc);
1686f32a6403SWarner Losh 	} else if (arg3type == STRING) {	/* split(str,arr,"string") */
16872a55deb1SDavid E. O'Brien 		x = execute(a[2]);
1688f39dd6a9SWarner Losh 		fs = origfs = strdup(getsval(x));
1689b5253557SWarner Losh 		tempfree(x);
1690f32a6403SWarner Losh 	} else if (arg3type == REGEXPR) {
16912a55deb1SDavid E. O'Brien 		fs = "(regexpr)";	/* split(str,arr,/regexpr/) */
1692f32a6403SWarner Losh 	} else {
16932a55deb1SDavid E. O'Brien 		FATAL("illegal type of split");
1694f32a6403SWarner Losh 	}
16952a55deb1SDavid E. O'Brien 	sep = *fs;
16962a55deb1SDavid E. O'Brien 	ap = execute(a[1]);	/* array name */
1697f32a6403SWarner Losh /* BUG 7/26/22: this appears not to reset array: see C1/asplit */
16982a55deb1SDavid E. O'Brien 	freesymtab(ap);
1699f39dd6a9SWarner Losh 	DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
17002a55deb1SDavid E. O'Brien 	ap->tval &= ~STR;
17012a55deb1SDavid E. O'Brien 	ap->tval |= ARR;
17022a55deb1SDavid E. O'Brien 	ap->sval = (char *) makesymtab(NSYMTAB);
17032a55deb1SDavid E. O'Brien 
17042a55deb1SDavid E. O'Brien 	n = 0;
1705d86a0988SRuslan Ermilov         if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1706d86a0988SRuslan Ermilov 		/* split(s, a, //); have to arrange that it looks like empty sep */
1707d86a0988SRuslan Ermilov 		arg3type = 0;
1708d86a0988SRuslan Ermilov 		fs = "";
1709d86a0988SRuslan Ermilov 		sep = 0;
1710d86a0988SRuslan Ermilov 	}
171188b8d487SRuslan Ermilov 	if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) {	/* reg expr */
17122a55deb1SDavid E. O'Brien 		fa *pfa;
17132a55deb1SDavid E. O'Brien 		if (arg3type == REGEXPR) {	/* it's ready already */
17142a55deb1SDavid E. O'Brien 			pfa = (fa *) a[2];
17152a55deb1SDavid E. O'Brien 		} else {
17162a55deb1SDavid E. O'Brien 			pfa = makedfa(fs, 1);
17172a55deb1SDavid E. O'Brien 		}
17182a55deb1SDavid E. O'Brien 		if (nematch(pfa,s)) {
17192a55deb1SDavid E. O'Brien 			tempstat = pfa->initstat;
17202a55deb1SDavid E. O'Brien 			pfa->initstat = 2;
17212a55deb1SDavid E. O'Brien 			do {
17222a55deb1SDavid E. O'Brien 				n++;
1723f39dd6a9SWarner Losh 				snprintf(num, sizeof(num), "%d", n);
17242a55deb1SDavid E. O'Brien 				temp = *patbeg;
1725f39dd6a9SWarner Losh 				setptr(patbeg, '\0');
1726f39dd6a9SWarner Losh 				if (is_number(s, & result))
1727f39dd6a9SWarner Losh 					setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
17282a55deb1SDavid E. O'Brien 				else
17292a55deb1SDavid E. O'Brien 					setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1730f39dd6a9SWarner Losh 				setptr(patbeg, temp);
17312a55deb1SDavid E. O'Brien 				s = patbeg + patlen;
1732f39dd6a9SWarner Losh 				if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
17332a55deb1SDavid E. O'Brien 					n++;
1734f39dd6a9SWarner Losh 					snprintf(num, sizeof(num), "%d", n);
17352a55deb1SDavid E. O'Brien 					setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
17362a55deb1SDavid E. O'Brien 					pfa->initstat = tempstat;
17372a55deb1SDavid E. O'Brien 					goto spdone;
17382a55deb1SDavid E. O'Brien 				}
17392a55deb1SDavid E. O'Brien 			} while (nematch(pfa,s));
1740c263f9bfSRuslan Ermilov 			pfa->initstat = tempstat; 	/* bwk: has to be here to reset */
1741c263f9bfSRuslan Ermilov 							/* cf gsub and refldbld */
17422a55deb1SDavid E. O'Brien 		}
17432a55deb1SDavid E. O'Brien 		n++;
1744f39dd6a9SWarner Losh 		snprintf(num, sizeof(num), "%d", n);
1745f39dd6a9SWarner Losh 		if (is_number(s, & result))
1746f39dd6a9SWarner Losh 			setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
17472a55deb1SDavid E. O'Brien 		else
17482a55deb1SDavid E. O'Brien 			setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
17492a55deb1SDavid E. O'Brien   spdone:
17502a55deb1SDavid E. O'Brien 		pfa = NULL;
1751f32a6403SWarner Losh 
1752f32a6403SWarner Losh 	} else if (a[2] == NULL && CSV) {	/* CSV only if no explicit separator */
1753f32a6403SWarner Losh 		char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */
1754f32a6403SWarner Losh 		for (;;) {
1755f32a6403SWarner Losh 			char *fr = newt;
1756f32a6403SWarner Losh 			n++;
1757f32a6403SWarner Losh 			if (*s == '"' ) { /* start of "..." */
1758f32a6403SWarner Losh 				for (s++ ; *s != '\0'; ) {
1759f32a6403SWarner Losh 					if (*s == '"' && s[1] != '\0' && s[1] == '"') {
1760f32a6403SWarner Losh 						s += 2; /* doubled quote */
1761f32a6403SWarner Losh 						*fr++ = '"';
1762f32a6403SWarner Losh 					} else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) {
1763f32a6403SWarner Losh 						s++; /* skip over closing quote */
1764f32a6403SWarner Losh 						break;
1765f32a6403SWarner Losh 					} else {
1766f32a6403SWarner Losh 						*fr++ = *s++;
1767f32a6403SWarner Losh 					}
1768f32a6403SWarner Losh 				}
1769f32a6403SWarner Losh 				*fr++ = 0;
1770f32a6403SWarner Losh 			} else {	/* unquoted field */
1771f32a6403SWarner Losh 				while (*s != ',' && *s != '\0')
1772f32a6403SWarner Losh 					*fr++ = *s++;
1773f32a6403SWarner Losh 				*fr++ = 0;
1774f32a6403SWarner Losh 			}
1775f32a6403SWarner Losh 			snprintf(num, sizeof(num), "%d", n);
1776f32a6403SWarner Losh 			if (is_number(newt, &result))
1777f32a6403SWarner Losh 				setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval);
1778f32a6403SWarner Losh 			else
1779f32a6403SWarner Losh 				setsymtab(num, newt, 0.0, STR, (Array *) ap->sval);
1780f32a6403SWarner Losh 			if (*s++ == '\0')
1781f32a6403SWarner Losh 				break;
1782f32a6403SWarner Losh 		}
1783f32a6403SWarner Losh 		free(newt);
1784f32a6403SWarner Losh 
1785f32a6403SWarner Losh 	} else if (!CSV && sep == ' ') { /* usual case: split on white space */
17862a55deb1SDavid E. O'Brien 		for (n = 0; ; ) {
1787f39dd6a9SWarner Losh #define ISWS(c)	((c) == ' ' || (c) == '\t' || (c) == '\n')
1788f39dd6a9SWarner Losh 			while (ISWS(*s))
17892a55deb1SDavid E. O'Brien 				s++;
1790f39dd6a9SWarner Losh 			if (*s == '\0')
17912a55deb1SDavid E. O'Brien 				break;
17922a55deb1SDavid E. O'Brien 			n++;
17932a55deb1SDavid E. O'Brien 			t = s;
17942a55deb1SDavid E. O'Brien 			do
17952a55deb1SDavid E. O'Brien 				s++;
1796f39dd6a9SWarner Losh 			while (*s != '\0' && !ISWS(*s));
17972a55deb1SDavid E. O'Brien 			temp = *s;
1798f39dd6a9SWarner Losh 			setptr(s, '\0');
1799f39dd6a9SWarner Losh 			snprintf(num, sizeof(num), "%d", n);
1800f39dd6a9SWarner Losh 			if (is_number(t, & result))
1801f39dd6a9SWarner Losh 				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
18022a55deb1SDavid E. O'Brien 			else
18032a55deb1SDavid E. O'Brien 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1804f39dd6a9SWarner Losh 			setptr(s, temp);
1805f39dd6a9SWarner Losh 			if (*s != '\0')
18062a55deb1SDavid E. O'Brien 				s++;
18072a55deb1SDavid E. O'Brien 		}
1808f32a6403SWarner Losh 
18092a55deb1SDavid E. O'Brien 	} else if (sep == 0) {	/* new: split(s, a, "") => 1 char/elem */
1810f32a6403SWarner Losh 		for (n = 0; *s != '\0'; s += u8_nextlen(s)) {
1811f32a6403SWarner Losh 			char buf[10];
18122a55deb1SDavid E. O'Brien 			n++;
1813f39dd6a9SWarner Losh 			snprintf(num, sizeof(num), "%d", n);
1814f32a6403SWarner Losh 
1815f32a6403SWarner Losh 			for (j = 0; j < u8_nextlen(s); j++) {
1816f32a6403SWarner Losh 				buf[j] = s[j];
1817f32a6403SWarner Losh 			}
1818f32a6403SWarner Losh 			buf[j] = '\0';
1819f32a6403SWarner Losh 
18202a55deb1SDavid E. O'Brien 			if (isdigit((uschar)buf[0]))
18212a55deb1SDavid E. O'Brien 				setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
18222a55deb1SDavid E. O'Brien 			else
18232a55deb1SDavid E. O'Brien 				setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
18242a55deb1SDavid E. O'Brien 		}
1825f32a6403SWarner Losh 
1826f32a6403SWarner Losh 	} else if (*s != '\0') {  /* some random single character */
18272a55deb1SDavid E. O'Brien 		for (;;) {
18282a55deb1SDavid E. O'Brien 			n++;
18292a55deb1SDavid E. O'Brien 			t = s;
18301023317aSWarner Losh 			while (*s != sep && *s != '\0')
18312a55deb1SDavid E. O'Brien 				s++;
18322a55deb1SDavid E. O'Brien 			temp = *s;
1833f39dd6a9SWarner Losh 			setptr(s, '\0');
1834f39dd6a9SWarner Losh 			snprintf(num, sizeof(num), "%d", n);
1835f39dd6a9SWarner Losh 			if (is_number(t, & result))
1836f39dd6a9SWarner Losh 				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
18372a55deb1SDavid E. O'Brien 			else
18382a55deb1SDavid E. O'Brien 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1839f39dd6a9SWarner Losh 			setptr(s, temp);
1840f39dd6a9SWarner Losh 			if (*s++ == '\0')
18412a55deb1SDavid E. O'Brien 				break;
18422a55deb1SDavid E. O'Brien 		}
18432a55deb1SDavid E. O'Brien 	}
18442a55deb1SDavid E. O'Brien 	tempfree(ap);
1845f39dd6a9SWarner Losh 	xfree(origs);
1846f39dd6a9SWarner Losh 	xfree(origfs);
18472a55deb1SDavid E. O'Brien 	x = gettemp();
18482a55deb1SDavid E. O'Brien 	x->tval = NUM;
18492a55deb1SDavid E. O'Brien 	x->fval = n;
18502a55deb1SDavid E. O'Brien 	return(x);
18512a55deb1SDavid E. O'Brien }
18522a55deb1SDavid E. O'Brien 
18532a55deb1SDavid E. O'Brien Cell *condexpr(Node **a, int n)	/* a[0] ? a[1] : a[2] */
18542a55deb1SDavid E. O'Brien {
18552a55deb1SDavid E. O'Brien 	Cell *x;
18562a55deb1SDavid E. O'Brien 
18572a55deb1SDavid E. O'Brien 	x = execute(a[0]);
18582a55deb1SDavid E. O'Brien 	if (istrue(x)) {
18592a55deb1SDavid E. O'Brien 		tempfree(x);
18602a55deb1SDavid E. O'Brien 		x = execute(a[1]);
18612a55deb1SDavid E. O'Brien 	} else {
18622a55deb1SDavid E. O'Brien 		tempfree(x);
18632a55deb1SDavid E. O'Brien 		x = execute(a[2]);
18642a55deb1SDavid E. O'Brien 	}
18652a55deb1SDavid E. O'Brien 	return(x);
18662a55deb1SDavid E. O'Brien }
18672a55deb1SDavid E. O'Brien 
18682a55deb1SDavid E. O'Brien Cell *ifstat(Node **a, int n)	/* if (a[0]) a[1]; else a[2] */
18692a55deb1SDavid E. O'Brien {
18702a55deb1SDavid E. O'Brien 	Cell *x;
18712a55deb1SDavid E. O'Brien 
18722a55deb1SDavid E. O'Brien 	x = execute(a[0]);
18732a55deb1SDavid E. O'Brien 	if (istrue(x)) {
18742a55deb1SDavid E. O'Brien 		tempfree(x);
18752a55deb1SDavid E. O'Brien 		x = execute(a[1]);
187610ce5b99SWarner Losh 	} else if (a[2] != NULL) {
18772a55deb1SDavid E. O'Brien 		tempfree(x);
18782a55deb1SDavid E. O'Brien 		x = execute(a[2]);
18792a55deb1SDavid E. O'Brien 	}
18802a55deb1SDavid E. O'Brien 	return(x);
18812a55deb1SDavid E. O'Brien }
18822a55deb1SDavid E. O'Brien 
18832a55deb1SDavid E. O'Brien Cell *whilestat(Node **a, int n)	/* while (a[0]) a[1] */
18842a55deb1SDavid E. O'Brien {
18852a55deb1SDavid E. O'Brien 	Cell *x;
18862a55deb1SDavid E. O'Brien 
18872a55deb1SDavid E. O'Brien 	for (;;) {
18882a55deb1SDavid E. O'Brien 		x = execute(a[0]);
18892a55deb1SDavid E. O'Brien 		if (!istrue(x))
18902a55deb1SDavid E. O'Brien 			return(x);
18912a55deb1SDavid E. O'Brien 		tempfree(x);
18922a55deb1SDavid E. O'Brien 		x = execute(a[1]);
18932a55deb1SDavid E. O'Brien 		if (isbreak(x)) {
18942a55deb1SDavid E. O'Brien 			x = True;
18952a55deb1SDavid E. O'Brien 			return(x);
18962a55deb1SDavid E. O'Brien 		}
18972a55deb1SDavid E. O'Brien 		if (isnext(x) || isexit(x) || isret(x))
18982a55deb1SDavid E. O'Brien 			return(x);
18992a55deb1SDavid E. O'Brien 		tempfree(x);
19002a55deb1SDavid E. O'Brien 	}
19012a55deb1SDavid E. O'Brien }
19022a55deb1SDavid E. O'Brien 
19032a55deb1SDavid E. O'Brien Cell *dostat(Node **a, int n)	/* do a[0]; while(a[1]) */
19042a55deb1SDavid E. O'Brien {
19052a55deb1SDavid E. O'Brien 	Cell *x;
19062a55deb1SDavid E. O'Brien 
19072a55deb1SDavid E. O'Brien 	for (;;) {
19082a55deb1SDavid E. O'Brien 		x = execute(a[0]);
19092a55deb1SDavid E. O'Brien 		if (isbreak(x))
19102a55deb1SDavid E. O'Brien 			return True;
19112a55deb1SDavid E. O'Brien 		if (isnext(x) || isexit(x) || isret(x))
19122a55deb1SDavid E. O'Brien 			return(x);
19132a55deb1SDavid E. O'Brien 		tempfree(x);
19142a55deb1SDavid E. O'Brien 		x = execute(a[1]);
19152a55deb1SDavid E. O'Brien 		if (!istrue(x))
19162a55deb1SDavid E. O'Brien 			return(x);
19172a55deb1SDavid E. O'Brien 		tempfree(x);
19182a55deb1SDavid E. O'Brien 	}
19192a55deb1SDavid E. O'Brien }
19202a55deb1SDavid E. O'Brien 
19212a55deb1SDavid E. O'Brien Cell *forstat(Node **a, int n)	/* for (a[0]; a[1]; a[2]) a[3] */
19222a55deb1SDavid E. O'Brien {
19232a55deb1SDavid E. O'Brien 	Cell *x;
19242a55deb1SDavid E. O'Brien 
19252a55deb1SDavid E. O'Brien 	x = execute(a[0]);
19262a55deb1SDavid E. O'Brien 	tempfree(x);
19272a55deb1SDavid E. O'Brien 	for (;;) {
192810ce5b99SWarner Losh 		if (a[1]!=NULL) {
19292a55deb1SDavid E. O'Brien 			x = execute(a[1]);
19302a55deb1SDavid E. O'Brien 			if (!istrue(x)) return(x);
19312a55deb1SDavid E. O'Brien 			else tempfree(x);
19322a55deb1SDavid E. O'Brien 		}
19332a55deb1SDavid E. O'Brien 		x = execute(a[3]);
19342a55deb1SDavid E. O'Brien 		if (isbreak(x))		/* turn off break */
19352a55deb1SDavid E. O'Brien 			return True;
19362a55deb1SDavid E. O'Brien 		if (isnext(x) || isexit(x) || isret(x))
19372a55deb1SDavid E. O'Brien 			return(x);
19382a55deb1SDavid E. O'Brien 		tempfree(x);
19392a55deb1SDavid E. O'Brien 		x = execute(a[2]);
19402a55deb1SDavid E. O'Brien 		tempfree(x);
19412a55deb1SDavid E. O'Brien 	}
19422a55deb1SDavid E. O'Brien }
19432a55deb1SDavid E. O'Brien 
19442a55deb1SDavid E. O'Brien Cell *instat(Node **a, int n)	/* for (a[0] in a[1]) a[2] */
19452a55deb1SDavid E. O'Brien {
19462a55deb1SDavid E. O'Brien 	Cell *x, *vp, *arrayp, *cp, *ncp;
19472a55deb1SDavid E. O'Brien 	Array *tp;
19482a55deb1SDavid E. O'Brien 	int i;
19492a55deb1SDavid E. O'Brien 
19502a55deb1SDavid E. O'Brien 	vp = execute(a[0]);
19512a55deb1SDavid E. O'Brien 	arrayp = execute(a[1]);
19522a55deb1SDavid E. O'Brien 	if (!isarr(arrayp)) {
19532a55deb1SDavid E. O'Brien 		return True;
19542a55deb1SDavid E. O'Brien 	}
19552a55deb1SDavid E. O'Brien 	tp = (Array *) arrayp->sval;
19562a55deb1SDavid E. O'Brien 	tempfree(arrayp);
19572a55deb1SDavid E. O'Brien 	for (i = 0; i < tp->size; i++) {	/* this routine knows too much */
19582a55deb1SDavid E. O'Brien 		for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
19592a55deb1SDavid E. O'Brien 			setsval(vp, cp->nval);
19602a55deb1SDavid E. O'Brien 			ncp = cp->cnext;
19612a55deb1SDavid E. O'Brien 			x = execute(a[2]);
19622a55deb1SDavid E. O'Brien 			if (isbreak(x)) {
19632a55deb1SDavid E. O'Brien 				tempfree(vp);
19642a55deb1SDavid E. O'Brien 				return True;
19652a55deb1SDavid E. O'Brien 			}
19662a55deb1SDavid E. O'Brien 			if (isnext(x) || isexit(x) || isret(x)) {
19672a55deb1SDavid E. O'Brien 				tempfree(vp);
19682a55deb1SDavid E. O'Brien 				return(x);
19692a55deb1SDavid E. O'Brien 			}
19702a55deb1SDavid E. O'Brien 			tempfree(x);
19712a55deb1SDavid E. O'Brien 		}
19722a55deb1SDavid E. O'Brien 	}
19732a55deb1SDavid E. O'Brien 	return True;
19742a55deb1SDavid E. O'Brien }
19752a55deb1SDavid E. O'Brien 
1976f39dd6a9SWarner Losh static char *nawk_convert(const char *s, int (*fun_c)(int),
1977f39dd6a9SWarner Losh     wint_t (*fun_wc)(wint_t))
1978f39dd6a9SWarner Losh {
1979f39dd6a9SWarner Losh 	char *buf      = NULL;
1980f39dd6a9SWarner Losh 	char *pbuf     = NULL;
1981f39dd6a9SWarner Losh 	const char *ps = NULL;
1982f39dd6a9SWarner Losh 	size_t n       = 0;
1983f39dd6a9SWarner Losh 	wchar_t wc;
1984f32a6403SWarner Losh 	const size_t sz = awk_mb_cur_max;
1985f32a6403SWarner Losh 	int unused;
1986f39dd6a9SWarner Losh 
1987f39dd6a9SWarner Losh 	if (sz == 1) {
1988f39dd6a9SWarner Losh 		buf = tostring(s);
1989f39dd6a9SWarner Losh 
1990f39dd6a9SWarner Losh 		for (pbuf = buf; *pbuf; pbuf++)
1991f39dd6a9SWarner Losh 			*pbuf = fun_c((uschar)*pbuf);
1992f39dd6a9SWarner Losh 
1993f39dd6a9SWarner Losh 		return buf;
1994f39dd6a9SWarner Losh 	} else {
1995f39dd6a9SWarner Losh 		/* upper/lower character may be shorter/longer */
1996f39dd6a9SWarner Losh 		buf = tostringN(s, strlen(s) * sz + 1);
1997f39dd6a9SWarner Losh 
1998f39dd6a9SWarner Losh 		(void) mbtowc(NULL, NULL, 0);	/* reset internal state */
1999f39dd6a9SWarner Losh 		/*
2000f39dd6a9SWarner Losh 		 * Reset internal state here too.
2001f39dd6a9SWarner Losh 		 * Assign result to avoid a compiler warning. (Casting to void
2002f39dd6a9SWarner Losh 		 * doesn't work.)
2003f39dd6a9SWarner Losh 		 * Increment said variable to avoid a different warning.
2004f39dd6a9SWarner Losh 		 */
2005f32a6403SWarner Losh 		unused = wctomb(NULL, L'\0');
2006f39dd6a9SWarner Losh 		unused++;
2007f39dd6a9SWarner Losh 
2008f39dd6a9SWarner Losh 		ps   = s;
2009f39dd6a9SWarner Losh 		pbuf = buf;
2010f39dd6a9SWarner Losh 		while (n = mbtowc(&wc, ps, sz),
2011f39dd6a9SWarner Losh 		       n > 0 && n != (size_t)-1 && n != (size_t)-2)
2012f39dd6a9SWarner Losh 		{
2013f39dd6a9SWarner Losh 			ps += n;
2014f39dd6a9SWarner Losh 
2015f39dd6a9SWarner Losh 			n = wctomb(pbuf, fun_wc(wc));
2016f39dd6a9SWarner Losh 			if (n == (size_t)-1)
2017f39dd6a9SWarner Losh 				FATAL("illegal wide character %s", s);
2018f39dd6a9SWarner Losh 
2019f39dd6a9SWarner Losh 			pbuf += n;
2020f39dd6a9SWarner Losh 		}
2021f39dd6a9SWarner Losh 
2022f39dd6a9SWarner Losh 		*pbuf = '\0';
2023f39dd6a9SWarner Losh 
2024f39dd6a9SWarner Losh 		if (n)
2025f39dd6a9SWarner Losh 			FATAL("illegal byte sequence %s", s);
2026f39dd6a9SWarner Losh 
2027f39dd6a9SWarner Losh 		return buf;
2028f39dd6a9SWarner Losh 	}
2029f39dd6a9SWarner Losh }
2030f39dd6a9SWarner Losh 
2031f39dd6a9SWarner Losh #ifdef __DJGPP__
2032f39dd6a9SWarner Losh static wint_t towupper(wint_t wc)
2033f39dd6a9SWarner Losh {
2034f39dd6a9SWarner Losh 	if (wc >= 0 && wc < 256)
2035f39dd6a9SWarner Losh 		return toupper(wc & 0xFF);
2036f39dd6a9SWarner Losh 
2037f39dd6a9SWarner Losh 	return wc;
2038f39dd6a9SWarner Losh }
2039f39dd6a9SWarner Losh 
2040f39dd6a9SWarner Losh static wint_t towlower(wint_t wc)
2041f39dd6a9SWarner Losh {
2042f39dd6a9SWarner Losh 	if (wc >= 0 && wc < 256)
2043f39dd6a9SWarner Losh 		return tolower(wc & 0xFF);
2044f39dd6a9SWarner Losh 
2045f39dd6a9SWarner Losh 	return wc;
2046f39dd6a9SWarner Losh }
2047f39dd6a9SWarner Losh #endif
2048f39dd6a9SWarner Losh 
2049f39dd6a9SWarner Losh static char *nawk_toupper(const char *s)
2050f39dd6a9SWarner Losh {
2051f39dd6a9SWarner Losh 	return nawk_convert(s, toupper, towupper);
2052f39dd6a9SWarner Losh }
2053f39dd6a9SWarner Losh 
2054f39dd6a9SWarner Losh static char *nawk_tolower(const char *s)
2055f39dd6a9SWarner Losh {
2056f39dd6a9SWarner Losh 	return nawk_convert(s, tolower, towlower);
2057f39dd6a9SWarner Losh }
2058f39dd6a9SWarner Losh 
2059f32a6403SWarner Losh 
2060f32a6403SWarner Losh 
20612a55deb1SDavid E. O'Brien Cell *bltin(Node **a, int n)	/* builtin functions. a[0] is type, a[1] is arg list */
20622a55deb1SDavid E. O'Brien {
20632a55deb1SDavid E. O'Brien 	Cell *x, *y;
206417853db4SWarner Losh 	Awkfloat u = 0;
2065eb690a05SWarner Losh 	int t, sz;
20661b11b783SRuslan Ermilov 	Awkfloat tmp;
2067eb690a05SWarner Losh 	char *buf, *fmt;
20682a55deb1SDavid E. O'Brien 	Node *nextarg;
20692a55deb1SDavid E. O'Brien 	FILE *fp;
2070b5253557SWarner Losh 	int status = 0;
2071eb690a05SWarner Losh 	time_t tv;
2072*8d457988SWarner Losh 	struct tm *tm, tmbuf;
2073f32a6403SWarner Losh 	int estatus = 0;
20742a55deb1SDavid E. O'Brien 
20752a55deb1SDavid E. O'Brien 	t = ptoi(a[0]);
20762a55deb1SDavid E. O'Brien 	x = execute(a[1]);
20772a55deb1SDavid E. O'Brien 	nextarg = a[1]->nnext;
20782a55deb1SDavid E. O'Brien 	switch (t) {
20792a55deb1SDavid E. O'Brien 	case FLENGTH:
2080007c6572SDag-Erling Smørgrav 		if (isarr(x))
2081007c6572SDag-Erling Smørgrav 			u = ((Array *) x->sval)->nelem;	/* GROT.  should be function*/
2082007c6572SDag-Erling Smørgrav 		else
2083f32a6403SWarner Losh 			u = u8_strlen(getsval(x));
2084007c6572SDag-Erling Smørgrav 		break;
20852a55deb1SDavid E. O'Brien 	case FLOG:
2086f39dd6a9SWarner Losh 		errno = 0;
2087f39dd6a9SWarner Losh 		u = errcheck(log(getfval(x)), "log");
2088f39dd6a9SWarner Losh 		break;
20892a55deb1SDavid E. O'Brien 	case FINT:
20902a55deb1SDavid E. O'Brien 		modf(getfval(x), &u); break;
20912a55deb1SDavid E. O'Brien 	case FEXP:
2092f39dd6a9SWarner Losh 		errno = 0;
2093f39dd6a9SWarner Losh 		u = errcheck(exp(getfval(x)), "exp");
2094f39dd6a9SWarner Losh 		break;
20952a55deb1SDavid E. O'Brien 	case FSQRT:
2096f39dd6a9SWarner Losh 		errno = 0;
2097f39dd6a9SWarner Losh 		u = errcheck(sqrt(getfval(x)), "sqrt");
2098f39dd6a9SWarner Losh 		break;
20992a55deb1SDavid E. O'Brien 	case FSIN:
21002a55deb1SDavid E. O'Brien 		u = sin(getfval(x)); break;
21012a55deb1SDavid E. O'Brien 	case FCOS:
21022a55deb1SDavid E. O'Brien 		u = cos(getfval(x)); break;
21032a55deb1SDavid E. O'Brien 	case FATAN:
210410ce5b99SWarner Losh 		if (nextarg == NULL) {
21052a55deb1SDavid E. O'Brien 			WARNING("atan2 requires two arguments; returning 1.0");
21062a55deb1SDavid E. O'Brien 			u = 1.0;
21072a55deb1SDavid E. O'Brien 		} else {
21082a55deb1SDavid E. O'Brien 			y = execute(a[1]->nnext);
21092a55deb1SDavid E. O'Brien 			u = atan2(getfval(x), getfval(y));
21102a55deb1SDavid E. O'Brien 			tempfree(y);
21112a55deb1SDavid E. O'Brien 			nextarg = nextarg->nnext;
21122a55deb1SDavid E. O'Brien 		}
21132a55deb1SDavid E. O'Brien 		break;
2114eb690a05SWarner Losh 	case FCOMPL:
2115eb690a05SWarner Losh 		u = ~((int)getfval(x));
2116eb690a05SWarner Losh 		break;
2117eb690a05SWarner Losh 	case FAND:
2118eb690a05SWarner Losh 		if (nextarg == 0) {
2119eb690a05SWarner Losh 			WARNING("and requires two arguments; returning 0");
2120eb690a05SWarner Losh 			u = 0;
2121eb690a05SWarner Losh 			break;
2122eb690a05SWarner Losh 		}
2123eb690a05SWarner Losh 		y = execute(a[1]->nnext);
2124eb690a05SWarner Losh 		u = ((int)getfval(x)) & ((int)getfval(y));
2125eb690a05SWarner Losh 		tempfree(y);
2126eb690a05SWarner Losh 		nextarg = nextarg->nnext;
2127eb690a05SWarner Losh 		break;
2128eb690a05SWarner Losh 	case FFOR:
2129eb690a05SWarner Losh 		if (nextarg == 0) {
2130eb690a05SWarner Losh 			WARNING("or requires two arguments; returning 0");
2131eb690a05SWarner Losh 			u = 0;
2132eb690a05SWarner Losh 			break;
2133eb690a05SWarner Losh 		}
2134eb690a05SWarner Losh 		y = execute(a[1]->nnext);
2135eb690a05SWarner Losh 		u = ((int)getfval(x)) | ((int)getfval(y));
2136eb690a05SWarner Losh 		tempfree(y);
2137eb690a05SWarner Losh 		nextarg = nextarg->nnext;
2138eb690a05SWarner Losh 		break;
2139eb690a05SWarner Losh 	case FXOR:
2140eb690a05SWarner Losh 		if (nextarg == 0) {
2141eb690a05SWarner Losh 			WARNING("xor requires two arguments; returning 0");
2142eb690a05SWarner Losh 			u = 0;
2143eb690a05SWarner Losh 			break;
2144eb690a05SWarner Losh 		}
2145eb690a05SWarner Losh 		y = execute(a[1]->nnext);
2146eb690a05SWarner Losh 		u = ((int)getfval(x)) ^ ((int)getfval(y));
2147eb690a05SWarner Losh 		tempfree(y);
2148eb690a05SWarner Losh 		nextarg = nextarg->nnext;
2149eb690a05SWarner Losh 		break;
2150eb690a05SWarner Losh 	case FLSHIFT:
2151eb690a05SWarner Losh 		if (nextarg == 0) {
2152eb690a05SWarner Losh 			WARNING("lshift requires two arguments; returning 0");
2153eb690a05SWarner Losh 			u = 0;
2154eb690a05SWarner Losh 			break;
2155eb690a05SWarner Losh 		}
2156eb690a05SWarner Losh 		y = execute(a[1]->nnext);
2157eb690a05SWarner Losh 		u = ((int)getfval(x)) << ((int)getfval(y));
2158eb690a05SWarner Losh 		tempfree(y);
2159eb690a05SWarner Losh 		nextarg = nextarg->nnext;
2160eb690a05SWarner Losh 		break;
2161eb690a05SWarner Losh 	case FRSHIFT:
2162eb690a05SWarner Losh 		if (nextarg == 0) {
2163eb690a05SWarner Losh 			WARNING("rshift requires two arguments; returning 0");
2164eb690a05SWarner Losh 			u = 0;
2165eb690a05SWarner Losh 			break;
2166eb690a05SWarner Losh 		}
2167eb690a05SWarner Losh 		y = execute(a[1]->nnext);
2168eb690a05SWarner Losh 		u = ((int)getfval(x)) >> ((int)getfval(y));
2169eb690a05SWarner Losh 		tempfree(y);
2170eb690a05SWarner Losh 		nextarg = nextarg->nnext;
2171eb690a05SWarner Losh 		break;
21722a55deb1SDavid E. O'Brien 	case FSYSTEM:
21732a55deb1SDavid E. O'Brien 		fflush(stdout);		/* in case something is buffered already */
2174f32a6403SWarner Losh 		estatus = status = system(getsval(x));
2175b5253557SWarner Losh 		if (status != -1) {
2176b5253557SWarner Losh 			if (WIFEXITED(status)) {
2177f32a6403SWarner Losh 				estatus = WEXITSTATUS(status);
2178b5253557SWarner Losh 			} else if (WIFSIGNALED(status)) {
2179f32a6403SWarner Losh 				estatus = WTERMSIG(status) + 256;
2180b5253557SWarner Losh #ifdef WCOREDUMP
2181b5253557SWarner Losh 				if (WCOREDUMP(status))
2182f32a6403SWarner Losh 					estatus += 256;
2183b5253557SWarner Losh #endif
2184b5253557SWarner Losh 			} else	/* something else?!? */
2185f32a6403SWarner Losh 				estatus = 0;
2186b5253557SWarner Losh 		}
2187f32a6403SWarner Losh 		/* else estatus was set to -1 */
2188f32a6403SWarner Losh 		u = estatus;
21892a55deb1SDavid E. O'Brien 		break;
21902a55deb1SDavid E. O'Brien 	case FRAND:
2191a4b2ac79SPedro F. Giffuni 		/* random() returns numbers in [0..2^31-1]
2192a4b2ac79SPedro F. Giffuni 		 * in order to get a number in [0, 1), divide it by 2^31
2193a4b2ac79SPedro F. Giffuni 		 */
2194a4b2ac79SPedro F. Giffuni 		u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
21952a55deb1SDavid E. O'Brien 		break;
21962a55deb1SDavid E. O'Brien 	case FSRAND:
21972a55deb1SDavid E. O'Brien 		if (isrec(x))	/* no argument provided */
21982a55deb1SDavid E. O'Brien 			u = time((time_t *)0);
21992a55deb1SDavid E. O'Brien 		else
22002a55deb1SDavid E. O'Brien 			u = getfval(x);
22011b11b783SRuslan Ermilov 		tmp = u;
2202a4b2ac79SPedro F. Giffuni 		srandom((unsigned long) u);
22031b11b783SRuslan Ermilov 		u = srand_seed;
22041b11b783SRuslan Ermilov 		srand_seed = tmp;
22052a55deb1SDavid E. O'Brien 		break;
22062a55deb1SDavid E. O'Brien 	case FTOUPPER:
22072a55deb1SDavid E. O'Brien 	case FTOLOWER:
2208f39dd6a9SWarner Losh 		if (t == FTOUPPER)
2209f39dd6a9SWarner Losh 			buf = nawk_toupper(getsval(x));
2210f39dd6a9SWarner Losh 		else
2211f39dd6a9SWarner Losh 			buf = nawk_tolower(getsval(x));
22122a55deb1SDavid E. O'Brien 		tempfree(x);
22132a55deb1SDavid E. O'Brien 		x = gettemp();
22142a55deb1SDavid E. O'Brien 		setsval(x, buf);
22152a55deb1SDavid E. O'Brien 		free(buf);
22162a55deb1SDavid E. O'Brien 		return x;
22172a55deb1SDavid E. O'Brien 	case FFLUSH:
2218007c6572SDag-Erling Smørgrav 		if (isrec(x) || strlen(getsval(x)) == 0) {
2219007c6572SDag-Erling Smørgrav 			flush_all();	/* fflush() or fflush("") -> all */
2220007c6572SDag-Erling Smørgrav 			u = 0;
2221f39dd6a9SWarner Losh 		} else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
22222a55deb1SDavid E. O'Brien 			u = EOF;
22232a55deb1SDavid E. O'Brien 		else
22242a55deb1SDavid E. O'Brien 			u = fflush(fp);
22252a55deb1SDavid E. O'Brien 		break;
2226*8d457988SWarner Losh 	case FMKTIME:
2227*8d457988SWarner Losh 		memset(&tmbuf, 0, sizeof(tmbuf));
2228*8d457988SWarner Losh 		tm = &tmbuf;
2229*8d457988SWarner Losh 		t = sscanf(getsval(x), "%d %d %d %d %d %d %d",
2230*8d457988SWarner Losh 		    &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour,
2231*8d457988SWarner Losh 		    &tm->tm_min, &tm->tm_sec, &tm->tm_isdst);
2232*8d457988SWarner Losh 		switch (t) {
2233*8d457988SWarner Losh 		case 6:
2234*8d457988SWarner Losh 			tm->tm_isdst = -1;	/* let mktime figure it out */
2235*8d457988SWarner Losh 			/* FALLTHROUGH */
2236*8d457988SWarner Losh 		case 7:
2237*8d457988SWarner Losh 			tm->tm_year -= 1900;
2238*8d457988SWarner Losh 			tm->tm_mon--;
2239*8d457988SWarner Losh 			u = mktime(tm);
2240*8d457988SWarner Losh 			break;
2241*8d457988SWarner Losh 		default:
2242*8d457988SWarner Losh 			u = -1;
2243*8d457988SWarner Losh 			break;
2244*8d457988SWarner Losh 		}
2245*8d457988SWarner Losh 		break;
2246eb690a05SWarner Losh 	case FSYSTIME:
2247eb690a05SWarner Losh 		u = time((time_t *) 0);
2248eb690a05SWarner Losh 		break;
2249eb690a05SWarner Losh 	case FSTRFTIME:
2250eb690a05SWarner Losh 		/* strftime([format [,timestamp]]) */
2251eb690a05SWarner Losh 		if (nextarg) {
2252eb690a05SWarner Losh 			y = execute(nextarg);
2253eb690a05SWarner Losh 			nextarg = nextarg->nnext;
2254eb690a05SWarner Losh 			tv = (time_t) getfval(y);
2255eb690a05SWarner Losh 			tempfree(y);
2256eb690a05SWarner Losh 		} else
2257eb690a05SWarner Losh 			tv = time((time_t *) 0);
2258eb690a05SWarner Losh 		tm = localtime(&tv);
2259eb690a05SWarner Losh 		if (tm == NULL)
2260eb690a05SWarner Losh 			FATAL("bad time %ld", (long)tv);
2261eb690a05SWarner Losh 
2262eb690a05SWarner Losh 		if (isrec(x)) {
2263eb690a05SWarner Losh 			/* format argument not provided, use default */
2264eb690a05SWarner Losh 			fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
2265eb690a05SWarner Losh 		} else
2266eb690a05SWarner Losh 			fmt = tostring(getsval(x));
2267eb690a05SWarner Losh 
2268eb690a05SWarner Losh 		sz = 32;
2269eb690a05SWarner Losh 		buf = NULL;
2270eb690a05SWarner Losh 		do {
2271eb690a05SWarner Losh 			if ((buf = realloc(buf, (sz *= 2))) == NULL)
2272eb690a05SWarner Losh 				FATAL("out of memory in strftime");
2273eb690a05SWarner Losh 		} while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
2274eb690a05SWarner Losh 
2275eb690a05SWarner Losh 		y = gettemp();
2276eb690a05SWarner Losh 		setsval(y, buf);
2277eb690a05SWarner Losh 		free(fmt);
2278eb690a05SWarner Losh 		free(buf);
2279eb690a05SWarner Losh 
2280eb690a05SWarner Losh 		return y;
22812a55deb1SDavid E. O'Brien 	default:	/* can't happen */
22822a55deb1SDavid E. O'Brien 		FATAL("illegal function type %d", t);
22832a55deb1SDavid E. O'Brien 		break;
22842a55deb1SDavid E. O'Brien 	}
22852a55deb1SDavid E. O'Brien 	tempfree(x);
22862a55deb1SDavid E. O'Brien 	x = gettemp();
22872a55deb1SDavid E. O'Brien 	setfval(x, u);
228810ce5b99SWarner Losh 	if (nextarg != NULL) {
22892a55deb1SDavid E. O'Brien 		WARNING("warning: function has too many arguments");
2290f32a6403SWarner Losh 		for ( ; nextarg; nextarg = nextarg->nnext) {
2291f32a6403SWarner Losh 			y = execute(nextarg);
2292f32a6403SWarner Losh 			tempfree(y);
2293f32a6403SWarner Losh 		}
22942a55deb1SDavid E. O'Brien 	}
22952a55deb1SDavid E. O'Brien 	return(x);
22962a55deb1SDavid E. O'Brien }
22972a55deb1SDavid E. O'Brien 
22982a55deb1SDavid E. O'Brien Cell *printstat(Node **a, int n)	/* print a[0] */
22992a55deb1SDavid E. O'Brien {
23002a55deb1SDavid E. O'Brien 	Node *x;
23012a55deb1SDavid E. O'Brien 	Cell *y;
23022a55deb1SDavid E. O'Brien 	FILE *fp;
23032a55deb1SDavid E. O'Brien 
230410ce5b99SWarner Losh 	if (a[1] == NULL)	/* a[1] is redirection operator, a[2] is file */
23052a55deb1SDavid E. O'Brien 		fp = stdout;
23062a55deb1SDavid E. O'Brien 	else
23072a55deb1SDavid E. O'Brien 		fp = redirect(ptoi(a[1]), a[2]);
23082a55deb1SDavid E. O'Brien 	for (x = a[0]; x != NULL; x = x->nnext) {
23092a55deb1SDavid E. O'Brien 		y = execute(x);
2310813da98dSDavid E. O'Brien 		fputs(getpssval(y), fp);
23112a55deb1SDavid E. O'Brien 		tempfree(y);
23122a55deb1SDavid E. O'Brien 		if (x->nnext == NULL)
2313b5253557SWarner Losh 			fputs(getsval(orsloc), fp);
23142a55deb1SDavid E. O'Brien 		else
2315b5253557SWarner Losh 			fputs(getsval(ofsloc), fp);
23162a55deb1SDavid E. O'Brien 	}
231710ce5b99SWarner Losh 	if (a[1] != NULL)
23182a55deb1SDavid E. O'Brien 		fflush(fp);
23192a55deb1SDavid E. O'Brien 	if (ferror(fp))
23202a55deb1SDavid E. O'Brien 		FATAL("write error on %s", filename(fp));
23212a55deb1SDavid E. O'Brien 	return(True);
23222a55deb1SDavid E. O'Brien }
23232a55deb1SDavid E. O'Brien 
23242a55deb1SDavid E. O'Brien Cell *nullproc(Node **a, int n)
23252a55deb1SDavid E. O'Brien {
23262a55deb1SDavid E. O'Brien 	return 0;
23272a55deb1SDavid E. O'Brien }
23282a55deb1SDavid E. O'Brien 
23292a55deb1SDavid E. O'Brien 
23302a55deb1SDavid E. O'Brien FILE *redirect(int a, Node *b)	/* set up all i/o redirections */
23312a55deb1SDavid E. O'Brien {
23322a55deb1SDavid E. O'Brien 	FILE *fp;
23332a55deb1SDavid E. O'Brien 	Cell *x;
23342a55deb1SDavid E. O'Brien 	char *fname;
23352a55deb1SDavid E. O'Brien 
23362a55deb1SDavid E. O'Brien 	x = execute(b);
23372a55deb1SDavid E. O'Brien 	fname = getsval(x);
2338f39dd6a9SWarner Losh 	fp = openfile(a, fname, NULL);
23392a55deb1SDavid E. O'Brien 	if (fp == NULL)
23402a55deb1SDavid E. O'Brien 		FATAL("can't open file %s", fname);
23412a55deb1SDavid E. O'Brien 	tempfree(x);
23422a55deb1SDavid E. O'Brien 	return fp;
23432a55deb1SDavid E. O'Brien }
23442a55deb1SDavid E. O'Brien 
23452a55deb1SDavid E. O'Brien struct files {
23462a55deb1SDavid E. O'Brien 	FILE	*fp;
2347813da98dSDavid E. O'Brien 	const char	*fname;
23482a55deb1SDavid E. O'Brien 	int	mode;	/* '|', 'a', 'w' => LE/LT, GT */
2349d86a0988SRuslan Ermilov } *files;
2350d86a0988SRuslan Ermilov 
2351f39dd6a9SWarner Losh size_t nfiles;
23522a55deb1SDavid E. O'Brien 
2353f39dd6a9SWarner Losh static void stdinit(void)	/* in case stdin, etc., are not constants */
23542a55deb1SDavid E. O'Brien {
2355d86a0988SRuslan Ermilov 	nfiles = FOPEN_MAX;
2356f39dd6a9SWarner Losh 	files = (struct files *) calloc(nfiles, sizeof(*files));
2357d86a0988SRuslan Ermilov 	if (files == NULL)
2358f39dd6a9SWarner Losh 		FATAL("can't allocate file memory for %zu files", nfiles);
23592a55deb1SDavid E. O'Brien         files[0].fp = stdin;
2360f32a6403SWarner Losh 	files[0].fname = tostring("/dev/stdin");
2361d86a0988SRuslan Ermilov 	files[0].mode = LT;
23622a55deb1SDavid E. O'Brien         files[1].fp = stdout;
2363f32a6403SWarner Losh 	files[1].fname = tostring("/dev/stdout");
2364d86a0988SRuslan Ermilov 	files[1].mode = GT;
23652a55deb1SDavid E. O'Brien         files[2].fp = stderr;
2366f32a6403SWarner Losh 	files[2].fname = tostring("/dev/stderr");
2367d86a0988SRuslan Ermilov 	files[2].mode = GT;
23682a55deb1SDavid E. O'Brien }
23692a55deb1SDavid E. O'Brien 
2370f39dd6a9SWarner Losh FILE *openfile(int a, const char *us, bool *pnewflag)
23712a55deb1SDavid E. O'Brien {
2372813da98dSDavid E. O'Brien 	const char *s = us;
2373f39dd6a9SWarner Losh 	size_t i;
2374f39dd6a9SWarner Losh 	int m;
237510ce5b99SWarner Losh 	FILE *fp = NULL;
23762a55deb1SDavid E. O'Brien 
23772a55deb1SDavid E. O'Brien 	if (*s == '\0')
23782a55deb1SDavid E. O'Brien 		FATAL("null file name in print or getline");
2379d86a0988SRuslan Ermilov 	for (i = 0; i < nfiles; i++)
2380f39dd6a9SWarner Losh 		if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
2381f39dd6a9SWarner Losh 		    (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
2382f39dd6a9SWarner Losh 		     a == FFLUSH)) {
2383f39dd6a9SWarner Losh 			if (pnewflag)
2384f39dd6a9SWarner Losh 				*pnewflag = false;
23852a55deb1SDavid E. O'Brien 			return files[i].fp;
23862a55deb1SDavid E. O'Brien 		}
23872a55deb1SDavid E. O'Brien 	if (a == FFLUSH)	/* didn't find it, so don't create it! */
23882a55deb1SDavid E. O'Brien 		return NULL;
23892a55deb1SDavid E. O'Brien 
2390d86a0988SRuslan Ermilov 	for (i = 0; i < nfiles; i++)
239110ce5b99SWarner Losh 		if (files[i].fp == NULL)
23922a55deb1SDavid E. O'Brien 			break;
2393d86a0988SRuslan Ermilov 	if (i >= nfiles) {
2394d86a0988SRuslan Ermilov 		struct files *nf;
2395f39dd6a9SWarner Losh 		size_t nnf = nfiles + FOPEN_MAX;
2396f39dd6a9SWarner Losh 		nf = (struct files *) realloc(files, nnf * sizeof(*nf));
2397d86a0988SRuslan Ermilov 		if (nf == NULL)
2398f39dd6a9SWarner Losh 			FATAL("cannot grow files for %s and %zu files", s, nnf);
2399d86a0988SRuslan Ermilov 		memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
2400d86a0988SRuslan Ermilov 		nfiles = nnf;
2401d86a0988SRuslan Ermilov 		files = nf;
2402d86a0988SRuslan Ermilov 	}
24032a55deb1SDavid E. O'Brien 	fflush(stdout);	/* force a semblance of order */
24042a55deb1SDavid E. O'Brien 	m = a;
24052a55deb1SDavid E. O'Brien 	if (a == GT) {
24062a55deb1SDavid E. O'Brien 		fp = fopen(s, "w");
24072a55deb1SDavid E. O'Brien 	} else if (a == APPEND) {
24082a55deb1SDavid E. O'Brien 		fp = fopen(s, "a");
24092a55deb1SDavid E. O'Brien 		m = GT;	/* so can mix > and >> */
24102a55deb1SDavid E. O'Brien 	} else if (a == '|') {	/* output pipe */
24112a55deb1SDavid E. O'Brien 		fp = popen(s, "w");
24122a55deb1SDavid E. O'Brien 	} else if (a == LE) {	/* input pipe */
24132a55deb1SDavid E. O'Brien 		fp = popen(s, "r");
24142a55deb1SDavid E. O'Brien 	} else if (a == LT) {	/* getline <file */
24152a55deb1SDavid E. O'Brien 		fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r");	/* "-" is stdin */
24162a55deb1SDavid E. O'Brien 	} else	/* can't happen */
24172a55deb1SDavid E. O'Brien 		FATAL("illegal redirection %d", a);
24182a55deb1SDavid E. O'Brien 	if (fp != NULL) {
24192a55deb1SDavid E. O'Brien 		files[i].fname = tostring(s);
24202a55deb1SDavid E. O'Brien 		files[i].fp = fp;
24212a55deb1SDavid E. O'Brien 		files[i].mode = m;
2422f39dd6a9SWarner Losh 		if (pnewflag)
2423f39dd6a9SWarner Losh 			*pnewflag = true;
2424f39dd6a9SWarner Losh 		if (fp != stdin && fp != stdout && fp != stderr)
2425f39dd6a9SWarner Losh 			(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
24262a55deb1SDavid E. O'Brien 	}
24272a55deb1SDavid E. O'Brien 	return fp;
24282a55deb1SDavid E. O'Brien }
24292a55deb1SDavid E. O'Brien 
2430813da98dSDavid E. O'Brien const char *filename(FILE *fp)
24312a55deb1SDavid E. O'Brien {
2432f39dd6a9SWarner Losh 	size_t i;
24332a55deb1SDavid E. O'Brien 
2434d86a0988SRuslan Ermilov 	for (i = 0; i < nfiles; i++)
24352a55deb1SDavid E. O'Brien 		if (fp == files[i].fp)
24362a55deb1SDavid E. O'Brien 			return files[i].fname;
24372a55deb1SDavid E. O'Brien 	return "???";
24382a55deb1SDavid E. O'Brien }
24392a55deb1SDavid E. O'Brien 
24402a55deb1SDavid E. O'Brien Cell *closefile(Node **a, int n)
24412a55deb1SDavid E. O'Brien {
24422a55deb1SDavid E. O'Brien  	Cell *x;
2443f39dd6a9SWarner Losh 	size_t i;
2444f39dd6a9SWarner Losh 	bool stat;
24452a55deb1SDavid E. O'Brien 
24462a55deb1SDavid E. O'Brien  	x = execute(a[0]);
24472a55deb1SDavid E. O'Brien  	getsval(x);
2448f39dd6a9SWarner Losh 	stat = true;
2449d86a0988SRuslan Ermilov  	for (i = 0; i < nfiles; i++) {
2450f39dd6a9SWarner Losh 		if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
2451f39dd6a9SWarner Losh 			continue;
2452f32a6403SWarner Losh 		if (files[i].mode == GT || files[i].mode == '|')
2453f32a6403SWarner Losh 			fflush(files[i].fp);
2454f32a6403SWarner Losh 		if (ferror(files[i].fp)) {
2455f32a6403SWarner Losh 			if ((files[i].mode == GT && files[i].fp != stderr)
2456f32a6403SWarner Losh 			  || files[i].mode == '|')
2457f32a6403SWarner Losh 				FATAL("write error on %s", files[i].fname);
2458f32a6403SWarner Losh 			else
2459f32a6403SWarner Losh 				WARNING("i/o error occurred on %s", files[i].fname);
2460f32a6403SWarner Losh 		}
2461f39dd6a9SWarner Losh 		if (files[i].fp == stdin || files[i].fp == stdout ||
2462f39dd6a9SWarner Losh 		    files[i].fp == stderr)
2463f39dd6a9SWarner Losh 			stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
2464f39dd6a9SWarner Losh 		else if (files[i].mode == '|' || files[i].mode == LE)
2465f39dd6a9SWarner Losh 			stat = pclose(files[i].fp) == -1;
24662a55deb1SDavid E. O'Brien 		else
2467f39dd6a9SWarner Losh 			stat = fclose(files[i].fp) == EOF;
2468f39dd6a9SWarner Losh 		if (stat)
2469f32a6403SWarner Losh 			WARNING("i/o error occurred closing %s", files[i].fname);
24702a55deb1SDavid E. O'Brien 		xfree(files[i].fname);
24712a55deb1SDavid E. O'Brien 		files[i].fname = NULL;	/* watch out for ref thru this */
24722a55deb1SDavid E. O'Brien 		files[i].fp = NULL;
2473f39dd6a9SWarner Losh 		break;
24742a55deb1SDavid E. O'Brien  	}
24752a55deb1SDavid E. O'Brien  	tempfree(x);
24762a55deb1SDavid E. O'Brien  	x = gettemp();
2477f39dd6a9SWarner Losh 	setfval(x, (Awkfloat) (stat ? -1 : 0));
24782a55deb1SDavid E. O'Brien  	return(x);
24792a55deb1SDavid E. O'Brien }
24802a55deb1SDavid E. O'Brien 
24812a55deb1SDavid E. O'Brien void closeall(void)
24822a55deb1SDavid E. O'Brien {
2483f39dd6a9SWarner Losh 	size_t i;
2484f39dd6a9SWarner Losh 	bool stat = false;
24852a55deb1SDavid E. O'Brien 
2486f39dd6a9SWarner Losh 	for (i = 0; i < nfiles; i++) {
2487f39dd6a9SWarner Losh 		if (! files[i].fp)
2488f39dd6a9SWarner Losh 			continue;
2489f32a6403SWarner Losh 		if (files[i].mode == GT || files[i].mode == '|')
2490f32a6403SWarner Losh 			fflush(files[i].fp);
2491f32a6403SWarner Losh 		if (ferror(files[i].fp)) {
2492f32a6403SWarner Losh 			if ((files[i].mode == GT && files[i].fp != stderr)
2493f32a6403SWarner Losh 			  || files[i].mode == '|')
2494f32a6403SWarner Losh 				FATAL("write error on %s", files[i].fname);
2495f32a6403SWarner Losh 			else
2496f32a6403SWarner Losh 				WARNING("i/o error occurred on %s", files[i].fname);
2497f32a6403SWarner Losh 		}
2498f32a6403SWarner Losh 		if (files[i].fp == stdin || files[i].fp == stdout ||
2499f32a6403SWarner Losh 		    files[i].fp == stderr)
2500f39dd6a9SWarner Losh 			continue;
25012a55deb1SDavid E. O'Brien 		if (files[i].mode == '|' || files[i].mode == LE)
2502f39dd6a9SWarner Losh 			stat = pclose(files[i].fp) == -1;
25032a55deb1SDavid E. O'Brien 		else
2504f39dd6a9SWarner Losh 			stat = fclose(files[i].fp) == EOF;
2505f39dd6a9SWarner Losh 		if (stat)
2506f32a6403SWarner Losh 			WARNING("i/o error occurred while closing %s", files[i].fname);
25072a55deb1SDavid E. O'Brien 	}
25082a55deb1SDavid E. O'Brien }
25092a55deb1SDavid E. O'Brien 
2510f39dd6a9SWarner Losh static void flush_all(void)
2511007c6572SDag-Erling Smørgrav {
2512f39dd6a9SWarner Losh 	size_t i;
2513007c6572SDag-Erling Smørgrav 
2514d86a0988SRuslan Ermilov 	for (i = 0; i < nfiles; i++)
2515007c6572SDag-Erling Smørgrav 		if (files[i].fp)
2516007c6572SDag-Erling Smørgrav 			fflush(files[i].fp);
2517007c6572SDag-Erling Smørgrav }
2518007c6572SDag-Erling Smørgrav 
2519f39dd6a9SWarner Losh void backsub(char **pb_ptr, const char **sptr_ptr);
25202a55deb1SDavid E. O'Brien 
2521f32a6403SWarner Losh Cell *dosub(Node **a, int subop)        /* sub and gsub */
25222a55deb1SDavid E. O'Brien {
25232a55deb1SDavid E. O'Brien 	fa *pfa;
25241023317aSWarner Losh 	int tempstat = 0;
2525f32a6403SWarner Losh 	char *repl;
2526f32a6403SWarner Losh 	Cell *x;
2527f32a6403SWarner Losh 
2528f32a6403SWarner Losh 	char *buf = NULL;
2529f32a6403SWarner Losh 	char *pb = NULL;
25302a55deb1SDavid E. O'Brien 	int bufsz = recsize;
25312a55deb1SDavid E. O'Brien 
2532f32a6403SWarner Losh 	const char *r, *s;
2533f32a6403SWarner Losh 	const char *start;
2534f32a6403SWarner Losh 	const char *noempty = NULL;      /* empty match disallowed here */
2535f32a6403SWarner Losh 	size_t m = 0;                    /* match count */
253617853db4SWarner Losh 	size_t whichm = 0;               /* which match to select, 0 = global */
2537f32a6403SWarner Losh 	int mtype;                       /* match type */
2538f32a6403SWarner Losh 
2539f32a6403SWarner Losh 	if (a[0] == NULL) {	/* 0 => a[1] is already-compiled regexpr */
2540f32a6403SWarner Losh 		pfa = (fa *) a[1];
2541f32a6403SWarner Losh 	} else {
2542f32a6403SWarner Losh 		x = execute(a[1]);
2543f32a6403SWarner Losh 		pfa = makedfa(getsval(x), 1);
25442a55deb1SDavid E. O'Brien 		tempfree(x);
25452a55deb1SDavid E. O'Brien 	}
25462a55deb1SDavid E. O'Brien 
2547f32a6403SWarner Losh 	x = execute(a[2]);	/* replacement string */
2548f32a6403SWarner Losh 	repl = tostring(getsval(x));
2549f32a6403SWarner Losh 	tempfree(x);
25502a55deb1SDavid E. O'Brien 
2551f32a6403SWarner Losh 	switch (subop) {
2552f32a6403SWarner Losh 	case SUB:
2553f32a6403SWarner Losh 		whichm = 1;
2554f32a6403SWarner Losh 		x = execute(a[3]);    /* source string */
2555f32a6403SWarner Losh 		break;
2556f32a6403SWarner Losh 	case GSUB:
2557f32a6403SWarner Losh 		whichm = 0;
2558f32a6403SWarner Losh 		x = execute(a[3]);    /* source string */
2559f32a6403SWarner Losh 		break;
2560f32a6403SWarner Losh 	default:
2561f32a6403SWarner Losh 		FATAL("dosub: unrecognized subop: %d", subop);
25622a55deb1SDavid E. O'Brien 	}
2563f32a6403SWarner Losh 
2564f32a6403SWarner Losh 	start = getsval(x);
2565f32a6403SWarner Losh 	while (pmatch(pfa, start)) {
2566f32a6403SWarner Losh 		if (buf == NULL) {
2567f32a6403SWarner Losh 			if ((pb = buf = (char *) malloc(bufsz)) == NULL)
2568f32a6403SWarner Losh 				FATAL("out of memory in dosub");
25692a55deb1SDavid E. O'Brien 			tempstat = pfa->initstat;
25702a55deb1SDavid E. O'Brien 			pfa->initstat = 2;
2571f32a6403SWarner Losh 		}
2572f32a6403SWarner Losh 
2573f32a6403SWarner Losh 		/* match types */
2574f32a6403SWarner Losh 		#define	MT_IGNORE  0  /* unselected or invalid */
2575f32a6403SWarner Losh 		#define MT_INSERT  1  /* selected, empty */
2576f32a6403SWarner Losh 		#define MT_REPLACE 2  /* selected, not empty */
2577f32a6403SWarner Losh 
2578f32a6403SWarner Losh 		/* an empty match just after replacement is invalid */
2579f32a6403SWarner Losh 
2580f32a6403SWarner Losh 		if (patbeg == noempty && patlen == 0) {
2581f32a6403SWarner Losh 			mtype = MT_IGNORE;    /* invalid, not counted */
2582f32a6403SWarner Losh 		} else if (whichm == ++m || whichm == 0) {
2583f32a6403SWarner Losh 			mtype = patlen ? MT_REPLACE : MT_INSERT;
2584f32a6403SWarner Losh 		} else {
2585f32a6403SWarner Losh 			mtype = MT_IGNORE;    /* unselected, but counted */
2586f32a6403SWarner Losh 		}
2587f32a6403SWarner Losh 
2588f32a6403SWarner Losh 		/* leading text: */
2589f32a6403SWarner Losh 		if (patbeg > start) {
2590f32a6403SWarner Losh 			adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start),
2591f32a6403SWarner Losh 				recsize, &pb, "dosub");
2592f32a6403SWarner Losh 			s = start;
2593f32a6403SWarner Losh 			while (s < patbeg)
2594f32a6403SWarner Losh 				*pb++ = *s++;
2595f32a6403SWarner Losh 		}
2596f32a6403SWarner Losh 
2597f32a6403SWarner Losh 		if (mtype == MT_IGNORE)
2598f32a6403SWarner Losh 			goto matching_text;  /* skip replacement text */
2599f32a6403SWarner Losh 
2600f32a6403SWarner Losh 		r = repl;
2601f32a6403SWarner Losh 		while (*r != 0) {
2602f32a6403SWarner Losh 			adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub");
2603f32a6403SWarner Losh 			if (*r == '\\') {
2604f32a6403SWarner Losh 				backsub(&pb, &r);
2605f32a6403SWarner Losh 			} else if (*r == '&') {
2606f32a6403SWarner Losh 				r++;
2607f32a6403SWarner Losh 				adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize,
2608f32a6403SWarner Losh 					&pb, "dosub");
2609f32a6403SWarner Losh 				for (s = patbeg; s < patbeg+patlen; )
2610f32a6403SWarner Losh 					*pb++ = *s++;
2611f32a6403SWarner Losh 			} else {
2612f32a6403SWarner Losh 				*pb++ = *r++;
26132a55deb1SDavid E. O'Brien 			}
26142a55deb1SDavid E. O'Brien 		}
2615f32a6403SWarner Losh 
2616f32a6403SWarner Losh matching_text:
2617f32a6403SWarner Losh 		if (mtype == MT_REPLACE || *patbeg == '\0')
2618f32a6403SWarner Losh 			goto next_search;  /* skip matching text */
2619f32a6403SWarner Losh 
2620f32a6403SWarner Losh 		if (patlen == 0)
2621f32a6403SWarner Losh 			patlen = u8_nextlen(patbeg);
2622f32a6403SWarner Losh 		adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub");
2623f32a6403SWarner Losh 		s = patbeg;
2624f32a6403SWarner Losh 		while (s < patbeg + patlen)
2625f32a6403SWarner Losh 			*pb++ = *s++;
2626f32a6403SWarner Losh 
2627f32a6403SWarner Losh next_search:
2628f32a6403SWarner Losh 		start = patbeg + patlen;
2629f32a6403SWarner Losh 		if (m == whichm || *patbeg == '\0')
2630f32a6403SWarner Losh 			break;
2631f32a6403SWarner Losh 		if (mtype == MT_REPLACE)
2632f32a6403SWarner Losh 			noempty = start;
2633f32a6403SWarner Losh 
2634f32a6403SWarner Losh 		#undef MT_IGNORE
2635f32a6403SWarner Losh 		#undef MT_INSERT
2636f32a6403SWarner Losh 		#undef MT_REPLACE
26372a55deb1SDavid E. O'Brien 	}
2638f32a6403SWarner Losh 
2639f32a6403SWarner Losh 	xfree(repl);
2640f32a6403SWarner Losh 
2641f32a6403SWarner Losh 	if (buf != NULL) {
26422a55deb1SDavid E. O'Brien 		pfa->initstat = tempstat;
2643f32a6403SWarner Losh 
2644f32a6403SWarner Losh 		/* trailing text */
2645f32a6403SWarner Losh 		adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub");
2646f32a6403SWarner Losh 		while ((*pb++ = *start++) != '\0')
2647f32a6403SWarner Losh 			;
2648f32a6403SWarner Losh 
2649f32a6403SWarner Losh 		setsval(x, buf);
2650f32a6403SWarner Losh 		free(buf);
26512a55deb1SDavid E. O'Brien 	}
2652f32a6403SWarner Losh 
26532a55deb1SDavid E. O'Brien 	tempfree(x);
26542a55deb1SDavid E. O'Brien 	x = gettemp();
26552a55deb1SDavid E. O'Brien 	x->tval = NUM;
2656f32a6403SWarner Losh 	x->fval = m;
2657f32a6403SWarner Losh 	return x;
26582a55deb1SDavid E. O'Brien }
26592a55deb1SDavid E. O'Brien 
2660eb690a05SWarner Losh Cell *gensub(Node **a, int nnn)	/* global selective substitute */
2661eb690a05SWarner Losh 	/* XXX incomplete - doesn't support backreferences \0 ... \9 */
2662eb690a05SWarner Losh {
2663eb690a05SWarner Losh 	Cell *x, *y, *res, *h;
2664eb690a05SWarner Losh 	char *rptr;
2665eb690a05SWarner Losh 	const char *sptr;
2666eb690a05SWarner Losh 	char *buf, *pb;
2667eb690a05SWarner Losh 	const char *t, *q;
2668eb690a05SWarner Losh 	fa *pfa;
2669eb690a05SWarner Losh 	int mflag, tempstat, num, whichm;
2670eb690a05SWarner Losh 	int bufsz = recsize;
2671eb690a05SWarner Losh 
2672eb690a05SWarner Losh 	if ((buf = malloc(bufsz)) == NULL)
2673eb690a05SWarner Losh 		FATAL("out of memory in gensub");
2674eb690a05SWarner Losh 	mflag = 0;	/* if mflag == 0, can replace empty string */
2675eb690a05SWarner Losh 	num = 0;
2676eb690a05SWarner Losh 	x = execute(a[4]);	/* source string */
2677eb690a05SWarner Losh 	t = getsval(x);
2678eb690a05SWarner Losh 	res = copycell(x);	/* target string - initially copy of source */
2679eb690a05SWarner Losh 	res->csub = CTEMP;	/* result values are temporary */
2680eb690a05SWarner Losh 	if (a[0] == 0)		/* 0 => a[1] is already-compiled regexpr */
2681eb690a05SWarner Losh 		pfa = (fa *) a[1];	/* regular expression */
2682eb690a05SWarner Losh 	else {
2683eb690a05SWarner Losh 		y = execute(a[1]);
2684eb690a05SWarner Losh 		pfa = makedfa(getsval(y), 1);
2685eb690a05SWarner Losh 		tempfree(y);
2686eb690a05SWarner Losh 	}
2687eb690a05SWarner Losh 	y = execute(a[2]);	/* replacement string */
2688eb690a05SWarner Losh 	h = execute(a[3]);	/* which matches should be replaced */
2689eb690a05SWarner Losh 	sptr = getsval(h);
2690eb690a05SWarner Losh 	if (sptr[0] == 'g' || sptr[0] == 'G')
2691eb690a05SWarner Losh 		whichm = -1;
2692eb690a05SWarner Losh 	else {
2693eb690a05SWarner Losh 		/*
2694eb690a05SWarner Losh 		 * The specified number is index of replacement, starting
2695eb690a05SWarner Losh 		 * from 1. GNU awk treats index lower than 0 same as
2696eb690a05SWarner Losh 		 * 1, we do same for compatibility.
2697eb690a05SWarner Losh 		 */
2698eb690a05SWarner Losh 		whichm = (int) getfval(h) - 1;
2699eb690a05SWarner Losh 		if (whichm < 0)
2700eb690a05SWarner Losh 			whichm = 0;
2701eb690a05SWarner Losh 	}
2702eb690a05SWarner Losh 	tempfree(h);
2703eb690a05SWarner Losh 
2704eb690a05SWarner Losh 	if (pmatch(pfa, t)) {
2705eb690a05SWarner Losh 		char *sl;
2706eb690a05SWarner Losh 
2707eb690a05SWarner Losh 		tempstat = pfa->initstat;
2708eb690a05SWarner Losh 		pfa->initstat = 2;
2709eb690a05SWarner Losh 		pb = buf;
2710eb690a05SWarner Losh 		rptr = getsval(y);
2711eb690a05SWarner Losh 		/*
2712eb690a05SWarner Losh 		 * XXX if there are any backreferences in subst string,
2713eb690a05SWarner Losh 		 * complain now.
2714eb690a05SWarner Losh 		 */
2715eb690a05SWarner Losh 		for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2716eb690a05SWarner Losh 			if (strchr("0123456789", sl[1])) {
2717eb690a05SWarner Losh 				FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2718eb690a05SWarner Losh 			}
2719eb690a05SWarner Losh 		}
2720eb690a05SWarner Losh 
2721eb690a05SWarner Losh 		do {
2722eb690a05SWarner Losh 			if (whichm >= 0 && whichm != num) {
2723eb690a05SWarner Losh 				num++;
2724eb690a05SWarner Losh 				adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2725eb690a05SWarner Losh 
2726eb690a05SWarner Losh 				/* copy the part of string up to and including
2727eb690a05SWarner Losh 				 * match to output buffer */
2728eb690a05SWarner Losh 				while (t < patbeg + patlen)
2729eb690a05SWarner Losh 					*pb++ = *t++;
2730eb690a05SWarner Losh 				continue;
2731eb690a05SWarner Losh 			}
2732eb690a05SWarner Losh 
2733eb690a05SWarner Losh 			if (patlen == 0 && *patbeg != 0) {	/* matched empty string */
2734eb690a05SWarner Losh 				if (mflag == 0) {	/* can replace empty */
2735eb690a05SWarner Losh 					num++;
2736eb690a05SWarner Losh 					sptr = rptr;
2737eb690a05SWarner Losh 					while (*sptr != 0) {
2738eb690a05SWarner Losh 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2739eb690a05SWarner Losh 						if (*sptr == '\\') {
2740eb690a05SWarner Losh 							backsub(&pb, &sptr);
2741eb690a05SWarner Losh 						} else if (*sptr == '&') {
2742eb690a05SWarner Losh 							sptr++;
2743eb690a05SWarner Losh 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2744eb690a05SWarner Losh 							for (q = patbeg; q < patbeg+patlen; )
2745eb690a05SWarner Losh 								*pb++ = *q++;
2746eb690a05SWarner Losh 						} else
2747eb690a05SWarner Losh 							*pb++ = *sptr++;
2748eb690a05SWarner Losh 					}
2749eb690a05SWarner Losh 				}
2750eb690a05SWarner Losh 				if (*t == 0)	/* at end */
2751eb690a05SWarner Losh 					goto done;
2752eb690a05SWarner Losh 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2753eb690a05SWarner Losh 				*pb++ = *t++;
2754eb690a05SWarner Losh 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2755eb690a05SWarner Losh 					FATAL("gensub result0 %.30s too big; can't happen", buf);
2756eb690a05SWarner Losh 				mflag = 0;
2757eb690a05SWarner Losh 			}
2758eb690a05SWarner Losh 			else {	/* matched nonempty string */
2759eb690a05SWarner Losh 				num++;
2760eb690a05SWarner Losh 				sptr = t;
2761eb690a05SWarner Losh 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2762eb690a05SWarner Losh 				while (sptr < patbeg)
2763eb690a05SWarner Losh 					*pb++ = *sptr++;
2764eb690a05SWarner Losh 				sptr = rptr;
2765eb690a05SWarner Losh 				while (*sptr != 0) {
2766eb690a05SWarner Losh 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2767eb690a05SWarner Losh 					if (*sptr == '\\') {
2768eb690a05SWarner Losh 						backsub(&pb, &sptr);
2769eb690a05SWarner Losh 					} else if (*sptr == '&') {
2770eb690a05SWarner Losh 						sptr++;
2771eb690a05SWarner Losh 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2772eb690a05SWarner Losh 						for (q = patbeg; q < patbeg+patlen; )
2773eb690a05SWarner Losh 							*pb++ = *q++;
2774eb690a05SWarner Losh 					} else
2775eb690a05SWarner Losh 						*pb++ = *sptr++;
2776eb690a05SWarner Losh 				}
2777eb690a05SWarner Losh 				t = patbeg + patlen;
2778eb690a05SWarner Losh 				if (patlen == 0 || *t == 0 || *(t-1) == 0)
2779eb690a05SWarner Losh 					goto done;
2780eb690a05SWarner Losh 				if (pb > buf + bufsz)
2781eb690a05SWarner Losh 					FATAL("gensub result1 %.30s too big; can't happen", buf);
2782eb690a05SWarner Losh 				mflag = 1;
2783eb690a05SWarner Losh 			}
2784eb690a05SWarner Losh 		} while (pmatch(pfa,t));
2785eb690a05SWarner Losh 		sptr = t;
2786eb690a05SWarner Losh 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2787eb690a05SWarner Losh 		while ((*pb++ = *sptr++) != 0)
2788eb690a05SWarner Losh 			;
2789eb690a05SWarner Losh 	done:	if (pb > buf + bufsz)
2790eb690a05SWarner Losh 			FATAL("gensub result2 %.30s too big; can't happen", buf);
2791eb690a05SWarner Losh 		*pb = '\0';
2792eb690a05SWarner Losh 		setsval(res, buf);
2793eb690a05SWarner Losh 		pfa->initstat = tempstat;
2794eb690a05SWarner Losh 	}
2795eb690a05SWarner Losh 	tempfree(x);
2796eb690a05SWarner Losh 	tempfree(y);
2797eb690a05SWarner Losh 	free(buf);
2798eb690a05SWarner Losh 	return(res);
2799eb690a05SWarner Losh }
2800eb690a05SWarner Losh 
2801f39dd6a9SWarner Losh void backsub(char **pb_ptr, const char **sptr_ptr)	/* handle \\& variations */
28022a55deb1SDavid E. O'Brien {						/* sptr[0] == '\\' */
2803f39dd6a9SWarner Losh 	char *pb = *pb_ptr;
2804f39dd6a9SWarner Losh 	const char *sptr = *sptr_ptr;
2805f39dd6a9SWarner Losh 	static bool first = true;
2806f39dd6a9SWarner Losh 	static bool do_posix = false;
2807f39dd6a9SWarner Losh 
2808f39dd6a9SWarner Losh 	if (first) {
2809f39dd6a9SWarner Losh 		first = false;
2810f39dd6a9SWarner Losh 		do_posix = (getenv("POSIXLY_CORRECT") != NULL);
2811f39dd6a9SWarner Losh 	}
28122a55deb1SDavid E. O'Brien 
28132a55deb1SDavid E. O'Brien 	if (sptr[1] == '\\') {
28142a55deb1SDavid E. O'Brien 		if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
28152a55deb1SDavid E. O'Brien 			*pb++ = '\\';
28162a55deb1SDavid E. O'Brien 			*pb++ = '&';
28172a55deb1SDavid E. O'Brien 			sptr += 4;
28182a55deb1SDavid E. O'Brien 		} else if (sptr[2] == '&') {	/* \\& -> \ + matched */
28192a55deb1SDavid E. O'Brien 			*pb++ = '\\';
28202a55deb1SDavid E. O'Brien 			sptr += 2;
2821f39dd6a9SWarner Losh 		} else if (do_posix) {		/* \\x -> \x */
2822f39dd6a9SWarner Losh 			sptr++;
2823f39dd6a9SWarner Losh 			*pb++ = *sptr++;
28242a55deb1SDavid E. O'Brien 		} else {			/* \\x -> \\x */
28252a55deb1SDavid E. O'Brien 			*pb++ = *sptr++;
28262a55deb1SDavid E. O'Brien 			*pb++ = *sptr++;
28272a55deb1SDavid E. O'Brien 		}
28282a55deb1SDavid E. O'Brien 	} else if (sptr[1] == '&') {	/* literal & */
28292a55deb1SDavid E. O'Brien 		sptr++;
28302a55deb1SDavid E. O'Brien 		*pb++ = *sptr++;
28312a55deb1SDavid E. O'Brien 	} else				/* literal \ */
28322a55deb1SDavid E. O'Brien 		*pb++ = *sptr++;
28332a55deb1SDavid E. O'Brien 
28342a55deb1SDavid E. O'Brien 	*pb_ptr = pb;
28352a55deb1SDavid E. O'Brien 	*sptr_ptr = sptr;
28362a55deb1SDavid E. O'Brien }
2837f32a6403SWarner Losh 
2838f32a6403SWarner Losh static char *wide_char_to_byte_str(int rune, size_t *outlen)
2839f32a6403SWarner Losh {
2840f32a6403SWarner Losh 	static char buf[5];
2841f32a6403SWarner Losh 	int len;
2842f32a6403SWarner Losh 
2843f32a6403SWarner Losh 	if (rune < 0 || rune > 0x10FFFF)
2844f32a6403SWarner Losh 		return NULL;
2845f32a6403SWarner Losh 
2846f32a6403SWarner Losh 	memset(buf, 0, sizeof(buf));
2847f32a6403SWarner Losh 
2848f32a6403SWarner Losh 	len = 0;
2849f32a6403SWarner Losh 	if (rune <= 0x0000007F) {
2850f32a6403SWarner Losh 		buf[len++] = rune;
2851f32a6403SWarner Losh 	} else if (rune <= 0x000007FF) {
2852f32a6403SWarner Losh 		// 110xxxxx 10xxxxxx
2853f32a6403SWarner Losh 		buf[len++] = 0xC0 | (rune >> 6);
2854f32a6403SWarner Losh 		buf[len++] = 0x80 | (rune & 0x3F);
2855f32a6403SWarner Losh 	} else if (rune <= 0x0000FFFF) {
2856f32a6403SWarner Losh 		// 1110xxxx 10xxxxxx 10xxxxxx
2857f32a6403SWarner Losh 		buf[len++] = 0xE0 | (rune >> 12);
2858f32a6403SWarner Losh 		buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2859f32a6403SWarner Losh 		buf[len++] = 0x80 | (rune & 0x3F);
2860f32a6403SWarner Losh 
2861f32a6403SWarner Losh 	} else {
2862f32a6403SWarner Losh 		// 0x00010000 - 0x10FFFF
2863f32a6403SWarner Losh 		// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2864f32a6403SWarner Losh 		buf[len++] = 0xF0 | (rune >> 18);
2865f32a6403SWarner Losh 		buf[len++] = 0x80 | ((rune >> 12) & 0x3F);
2866f32a6403SWarner Losh 		buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2867f32a6403SWarner Losh 		buf[len++] = 0x80 | (rune & 0x3F);
2868f32a6403SWarner Losh 	}
2869f32a6403SWarner Losh 
2870f32a6403SWarner Losh 	*outlen = len;
2871f32a6403SWarner Losh 	buf[len++] = '\0';
2872f32a6403SWarner Losh 
2873f32a6403SWarner Losh 	return buf;
2874f32a6403SWarner Losh }
2875