12a55deb1SDavid E. O'Brien /**************************************************************** 22a55deb1SDavid E. O'Brien Copyright (C) Lucent Technologies 1997 32a55deb1SDavid E. O'Brien All Rights Reserved 42a55deb1SDavid E. O'Brien 52a55deb1SDavid E. O'Brien Permission to use, copy, modify, and distribute this software and 62a55deb1SDavid E. O'Brien its documentation for any purpose and without fee is hereby 72a55deb1SDavid E. O'Brien granted, provided that the above copyright notice appear in all 82a55deb1SDavid E. O'Brien copies and that both that the copyright notice and this 92a55deb1SDavid E. O'Brien permission notice and warranty disclaimer appear in supporting 102a55deb1SDavid E. O'Brien documentation, and that the name Lucent Technologies or any of 112a55deb1SDavid E. O'Brien its entities not be used in advertising or publicity pertaining 122a55deb1SDavid E. O'Brien to distribution of the software without specific, written prior 132a55deb1SDavid E. O'Brien permission. 142a55deb1SDavid E. O'Brien 152a55deb1SDavid E. O'Brien LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 162a55deb1SDavid E. O'Brien INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 172a55deb1SDavid E. O'Brien IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 182a55deb1SDavid E. O'Brien SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 192a55deb1SDavid E. O'Brien WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 202a55deb1SDavid E. O'Brien IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 212a55deb1SDavid E. O'Brien ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 222a55deb1SDavid E. O'Brien THIS SOFTWARE. 232a55deb1SDavid E. O'Brien ****************************************************************/ 242a55deb1SDavid E. O'Brien 252a55deb1SDavid E. O'Brien #define DEBUG 262a55deb1SDavid E. O'Brien #include <stdio.h> 272a55deb1SDavid E. O'Brien #include <ctype.h> 28f39dd6a9SWarner Losh #include <errno.h> 29f39dd6a9SWarner Losh #include <wctype.h> 30f39dd6a9SWarner Losh #include <fcntl.h> 312a55deb1SDavid E. O'Brien #include <setjmp.h> 32c263f9bfSRuslan Ermilov #include <limits.h> 332a55deb1SDavid E. O'Brien #include <math.h> 342a55deb1SDavid E. O'Brien #include <string.h> 352a55deb1SDavid E. O'Brien #include <stdlib.h> 362a55deb1SDavid E. O'Brien #include <time.h> 37b5253557SWarner Losh #include <sys/types.h> 38b5253557SWarner Losh #include <sys/wait.h> 392a55deb1SDavid E. O'Brien #include "awk.h" 40f39dd6a9SWarner Losh #include "awkgram.tab.h" 412a55deb1SDavid E. O'Brien 42f32a6403SWarner Losh 43f39dd6a9SWarner Losh static void stdinit(void); 44f39dd6a9SWarner Losh static void flush_all(void); 45f32a6403SWarner Losh static char *wide_char_to_byte_str(int rune, size_t *outlen); 462a55deb1SDavid E. O'Brien 47f39dd6a9SWarner Losh #if 1 48f39dd6a9SWarner Losh #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 49f39dd6a9SWarner Losh #else 502a55deb1SDavid E. O'Brien void tempfree(Cell *p) { 512a55deb1SDavid E. O'Brien if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 522a55deb1SDavid E. O'Brien WARNING("bad csub %d in Cell %d %s", 532a55deb1SDavid E. O'Brien p->csub, p->ctype, p->sval); 542a55deb1SDavid E. O'Brien } 552a55deb1SDavid E. O'Brien if (istemp(p)) 562a55deb1SDavid E. O'Brien tfree(p); 572a55deb1SDavid E. O'Brien } 58f39dd6a9SWarner Losh #endif 592a55deb1SDavid E. O'Brien 60addad6afSRong-En Fan /* do we really need these? */ 61addad6afSRong-En Fan /* #ifdef _NFILE */ 62addad6afSRong-En Fan /* #ifndef FOPEN_MAX */ 63addad6afSRong-En Fan /* #define FOPEN_MAX _NFILE */ 64addad6afSRong-En Fan /* #endif */ 65addad6afSRong-En Fan /* #endif */ 66addad6afSRong-En Fan /* */ 67addad6afSRong-En Fan /* #ifndef FOPEN_MAX */ 68addad6afSRong-En Fan /* #define FOPEN_MAX 40 */ /* max number of open files */ 69addad6afSRong-En Fan /* #endif */ 70addad6afSRong-En Fan /* */ 71addad6afSRong-En Fan /* #ifndef RAND_MAX */ 72addad6afSRong-En Fan /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 73addad6afSRong-En Fan /* #endif */ 742a55deb1SDavid E. O'Brien 752a55deb1SDavid E. O'Brien jmp_buf env; 762a55deb1SDavid E. O'Brien extern int pairstack[]; 771b11b783SRuslan Ermilov extern Awkfloat srand_seed; 782a55deb1SDavid E. O'Brien 792a55deb1SDavid E. O'Brien Node *winner = NULL; /* root of parse tree */ 802a55deb1SDavid E. O'Brien Cell *tmps; /* free temporary cells for execution */ 812a55deb1SDavid E. O'Brien 82f39dd6a9SWarner Losh static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 832a55deb1SDavid E. O'Brien Cell *True = &truecell; 84f39dd6a9SWarner Losh static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 852a55deb1SDavid E. O'Brien Cell *False = &falsecell; 86f39dd6a9SWarner Losh static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 872a55deb1SDavid E. O'Brien Cell *jbreak = &breakcell; 88f39dd6a9SWarner Losh static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 892a55deb1SDavid E. O'Brien Cell *jcont = &contcell; 90f39dd6a9SWarner Losh static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 912a55deb1SDavid E. O'Brien Cell *jnext = &nextcell; 92f39dd6a9SWarner Losh static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 932a55deb1SDavid E. O'Brien Cell *jnextfile = &nextfilecell; 94f39dd6a9SWarner Losh static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 952a55deb1SDavid E. O'Brien Cell *jexit = &exitcell; 96f39dd6a9SWarner Losh static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 972a55deb1SDavid E. O'Brien Cell *jret = &retcell; 98f39dd6a9SWarner Losh static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 992a55deb1SDavid E. O'Brien 1002a55deb1SDavid E. O'Brien Node *curnode = NULL; /* the node being executed, for debugging */ 1012a55deb1SDavid E. O'Brien 1022a55deb1SDavid E. O'Brien /* buffer memory management */ 1032a55deb1SDavid E. O'Brien int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 104813da98dSDavid E. O'Brien const char *whatrtn) 1052a55deb1SDavid E. O'Brien /* pbuf: address of pointer to buffer being managed 1062a55deb1SDavid E. O'Brien * psiz: address of buffer size variable 1072a55deb1SDavid E. O'Brien * minlen: minimum length of buffer needed 1082a55deb1SDavid E. O'Brien * quantum: buffer size quantum 1092a55deb1SDavid E. O'Brien * pbptr: address of movable pointer into buffer, or 0 if none 1102a55deb1SDavid E. O'Brien * whatrtn: name of the calling routine if failure should cause fatal error 1112a55deb1SDavid E. O'Brien * 1122a55deb1SDavid E. O'Brien * return 0 for realloc failure, !=0 for success 1132a55deb1SDavid E. O'Brien */ 1142a55deb1SDavid E. O'Brien { 1152a55deb1SDavid E. O'Brien if (minlen > *psiz) { 1162a55deb1SDavid E. O'Brien char *tbuf; 1172a55deb1SDavid E. O'Brien int rminlen = quantum ? minlen % quantum : 0; 1182a55deb1SDavid E. O'Brien int boff = pbptr ? *pbptr - *pbuf : 0; 1192a55deb1SDavid E. O'Brien /* round up to next multiple of quantum */ 1202a55deb1SDavid E. O'Brien if (rminlen) 1212a55deb1SDavid E. O'Brien minlen += quantum - rminlen; 1222a55deb1SDavid E. O'Brien tbuf = (char *) realloc(*pbuf, minlen); 123f39dd6a9SWarner Losh DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); 1242a55deb1SDavid E. O'Brien if (tbuf == NULL) { 1252a55deb1SDavid E. O'Brien if (whatrtn) 1262a55deb1SDavid E. O'Brien FATAL("out of memory in %s", whatrtn); 1272a55deb1SDavid E. O'Brien return 0; 1282a55deb1SDavid E. O'Brien } 1292a55deb1SDavid E. O'Brien *pbuf = tbuf; 1302a55deb1SDavid E. O'Brien *psiz = minlen; 1312a55deb1SDavid E. O'Brien if (pbptr) 1322a55deb1SDavid E. O'Brien *pbptr = tbuf + boff; 1332a55deb1SDavid E. O'Brien } 1342a55deb1SDavid E. O'Brien return 1; 1352a55deb1SDavid E. O'Brien } 1362a55deb1SDavid E. O'Brien 1372a55deb1SDavid E. O'Brien void run(Node *a) /* execution of parse tree starts here */ 1382a55deb1SDavid E. O'Brien { 1392a55deb1SDavid E. O'Brien 1402a55deb1SDavid E. O'Brien stdinit(); 1412a55deb1SDavid E. O'Brien execute(a); 1422a55deb1SDavid E. O'Brien closeall(); 1432a55deb1SDavid E. O'Brien } 1442a55deb1SDavid E. O'Brien 1452a55deb1SDavid E. O'Brien Cell *execute(Node *u) /* execute a node of the parse tree */ 1462a55deb1SDavid E. O'Brien { 1472a55deb1SDavid E. O'Brien Cell *(*proc)(Node **, int); 1482a55deb1SDavid E. O'Brien Cell *x; 1492a55deb1SDavid E. O'Brien Node *a; 1502a55deb1SDavid E. O'Brien 1512a55deb1SDavid E. O'Brien if (u == NULL) 1522a55deb1SDavid E. O'Brien return(True); 1532a55deb1SDavid E. O'Brien for (a = u; ; a = a->nnext) { 1542a55deb1SDavid E. O'Brien curnode = a; 1552a55deb1SDavid E. O'Brien if (isvalue(a)) { 1562a55deb1SDavid E. O'Brien x = (Cell *) (a->narg[0]); 1572a55deb1SDavid E. O'Brien if (isfld(x) && !donefld) 1582a55deb1SDavid E. O'Brien fldbld(); 1592a55deb1SDavid E. O'Brien else if (isrec(x) && !donerec) 1602a55deb1SDavid E. O'Brien recbld(); 1612a55deb1SDavid E. O'Brien return(x); 1622a55deb1SDavid E. O'Brien } 1632a55deb1SDavid E. O'Brien if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 1642a55deb1SDavid E. O'Brien FATAL("illegal statement"); 1652a55deb1SDavid E. O'Brien proc = proctab[a->nobj-FIRSTTOKEN]; 1662a55deb1SDavid E. O'Brien x = (*proc)(a->narg, a->nobj); 1672a55deb1SDavid E. O'Brien if (isfld(x) && !donefld) 1682a55deb1SDavid E. O'Brien fldbld(); 1692a55deb1SDavid E. O'Brien else if (isrec(x) && !donerec) 1702a55deb1SDavid E. O'Brien recbld(); 1712a55deb1SDavid E. O'Brien if (isexpr(a)) 1722a55deb1SDavid E. O'Brien return(x); 1732a55deb1SDavid E. O'Brien if (isjump(x)) 1742a55deb1SDavid E. O'Brien return(x); 1752a55deb1SDavid E. O'Brien if (a->nnext == NULL) 1762a55deb1SDavid E. O'Brien return(x); 1772a55deb1SDavid E. O'Brien tempfree(x); 1782a55deb1SDavid E. O'Brien } 1792a55deb1SDavid E. O'Brien } 1802a55deb1SDavid E. O'Brien 1812a55deb1SDavid E. O'Brien 1822a55deb1SDavid E. O'Brien Cell *program(Node **a, int n) /* execute an awk program */ 1832a55deb1SDavid E. O'Brien { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 1842a55deb1SDavid E. O'Brien Cell *x; 1852a55deb1SDavid E. O'Brien 1862a55deb1SDavid E. O'Brien if (setjmp(env) != 0) 1872a55deb1SDavid E. O'Brien goto ex; 1882a55deb1SDavid E. O'Brien if (a[0]) { /* BEGIN */ 1892a55deb1SDavid E. O'Brien x = execute(a[0]); 1902a55deb1SDavid E. O'Brien if (isexit(x)) 1912a55deb1SDavid E. O'Brien return(True); 1922a55deb1SDavid E. O'Brien if (isjump(x)) 1932a55deb1SDavid E. O'Brien FATAL("illegal break, continue, next or nextfile from BEGIN"); 1942a55deb1SDavid E. O'Brien tempfree(x); 1952a55deb1SDavid E. O'Brien } 1962a55deb1SDavid E. O'Brien if (a[1] || a[2]) 197f39dd6a9SWarner Losh while (getrec(&record, &recsize, true) > 0) { 1982a55deb1SDavid E. O'Brien x = execute(a[1]); 1992a55deb1SDavid E. O'Brien if (isexit(x)) 2002a55deb1SDavid E. O'Brien break; 2012a55deb1SDavid E. O'Brien tempfree(x); 2022a55deb1SDavid E. O'Brien } 2032a55deb1SDavid E. O'Brien ex: 2042a55deb1SDavid E. O'Brien if (setjmp(env) != 0) /* handles exit within END */ 2052a55deb1SDavid E. O'Brien goto ex1; 2062a55deb1SDavid E. O'Brien if (a[2]) { /* END */ 2072a55deb1SDavid E. O'Brien x = execute(a[2]); 2082a55deb1SDavid E. O'Brien if (isbreak(x) || isnext(x) || iscont(x)) 2092a55deb1SDavid E. O'Brien FATAL("illegal break, continue, next or nextfile from END"); 2102a55deb1SDavid E. O'Brien tempfree(x); 2112a55deb1SDavid E. O'Brien } 2122a55deb1SDavid E. O'Brien ex1: 2132a55deb1SDavid E. O'Brien return(True); 2142a55deb1SDavid E. O'Brien } 2152a55deb1SDavid E. O'Brien 2162a55deb1SDavid E. O'Brien struct Frame { /* stack frame for awk function calls */ 2172a55deb1SDavid E. O'Brien int nargs; /* number of arguments in this call */ 2182a55deb1SDavid E. O'Brien Cell *fcncell; /* pointer to Cell for function */ 2192a55deb1SDavid E. O'Brien Cell **args; /* pointer to array of arguments after execute */ 2202a55deb1SDavid E. O'Brien Cell *retval; /* return value */ 2212a55deb1SDavid E. O'Brien }; 2222a55deb1SDavid E. O'Brien 2232a55deb1SDavid E. O'Brien #define NARGS 50 /* max args in a call */ 2242a55deb1SDavid E. O'Brien 2252a55deb1SDavid E. O'Brien struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 2262a55deb1SDavid E. O'Brien int nframe = 0; /* number of frames allocated */ 227f39dd6a9SWarner Losh struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 2282a55deb1SDavid E. O'Brien 2292a55deb1SDavid E. O'Brien Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 2302a55deb1SDavid E. O'Brien { 231f39dd6a9SWarner Losh static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 2322a55deb1SDavid E. O'Brien int i, ncall, ndef; 23362ebc626SRuslan Ermilov int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 2342a55deb1SDavid E. O'Brien Node *x; 2352a55deb1SDavid E. O'Brien Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 2362a55deb1SDavid E. O'Brien Cell *y, *z, *fcn; 2372a55deb1SDavid E. O'Brien char *s; 2382a55deb1SDavid E. O'Brien 2392a55deb1SDavid E. O'Brien fcn = execute(a[0]); /* the function itself */ 2402a55deb1SDavid E. O'Brien s = fcn->nval; 2412a55deb1SDavid E. O'Brien if (!isfcn(fcn)) 2422a55deb1SDavid E. O'Brien FATAL("calling undefined function %s", s); 2432a55deb1SDavid E. O'Brien if (frame == NULL) { 244f39dd6a9SWarner Losh frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); 2452a55deb1SDavid E. O'Brien if (frame == NULL) 2462a55deb1SDavid E. O'Brien FATAL("out of space for stack frames calling %s", s); 2472a55deb1SDavid E. O'Brien } 2482a55deb1SDavid E. O'Brien for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 2492a55deb1SDavid E. O'Brien ncall++; 2502a55deb1SDavid E. O'Brien ndef = (int) fcn->fval; /* args in defn */ 251f39dd6a9SWarner Losh DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 2522a55deb1SDavid E. O'Brien if (ncall > ndef) 2532a55deb1SDavid E. O'Brien WARNING("function %s called with %d args, uses only %d", 2542a55deb1SDavid E. O'Brien s, ncall, ndef); 2552a55deb1SDavid E. O'Brien if (ncall + ndef > NARGS) 2562a55deb1SDavid E. O'Brien FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 2572a55deb1SDavid E. O'Brien for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 258f39dd6a9SWarner Losh DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 2592a55deb1SDavid E. O'Brien y = execute(x); 2602a55deb1SDavid E. O'Brien oargs[i] = y; 261f39dd6a9SWarner Losh DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 262f39dd6a9SWarner Losh i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 2632a55deb1SDavid E. O'Brien if (isfcn(y)) 2642a55deb1SDavid E. O'Brien FATAL("can't use function %s as argument in %s", y->nval, s); 2652a55deb1SDavid E. O'Brien if (isarr(y)) 2662a55deb1SDavid E. O'Brien args[i] = y; /* arrays by ref */ 2672a55deb1SDavid E. O'Brien else 2682a55deb1SDavid E. O'Brien args[i] = copycell(y); 2692a55deb1SDavid E. O'Brien tempfree(y); 2702a55deb1SDavid E. O'Brien } 2712a55deb1SDavid E. O'Brien for ( ; i < ndef; i++) { /* add null args for ones not provided */ 2722a55deb1SDavid E. O'Brien args[i] = gettemp(); 2732a55deb1SDavid E. O'Brien *args[i] = newcopycell; 2742a55deb1SDavid E. O'Brien } 275f39dd6a9SWarner Losh frp++; /* now ok to up frame */ 276f39dd6a9SWarner Losh if (frp >= frame + nframe) { 277f39dd6a9SWarner Losh int dfp = frp - frame; /* old index */ 278f39dd6a9SWarner Losh frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame)); 2792a55deb1SDavid E. O'Brien if (frame == NULL) 2802a55deb1SDavid E. O'Brien FATAL("out of space for stack frames in %s", s); 281f39dd6a9SWarner Losh frp = frame + dfp; 2822a55deb1SDavid E. O'Brien } 283f39dd6a9SWarner Losh frp->fcncell = fcn; 284f39dd6a9SWarner Losh frp->args = args; 285f39dd6a9SWarner Losh frp->nargs = ndef; /* number defined with (excess are locals) */ 286f39dd6a9SWarner Losh frp->retval = gettemp(); 2872a55deb1SDavid E. O'Brien 288f39dd6a9SWarner Losh DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 2892a55deb1SDavid E. O'Brien y = execute((Node *)(fcn->sval)); /* execute body */ 290f39dd6a9SWarner Losh DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 2912a55deb1SDavid E. O'Brien 2922a55deb1SDavid E. O'Brien for (i = 0; i < ndef; i++) { 293f39dd6a9SWarner Losh Cell *t = frp->args[i]; 2942a55deb1SDavid E. O'Brien if (isarr(t)) { 2952a55deb1SDavid E. O'Brien if (t->csub == CCOPY) { 2962a55deb1SDavid E. O'Brien if (i >= ncall) { 2972a55deb1SDavid E. O'Brien freesymtab(t); 2982a55deb1SDavid E. O'Brien t->csub = CTEMP; 2992a55deb1SDavid E. O'Brien tempfree(t); 3002a55deb1SDavid E. O'Brien } else { 3012a55deb1SDavid E. O'Brien oargs[i]->tval = t->tval; 3022a55deb1SDavid E. O'Brien oargs[i]->tval &= ~(STR|NUM|DONTFREE); 3032a55deb1SDavid E. O'Brien oargs[i]->sval = t->sval; 3042a55deb1SDavid E. O'Brien tempfree(t); 3052a55deb1SDavid E. O'Brien } 3062a55deb1SDavid E. O'Brien } 3072a55deb1SDavid E. O'Brien } else if (t != y) { /* kludge to prevent freeing twice */ 3082a55deb1SDavid E. O'Brien t->csub = CTEMP; 3092a55deb1SDavid E. O'Brien tempfree(t); 31062ebc626SRuslan Ermilov } else if (t == y && t->csub == CCOPY) { 31162ebc626SRuslan Ermilov t->csub = CTEMP; 31262ebc626SRuslan Ermilov tempfree(t); 31362ebc626SRuslan Ermilov freed = 1; 3142a55deb1SDavid E. O'Brien } 3152a55deb1SDavid E. O'Brien } 3162a55deb1SDavid E. O'Brien tempfree(fcn); 3172a55deb1SDavid E. O'Brien if (isexit(y) || isnext(y)) 3182a55deb1SDavid E. O'Brien return y; 31962ebc626SRuslan Ermilov if (freed == 0) { 32062ebc626SRuslan Ermilov tempfree(y); /* don't free twice! */ 32162ebc626SRuslan Ermilov } 322f39dd6a9SWarner Losh z = frp->retval; /* return value */ 323f39dd6a9SWarner Losh DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 324f39dd6a9SWarner Losh frp--; 3252a55deb1SDavid E. O'Brien return(z); 3262a55deb1SDavid E. O'Brien } 3272a55deb1SDavid E. O'Brien 3282a55deb1SDavid E. O'Brien Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 3292a55deb1SDavid E. O'Brien { 3302a55deb1SDavid E. O'Brien Cell *y; 3312a55deb1SDavid E. O'Brien 332b5253557SWarner Losh /* copy is not constant or field */ 333b5253557SWarner Losh 3342a55deb1SDavid E. O'Brien y = gettemp(); 335b5253557SWarner Losh y->tval = x->tval & ~(CON|FLD|REC); 3362a55deb1SDavid E. O'Brien y->csub = CCOPY; /* prevents freeing until call is over */ 3372a55deb1SDavid E. O'Brien y->nval = x->nval; /* BUG? */ 338b5253557SWarner Losh if (isstr(x) /* || x->ctype == OCELL */) { 3392a55deb1SDavid E. O'Brien y->sval = tostring(x->sval); 340b5253557SWarner Losh y->tval &= ~DONTFREE; 341b5253557SWarner Losh } else 342b5253557SWarner Losh y->tval |= DONTFREE; 3432a55deb1SDavid E. O'Brien y->fval = x->fval; 3442a55deb1SDavid E. O'Brien return y; 3452a55deb1SDavid E. O'Brien } 3462a55deb1SDavid E. O'Brien 3472a55deb1SDavid E. O'Brien Cell *arg(Node **a, int n) /* nth argument of a function */ 3482a55deb1SDavid E. O'Brien { 3492a55deb1SDavid E. O'Brien 3502a55deb1SDavid E. O'Brien n = ptoi(a[0]); /* argument number, counting from 0 */ 351f39dd6a9SWarner Losh DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 352f39dd6a9SWarner Losh if (n+1 > frp->nargs) 3532a55deb1SDavid E. O'Brien FATAL("argument #%d of function %s was not supplied", 354f39dd6a9SWarner Losh n+1, frp->fcncell->nval); 355f39dd6a9SWarner Losh return frp->args[n]; 3562a55deb1SDavid E. O'Brien } 3572a55deb1SDavid E. O'Brien 3582a55deb1SDavid E. O'Brien Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 3592a55deb1SDavid E. O'Brien { 3602a55deb1SDavid E. O'Brien Cell *y; 3612a55deb1SDavid E. O'Brien 3622a55deb1SDavid E. O'Brien switch (n) { 3632a55deb1SDavid E. O'Brien case EXIT: 3642a55deb1SDavid E. O'Brien if (a[0] != NULL) { 3652a55deb1SDavid E. O'Brien y = execute(a[0]); 3662a55deb1SDavid E. O'Brien errorflag = (int) getfval(y); 3672a55deb1SDavid E. O'Brien tempfree(y); 3682a55deb1SDavid E. O'Brien } 3692a55deb1SDavid E. O'Brien longjmp(env, 1); 3702a55deb1SDavid E. O'Brien case RETURN: 3712a55deb1SDavid E. O'Brien if (a[0] != NULL) { 3722a55deb1SDavid E. O'Brien y = execute(a[0]); 3732a55deb1SDavid E. O'Brien if ((y->tval & (STR|NUM)) == (STR|NUM)) { 374f39dd6a9SWarner Losh setsval(frp->retval, getsval(y)); 375f39dd6a9SWarner Losh frp->retval->fval = getfval(y); 376f39dd6a9SWarner Losh frp->retval->tval |= NUM; 3772a55deb1SDavid E. O'Brien } 3782a55deb1SDavid E. O'Brien else if (y->tval & STR) 379f39dd6a9SWarner Losh setsval(frp->retval, getsval(y)); 3802a55deb1SDavid E. O'Brien else if (y->tval & NUM) 381f39dd6a9SWarner Losh setfval(frp->retval, getfval(y)); 3822a55deb1SDavid E. O'Brien else /* can't happen */ 3832a55deb1SDavid E. O'Brien FATAL("bad type variable %d", y->tval); 3842a55deb1SDavid E. O'Brien tempfree(y); 3852a55deb1SDavid E. O'Brien } 3862a55deb1SDavid E. O'Brien return(jret); 3872a55deb1SDavid E. O'Brien case NEXT: 3882a55deb1SDavid E. O'Brien return(jnext); 3892a55deb1SDavid E. O'Brien case NEXTFILE: 3902a55deb1SDavid E. O'Brien nextfile(); 3912a55deb1SDavid E. O'Brien return(jnextfile); 3922a55deb1SDavid E. O'Brien case BREAK: 3932a55deb1SDavid E. O'Brien return(jbreak); 3942a55deb1SDavid E. O'Brien case CONTINUE: 3952a55deb1SDavid E. O'Brien return(jcont); 3962a55deb1SDavid E. O'Brien default: /* can't happen */ 3972a55deb1SDavid E. O'Brien FATAL("illegal jump type %d", n); 3982a55deb1SDavid E. O'Brien } 3992a55deb1SDavid E. O'Brien return 0; /* not reached */ 4002a55deb1SDavid E. O'Brien } 4012a55deb1SDavid E. O'Brien 40291217c1cSRuslan Ermilov Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 4032a55deb1SDavid E. O'Brien { /* a[0] is variable, a[1] is operator, a[2] is filename */ 4042a55deb1SDavid E. O'Brien Cell *r, *x; 4052a55deb1SDavid E. O'Brien extern Cell **fldtab; 4062a55deb1SDavid E. O'Brien FILE *fp; 4072a55deb1SDavid E. O'Brien char *buf; 4082a55deb1SDavid E. O'Brien int bufsize = recsize; 4092a55deb1SDavid E. O'Brien int mode; 410f39dd6a9SWarner Losh bool newflag; 411f39dd6a9SWarner Losh double result; 4122a55deb1SDavid E. O'Brien 4132a55deb1SDavid E. O'Brien if ((buf = (char *) malloc(bufsize)) == NULL) 4142a55deb1SDavid E. O'Brien FATAL("out of memory in getline"); 4152a55deb1SDavid E. O'Brien 4162a55deb1SDavid E. O'Brien fflush(stdout); /* in case someone is waiting for a prompt */ 4172a55deb1SDavid E. O'Brien r = gettemp(); 4182a55deb1SDavid E. O'Brien if (a[1] != NULL) { /* getline < file */ 4192a55deb1SDavid E. O'Brien x = execute(a[2]); /* filename */ 4202a55deb1SDavid E. O'Brien mode = ptoi(a[1]); 4212a55deb1SDavid E. O'Brien if (mode == '|') /* input pipe */ 4222a55deb1SDavid E. O'Brien mode = LE; /* arbitrary flag */ 423f39dd6a9SWarner Losh fp = openfile(mode, getsval(x), &newflag); 4242a55deb1SDavid E. O'Brien tempfree(x); 4252a55deb1SDavid E. O'Brien if (fp == NULL) 4262a55deb1SDavid E. O'Brien n = -1; 4272a55deb1SDavid E. O'Brien else 428f39dd6a9SWarner Losh n = readrec(&buf, &bufsize, fp, newflag); 4292a55deb1SDavid E. O'Brien if (n <= 0) { 4302a55deb1SDavid E. O'Brien ; 4312a55deb1SDavid E. O'Brien } else if (a[0] != NULL) { /* getline var <file */ 4322a55deb1SDavid E. O'Brien x = execute(a[0]); 4332a55deb1SDavid E. O'Brien setsval(x, buf); 434f39dd6a9SWarner Losh if (is_number(x->sval, & result)) { 435f39dd6a9SWarner Losh x->fval = result; 436b5253557SWarner Losh x->tval |= NUM; 437b5253557SWarner Losh } 4382a55deb1SDavid E. O'Brien tempfree(x); 4392a55deb1SDavid E. O'Brien } else { /* getline <file */ 4402a55deb1SDavid E. O'Brien setsval(fldtab[0], buf); 441f39dd6a9SWarner Losh if (is_number(fldtab[0]->sval, & result)) { 442f39dd6a9SWarner Losh fldtab[0]->fval = result; 4432a55deb1SDavid E. O'Brien fldtab[0]->tval |= NUM; 4442a55deb1SDavid E. O'Brien } 4452a55deb1SDavid E. O'Brien } 4462a55deb1SDavid E. O'Brien } else { /* bare getline; use current input */ 4472a55deb1SDavid E. O'Brien if (a[0] == NULL) /* getline */ 448f39dd6a9SWarner Losh n = getrec(&record, &recsize, true); 4492a55deb1SDavid E. O'Brien else { /* getline var */ 450f39dd6a9SWarner Losh n = getrec(&buf, &bufsize, false); 451f32a6403SWarner Losh if (n > 0) { 4522a55deb1SDavid E. O'Brien x = execute(a[0]); 4532a55deb1SDavid E. O'Brien setsval(x, buf); 454f39dd6a9SWarner Losh if (is_number(x->sval, & result)) { 455f39dd6a9SWarner Losh x->fval = result; 456b5253557SWarner Losh x->tval |= NUM; 457b5253557SWarner Losh } 4582a55deb1SDavid E. O'Brien tempfree(x); 4592a55deb1SDavid E. O'Brien } 4602a55deb1SDavid E. O'Brien } 461f32a6403SWarner Losh } 4622a55deb1SDavid E. O'Brien setfval(r, (Awkfloat) n); 4632a55deb1SDavid E. O'Brien free(buf); 4642a55deb1SDavid E. O'Brien return r; 4652a55deb1SDavid E. O'Brien } 4662a55deb1SDavid E. O'Brien 4672a55deb1SDavid E. O'Brien Cell *getnf(Node **a, int n) /* get NF */ 4682a55deb1SDavid E. O'Brien { 469f39dd6a9SWarner Losh if (!donefld) 4702a55deb1SDavid E. O'Brien fldbld(); 4712a55deb1SDavid E. O'Brien return (Cell *) a[0]; 4722a55deb1SDavid E. O'Brien } 4732a55deb1SDavid E. O'Brien 474f39dd6a9SWarner Losh static char * 475f39dd6a9SWarner Losh makearraystring(Node *p, const char *func) 4762a55deb1SDavid E. O'Brien { 4772a55deb1SDavid E. O'Brien char *buf; 4782a55deb1SDavid E. O'Brien int bufsz = recsize; 479f39dd6a9SWarner Losh size_t blen; 4802a55deb1SDavid E. O'Brien 481f39dd6a9SWarner Losh if ((buf = (char *) malloc(bufsz)) == NULL) { 482f39dd6a9SWarner Losh FATAL("%s: out of memory", func); 483f39dd6a9SWarner Losh } 484f39dd6a9SWarner Losh 485f39dd6a9SWarner Losh blen = 0; 486f39dd6a9SWarner Losh buf[blen] = '\0'; 487f39dd6a9SWarner Losh 488f39dd6a9SWarner Losh for (; p; p = p->nnext) { 489f39dd6a9SWarner Losh Cell *x = execute(p); /* expr */ 490f39dd6a9SWarner Losh char *s = getsval(x); 491f39dd6a9SWarner Losh size_t seplen = strlen(getsval(subseploc)); 492f39dd6a9SWarner Losh size_t nsub = p->nnext ? seplen : 0; 493f39dd6a9SWarner Losh size_t slen = strlen(s); 494f39dd6a9SWarner Losh size_t tlen = blen + slen + nsub; 495f39dd6a9SWarner Losh 496f39dd6a9SWarner Losh if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 497f39dd6a9SWarner Losh FATAL("%s: out of memory %s[%s...]", 498f39dd6a9SWarner Losh func, x->nval, buf); 499f39dd6a9SWarner Losh } 500f39dd6a9SWarner Losh memcpy(buf + blen, s, slen); 501f39dd6a9SWarner Losh if (nsub) { 502f39dd6a9SWarner Losh memcpy(buf + blen + slen, *SUBSEP, nsub); 503f39dd6a9SWarner Losh } 504f39dd6a9SWarner Losh buf[tlen] = '\0'; 505f39dd6a9SWarner Losh blen = tlen; 506f39dd6a9SWarner Losh tempfree(x); 507f39dd6a9SWarner Losh } 508f39dd6a9SWarner Losh return buf; 509f39dd6a9SWarner Losh } 510f39dd6a9SWarner Losh 511f39dd6a9SWarner Losh Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 512f39dd6a9SWarner Losh { 513f39dd6a9SWarner Losh Cell *x, *z; 514f39dd6a9SWarner Losh char *buf; 5152a55deb1SDavid E. O'Brien 5162a55deb1SDavid E. O'Brien x = execute(a[0]); /* Cell* for symbol table */ 517f39dd6a9SWarner Losh buf = makearraystring(a[1], __func__); 5182a55deb1SDavid E. O'Brien if (!isarr(x)) { 519f39dd6a9SWarner Losh DPRINTF("making %s into an array\n", NN(x->nval)); 5202a55deb1SDavid E. O'Brien if (freeable(x)) 5212a55deb1SDavid E. O'Brien xfree(x->sval); 5222a55deb1SDavid E. O'Brien x->tval &= ~(STR|NUM|DONTFREE); 5232a55deb1SDavid E. O'Brien x->tval |= ARR; 5242a55deb1SDavid E. O'Brien x->sval = (char *) makesymtab(NSYMTAB); 5252a55deb1SDavid E. O'Brien } 5262a55deb1SDavid E. O'Brien z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 5272a55deb1SDavid E. O'Brien z->ctype = OCELL; 5282a55deb1SDavid E. O'Brien z->csub = CVAR; 5292a55deb1SDavid E. O'Brien tempfree(x); 5302a55deb1SDavid E. O'Brien free(buf); 5312a55deb1SDavid E. O'Brien return(z); 5322a55deb1SDavid E. O'Brien } 5332a55deb1SDavid E. O'Brien 5342a55deb1SDavid E. O'Brien Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 5352a55deb1SDavid E. O'Brien { 536f39dd6a9SWarner Losh Cell *x; 5372a55deb1SDavid E. O'Brien 5382a55deb1SDavid E. O'Brien x = execute(a[0]); /* Cell* for symbol table */ 539f39dd6a9SWarner Losh if (x == symtabloc) { 540f39dd6a9SWarner Losh FATAL("cannot delete SYMTAB or its elements"); 541f39dd6a9SWarner Losh } 5422a55deb1SDavid E. O'Brien if (!isarr(x)) 5432a55deb1SDavid E. O'Brien return True; 54410ce5b99SWarner Losh if (a[1] == NULL) { /* delete the elements, not the table */ 5452a55deb1SDavid E. O'Brien freesymtab(x); 5462a55deb1SDavid E. O'Brien x->tval &= ~STR; 5472a55deb1SDavid E. O'Brien x->tval |= ARR; 5482a55deb1SDavid E. O'Brien x->sval = (char *) makesymtab(NSYMTAB); 5492a55deb1SDavid E. O'Brien } else { 550f39dd6a9SWarner Losh char *buf = makearraystring(a[1], __func__); 5512a55deb1SDavid E. O'Brien freeelem(x, buf); 5522a55deb1SDavid E. O'Brien free(buf); 5532a55deb1SDavid E. O'Brien } 5542a55deb1SDavid E. O'Brien tempfree(x); 5552a55deb1SDavid E. O'Brien return True; 5562a55deb1SDavid E. O'Brien } 5572a55deb1SDavid E. O'Brien 5582a55deb1SDavid E. O'Brien Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 5592a55deb1SDavid E. O'Brien { 560f39dd6a9SWarner Losh Cell *ap, *k; 5612a55deb1SDavid E. O'Brien char *buf; 5622a55deb1SDavid E. O'Brien 5632a55deb1SDavid E. O'Brien ap = execute(a[1]); /* array name */ 5642a55deb1SDavid E. O'Brien if (!isarr(ap)) { 565f39dd6a9SWarner Losh DPRINTF("making %s into an array\n", ap->nval); 5662a55deb1SDavid E. O'Brien if (freeable(ap)) 5672a55deb1SDavid E. O'Brien xfree(ap->sval); 5682a55deb1SDavid E. O'Brien ap->tval &= ~(STR|NUM|DONTFREE); 5692a55deb1SDavid E. O'Brien ap->tval |= ARR; 5702a55deb1SDavid E. O'Brien ap->sval = (char *) makesymtab(NSYMTAB); 5712a55deb1SDavid E. O'Brien } 572f39dd6a9SWarner Losh buf = makearraystring(a[0], __func__); 5732a55deb1SDavid E. O'Brien k = lookup(buf, (Array *) ap->sval); 5742a55deb1SDavid E. O'Brien tempfree(ap); 5752a55deb1SDavid E. O'Brien free(buf); 5762a55deb1SDavid E. O'Brien if (k == NULL) 5772a55deb1SDavid E. O'Brien return(False); 5782a55deb1SDavid E. O'Brien else 5792a55deb1SDavid E. O'Brien return(True); 5802a55deb1SDavid E. O'Brien } 5812a55deb1SDavid E. O'Brien 5822a55deb1SDavid E. O'Brien 583f32a6403SWarner Losh /* ======== utf-8 code ========== */ 584f32a6403SWarner Losh 585f32a6403SWarner Losh /* 586f32a6403SWarner Losh * Awk strings can contain ascii, random 8-bit items (eg Latin-1), 587f32a6403SWarner Losh * or utf-8. u8_isutf tests whether a string starts with a valid 588f32a6403SWarner Losh * utf-8 sequence, and returns 0 if not (e.g., high bit set). 589f32a6403SWarner Losh * u8_nextlen returns length of next valid sequence, which is 590f32a6403SWarner Losh * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf. 591f32a6403SWarner Losh * u8_strlen returns length of string in valid utf-8 sequences 592f32a6403SWarner Losh * and/or high-bit bytes. Conversion functions go between byte 593f32a6403SWarner Losh * number and character number. 594f32a6403SWarner Losh * 595f32a6403SWarner Losh * In theory, this behaves the same as before for non-utf8 bytes. 596f32a6403SWarner Losh * 597f32a6403SWarner Losh * Limited checking! This is a potential security hole. 598f32a6403SWarner Losh */ 599f32a6403SWarner Losh 600f32a6403SWarner Losh /* is s the beginning of a valid utf-8 string? */ 601f32a6403SWarner Losh /* return length 1..4 if yes, 0 if no */ 602f32a6403SWarner Losh int u8_isutf(const char *s) 603f32a6403SWarner Losh { 604f32a6403SWarner Losh int n, ret; 605f32a6403SWarner Losh unsigned char c; 606f32a6403SWarner Losh 607f32a6403SWarner Losh c = s[0]; 608f32a6403SWarner Losh if (c < 128 || awk_mb_cur_max == 1) 609f32a6403SWarner Losh return 1; /* what if it's 0? */ 610f32a6403SWarner Losh 611f32a6403SWarner Losh n = strlen(s); 612f32a6403SWarner Losh if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 613f32a6403SWarner Losh ret = 2; /* 110xxxxx 10xxxxxx */ 614f32a6403SWarner Losh } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 615f32a6403SWarner Losh && (s[2] & 0xC0) == 0x80) { 616f32a6403SWarner Losh ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */ 617f32a6403SWarner Losh } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 618f32a6403SWarner Losh && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 619f32a6403SWarner Losh ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 620f32a6403SWarner Losh } else { 621f32a6403SWarner Losh ret = 0; 622f32a6403SWarner Losh } 623f32a6403SWarner Losh return ret; 624f32a6403SWarner Losh } 625f32a6403SWarner Losh 626f32a6403SWarner Losh /* Convert (prefix of) utf8 string to utf-32 rune. */ 627f32a6403SWarner Losh /* Sets *rune to the value, returns the length. */ 628f32a6403SWarner Losh /* No error checking: watch out. */ 629f32a6403SWarner Losh int u8_rune(int *rune, const char *s) 630f32a6403SWarner Losh { 631f32a6403SWarner Losh int n, ret; 632f32a6403SWarner Losh unsigned char c; 633f32a6403SWarner Losh 634f32a6403SWarner Losh c = s[0]; 635f32a6403SWarner Losh if (c < 128 || awk_mb_cur_max == 1) { 636f32a6403SWarner Losh *rune = c; 637f32a6403SWarner Losh return 1; 638f32a6403SWarner Losh } 639f32a6403SWarner Losh 640f32a6403SWarner Losh n = strlen(s); 641f32a6403SWarner Losh if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 642f32a6403SWarner Losh *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */ 643f32a6403SWarner Losh ret = 2; 644f32a6403SWarner Losh } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 645f32a6403SWarner Losh && (s[2] & 0xC0) == 0x80) { 646f32a6403SWarner Losh *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); 647f32a6403SWarner Losh /* 1110xxxx 10xxxxxx 10xxxxxx */ 648f32a6403SWarner Losh ret = 3; 649f32a6403SWarner Losh } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 650f32a6403SWarner Losh && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 651f32a6403SWarner Losh *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); 652f32a6403SWarner Losh /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 653f32a6403SWarner Losh ret = 4; 654f32a6403SWarner Losh } else { 655f32a6403SWarner Losh *rune = c; 656f32a6403SWarner Losh ret = 1; 657f32a6403SWarner Losh } 658f32a6403SWarner Losh return ret; /* returns one byte if sequence doesn't look like utf */ 659f32a6403SWarner Losh } 660f32a6403SWarner Losh 661f32a6403SWarner Losh /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */ 662f32a6403SWarner Losh int u8_nextlen(const char *s) 663f32a6403SWarner Losh { 664f32a6403SWarner Losh int len; 665f32a6403SWarner Losh 666f32a6403SWarner Losh len = u8_isutf(s); 667f32a6403SWarner Losh if (len == 0) 668f32a6403SWarner Losh len = 1; 669f32a6403SWarner Losh return len; 670f32a6403SWarner Losh } 671f32a6403SWarner Losh 672f32a6403SWarner Losh /* return number of utf characters or single non-utf bytes */ 673f32a6403SWarner Losh int u8_strlen(const char *s) 674f32a6403SWarner Losh { 675f32a6403SWarner Losh int i, len, n, totlen; 676f32a6403SWarner Losh unsigned char c; 677f32a6403SWarner Losh 678f32a6403SWarner Losh n = strlen(s); 679f32a6403SWarner Losh totlen = 0; 680f32a6403SWarner Losh for (i = 0; i < n; i += len) { 681f32a6403SWarner Losh c = s[i]; 682f32a6403SWarner Losh if (c < 128 || awk_mb_cur_max == 1) { 683f32a6403SWarner Losh len = 1; 684f32a6403SWarner Losh } else { 685f32a6403SWarner Losh len = u8_nextlen(&s[i]); 686f32a6403SWarner Losh } 687f32a6403SWarner Losh totlen++; 688f32a6403SWarner Losh if (i > n) 689f32a6403SWarner Losh FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i); 690f32a6403SWarner Losh } 691f32a6403SWarner Losh return totlen; 692f32a6403SWarner Losh } 693f32a6403SWarner Losh 694f32a6403SWarner Losh /* convert utf-8 char number in a string to its byte offset */ 695f32a6403SWarner Losh int u8_char2byte(const char *s, int charnum) 696f32a6403SWarner Losh { 697f32a6403SWarner Losh int n; 698f32a6403SWarner Losh int bytenum = 0; 699f32a6403SWarner Losh 700f32a6403SWarner Losh while (charnum > 0) { 701f32a6403SWarner Losh n = u8_nextlen(s); 702f32a6403SWarner Losh s += n; 703f32a6403SWarner Losh bytenum += n; 704f32a6403SWarner Losh charnum--; 705f32a6403SWarner Losh } 706f32a6403SWarner Losh return bytenum; 707f32a6403SWarner Losh } 708f32a6403SWarner Losh 709f32a6403SWarner Losh /* convert byte offset in s to utf-8 char number that starts there */ 710f32a6403SWarner Losh int u8_byte2char(const char *s, int bytenum) 711f32a6403SWarner Losh { 712f32a6403SWarner Losh int i, len, b; 713f32a6403SWarner Losh int charnum = 0; /* BUG: what origin? */ 714f32a6403SWarner Losh /* should be 0 to match start==0 which means no match */ 715f32a6403SWarner Losh 716f32a6403SWarner Losh b = strlen(s); 717f32a6403SWarner Losh if (bytenum > b) { 718f32a6403SWarner Losh return -1; /* ??? */ 719f32a6403SWarner Losh } 720f32a6403SWarner Losh for (i = 0; i <= bytenum; i += len) { 721f32a6403SWarner Losh len = u8_nextlen(s+i); 722f32a6403SWarner Losh charnum++; 723f32a6403SWarner Losh } 724f32a6403SWarner Losh return charnum; 725f32a6403SWarner Losh } 726f32a6403SWarner Losh 72717853db4SWarner Losh /* runetochar() adapted from rune.c in the Plan 9 distribution */ 728f32a6403SWarner Losh 729f32a6403SWarner Losh enum 730f32a6403SWarner Losh { 731f32a6403SWarner Losh Runeerror = 128, /* from somewhere else */ 732f32a6403SWarner Losh Runemax = 0x10FFFF, 733f32a6403SWarner Losh 734f32a6403SWarner Losh Bit1 = 7, 735f32a6403SWarner Losh Bitx = 6, 736f32a6403SWarner Losh Bit2 = 5, 737f32a6403SWarner Losh Bit3 = 4, 738f32a6403SWarner Losh Bit4 = 3, 739f32a6403SWarner Losh Bit5 = 2, 740f32a6403SWarner Losh 741f32a6403SWarner Losh T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ 742f32a6403SWarner Losh Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ 743f32a6403SWarner Losh T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ 744f32a6403SWarner Losh T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ 745f32a6403SWarner Losh T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ 746f32a6403SWarner Losh T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ 747f32a6403SWarner Losh 748f32a6403SWarner Losh Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ 749f32a6403SWarner Losh Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ 750f32a6403SWarner Losh Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ 751f32a6403SWarner Losh Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ 752f32a6403SWarner Losh 753f32a6403SWarner Losh Maskx = (1<<Bitx)-1, /* 0011 1111 */ 754f32a6403SWarner Losh Testx = Maskx ^ 0xFF, /* 1100 0000 */ 755f32a6403SWarner Losh 756f32a6403SWarner Losh }; 757f32a6403SWarner Losh 758f32a6403SWarner Losh int runetochar(char *str, int c) 759f32a6403SWarner Losh { 760f32a6403SWarner Losh /* one character sequence 00000-0007F => 00-7F */ 761f32a6403SWarner Losh if (c <= Rune1) { 762f32a6403SWarner Losh str[0] = c; 763f32a6403SWarner Losh return 1; 764f32a6403SWarner Losh } 765f32a6403SWarner Losh 766f32a6403SWarner Losh /* two character sequence 00080-007FF => T2 Tx */ 767f32a6403SWarner Losh if (c <= Rune2) { 768f32a6403SWarner Losh str[0] = T2 | (c >> 1*Bitx); 769f32a6403SWarner Losh str[1] = Tx | (c & Maskx); 770f32a6403SWarner Losh return 2; 771f32a6403SWarner Losh } 772f32a6403SWarner Losh 773f32a6403SWarner Losh /* three character sequence 00800-0FFFF => T3 Tx Tx */ 774f32a6403SWarner Losh if (c > Runemax) 775f32a6403SWarner Losh c = Runeerror; 776f32a6403SWarner Losh if (c <= Rune3) { 777f32a6403SWarner Losh str[0] = T3 | (c >> 2*Bitx); 778f32a6403SWarner Losh str[1] = Tx | ((c >> 1*Bitx) & Maskx); 779f32a6403SWarner Losh str[2] = Tx | (c & Maskx); 780f32a6403SWarner Losh return 3; 781f32a6403SWarner Losh } 782f32a6403SWarner Losh 783f32a6403SWarner Losh /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */ 784f32a6403SWarner Losh str[0] = T4 | (c >> 3*Bitx); 785f32a6403SWarner Losh str[1] = Tx | ((c >> 2*Bitx) & Maskx); 786f32a6403SWarner Losh str[2] = Tx | ((c >> 1*Bitx) & Maskx); 787f32a6403SWarner Losh str[3] = Tx | (c & Maskx); 788f32a6403SWarner Losh return 4; 789f32a6403SWarner Losh } 790f32a6403SWarner Losh 791f32a6403SWarner Losh 792f32a6403SWarner Losh /* ========== end of utf8 code =========== */ 793f32a6403SWarner Losh 794f32a6403SWarner Losh 795f32a6403SWarner Losh 7962a55deb1SDavid E. O'Brien Cell *matchop(Node **a, int n) /* ~ and match() */ 7972a55deb1SDavid E. O'Brien { 798f32a6403SWarner Losh Cell *x, *y, *z; 7992a55deb1SDavid E. O'Brien char *s, *t; 8002a55deb1SDavid E. O'Brien int i; 801f32a6403SWarner Losh int cstart, cpatlen, len; 8022a55deb1SDavid E. O'Brien fa *pfa; 803813da98dSDavid E. O'Brien int (*mf)(fa *, const char *) = match, mode = 0; 8042a55deb1SDavid E. O'Brien 8052a55deb1SDavid E. O'Brien if (n == MATCHFCN) { 8062a55deb1SDavid E. O'Brien mf = pmatch; 8072a55deb1SDavid E. O'Brien mode = 1; 8082a55deb1SDavid E. O'Brien } 8092a55deb1SDavid E. O'Brien x = execute(a[1]); /* a[1] = target text */ 8102a55deb1SDavid E. O'Brien s = getsval(x); 81110ce5b99SWarner Losh if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 8122a55deb1SDavid E. O'Brien i = (*mf)((fa *) a[2], s); 8132a55deb1SDavid E. O'Brien else { 8142a55deb1SDavid E. O'Brien y = execute(a[2]); /* a[2] = regular expr */ 8152a55deb1SDavid E. O'Brien t = getsval(y); 8162a55deb1SDavid E. O'Brien pfa = makedfa(t, mode); 8172a55deb1SDavid E. O'Brien i = (*mf)(pfa, s); 8182a55deb1SDavid E. O'Brien tempfree(y); 8192a55deb1SDavid E. O'Brien } 820f32a6403SWarner Losh z = x; 8212a55deb1SDavid E. O'Brien if (n == MATCHFCN) { 822f32a6403SWarner Losh int start = patbeg - s + 1; /* origin 1 */ 823f32a6403SWarner Losh if (patlen < 0) { 824f32a6403SWarner Losh start = 0; /* not found */ 825f32a6403SWarner Losh } else { 826f32a6403SWarner Losh cstart = u8_byte2char(s, start-1); 827f32a6403SWarner Losh cpatlen = 0; 828f32a6403SWarner Losh for (i = 0; i < patlen; i += len) { 829f32a6403SWarner Losh len = u8_nextlen(patbeg+i); 830f32a6403SWarner Losh cpatlen++; 831f32a6403SWarner Losh } 832f32a6403SWarner Losh 833f32a6403SWarner Losh start = cstart; 834f32a6403SWarner Losh patlen = cpatlen; 835f32a6403SWarner Losh } 836f32a6403SWarner Losh 8372a55deb1SDavid E. O'Brien setfval(rstartloc, (Awkfloat) start); 8382a55deb1SDavid E. O'Brien setfval(rlengthloc, (Awkfloat) patlen); 8392a55deb1SDavid E. O'Brien x = gettemp(); 8402a55deb1SDavid E. O'Brien x->tval = NUM; 8412a55deb1SDavid E. O'Brien x->fval = start; 8422a55deb1SDavid E. O'Brien } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 843f32a6403SWarner Losh x = True; 8442a55deb1SDavid E. O'Brien else 845f32a6403SWarner Losh x = False; 846f32a6403SWarner Losh 847f32a6403SWarner Losh tempfree(z); 848f32a6403SWarner Losh return x; 8492a55deb1SDavid E. O'Brien } 8502a55deb1SDavid E. O'Brien 8512a55deb1SDavid E. O'Brien 8522a55deb1SDavid E. O'Brien Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 8532a55deb1SDavid E. O'Brien { 8542a55deb1SDavid E. O'Brien Cell *x, *y; 8552a55deb1SDavid E. O'Brien int i; 8562a55deb1SDavid E. O'Brien 8572a55deb1SDavid E. O'Brien x = execute(a[0]); 8582a55deb1SDavid E. O'Brien i = istrue(x); 8592a55deb1SDavid E. O'Brien tempfree(x); 8602a55deb1SDavid E. O'Brien switch (n) { 8612a55deb1SDavid E. O'Brien case BOR: 8622a55deb1SDavid E. O'Brien if (i) return(True); 8632a55deb1SDavid E. O'Brien y = execute(a[1]); 8642a55deb1SDavid E. O'Brien i = istrue(y); 8652a55deb1SDavid E. O'Brien tempfree(y); 8662a55deb1SDavid E. O'Brien if (i) return(True); 8672a55deb1SDavid E. O'Brien else return(False); 8682a55deb1SDavid E. O'Brien case AND: 8692a55deb1SDavid E. O'Brien if ( !i ) return(False); 8702a55deb1SDavid E. O'Brien y = execute(a[1]); 8712a55deb1SDavid E. O'Brien i = istrue(y); 8722a55deb1SDavid E. O'Brien tempfree(y); 8732a55deb1SDavid E. O'Brien if (i) return(True); 8742a55deb1SDavid E. O'Brien else return(False); 8752a55deb1SDavid E. O'Brien case NOT: 8762a55deb1SDavid E. O'Brien if (i) return(False); 8772a55deb1SDavid E. O'Brien else return(True); 8782a55deb1SDavid E. O'Brien default: /* can't happen */ 8792a55deb1SDavid E. O'Brien FATAL("unknown boolean operator %d", n); 8802a55deb1SDavid E. O'Brien } 8812a55deb1SDavid E. O'Brien return 0; /*NOTREACHED*/ 8822a55deb1SDavid E. O'Brien } 8832a55deb1SDavid E. O'Brien 8842a55deb1SDavid E. O'Brien Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 8852a55deb1SDavid E. O'Brien { 8862a55deb1SDavid E. O'Brien int i; 8872a55deb1SDavid E. O'Brien Cell *x, *y; 8882a55deb1SDavid E. O'Brien Awkfloat j; 889f32a6403SWarner Losh bool x_is_nan, y_is_nan; 8902a55deb1SDavid E. O'Brien 8912a55deb1SDavid E. O'Brien x = execute(a[0]); 8922a55deb1SDavid E. O'Brien y = execute(a[1]); 893f32a6403SWarner Losh x_is_nan = isnan(x->fval); 894f32a6403SWarner Losh y_is_nan = isnan(y->fval); 8952a55deb1SDavid E. O'Brien if (x->tval&NUM && y->tval&NUM) { 896f32a6403SWarner Losh if ((x_is_nan || y_is_nan) && n != NE) 897f32a6403SWarner Losh return(False); 8982a55deb1SDavid E. O'Brien j = x->fval - y->fval; 8992a55deb1SDavid E. O'Brien i = j<0? -1: (j>0? 1: 0); 9002a55deb1SDavid E. O'Brien } else { 901628bd30aSWarner Losh i = strcmp(getsval(x), getsval(y)); 9022a55deb1SDavid E. O'Brien } 9032a55deb1SDavid E. O'Brien tempfree(x); 9042a55deb1SDavid E. O'Brien tempfree(y); 9052a55deb1SDavid E. O'Brien switch (n) { 9062a55deb1SDavid E. O'Brien case LT: if (i<0) return(True); 9072a55deb1SDavid E. O'Brien else return(False); 9082a55deb1SDavid E. O'Brien case LE: if (i<=0) return(True); 9092a55deb1SDavid E. O'Brien else return(False); 910f32a6403SWarner Losh case NE: if (x_is_nan && y_is_nan) return(True); 911f32a6403SWarner Losh else if (i!=0) return(True); 9122a55deb1SDavid E. O'Brien else return(False); 9132a55deb1SDavid E. O'Brien case EQ: if (i == 0) return(True); 9142a55deb1SDavid E. O'Brien else return(False); 9152a55deb1SDavid E. O'Brien case GE: if (i>=0) return(True); 9162a55deb1SDavid E. O'Brien else return(False); 9172a55deb1SDavid E. O'Brien case GT: if (i>0) return(True); 9182a55deb1SDavid E. O'Brien else return(False); 9192a55deb1SDavid E. O'Brien default: /* can't happen */ 9202a55deb1SDavid E. O'Brien FATAL("unknown relational operator %d", n); 9212a55deb1SDavid E. O'Brien } 9222a55deb1SDavid E. O'Brien return 0; /*NOTREACHED*/ 9232a55deb1SDavid E. O'Brien } 9242a55deb1SDavid E. O'Brien 9252a55deb1SDavid E. O'Brien void tfree(Cell *a) /* free a tempcell */ 9262a55deb1SDavid E. O'Brien { 9272a55deb1SDavid E. O'Brien if (freeable(a)) { 928f39dd6a9SWarner Losh DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 9292a55deb1SDavid E. O'Brien xfree(a->sval); 9302a55deb1SDavid E. O'Brien } 9312a55deb1SDavid E. O'Brien if (a == tmps) 9322a55deb1SDavid E. O'Brien FATAL("tempcell list is curdled"); 9332a55deb1SDavid E. O'Brien a->cnext = tmps; 9342a55deb1SDavid E. O'Brien tmps = a; 9352a55deb1SDavid E. O'Brien } 9362a55deb1SDavid E. O'Brien 9372a55deb1SDavid E. O'Brien Cell *gettemp(void) /* get a tempcell */ 9382a55deb1SDavid E. O'Brien { int i; 9392a55deb1SDavid E. O'Brien Cell *x; 9402a55deb1SDavid E. O'Brien 9412a55deb1SDavid E. O'Brien if (!tmps) { 942f39dd6a9SWarner Losh tmps = (Cell *) calloc(100, sizeof(*tmps)); 9432a55deb1SDavid E. O'Brien if (!tmps) 9442a55deb1SDavid E. O'Brien FATAL("out of space for temporaries"); 9452a55deb1SDavid E. O'Brien for (i = 1; i < 100; i++) 9462a55deb1SDavid E. O'Brien tmps[i-1].cnext = &tmps[i]; 94710ce5b99SWarner Losh tmps[i-1].cnext = NULL; 9482a55deb1SDavid E. O'Brien } 9492a55deb1SDavid E. O'Brien x = tmps; 9502a55deb1SDavid E. O'Brien tmps = x->cnext; 9512a55deb1SDavid E. O'Brien *x = tempcell; 9522a55deb1SDavid E. O'Brien return(x); 9532a55deb1SDavid E. O'Brien } 9542a55deb1SDavid E. O'Brien 9552a55deb1SDavid E. O'Brien Cell *indirect(Node **a, int n) /* $( a[0] ) */ 9562a55deb1SDavid E. O'Brien { 957c263f9bfSRuslan Ermilov Awkfloat val; 9582a55deb1SDavid E. O'Brien Cell *x; 9592a55deb1SDavid E. O'Brien int m; 9602a55deb1SDavid E. O'Brien char *s; 9612a55deb1SDavid E. O'Brien 9622a55deb1SDavid E. O'Brien x = execute(a[0]); 963c263f9bfSRuslan Ermilov val = getfval(x); /* freebsd: defend against super large field numbers */ 964c263f9bfSRuslan Ermilov if ((Awkfloat)INT_MAX < val) 965c263f9bfSRuslan Ermilov FATAL("trying to access out of range field %s", x->nval); 966c263f9bfSRuslan Ermilov m = (int) val; 967f39dd6a9SWarner Losh if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ 9682a55deb1SDavid E. O'Brien FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 9692a55deb1SDavid E. O'Brien /* BUG: can x->nval ever be null??? */ 9702a55deb1SDavid E. O'Brien tempfree(x); 9712a55deb1SDavid E. O'Brien x = fieldadr(m); 9722a55deb1SDavid E. O'Brien x->ctype = OCELL; /* BUG? why are these needed? */ 9732a55deb1SDavid E. O'Brien x->csub = CFLD; 9742a55deb1SDavid E. O'Brien return(x); 9752a55deb1SDavid E. O'Brien } 9762a55deb1SDavid E. O'Brien 9772a55deb1SDavid E. O'Brien Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 9782a55deb1SDavid E. O'Brien { 9792a55deb1SDavid E. O'Brien int k, m, n; 980f32a6403SWarner Losh int mb, nb; 9812a55deb1SDavid E. O'Brien char *s; 9822a55deb1SDavid E. O'Brien int temp; 98310ce5b99SWarner Losh Cell *x, *y, *z = NULL; 9842a55deb1SDavid E. O'Brien 9852a55deb1SDavid E. O'Brien x = execute(a[0]); 9862a55deb1SDavid E. O'Brien y = execute(a[1]); 98710ce5b99SWarner Losh if (a[2] != NULL) 9882a55deb1SDavid E. O'Brien z = execute(a[2]); 9892a55deb1SDavid E. O'Brien s = getsval(x); 990f32a6403SWarner Losh k = u8_strlen(s) + 1; 9912a55deb1SDavid E. O'Brien if (k <= 1) { 9922a55deb1SDavid E. O'Brien tempfree(x); 9932a55deb1SDavid E. O'Brien tempfree(y); 99410ce5b99SWarner Losh if (a[2] != NULL) { 9952a55deb1SDavid E. O'Brien tempfree(z); 9962a55deb1SDavid E. O'Brien } 9972a55deb1SDavid E. O'Brien x = gettemp(); 9982a55deb1SDavid E. O'Brien setsval(x, ""); 9992a55deb1SDavid E. O'Brien return(x); 10002a55deb1SDavid E. O'Brien } 10012a55deb1SDavid E. O'Brien m = (int) getfval(y); 10022a55deb1SDavid E. O'Brien if (m <= 0) 10032a55deb1SDavid E. O'Brien m = 1; 10042a55deb1SDavid E. O'Brien else if (m > k) 10052a55deb1SDavid E. O'Brien m = k; 10062a55deb1SDavid E. O'Brien tempfree(y); 100710ce5b99SWarner Losh if (a[2] != NULL) { 10082a55deb1SDavid E. O'Brien n = (int) getfval(z); 10092a55deb1SDavid E. O'Brien tempfree(z); 10102a55deb1SDavid E. O'Brien } else 10112a55deb1SDavid E. O'Brien n = k - 1; 10122a55deb1SDavid E. O'Brien if (n < 0) 10132a55deb1SDavid E. O'Brien n = 0; 10142a55deb1SDavid E. O'Brien else if (n > k - m) 10152a55deb1SDavid E. O'Brien n = k - m; 1016f32a6403SWarner Losh /* m is start, n is length from there */ 1017f39dd6a9SWarner Losh DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 10182a55deb1SDavid E. O'Brien y = gettemp(); 1019f32a6403SWarner Losh mb = u8_char2byte(s, m-1); /* byte offset of start char in s */ 1020f32a6403SWarner Losh nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */ 1021f32a6403SWarner Losh 1022f32a6403SWarner Losh temp = s[nb]; /* with thanks to John Linderman */ 1023f32a6403SWarner Losh s[nb] = '\0'; 1024f32a6403SWarner Losh setsval(y, s + mb); 1025f32a6403SWarner Losh s[nb] = temp; 10262a55deb1SDavid E. O'Brien tempfree(x); 10272a55deb1SDavid E. O'Brien return(y); 10282a55deb1SDavid E. O'Brien } 10292a55deb1SDavid E. O'Brien 10302a55deb1SDavid E. O'Brien Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 10312a55deb1SDavid E. O'Brien { 10322a55deb1SDavid E. O'Brien Cell *x, *y, *z; 10332a55deb1SDavid E. O'Brien char *s1, *s2, *p1, *p2, *q; 10342a55deb1SDavid E. O'Brien Awkfloat v = 0.0; 10352a55deb1SDavid E. O'Brien 10362a55deb1SDavid E. O'Brien x = execute(a[0]); 10372a55deb1SDavid E. O'Brien s1 = getsval(x); 10382a55deb1SDavid E. O'Brien y = execute(a[1]); 10392a55deb1SDavid E. O'Brien s2 = getsval(y); 10402a55deb1SDavid E. O'Brien 10412a55deb1SDavid E. O'Brien z = gettemp(); 10422a55deb1SDavid E. O'Brien for (p1 = s1; *p1 != '\0'; p1++) { 10432a55deb1SDavid E. O'Brien for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 1044f39dd6a9SWarner Losh continue; 10452a55deb1SDavid E. O'Brien if (*p2 == '\0') { 1046f32a6403SWarner Losh /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */ 1047f32a6403SWarner Losh 1048f32a6403SWarner Losh /* should be a function: used in match() as well */ 1049f32a6403SWarner Losh int i, len; 1050f32a6403SWarner Losh v = 0; 1051f32a6403SWarner Losh for (i = 0; i < p1-s1+1; i += len) { 1052f32a6403SWarner Losh len = u8_nextlen(s1+i); 1053f32a6403SWarner Losh v++; 1054f32a6403SWarner Losh } 10552a55deb1SDavid E. O'Brien break; 10562a55deb1SDavid E. O'Brien } 10572a55deb1SDavid E. O'Brien } 10582a55deb1SDavid E. O'Brien tempfree(x); 10592a55deb1SDavid E. O'Brien tempfree(y); 10602a55deb1SDavid E. O'Brien setfval(z, v); 10612a55deb1SDavid E. O'Brien return(z); 10622a55deb1SDavid E. O'Brien } 10632a55deb1SDavid E. O'Brien 1064f32a6403SWarner Losh int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */ 1065f32a6403SWarner Losh { 1066f32a6403SWarner Losh int n; 1067f32a6403SWarner Losh 1068f32a6403SWarner Losh for (n = 0; *s != 0; s += n) { 1069f32a6403SWarner Losh n = u8_nextlen(s); 1070f32a6403SWarner Losh if (n > 1) 1071f32a6403SWarner Losh return 1; 1072f32a6403SWarner Losh } 1073f32a6403SWarner Losh return 0; 1074f32a6403SWarner Losh } 1075f32a6403SWarner Losh 10762a55deb1SDavid E. O'Brien #define MAXNUMSIZE 50 10772a55deb1SDavid E. O'Brien 1078813da98dSDavid E. O'Brien int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 10792a55deb1SDavid E. O'Brien { 10802a55deb1SDavid E. O'Brien char *fmt; 1081813da98dSDavid E. O'Brien char *p, *t; 1082813da98dSDavid E. O'Brien const char *os; 10832a55deb1SDavid E. O'Brien Cell *x; 10842a55deb1SDavid E. O'Brien int flag = 0, n; 10852a55deb1SDavid E. O'Brien int fmtwd; /* format width */ 10862a55deb1SDavid E. O'Brien int fmtsz = recsize; 10872a55deb1SDavid E. O'Brien char *buf = *pbuf; 10882a55deb1SDavid E. O'Brien int bufsize = *pbufsize; 1089f39dd6a9SWarner Losh #define FMTSZ(a) (fmtsz - ((a) - fmt)) 1090f39dd6a9SWarner Losh #define BUFSZ(a) (bufsize - ((a) - buf)) 10912a55deb1SDavid E. O'Brien 1092f39dd6a9SWarner Losh static bool first = true; 1093f39dd6a9SWarner Losh static bool have_a_format = false; 1094b5253557SWarner Losh 1095b5253557SWarner Losh if (first) { 1096f39dd6a9SWarner Losh char xbuf[100]; 1097b5253557SWarner Losh 1098f39dd6a9SWarner Losh snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 1099f39dd6a9SWarner Losh have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 1100f39dd6a9SWarner Losh first = false; 1101b5253557SWarner Losh } 1102b5253557SWarner Losh 11032a55deb1SDavid E. O'Brien os = s; 11042a55deb1SDavid E. O'Brien p = buf; 11052a55deb1SDavid E. O'Brien if ((fmt = (char *) malloc(fmtsz)) == NULL) 11062a55deb1SDavid E. O'Brien FATAL("out of memory in format()"); 11072a55deb1SDavid E. O'Brien while (*s) { 1108addad6afSRong-En Fan adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 11092a55deb1SDavid E. O'Brien if (*s != '%') { 11102a55deb1SDavid E. O'Brien *p++ = *s++; 11112a55deb1SDavid E. O'Brien continue; 11122a55deb1SDavid E. O'Brien } 11132a55deb1SDavid E. O'Brien if (*(s+1) == '%') { 11142a55deb1SDavid E. O'Brien *p++ = '%'; 11152a55deb1SDavid E. O'Brien s += 2; 11162a55deb1SDavid E. O'Brien continue; 11172a55deb1SDavid E. O'Brien } 11182a55deb1SDavid E. O'Brien fmtwd = atoi(s+1); 11192a55deb1SDavid E. O'Brien if (fmtwd < 0) 11202a55deb1SDavid E. O'Brien fmtwd = -fmtwd; 1121addad6afSRong-En Fan adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 11222a55deb1SDavid E. O'Brien for (t = fmt; (*t++ = *s) != '\0'; s++) { 1123addad6afSRong-En Fan if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 11242a55deb1SDavid E. O'Brien FATAL("format item %.30s... ran format() out of memory", os); 1125f39dd6a9SWarner Losh /* Ignore size specifiers */ 1126f39dd6a9SWarner Losh if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 1127f39dd6a9SWarner Losh t--; 1128f39dd6a9SWarner Losh continue; 1129f39dd6a9SWarner Losh } 1130f39dd6a9SWarner Losh if (isalpha((uschar)*s)) 1131f39dd6a9SWarner Losh break; 1132b5253557SWarner Losh if (*s == '$') { 1133b5253557SWarner Losh FATAL("'$' not permitted in awk formats"); 1134b5253557SWarner Losh } 11352a55deb1SDavid E. O'Brien if (*s == '*') { 1136b5253557SWarner Losh if (a == NULL) { 1137b5253557SWarner Losh FATAL("not enough args in printf(%s)", os); 1138b5253557SWarner Losh } 11392a55deb1SDavid E. O'Brien x = execute(a); 11402a55deb1SDavid E. O'Brien a = a->nnext; 1141f39dd6a9SWarner Losh snprintf(t - 1, FMTSZ(t - 1), 1142f39dd6a9SWarner Losh "%d", fmtwd=(int) getfval(x)); 11432a55deb1SDavid E. O'Brien if (fmtwd < 0) 11442a55deb1SDavid E. O'Brien fmtwd = -fmtwd; 11452a55deb1SDavid E. O'Brien adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 11462a55deb1SDavid E. O'Brien t = fmt + strlen(fmt); 11472a55deb1SDavid E. O'Brien tempfree(x); 11482a55deb1SDavid E. O'Brien } 11492a55deb1SDavid E. O'Brien } 11502a55deb1SDavid E. O'Brien *t = '\0'; 11512a55deb1SDavid E. O'Brien if (fmtwd < 0) 11522a55deb1SDavid E. O'Brien fmtwd = -fmtwd; 1153addad6afSRong-En Fan adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 11542a55deb1SDavid E. O'Brien switch (*s) { 1155b5253557SWarner Losh case 'a': case 'A': 1156b5253557SWarner Losh if (have_a_format) 1157b5253557SWarner Losh flag = *s; 1158b5253557SWarner Losh else 1159b5253557SWarner Losh flag = 'f'; 1160b5253557SWarner Losh break; 11612a55deb1SDavid E. O'Brien case 'f': case 'e': case 'g': case 'E': case 'G': 1162813da98dSDavid E. O'Brien flag = 'f'; 11632a55deb1SDavid E. O'Brien break; 1164f39dd6a9SWarner Losh case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 1165f39dd6a9SWarner Losh flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 1166f39dd6a9SWarner Losh *(t-1) = 'j'; 1167f39dd6a9SWarner Losh *t = *s; 11682a55deb1SDavid E. O'Brien *++t = '\0'; 11692a55deb1SDavid E. O'Brien break; 11702a55deb1SDavid E. O'Brien case 's': 1171813da98dSDavid E. O'Brien flag = 's'; 11722a55deb1SDavid E. O'Brien break; 11732a55deb1SDavid E. O'Brien case 'c': 1174813da98dSDavid E. O'Brien flag = 'c'; 11752a55deb1SDavid E. O'Brien break; 11762a55deb1SDavid E. O'Brien default: 11772a55deb1SDavid E. O'Brien WARNING("weird printf conversion %s", fmt); 1178813da98dSDavid E. O'Brien flag = '?'; 11792a55deb1SDavid E. O'Brien break; 11802a55deb1SDavid E. O'Brien } 11812a55deb1SDavid E. O'Brien if (a == NULL) 11822a55deb1SDavid E. O'Brien FATAL("not enough args in printf(%s)", os); 11832a55deb1SDavid E. O'Brien x = execute(a); 11842a55deb1SDavid E. O'Brien a = a->nnext; 11852a55deb1SDavid E. O'Brien n = MAXNUMSIZE; 11862a55deb1SDavid E. O'Brien if (fmtwd > n) 11872a55deb1SDavid E. O'Brien n = fmtwd; 1188addad6afSRong-En Fan adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 11892a55deb1SDavid E. O'Brien switch (flag) { 1190f32a6403SWarner Losh case '?': 1191f32a6403SWarner Losh snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 11922a55deb1SDavid E. O'Brien t = getsval(x); 11932a55deb1SDavid E. O'Brien n = strlen(t); 11942a55deb1SDavid E. O'Brien if (fmtwd > n) 11952a55deb1SDavid E. O'Brien n = fmtwd; 1196addad6afSRong-En Fan adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 11972a55deb1SDavid E. O'Brien p += strlen(p); 1198f39dd6a9SWarner Losh snprintf(p, BUFSZ(p), "%s", t); 11992a55deb1SDavid E. O'Brien break; 1200b5253557SWarner Losh case 'a': 1201b5253557SWarner Losh case 'A': 1202f39dd6a9SWarner Losh case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 1203f39dd6a9SWarner Losh case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 1204f39dd6a9SWarner Losh case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 1205f32a6403SWarner Losh 1206f32a6403SWarner Losh case 's': { 12072a55deb1SDavid E. O'Brien t = getsval(x); 12082a55deb1SDavid E. O'Brien n = strlen(t); 1209f32a6403SWarner Losh /* if simple format or no utf-8 in the string, sprintf works */ 1210f32a6403SWarner Losh if (!has_utf8(t) || strcmp(fmt,"%s") == 0) { 12112a55deb1SDavid E. O'Brien if (fmtwd > n) 12122a55deb1SDavid E. O'Brien n = fmtwd; 1213addad6afSRong-En Fan if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 1214f32a6403SWarner Losh FATAL("huge string/format (%d chars) in printf %.30s..." \ 1215f32a6403SWarner Losh " ran format() out of memory", n, t); 1216f39dd6a9SWarner Losh snprintf(p, BUFSZ(p), fmt, t); 12172a55deb1SDavid E. O'Brien break; 1218f32a6403SWarner Losh } 1219f32a6403SWarner Losh 1220f32a6403SWarner Losh /* get here if string has utf-8 chars and fmt is not plain %s */ 1221f32a6403SWarner Losh /* "%-w.ps", where -, w and .p are all optional */ 1222f32a6403SWarner Losh /* '0' before the w is a flag character */ 1223f32a6403SWarner Losh /* fmt points at % */ 1224f32a6403SWarner Losh int ljust = 0, wid = 0, prec = n, pad = 0; 1225f32a6403SWarner Losh char *f = fmt+1; 1226f32a6403SWarner Losh if (f[0] == '-') { 1227f32a6403SWarner Losh ljust = 1; 1228f32a6403SWarner Losh f++; 1229f32a6403SWarner Losh } 1230f32a6403SWarner Losh // flags '0' and '+' are recognized but skipped 1231f32a6403SWarner Losh if (f[0] == '0') { 1232f32a6403SWarner Losh f++; 1233f32a6403SWarner Losh if (f[0] == '+') 1234f32a6403SWarner Losh f++; 1235f32a6403SWarner Losh } 1236f32a6403SWarner Losh if (f[0] == '+') { 1237f32a6403SWarner Losh f++; 1238f32a6403SWarner Losh if (f[0] == '0') 1239f32a6403SWarner Losh f++; 1240f32a6403SWarner Losh } 1241f32a6403SWarner Losh if (isdigit(f[0])) { /* there is a wid */ 1242f32a6403SWarner Losh wid = strtol(f, &f, 10); 1243f32a6403SWarner Losh } 1244f32a6403SWarner Losh if (f[0] == '.') { /* there is a .prec */ 1245f32a6403SWarner Losh prec = strtol(++f, &f, 10); 1246f32a6403SWarner Losh } 1247f32a6403SWarner Losh if (prec > u8_strlen(t)) 1248f32a6403SWarner Losh prec = u8_strlen(t); 1249f32a6403SWarner Losh pad = wid>prec ? wid - prec : 0; // has to be >= 0 1250f32a6403SWarner Losh int i, k, n; 1251f32a6403SWarner Losh 1252f32a6403SWarner Losh if (ljust) { // print prec chars from t, then pad blanks 1253f32a6403SWarner Losh n = u8_char2byte(t, prec); 1254f32a6403SWarner Losh for (k = 0; k < n; k++) { 1255f32a6403SWarner Losh //putchar(t[k]); 1256f32a6403SWarner Losh *p++ = t[k]; 1257f32a6403SWarner Losh } 1258f32a6403SWarner Losh for (i = 0; i < pad; i++) { 1259f32a6403SWarner Losh //printf(" "); 1260f32a6403SWarner Losh *p++ = ' '; 1261f32a6403SWarner Losh } 1262f32a6403SWarner Losh } else { // print pad blanks, then prec chars from t 1263f32a6403SWarner Losh for (i = 0; i < pad; i++) { 1264f32a6403SWarner Losh //printf(" "); 1265f32a6403SWarner Losh *p++ = ' '; 1266f32a6403SWarner Losh } 1267f32a6403SWarner Losh n = u8_char2byte(t, prec); 1268f32a6403SWarner Losh for (k = 0; k < n; k++) { 1269f32a6403SWarner Losh //putchar(t[k]); 1270f32a6403SWarner Losh *p++ = t[k]; 1271f32a6403SWarner Losh } 1272f32a6403SWarner Losh } 1273f32a6403SWarner Losh *p = 0; 1274f32a6403SWarner Losh break; 1275f32a6403SWarner Losh } 1276f32a6403SWarner Losh 1277f32a6403SWarner Losh case 'c': { 1278f32a6403SWarner Losh /* 1279f32a6403SWarner Losh * If a numeric value is given, awk should just turn 1280f32a6403SWarner Losh * it into a character and print it: 1281f32a6403SWarner Losh * BEGIN { printf("%c\n", 65) } 1282f32a6403SWarner Losh * prints "A". 1283f32a6403SWarner Losh * 1284f32a6403SWarner Losh * But what if the numeric value is > 128 and 1285f32a6403SWarner Losh * represents a valid Unicode code point?!? We do 1286f32a6403SWarner Losh * our best to convert it back into UTF-8. If we 1287f32a6403SWarner Losh * can't, we output the encoding of the Unicode 1288f32a6403SWarner Losh * "invalid character", 0xFFFD. 1289f32a6403SWarner Losh */ 12902a55deb1SDavid E. O'Brien if (isnum(x)) { 1291f32a6403SWarner Losh int charval = (int) getfval(x); 1292f32a6403SWarner Losh 1293f32a6403SWarner Losh if (charval != 0) { 1294f32a6403SWarner Losh if (charval < 128 || awk_mb_cur_max == 1) 1295f32a6403SWarner Losh snprintf(p, BUFSZ(p), fmt, charval); 1296813da98dSDavid E. O'Brien else { 1297f32a6403SWarner Losh // possible unicode character 1298f32a6403SWarner Losh size_t count; 1299f32a6403SWarner Losh char *bs = wide_char_to_byte_str(charval, &count); 1300f32a6403SWarner Losh 1301f32a6403SWarner Losh if (bs == NULL) { // invalid character 1302f32a6403SWarner Losh // use unicode invalid character, 0xFFFD 1303f32a6403SWarner Losh static char invalid_char[] = "\357\277\275"; 1304f32a6403SWarner Losh bs = invalid_char; 1305f32a6403SWarner Losh count = 3; 1306f32a6403SWarner Losh } 1307f32a6403SWarner Losh t = bs; 1308f32a6403SWarner Losh n = count; 1309f32a6403SWarner Losh goto format_percent_c; 1310f32a6403SWarner Losh } 1311f32a6403SWarner Losh } else { 1312813da98dSDavid E. O'Brien *p++ = '\0'; /* explicit null byte */ 1313813da98dSDavid E. O'Brien *p = '\0'; /* next output will start here */ 1314813da98dSDavid E. O'Brien } 1315f32a6403SWarner Losh break; 1316f32a6403SWarner Losh } 1317f32a6403SWarner Losh t = getsval(x); 1318f32a6403SWarner Losh n = u8_nextlen(t); 1319f32a6403SWarner Losh format_percent_c: 1320f32a6403SWarner Losh if (n < 2) { /* not utf8 */ 1321f39dd6a9SWarner Losh snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 13222a55deb1SDavid E. O'Brien break; 1323f32a6403SWarner Losh } 1324f32a6403SWarner Losh 1325f32a6403SWarner Losh // utf8 character, almost same song and dance as for %s 1326f32a6403SWarner Losh int ljust = 0, wid = 0, prec = n, pad = 0; 1327f32a6403SWarner Losh char *f = fmt+1; 1328f32a6403SWarner Losh if (f[0] == '-') { 1329f32a6403SWarner Losh ljust = 1; 1330f32a6403SWarner Losh f++; 1331f32a6403SWarner Losh } 1332f32a6403SWarner Losh // flags '0' and '+' are recognized but skipped 1333f32a6403SWarner Losh if (f[0] == '0') { 1334f32a6403SWarner Losh f++; 1335f32a6403SWarner Losh if (f[0] == '+') 1336f32a6403SWarner Losh f++; 1337f32a6403SWarner Losh } 1338f32a6403SWarner Losh if (f[0] == '+') { 1339f32a6403SWarner Losh f++; 1340f32a6403SWarner Losh if (f[0] == '0') 1341f32a6403SWarner Losh f++; 1342f32a6403SWarner Losh } 1343f32a6403SWarner Losh if (isdigit(f[0])) { /* there is a wid */ 1344f32a6403SWarner Losh wid = strtol(f, &f, 10); 1345f32a6403SWarner Losh } 1346f32a6403SWarner Losh if (f[0] == '.') { /* there is a .prec */ 1347f32a6403SWarner Losh prec = strtol(++f, &f, 10); 1348f32a6403SWarner Losh } 1349f32a6403SWarner Losh if (prec > 1) // %c --> only one character 1350f32a6403SWarner Losh prec = 1; 1351f32a6403SWarner Losh pad = wid>prec ? wid - prec : 0; // has to be >= 0 1352f32a6403SWarner Losh int i; 1353f32a6403SWarner Losh 1354f32a6403SWarner Losh if (ljust) { // print one char from t, then pad blanks 1355f32a6403SWarner Losh for (i = 0; i < n; i++) 1356f32a6403SWarner Losh *p++ = t[i]; 1357f32a6403SWarner Losh for (i = 0; i < pad; i++) { 1358f32a6403SWarner Losh //printf(" "); 1359f32a6403SWarner Losh *p++ = ' '; 1360f32a6403SWarner Losh } 1361f32a6403SWarner Losh } else { // print pad blanks, then prec chars from t 1362f32a6403SWarner Losh for (i = 0; i < pad; i++) { 1363f32a6403SWarner Losh //printf(" "); 1364f32a6403SWarner Losh *p++ = ' '; 1365f32a6403SWarner Losh } 1366f32a6403SWarner Losh for (i = 0; i < n; i++) 1367f32a6403SWarner Losh *p++ = t[i]; 1368f32a6403SWarner Losh } 1369f32a6403SWarner Losh *p = 0; 1370f32a6403SWarner Losh break; 1371f32a6403SWarner Losh } 1372813da98dSDavid E. O'Brien default: 1373813da98dSDavid E. O'Brien FATAL("can't happen: bad conversion %c in format()", flag); 13742a55deb1SDavid E. O'Brien } 1375f32a6403SWarner Losh 13762a55deb1SDavid E. O'Brien tempfree(x); 13772a55deb1SDavid E. O'Brien p += strlen(p); 13782a55deb1SDavid E. O'Brien s++; 13792a55deb1SDavid E. O'Brien } 13802a55deb1SDavid E. O'Brien *p = '\0'; 13812a55deb1SDavid E. O'Brien free(fmt); 1382f32a6403SWarner Losh for ( ; a; a = a->nnext) { /* evaluate any remaining args */ 1383f32a6403SWarner Losh x = execute(a); 1384f32a6403SWarner Losh tempfree(x); 1385f32a6403SWarner Losh } 13862a55deb1SDavid E. O'Brien *pbuf = buf; 13872a55deb1SDavid E. O'Brien *pbufsize = bufsize; 13882a55deb1SDavid E. O'Brien return p - buf; 13892a55deb1SDavid E. O'Brien } 13902a55deb1SDavid E. O'Brien 13912a55deb1SDavid E. O'Brien Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 13922a55deb1SDavid E. O'Brien { 13932a55deb1SDavid E. O'Brien Cell *x; 13942a55deb1SDavid E. O'Brien Node *y; 13952a55deb1SDavid E. O'Brien char *buf; 13962a55deb1SDavid E. O'Brien int bufsz=3*recsize; 13972a55deb1SDavid E. O'Brien 13982a55deb1SDavid E. O'Brien if ((buf = (char *) malloc(bufsz)) == NULL) 13992a55deb1SDavid E. O'Brien FATAL("out of memory in awksprintf"); 14002a55deb1SDavid E. O'Brien y = a[0]->nnext; 14012a55deb1SDavid E. O'Brien x = execute(a[0]); 14022a55deb1SDavid E. O'Brien if (format(&buf, &bufsz, getsval(x), y) == -1) 14032a55deb1SDavid E. O'Brien FATAL("sprintf string %.30s... too long. can't happen.", buf); 14042a55deb1SDavid E. O'Brien tempfree(x); 14052a55deb1SDavid E. O'Brien x = gettemp(); 14062a55deb1SDavid E. O'Brien x->sval = buf; 14072a55deb1SDavid E. O'Brien x->tval = STR; 14082a55deb1SDavid E. O'Brien return(x); 14092a55deb1SDavid E. O'Brien } 14102a55deb1SDavid E. O'Brien 14112a55deb1SDavid E. O'Brien Cell *awkprintf(Node **a, int n) /* printf */ 14122a55deb1SDavid E. O'Brien { /* a[0] is list of args, starting with format string */ 14132a55deb1SDavid E. O'Brien /* a[1] is redirection operator, a[2] is redirection file */ 14142a55deb1SDavid E. O'Brien FILE *fp; 14152a55deb1SDavid E. O'Brien Cell *x; 14162a55deb1SDavid E. O'Brien Node *y; 14172a55deb1SDavid E. O'Brien char *buf; 14182a55deb1SDavid E. O'Brien int len; 14192a55deb1SDavid E. O'Brien int bufsz=3*recsize; 14202a55deb1SDavid E. O'Brien 14212a55deb1SDavid E. O'Brien if ((buf = (char *) malloc(bufsz)) == NULL) 14222a55deb1SDavid E. O'Brien FATAL("out of memory in awkprintf"); 14232a55deb1SDavid E. O'Brien y = a[0]->nnext; 14242a55deb1SDavid E. O'Brien x = execute(a[0]); 14252a55deb1SDavid E. O'Brien if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 14262a55deb1SDavid E. O'Brien FATAL("printf string %.30s... too long. can't happen.", buf); 14272a55deb1SDavid E. O'Brien tempfree(x); 14282a55deb1SDavid E. O'Brien if (a[1] == NULL) { 14292a55deb1SDavid E. O'Brien /* fputs(buf, stdout); */ 14302a55deb1SDavid E. O'Brien fwrite(buf, len, 1, stdout); 14312a55deb1SDavid E. O'Brien if (ferror(stdout)) 14322a55deb1SDavid E. O'Brien FATAL("write error on stdout"); 14332a55deb1SDavid E. O'Brien } else { 14342a55deb1SDavid E. O'Brien fp = redirect(ptoi(a[1]), a[2]); 14352a55deb1SDavid E. O'Brien /* fputs(buf, fp); */ 14362a55deb1SDavid E. O'Brien fwrite(buf, len, 1, fp); 14372a55deb1SDavid E. O'Brien fflush(fp); 14382a55deb1SDavid E. O'Brien if (ferror(fp)) 14392a55deb1SDavid E. O'Brien FATAL("write error on %s", filename(fp)); 14402a55deb1SDavid E. O'Brien } 14412a55deb1SDavid E. O'Brien free(buf); 14422a55deb1SDavid E. O'Brien return(True); 14432a55deb1SDavid E. O'Brien } 14442a55deb1SDavid E. O'Brien 14452a55deb1SDavid E. O'Brien Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 14462a55deb1SDavid E. O'Brien { 14472a55deb1SDavid E. O'Brien Awkfloat i, j = 0; 14482a55deb1SDavid E. O'Brien double v; 14492a55deb1SDavid E. O'Brien Cell *x, *y, *z; 14502a55deb1SDavid E. O'Brien 14512a55deb1SDavid E. O'Brien x = execute(a[0]); 14522a55deb1SDavid E. O'Brien i = getfval(x); 14532a55deb1SDavid E. O'Brien tempfree(x); 1454b5253557SWarner Losh if (n != UMINUS && n != UPLUS) { 14552a55deb1SDavid E. O'Brien y = execute(a[1]); 14562a55deb1SDavid E. O'Brien j = getfval(y); 14572a55deb1SDavid E. O'Brien tempfree(y); 14582a55deb1SDavid E. O'Brien } 14592a55deb1SDavid E. O'Brien z = gettemp(); 14602a55deb1SDavid E. O'Brien switch (n) { 14612a55deb1SDavid E. O'Brien case ADD: 14622a55deb1SDavid E. O'Brien i += j; 14632a55deb1SDavid E. O'Brien break; 14642a55deb1SDavid E. O'Brien case MINUS: 14652a55deb1SDavid E. O'Brien i -= j; 14662a55deb1SDavid E. O'Brien break; 14672a55deb1SDavid E. O'Brien case MULT: 14682a55deb1SDavid E. O'Brien i *= j; 14692a55deb1SDavid E. O'Brien break; 14702a55deb1SDavid E. O'Brien case DIVIDE: 14712a55deb1SDavid E. O'Brien if (j == 0) 14722a55deb1SDavid E. O'Brien FATAL("division by zero"); 14732a55deb1SDavid E. O'Brien i /= j; 14742a55deb1SDavid E. O'Brien break; 14752a55deb1SDavid E. O'Brien case MOD: 14762a55deb1SDavid E. O'Brien if (j == 0) 14772a55deb1SDavid E. O'Brien FATAL("division by zero in mod"); 14782a55deb1SDavid E. O'Brien modf(i/j, &v); 14792a55deb1SDavid E. O'Brien i = i - j * v; 14802a55deb1SDavid E. O'Brien break; 14812a55deb1SDavid E. O'Brien case UMINUS: 14822a55deb1SDavid E. O'Brien i = -i; 14832a55deb1SDavid E. O'Brien break; 1484b5253557SWarner Losh case UPLUS: /* handled by getfval(), above */ 1485b5253557SWarner Losh break; 14862a55deb1SDavid E. O'Brien case POWER: 14872a55deb1SDavid E. O'Brien if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 14882a55deb1SDavid E. O'Brien i = ipow(i, (int) j); 1489f39dd6a9SWarner Losh else { 1490f39dd6a9SWarner Losh errno = 0; 14912a55deb1SDavid E. O'Brien i = errcheck(pow(i, j), "pow"); 1492f39dd6a9SWarner Losh } 14932a55deb1SDavid E. O'Brien break; 14942a55deb1SDavid E. O'Brien default: /* can't happen */ 14952a55deb1SDavid E. O'Brien FATAL("illegal arithmetic operator %d", n); 14962a55deb1SDavid E. O'Brien } 14972a55deb1SDavid E. O'Brien setfval(z, i); 14982a55deb1SDavid E. O'Brien return(z); 14992a55deb1SDavid E. O'Brien } 15002a55deb1SDavid E. O'Brien 15012a55deb1SDavid E. O'Brien double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 15022a55deb1SDavid E. O'Brien { 15032a55deb1SDavid E. O'Brien double v; 15042a55deb1SDavid E. O'Brien 15052a55deb1SDavid E. O'Brien if (n <= 0) 15062a55deb1SDavid E. O'Brien return 1; 15072a55deb1SDavid E. O'Brien v = ipow(x, n/2); 15082a55deb1SDavid E. O'Brien if (n % 2 == 0) 15092a55deb1SDavid E. O'Brien return v * v; 15102a55deb1SDavid E. O'Brien else 15112a55deb1SDavid E. O'Brien return x * v * v; 15122a55deb1SDavid E. O'Brien } 15132a55deb1SDavid E. O'Brien 15142a55deb1SDavid E. O'Brien Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 15152a55deb1SDavid E. O'Brien { 15162a55deb1SDavid E. O'Brien Cell *x, *z; 15172a55deb1SDavid E. O'Brien int k; 15182a55deb1SDavid E. O'Brien Awkfloat xf; 15192a55deb1SDavid E. O'Brien 15202a55deb1SDavid E. O'Brien x = execute(a[0]); 15212a55deb1SDavid E. O'Brien xf = getfval(x); 15222a55deb1SDavid E. O'Brien k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 15232a55deb1SDavid E. O'Brien if (n == PREINCR || n == PREDECR) { 15242a55deb1SDavid E. O'Brien setfval(x, xf + k); 15252a55deb1SDavid E. O'Brien return(x); 15262a55deb1SDavid E. O'Brien } 15272a55deb1SDavid E. O'Brien z = gettemp(); 15282a55deb1SDavid E. O'Brien setfval(z, xf); 15292a55deb1SDavid E. O'Brien setfval(x, xf + k); 15302a55deb1SDavid E. O'Brien tempfree(x); 15312a55deb1SDavid E. O'Brien return(z); 15322a55deb1SDavid E. O'Brien } 15332a55deb1SDavid E. O'Brien 15342a55deb1SDavid E. O'Brien Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 15352a55deb1SDavid E. O'Brien { /* this is subtle; don't muck with it. */ 15362a55deb1SDavid E. O'Brien Cell *x, *y; 15372a55deb1SDavid E. O'Brien Awkfloat xf, yf; 15382a55deb1SDavid E. O'Brien double v; 15392a55deb1SDavid E. O'Brien 15402a55deb1SDavid E. O'Brien y = execute(a[1]); 15412a55deb1SDavid E. O'Brien x = execute(a[0]); 15422a55deb1SDavid E. O'Brien if (n == ASSIGN) { /* ordinary assignment */ 1543b5253557SWarner Losh if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1544b5253557SWarner Losh ; /* self-assignment: leave alone unless it's a field or NF */ 15452a55deb1SDavid E. O'Brien else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1546f32a6403SWarner Losh yf = getfval(y); 15472a55deb1SDavid E. O'Brien setsval(x, getsval(y)); 1548f32a6403SWarner Losh x->fval = yf; 15492a55deb1SDavid E. O'Brien x->tval |= NUM; 15502a55deb1SDavid E. O'Brien } 15512a55deb1SDavid E. O'Brien else if (isstr(y)) 15522a55deb1SDavid E. O'Brien setsval(x, getsval(y)); 15532a55deb1SDavid E. O'Brien else if (isnum(y)) 15542a55deb1SDavid E. O'Brien setfval(x, getfval(y)); 15552a55deb1SDavid E. O'Brien else 15562a55deb1SDavid E. O'Brien funnyvar(y, "read value of"); 15572a55deb1SDavid E. O'Brien tempfree(y); 15582a55deb1SDavid E. O'Brien return(x); 15592a55deb1SDavid E. O'Brien } 15602a55deb1SDavid E. O'Brien xf = getfval(x); 15612a55deb1SDavid E. O'Brien yf = getfval(y); 15622a55deb1SDavid E. O'Brien switch (n) { 15632a55deb1SDavid E. O'Brien case ADDEQ: 15642a55deb1SDavid E. O'Brien xf += yf; 15652a55deb1SDavid E. O'Brien break; 15662a55deb1SDavid E. O'Brien case SUBEQ: 15672a55deb1SDavid E. O'Brien xf -= yf; 15682a55deb1SDavid E. O'Brien break; 15692a55deb1SDavid E. O'Brien case MULTEQ: 15702a55deb1SDavid E. O'Brien xf *= yf; 15712a55deb1SDavid E. O'Brien break; 15722a55deb1SDavid E. O'Brien case DIVEQ: 15732a55deb1SDavid E. O'Brien if (yf == 0) 15742a55deb1SDavid E. O'Brien FATAL("division by zero in /="); 15752a55deb1SDavid E. O'Brien xf /= yf; 15762a55deb1SDavid E. O'Brien break; 15772a55deb1SDavid E. O'Brien case MODEQ: 15782a55deb1SDavid E. O'Brien if (yf == 0) 15792a55deb1SDavid E. O'Brien FATAL("division by zero in %%="); 15802a55deb1SDavid E. O'Brien modf(xf/yf, &v); 15812a55deb1SDavid E. O'Brien xf = xf - yf * v; 15822a55deb1SDavid E. O'Brien break; 15832a55deb1SDavid E. O'Brien case POWEQ: 15842a55deb1SDavid E. O'Brien if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 15852a55deb1SDavid E. O'Brien xf = ipow(xf, (int) yf); 1586f39dd6a9SWarner Losh else { 1587f39dd6a9SWarner Losh errno = 0; 15882a55deb1SDavid E. O'Brien xf = errcheck(pow(xf, yf), "pow"); 1589f39dd6a9SWarner Losh } 15902a55deb1SDavid E. O'Brien break; 15912a55deb1SDavid E. O'Brien default: 15922a55deb1SDavid E. O'Brien FATAL("illegal assignment operator %d", n); 15932a55deb1SDavid E. O'Brien break; 15942a55deb1SDavid E. O'Brien } 15952a55deb1SDavid E. O'Brien tempfree(y); 15962a55deb1SDavid E. O'Brien setfval(x, xf); 15972a55deb1SDavid E. O'Brien return(x); 15982a55deb1SDavid E. O'Brien } 15992a55deb1SDavid E. O'Brien 16002a55deb1SDavid E. O'Brien Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 16012a55deb1SDavid E. O'Brien { 16022a55deb1SDavid E. O'Brien Cell *x, *y, *z; 16032a55deb1SDavid E. O'Brien int n1, n2; 1604b5253557SWarner Losh char *s = NULL; 1605b5253557SWarner Losh int ssz = 0; 16062a55deb1SDavid E. O'Brien 16072a55deb1SDavid E. O'Brien x = execute(a[0]); 1608b5253557SWarner Losh n1 = strlen(getsval(x)); 1609f32a6403SWarner Losh adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); 1610f39dd6a9SWarner Losh memcpy(s, x->sval, n1); 1611b5253557SWarner Losh 1612f32a6403SWarner Losh tempfree(x); 1613f32a6403SWarner Losh 16142a55deb1SDavid E. O'Brien y = execute(a[1]); 1615b5253557SWarner Losh n2 = strlen(getsval(y)); 1616b5253557SWarner Losh adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1617f39dd6a9SWarner Losh memcpy(s + n1, y->sval, n2); 1618f39dd6a9SWarner Losh s[n1 + n2] = '\0'; 1619b5253557SWarner Losh 16202a55deb1SDavid E. O'Brien tempfree(y); 1621b5253557SWarner Losh 16222a55deb1SDavid E. O'Brien z = gettemp(); 16232a55deb1SDavid E. O'Brien z->sval = s; 16242a55deb1SDavid E. O'Brien z->tval = STR; 1625b5253557SWarner Losh 16262a55deb1SDavid E. O'Brien return(z); 16272a55deb1SDavid E. O'Brien } 16282a55deb1SDavid E. O'Brien 16292a55deb1SDavid E. O'Brien Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 16302a55deb1SDavid E. O'Brien { 16312a55deb1SDavid E. O'Brien Cell *x; 16322a55deb1SDavid E. O'Brien 163310ce5b99SWarner Losh if (a[0] == NULL) 16342a55deb1SDavid E. O'Brien x = execute(a[1]); 16352a55deb1SDavid E. O'Brien else { 16362a55deb1SDavid E. O'Brien x = execute(a[0]); 16372a55deb1SDavid E. O'Brien if (istrue(x)) { 16382a55deb1SDavid E. O'Brien tempfree(x); 16392a55deb1SDavid E. O'Brien x = execute(a[1]); 16402a55deb1SDavid E. O'Brien } 16412a55deb1SDavid E. O'Brien } 16422a55deb1SDavid E. O'Brien return x; 16432a55deb1SDavid E. O'Brien } 16442a55deb1SDavid E. O'Brien 16452a55deb1SDavid E. O'Brien Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 16462a55deb1SDavid E. O'Brien { 16472a55deb1SDavid E. O'Brien Cell *x; 16482a55deb1SDavid E. O'Brien int pair; 16492a55deb1SDavid E. O'Brien 16502a55deb1SDavid E. O'Brien pair = ptoi(a[3]); 16512a55deb1SDavid E. O'Brien if (pairstack[pair] == 0) { 16522a55deb1SDavid E. O'Brien x = execute(a[0]); 16532a55deb1SDavid E. O'Brien if (istrue(x)) 16542a55deb1SDavid E. O'Brien pairstack[pair] = 1; 16552a55deb1SDavid E. O'Brien tempfree(x); 16562a55deb1SDavid E. O'Brien } 16572a55deb1SDavid E. O'Brien if (pairstack[pair] == 1) { 16582a55deb1SDavid E. O'Brien x = execute(a[1]); 16592a55deb1SDavid E. O'Brien if (istrue(x)) 16602a55deb1SDavid E. O'Brien pairstack[pair] = 0; 16612a55deb1SDavid E. O'Brien tempfree(x); 16622a55deb1SDavid E. O'Brien x = execute(a[2]); 16632a55deb1SDavid E. O'Brien return(x); 16642a55deb1SDavid E. O'Brien } 16652a55deb1SDavid E. O'Brien return(False); 16662a55deb1SDavid E. O'Brien } 16672a55deb1SDavid E. O'Brien 16682a55deb1SDavid E. O'Brien Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 16692a55deb1SDavid E. O'Brien { 167010ce5b99SWarner Losh Cell *x = NULL, *y, *ap; 1671f39dd6a9SWarner Losh const char *s, *origs, *t; 1672f39dd6a9SWarner Losh const char *fs = NULL; 1673f39dd6a9SWarner Losh char *origfs = NULL; 16742a55deb1SDavid E. O'Brien int sep; 1675f39dd6a9SWarner Losh char temp, num[50]; 16762a55deb1SDavid E. O'Brien int n, tempstat, arg3type; 1677f32a6403SWarner Losh int j; 1678f39dd6a9SWarner Losh double result; 16792a55deb1SDavid E. O'Brien 16802a55deb1SDavid E. O'Brien y = execute(a[0]); /* source string */ 16810840e960SXin LI origs = s = strdup(getsval(y)); 1682f32a6403SWarner Losh tempfree(y); 16832a55deb1SDavid E. O'Brien arg3type = ptoi(a[3]); 1684f32a6403SWarner Losh if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */ 1685d9e8cf28SWarner Losh fs = getsval(fsloc); 1686f32a6403SWarner Losh } else if (arg3type == STRING) { /* split(str,arr,"string") */ 16872a55deb1SDavid E. O'Brien x = execute(a[2]); 1688f39dd6a9SWarner Losh fs = origfs = strdup(getsval(x)); 1689b5253557SWarner Losh tempfree(x); 1690f32a6403SWarner Losh } else if (arg3type == REGEXPR) { 16912a55deb1SDavid E. O'Brien fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1692f32a6403SWarner Losh } else { 16932a55deb1SDavid E. O'Brien FATAL("illegal type of split"); 1694f32a6403SWarner Losh } 16952a55deb1SDavid E. O'Brien sep = *fs; 16962a55deb1SDavid E. O'Brien ap = execute(a[1]); /* array name */ 1697f32a6403SWarner Losh /* BUG 7/26/22: this appears not to reset array: see C1/asplit */ 16982a55deb1SDavid E. O'Brien freesymtab(ap); 1699f39dd6a9SWarner Losh DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 17002a55deb1SDavid E. O'Brien ap->tval &= ~STR; 17012a55deb1SDavid E. O'Brien ap->tval |= ARR; 17022a55deb1SDavid E. O'Brien ap->sval = (char *) makesymtab(NSYMTAB); 17032a55deb1SDavid E. O'Brien 17042a55deb1SDavid E. O'Brien n = 0; 1705d86a0988SRuslan Ermilov if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1706d86a0988SRuslan Ermilov /* split(s, a, //); have to arrange that it looks like empty sep */ 1707d86a0988SRuslan Ermilov arg3type = 0; 1708d86a0988SRuslan Ermilov fs = ""; 1709d86a0988SRuslan Ermilov sep = 0; 1710d86a0988SRuslan Ermilov } 171188b8d487SRuslan Ermilov if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 17122a55deb1SDavid E. O'Brien fa *pfa; 17132a55deb1SDavid E. O'Brien if (arg3type == REGEXPR) { /* it's ready already */ 17142a55deb1SDavid E. O'Brien pfa = (fa *) a[2]; 17152a55deb1SDavid E. O'Brien } else { 17162a55deb1SDavid E. O'Brien pfa = makedfa(fs, 1); 17172a55deb1SDavid E. O'Brien } 17182a55deb1SDavid E. O'Brien if (nematch(pfa,s)) { 17192a55deb1SDavid E. O'Brien tempstat = pfa->initstat; 17202a55deb1SDavid E. O'Brien pfa->initstat = 2; 17212a55deb1SDavid E. O'Brien do { 17222a55deb1SDavid E. O'Brien n++; 1723f39dd6a9SWarner Losh snprintf(num, sizeof(num), "%d", n); 17242a55deb1SDavid E. O'Brien temp = *patbeg; 1725f39dd6a9SWarner Losh setptr(patbeg, '\0'); 1726f39dd6a9SWarner Losh if (is_number(s, & result)) 1727f39dd6a9SWarner Losh setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 17282a55deb1SDavid E. O'Brien else 17292a55deb1SDavid E. O'Brien setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1730f39dd6a9SWarner Losh setptr(patbeg, temp); 17312a55deb1SDavid E. O'Brien s = patbeg + patlen; 1732f39dd6a9SWarner Losh if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 17332a55deb1SDavid E. O'Brien n++; 1734f39dd6a9SWarner Losh snprintf(num, sizeof(num), "%d", n); 17352a55deb1SDavid E. O'Brien setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 17362a55deb1SDavid E. O'Brien pfa->initstat = tempstat; 17372a55deb1SDavid E. O'Brien goto spdone; 17382a55deb1SDavid E. O'Brien } 17392a55deb1SDavid E. O'Brien } while (nematch(pfa,s)); 1740c263f9bfSRuslan Ermilov pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1741c263f9bfSRuslan Ermilov /* cf gsub and refldbld */ 17422a55deb1SDavid E. O'Brien } 17432a55deb1SDavid E. O'Brien n++; 1744f39dd6a9SWarner Losh snprintf(num, sizeof(num), "%d", n); 1745f39dd6a9SWarner Losh if (is_number(s, & result)) 1746f39dd6a9SWarner Losh setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 17472a55deb1SDavid E. O'Brien else 17482a55deb1SDavid E. O'Brien setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 17492a55deb1SDavid E. O'Brien spdone: 17502a55deb1SDavid E. O'Brien pfa = NULL; 1751f32a6403SWarner Losh 1752f32a6403SWarner Losh } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */ 1753f32a6403SWarner Losh char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */ 1754f32a6403SWarner Losh for (;;) { 1755f32a6403SWarner Losh char *fr = newt; 1756f32a6403SWarner Losh n++; 1757f32a6403SWarner Losh if (*s == '"' ) { /* start of "..." */ 1758f32a6403SWarner Losh for (s++ ; *s != '\0'; ) { 1759f32a6403SWarner Losh if (*s == '"' && s[1] != '\0' && s[1] == '"') { 1760f32a6403SWarner Losh s += 2; /* doubled quote */ 1761f32a6403SWarner Losh *fr++ = '"'; 1762f32a6403SWarner Losh } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) { 1763f32a6403SWarner Losh s++; /* skip over closing quote */ 1764f32a6403SWarner Losh break; 1765f32a6403SWarner Losh } else { 1766f32a6403SWarner Losh *fr++ = *s++; 1767f32a6403SWarner Losh } 1768f32a6403SWarner Losh } 1769f32a6403SWarner Losh *fr++ = 0; 1770f32a6403SWarner Losh } else { /* unquoted field */ 1771f32a6403SWarner Losh while (*s != ',' && *s != '\0') 1772f32a6403SWarner Losh *fr++ = *s++; 1773f32a6403SWarner Losh *fr++ = 0; 1774f32a6403SWarner Losh } 1775f32a6403SWarner Losh snprintf(num, sizeof(num), "%d", n); 1776f32a6403SWarner Losh if (is_number(newt, &result)) 1777f32a6403SWarner Losh setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval); 1778f32a6403SWarner Losh else 1779f32a6403SWarner Losh setsymtab(num, newt, 0.0, STR, (Array *) ap->sval); 1780f32a6403SWarner Losh if (*s++ == '\0') 1781f32a6403SWarner Losh break; 1782f32a6403SWarner Losh } 1783f32a6403SWarner Losh free(newt); 1784f32a6403SWarner Losh 1785f32a6403SWarner Losh } else if (!CSV && sep == ' ') { /* usual case: split on white space */ 17862a55deb1SDavid E. O'Brien for (n = 0; ; ) { 1787f39dd6a9SWarner Losh #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1788f39dd6a9SWarner Losh while (ISWS(*s)) 17892a55deb1SDavid E. O'Brien s++; 1790f39dd6a9SWarner Losh if (*s == '\0') 17912a55deb1SDavid E. O'Brien break; 17922a55deb1SDavid E. O'Brien n++; 17932a55deb1SDavid E. O'Brien t = s; 17942a55deb1SDavid E. O'Brien do 17952a55deb1SDavid E. O'Brien s++; 1796f39dd6a9SWarner Losh while (*s != '\0' && !ISWS(*s)); 17972a55deb1SDavid E. O'Brien temp = *s; 1798f39dd6a9SWarner Losh setptr(s, '\0'); 1799f39dd6a9SWarner Losh snprintf(num, sizeof(num), "%d", n); 1800f39dd6a9SWarner Losh if (is_number(t, & result)) 1801f39dd6a9SWarner Losh setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 18022a55deb1SDavid E. O'Brien else 18032a55deb1SDavid E. O'Brien setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1804f39dd6a9SWarner Losh setptr(s, temp); 1805f39dd6a9SWarner Losh if (*s != '\0') 18062a55deb1SDavid E. O'Brien s++; 18072a55deb1SDavid E. O'Brien } 1808f32a6403SWarner Losh 18092a55deb1SDavid E. O'Brien } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1810f32a6403SWarner Losh for (n = 0; *s != '\0'; s += u8_nextlen(s)) { 1811f32a6403SWarner Losh char buf[10]; 18122a55deb1SDavid E. O'Brien n++; 1813f39dd6a9SWarner Losh snprintf(num, sizeof(num), "%d", n); 1814f32a6403SWarner Losh 1815f32a6403SWarner Losh for (j = 0; j < u8_nextlen(s); j++) { 1816f32a6403SWarner Losh buf[j] = s[j]; 1817f32a6403SWarner Losh } 1818f32a6403SWarner Losh buf[j] = '\0'; 1819f32a6403SWarner Losh 18202a55deb1SDavid E. O'Brien if (isdigit((uschar)buf[0])) 18212a55deb1SDavid E. O'Brien setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 18222a55deb1SDavid E. O'Brien else 18232a55deb1SDavid E. O'Brien setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 18242a55deb1SDavid E. O'Brien } 1825f32a6403SWarner Losh 1826f32a6403SWarner Losh } else if (*s != '\0') { /* some random single character */ 18272a55deb1SDavid E. O'Brien for (;;) { 18282a55deb1SDavid E. O'Brien n++; 18292a55deb1SDavid E. O'Brien t = s; 18301023317aSWarner Losh while (*s != sep && *s != '\0') 18312a55deb1SDavid E. O'Brien s++; 18322a55deb1SDavid E. O'Brien temp = *s; 1833f39dd6a9SWarner Losh setptr(s, '\0'); 1834f39dd6a9SWarner Losh snprintf(num, sizeof(num), "%d", n); 1835f39dd6a9SWarner Losh if (is_number(t, & result)) 1836f39dd6a9SWarner Losh setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 18372a55deb1SDavid E. O'Brien else 18382a55deb1SDavid E. O'Brien setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1839f39dd6a9SWarner Losh setptr(s, temp); 1840f39dd6a9SWarner Losh if (*s++ == '\0') 18412a55deb1SDavid E. O'Brien break; 18422a55deb1SDavid E. O'Brien } 18432a55deb1SDavid E. O'Brien } 18442a55deb1SDavid E. O'Brien tempfree(ap); 1845f39dd6a9SWarner Losh xfree(origs); 1846f39dd6a9SWarner Losh xfree(origfs); 18472a55deb1SDavid E. O'Brien x = gettemp(); 18482a55deb1SDavid E. O'Brien x->tval = NUM; 18492a55deb1SDavid E. O'Brien x->fval = n; 18502a55deb1SDavid E. O'Brien return(x); 18512a55deb1SDavid E. O'Brien } 18522a55deb1SDavid E. O'Brien 18532a55deb1SDavid E. O'Brien Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 18542a55deb1SDavid E. O'Brien { 18552a55deb1SDavid E. O'Brien Cell *x; 18562a55deb1SDavid E. O'Brien 18572a55deb1SDavid E. O'Brien x = execute(a[0]); 18582a55deb1SDavid E. O'Brien if (istrue(x)) { 18592a55deb1SDavid E. O'Brien tempfree(x); 18602a55deb1SDavid E. O'Brien x = execute(a[1]); 18612a55deb1SDavid E. O'Brien } else { 18622a55deb1SDavid E. O'Brien tempfree(x); 18632a55deb1SDavid E. O'Brien x = execute(a[2]); 18642a55deb1SDavid E. O'Brien } 18652a55deb1SDavid E. O'Brien return(x); 18662a55deb1SDavid E. O'Brien } 18672a55deb1SDavid E. O'Brien 18682a55deb1SDavid E. O'Brien Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 18692a55deb1SDavid E. O'Brien { 18702a55deb1SDavid E. O'Brien Cell *x; 18712a55deb1SDavid E. O'Brien 18722a55deb1SDavid E. O'Brien x = execute(a[0]); 18732a55deb1SDavid E. O'Brien if (istrue(x)) { 18742a55deb1SDavid E. O'Brien tempfree(x); 18752a55deb1SDavid E. O'Brien x = execute(a[1]); 187610ce5b99SWarner Losh } else if (a[2] != NULL) { 18772a55deb1SDavid E. O'Brien tempfree(x); 18782a55deb1SDavid E. O'Brien x = execute(a[2]); 18792a55deb1SDavid E. O'Brien } 18802a55deb1SDavid E. O'Brien return(x); 18812a55deb1SDavid E. O'Brien } 18822a55deb1SDavid E. O'Brien 18832a55deb1SDavid E. O'Brien Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 18842a55deb1SDavid E. O'Brien { 18852a55deb1SDavid E. O'Brien Cell *x; 18862a55deb1SDavid E. O'Brien 18872a55deb1SDavid E. O'Brien for (;;) { 18882a55deb1SDavid E. O'Brien x = execute(a[0]); 18892a55deb1SDavid E. O'Brien if (!istrue(x)) 18902a55deb1SDavid E. O'Brien return(x); 18912a55deb1SDavid E. O'Brien tempfree(x); 18922a55deb1SDavid E. O'Brien x = execute(a[1]); 18932a55deb1SDavid E. O'Brien if (isbreak(x)) { 18942a55deb1SDavid E. O'Brien x = True; 18952a55deb1SDavid E. O'Brien return(x); 18962a55deb1SDavid E. O'Brien } 18972a55deb1SDavid E. O'Brien if (isnext(x) || isexit(x) || isret(x)) 18982a55deb1SDavid E. O'Brien return(x); 18992a55deb1SDavid E. O'Brien tempfree(x); 19002a55deb1SDavid E. O'Brien } 19012a55deb1SDavid E. O'Brien } 19022a55deb1SDavid E. O'Brien 19032a55deb1SDavid E. O'Brien Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 19042a55deb1SDavid E. O'Brien { 19052a55deb1SDavid E. O'Brien Cell *x; 19062a55deb1SDavid E. O'Brien 19072a55deb1SDavid E. O'Brien for (;;) { 19082a55deb1SDavid E. O'Brien x = execute(a[0]); 19092a55deb1SDavid E. O'Brien if (isbreak(x)) 19102a55deb1SDavid E. O'Brien return True; 19112a55deb1SDavid E. O'Brien if (isnext(x) || isexit(x) || isret(x)) 19122a55deb1SDavid E. O'Brien return(x); 19132a55deb1SDavid E. O'Brien tempfree(x); 19142a55deb1SDavid E. O'Brien x = execute(a[1]); 19152a55deb1SDavid E. O'Brien if (!istrue(x)) 19162a55deb1SDavid E. O'Brien return(x); 19172a55deb1SDavid E. O'Brien tempfree(x); 19182a55deb1SDavid E. O'Brien } 19192a55deb1SDavid E. O'Brien } 19202a55deb1SDavid E. O'Brien 19212a55deb1SDavid E. O'Brien Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 19222a55deb1SDavid E. O'Brien { 19232a55deb1SDavid E. O'Brien Cell *x; 19242a55deb1SDavid E. O'Brien 19252a55deb1SDavid E. O'Brien x = execute(a[0]); 19262a55deb1SDavid E. O'Brien tempfree(x); 19272a55deb1SDavid E. O'Brien for (;;) { 192810ce5b99SWarner Losh if (a[1]!=NULL) { 19292a55deb1SDavid E. O'Brien x = execute(a[1]); 19302a55deb1SDavid E. O'Brien if (!istrue(x)) return(x); 19312a55deb1SDavid E. O'Brien else tempfree(x); 19322a55deb1SDavid E. O'Brien } 19332a55deb1SDavid E. O'Brien x = execute(a[3]); 19342a55deb1SDavid E. O'Brien if (isbreak(x)) /* turn off break */ 19352a55deb1SDavid E. O'Brien return True; 19362a55deb1SDavid E. O'Brien if (isnext(x) || isexit(x) || isret(x)) 19372a55deb1SDavid E. O'Brien return(x); 19382a55deb1SDavid E. O'Brien tempfree(x); 19392a55deb1SDavid E. O'Brien x = execute(a[2]); 19402a55deb1SDavid E. O'Brien tempfree(x); 19412a55deb1SDavid E. O'Brien } 19422a55deb1SDavid E. O'Brien } 19432a55deb1SDavid E. O'Brien 19442a55deb1SDavid E. O'Brien Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 19452a55deb1SDavid E. O'Brien { 19462a55deb1SDavid E. O'Brien Cell *x, *vp, *arrayp, *cp, *ncp; 19472a55deb1SDavid E. O'Brien Array *tp; 19482a55deb1SDavid E. O'Brien int i; 19492a55deb1SDavid E. O'Brien 19502a55deb1SDavid E. O'Brien vp = execute(a[0]); 19512a55deb1SDavid E. O'Brien arrayp = execute(a[1]); 19522a55deb1SDavid E. O'Brien if (!isarr(arrayp)) { 19532a55deb1SDavid E. O'Brien return True; 19542a55deb1SDavid E. O'Brien } 19552a55deb1SDavid E. O'Brien tp = (Array *) arrayp->sval; 19562a55deb1SDavid E. O'Brien tempfree(arrayp); 19572a55deb1SDavid E. O'Brien for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 19582a55deb1SDavid E. O'Brien for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 19592a55deb1SDavid E. O'Brien setsval(vp, cp->nval); 19602a55deb1SDavid E. O'Brien ncp = cp->cnext; 19612a55deb1SDavid E. O'Brien x = execute(a[2]); 19622a55deb1SDavid E. O'Brien if (isbreak(x)) { 19632a55deb1SDavid E. O'Brien tempfree(vp); 19642a55deb1SDavid E. O'Brien return True; 19652a55deb1SDavid E. O'Brien } 19662a55deb1SDavid E. O'Brien if (isnext(x) || isexit(x) || isret(x)) { 19672a55deb1SDavid E. O'Brien tempfree(vp); 19682a55deb1SDavid E. O'Brien return(x); 19692a55deb1SDavid E. O'Brien } 19702a55deb1SDavid E. O'Brien tempfree(x); 19712a55deb1SDavid E. O'Brien } 19722a55deb1SDavid E. O'Brien } 19732a55deb1SDavid E. O'Brien return True; 19742a55deb1SDavid E. O'Brien } 19752a55deb1SDavid E. O'Brien 1976f39dd6a9SWarner Losh static char *nawk_convert(const char *s, int (*fun_c)(int), 1977f39dd6a9SWarner Losh wint_t (*fun_wc)(wint_t)) 1978f39dd6a9SWarner Losh { 1979f39dd6a9SWarner Losh char *buf = NULL; 1980f39dd6a9SWarner Losh char *pbuf = NULL; 1981f39dd6a9SWarner Losh const char *ps = NULL; 1982f39dd6a9SWarner Losh size_t n = 0; 1983f39dd6a9SWarner Losh wchar_t wc; 1984f32a6403SWarner Losh const size_t sz = awk_mb_cur_max; 1985f32a6403SWarner Losh int unused; 1986f39dd6a9SWarner Losh 1987f39dd6a9SWarner Losh if (sz == 1) { 1988f39dd6a9SWarner Losh buf = tostring(s); 1989f39dd6a9SWarner Losh 1990f39dd6a9SWarner Losh for (pbuf = buf; *pbuf; pbuf++) 1991f39dd6a9SWarner Losh *pbuf = fun_c((uschar)*pbuf); 1992f39dd6a9SWarner Losh 1993f39dd6a9SWarner Losh return buf; 1994f39dd6a9SWarner Losh } else { 1995f39dd6a9SWarner Losh /* upper/lower character may be shorter/longer */ 1996f39dd6a9SWarner Losh buf = tostringN(s, strlen(s) * sz + 1); 1997f39dd6a9SWarner Losh 1998f39dd6a9SWarner Losh (void) mbtowc(NULL, NULL, 0); /* reset internal state */ 1999f39dd6a9SWarner Losh /* 2000f39dd6a9SWarner Losh * Reset internal state here too. 2001f39dd6a9SWarner Losh * Assign result to avoid a compiler warning. (Casting to void 2002f39dd6a9SWarner Losh * doesn't work.) 2003f39dd6a9SWarner Losh * Increment said variable to avoid a different warning. 2004f39dd6a9SWarner Losh */ 2005f32a6403SWarner Losh unused = wctomb(NULL, L'\0'); 2006f39dd6a9SWarner Losh unused++; 2007f39dd6a9SWarner Losh 2008f39dd6a9SWarner Losh ps = s; 2009f39dd6a9SWarner Losh pbuf = buf; 2010f39dd6a9SWarner Losh while (n = mbtowc(&wc, ps, sz), 2011f39dd6a9SWarner Losh n > 0 && n != (size_t)-1 && n != (size_t)-2) 2012f39dd6a9SWarner Losh { 2013f39dd6a9SWarner Losh ps += n; 2014f39dd6a9SWarner Losh 2015f39dd6a9SWarner Losh n = wctomb(pbuf, fun_wc(wc)); 2016f39dd6a9SWarner Losh if (n == (size_t)-1) 2017f39dd6a9SWarner Losh FATAL("illegal wide character %s", s); 2018f39dd6a9SWarner Losh 2019f39dd6a9SWarner Losh pbuf += n; 2020f39dd6a9SWarner Losh } 2021f39dd6a9SWarner Losh 2022f39dd6a9SWarner Losh *pbuf = '\0'; 2023f39dd6a9SWarner Losh 2024f39dd6a9SWarner Losh if (n) 2025f39dd6a9SWarner Losh FATAL("illegal byte sequence %s", s); 2026f39dd6a9SWarner Losh 2027f39dd6a9SWarner Losh return buf; 2028f39dd6a9SWarner Losh } 2029f39dd6a9SWarner Losh } 2030f39dd6a9SWarner Losh 2031f39dd6a9SWarner Losh #ifdef __DJGPP__ 2032f39dd6a9SWarner Losh static wint_t towupper(wint_t wc) 2033f39dd6a9SWarner Losh { 2034f39dd6a9SWarner Losh if (wc >= 0 && wc < 256) 2035f39dd6a9SWarner Losh return toupper(wc & 0xFF); 2036f39dd6a9SWarner Losh 2037f39dd6a9SWarner Losh return wc; 2038f39dd6a9SWarner Losh } 2039f39dd6a9SWarner Losh 2040f39dd6a9SWarner Losh static wint_t towlower(wint_t wc) 2041f39dd6a9SWarner Losh { 2042f39dd6a9SWarner Losh if (wc >= 0 && wc < 256) 2043f39dd6a9SWarner Losh return tolower(wc & 0xFF); 2044f39dd6a9SWarner Losh 2045f39dd6a9SWarner Losh return wc; 2046f39dd6a9SWarner Losh } 2047f39dd6a9SWarner Losh #endif 2048f39dd6a9SWarner Losh 2049f39dd6a9SWarner Losh static char *nawk_toupper(const char *s) 2050f39dd6a9SWarner Losh { 2051f39dd6a9SWarner Losh return nawk_convert(s, toupper, towupper); 2052f39dd6a9SWarner Losh } 2053f39dd6a9SWarner Losh 2054f39dd6a9SWarner Losh static char *nawk_tolower(const char *s) 2055f39dd6a9SWarner Losh { 2056f39dd6a9SWarner Losh return nawk_convert(s, tolower, towlower); 2057f39dd6a9SWarner Losh } 2058f39dd6a9SWarner Losh 2059f32a6403SWarner Losh 2060f32a6403SWarner Losh 20612a55deb1SDavid E. O'Brien Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 20622a55deb1SDavid E. O'Brien { 20632a55deb1SDavid E. O'Brien Cell *x, *y; 206417853db4SWarner Losh Awkfloat u = 0; 2065eb690a05SWarner Losh int t, sz; 20661b11b783SRuslan Ermilov Awkfloat tmp; 2067eb690a05SWarner Losh char *buf, *fmt; 20682a55deb1SDavid E. O'Brien Node *nextarg; 20692a55deb1SDavid E. O'Brien FILE *fp; 2070b5253557SWarner Losh int status = 0; 2071eb690a05SWarner Losh time_t tv; 2072*8d457988SWarner Losh struct tm *tm, tmbuf; 2073f32a6403SWarner Losh int estatus = 0; 20742a55deb1SDavid E. O'Brien 20752a55deb1SDavid E. O'Brien t = ptoi(a[0]); 20762a55deb1SDavid E. O'Brien x = execute(a[1]); 20772a55deb1SDavid E. O'Brien nextarg = a[1]->nnext; 20782a55deb1SDavid E. O'Brien switch (t) { 20792a55deb1SDavid E. O'Brien case FLENGTH: 2080007c6572SDag-Erling Smørgrav if (isarr(x)) 2081007c6572SDag-Erling Smørgrav u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 2082007c6572SDag-Erling Smørgrav else 2083f32a6403SWarner Losh u = u8_strlen(getsval(x)); 2084007c6572SDag-Erling Smørgrav break; 20852a55deb1SDavid E. O'Brien case FLOG: 2086f39dd6a9SWarner Losh errno = 0; 2087f39dd6a9SWarner Losh u = errcheck(log(getfval(x)), "log"); 2088f39dd6a9SWarner Losh break; 20892a55deb1SDavid E. O'Brien case FINT: 20902a55deb1SDavid E. O'Brien modf(getfval(x), &u); break; 20912a55deb1SDavid E. O'Brien case FEXP: 2092f39dd6a9SWarner Losh errno = 0; 2093f39dd6a9SWarner Losh u = errcheck(exp(getfval(x)), "exp"); 2094f39dd6a9SWarner Losh break; 20952a55deb1SDavid E. O'Brien case FSQRT: 2096f39dd6a9SWarner Losh errno = 0; 2097f39dd6a9SWarner Losh u = errcheck(sqrt(getfval(x)), "sqrt"); 2098f39dd6a9SWarner Losh break; 20992a55deb1SDavid E. O'Brien case FSIN: 21002a55deb1SDavid E. O'Brien u = sin(getfval(x)); break; 21012a55deb1SDavid E. O'Brien case FCOS: 21022a55deb1SDavid E. O'Brien u = cos(getfval(x)); break; 21032a55deb1SDavid E. O'Brien case FATAN: 210410ce5b99SWarner Losh if (nextarg == NULL) { 21052a55deb1SDavid E. O'Brien WARNING("atan2 requires two arguments; returning 1.0"); 21062a55deb1SDavid E. O'Brien u = 1.0; 21072a55deb1SDavid E. O'Brien } else { 21082a55deb1SDavid E. O'Brien y = execute(a[1]->nnext); 21092a55deb1SDavid E. O'Brien u = atan2(getfval(x), getfval(y)); 21102a55deb1SDavid E. O'Brien tempfree(y); 21112a55deb1SDavid E. O'Brien nextarg = nextarg->nnext; 21122a55deb1SDavid E. O'Brien } 21132a55deb1SDavid E. O'Brien break; 2114eb690a05SWarner Losh case FCOMPL: 2115eb690a05SWarner Losh u = ~((int)getfval(x)); 2116eb690a05SWarner Losh break; 2117eb690a05SWarner Losh case FAND: 2118eb690a05SWarner Losh if (nextarg == 0) { 2119eb690a05SWarner Losh WARNING("and requires two arguments; returning 0"); 2120eb690a05SWarner Losh u = 0; 2121eb690a05SWarner Losh break; 2122eb690a05SWarner Losh } 2123eb690a05SWarner Losh y = execute(a[1]->nnext); 2124eb690a05SWarner Losh u = ((int)getfval(x)) & ((int)getfval(y)); 2125eb690a05SWarner Losh tempfree(y); 2126eb690a05SWarner Losh nextarg = nextarg->nnext; 2127eb690a05SWarner Losh break; 2128eb690a05SWarner Losh case FFOR: 2129eb690a05SWarner Losh if (nextarg == 0) { 2130eb690a05SWarner Losh WARNING("or requires two arguments; returning 0"); 2131eb690a05SWarner Losh u = 0; 2132eb690a05SWarner Losh break; 2133eb690a05SWarner Losh } 2134eb690a05SWarner Losh y = execute(a[1]->nnext); 2135eb690a05SWarner Losh u = ((int)getfval(x)) | ((int)getfval(y)); 2136eb690a05SWarner Losh tempfree(y); 2137eb690a05SWarner Losh nextarg = nextarg->nnext; 2138eb690a05SWarner Losh break; 2139eb690a05SWarner Losh case FXOR: 2140eb690a05SWarner Losh if (nextarg == 0) { 2141eb690a05SWarner Losh WARNING("xor requires two arguments; returning 0"); 2142eb690a05SWarner Losh u = 0; 2143eb690a05SWarner Losh break; 2144eb690a05SWarner Losh } 2145eb690a05SWarner Losh y = execute(a[1]->nnext); 2146eb690a05SWarner Losh u = ((int)getfval(x)) ^ ((int)getfval(y)); 2147eb690a05SWarner Losh tempfree(y); 2148eb690a05SWarner Losh nextarg = nextarg->nnext; 2149eb690a05SWarner Losh break; 2150eb690a05SWarner Losh case FLSHIFT: 2151eb690a05SWarner Losh if (nextarg == 0) { 2152eb690a05SWarner Losh WARNING("lshift requires two arguments; returning 0"); 2153eb690a05SWarner Losh u = 0; 2154eb690a05SWarner Losh break; 2155eb690a05SWarner Losh } 2156eb690a05SWarner Losh y = execute(a[1]->nnext); 2157eb690a05SWarner Losh u = ((int)getfval(x)) << ((int)getfval(y)); 2158eb690a05SWarner Losh tempfree(y); 2159eb690a05SWarner Losh nextarg = nextarg->nnext; 2160eb690a05SWarner Losh break; 2161eb690a05SWarner Losh case FRSHIFT: 2162eb690a05SWarner Losh if (nextarg == 0) { 2163eb690a05SWarner Losh WARNING("rshift requires two arguments; returning 0"); 2164eb690a05SWarner Losh u = 0; 2165eb690a05SWarner Losh break; 2166eb690a05SWarner Losh } 2167eb690a05SWarner Losh y = execute(a[1]->nnext); 2168eb690a05SWarner Losh u = ((int)getfval(x)) >> ((int)getfval(y)); 2169eb690a05SWarner Losh tempfree(y); 2170eb690a05SWarner Losh nextarg = nextarg->nnext; 2171eb690a05SWarner Losh break; 21722a55deb1SDavid E. O'Brien case FSYSTEM: 21732a55deb1SDavid E. O'Brien fflush(stdout); /* in case something is buffered already */ 2174f32a6403SWarner Losh estatus = status = system(getsval(x)); 2175b5253557SWarner Losh if (status != -1) { 2176b5253557SWarner Losh if (WIFEXITED(status)) { 2177f32a6403SWarner Losh estatus = WEXITSTATUS(status); 2178b5253557SWarner Losh } else if (WIFSIGNALED(status)) { 2179f32a6403SWarner Losh estatus = WTERMSIG(status) + 256; 2180b5253557SWarner Losh #ifdef WCOREDUMP 2181b5253557SWarner Losh if (WCOREDUMP(status)) 2182f32a6403SWarner Losh estatus += 256; 2183b5253557SWarner Losh #endif 2184b5253557SWarner Losh } else /* something else?!? */ 2185f32a6403SWarner Losh estatus = 0; 2186b5253557SWarner Losh } 2187f32a6403SWarner Losh /* else estatus was set to -1 */ 2188f32a6403SWarner Losh u = estatus; 21892a55deb1SDavid E. O'Brien break; 21902a55deb1SDavid E. O'Brien case FRAND: 2191a4b2ac79SPedro F. Giffuni /* random() returns numbers in [0..2^31-1] 2192a4b2ac79SPedro F. Giffuni * in order to get a number in [0, 1), divide it by 2^31 2193a4b2ac79SPedro F. Giffuni */ 2194a4b2ac79SPedro F. Giffuni u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 21952a55deb1SDavid E. O'Brien break; 21962a55deb1SDavid E. O'Brien case FSRAND: 21972a55deb1SDavid E. O'Brien if (isrec(x)) /* no argument provided */ 21982a55deb1SDavid E. O'Brien u = time((time_t *)0); 21992a55deb1SDavid E. O'Brien else 22002a55deb1SDavid E. O'Brien u = getfval(x); 22011b11b783SRuslan Ermilov tmp = u; 2202a4b2ac79SPedro F. Giffuni srandom((unsigned long) u); 22031b11b783SRuslan Ermilov u = srand_seed; 22041b11b783SRuslan Ermilov srand_seed = tmp; 22052a55deb1SDavid E. O'Brien break; 22062a55deb1SDavid E. O'Brien case FTOUPPER: 22072a55deb1SDavid E. O'Brien case FTOLOWER: 2208f39dd6a9SWarner Losh if (t == FTOUPPER) 2209f39dd6a9SWarner Losh buf = nawk_toupper(getsval(x)); 2210f39dd6a9SWarner Losh else 2211f39dd6a9SWarner Losh buf = nawk_tolower(getsval(x)); 22122a55deb1SDavid E. O'Brien tempfree(x); 22132a55deb1SDavid E. O'Brien x = gettemp(); 22142a55deb1SDavid E. O'Brien setsval(x, buf); 22152a55deb1SDavid E. O'Brien free(buf); 22162a55deb1SDavid E. O'Brien return x; 22172a55deb1SDavid E. O'Brien case FFLUSH: 2218007c6572SDag-Erling Smørgrav if (isrec(x) || strlen(getsval(x)) == 0) { 2219007c6572SDag-Erling Smørgrav flush_all(); /* fflush() or fflush("") -> all */ 2220007c6572SDag-Erling Smørgrav u = 0; 2221f39dd6a9SWarner Losh } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 22222a55deb1SDavid E. O'Brien u = EOF; 22232a55deb1SDavid E. O'Brien else 22242a55deb1SDavid E. O'Brien u = fflush(fp); 22252a55deb1SDavid E. O'Brien break; 2226*8d457988SWarner Losh case FMKTIME: 2227*8d457988SWarner Losh memset(&tmbuf, 0, sizeof(tmbuf)); 2228*8d457988SWarner Losh tm = &tmbuf; 2229*8d457988SWarner Losh t = sscanf(getsval(x), "%d %d %d %d %d %d %d", 2230*8d457988SWarner Losh &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour, 2231*8d457988SWarner Losh &tm->tm_min, &tm->tm_sec, &tm->tm_isdst); 2232*8d457988SWarner Losh switch (t) { 2233*8d457988SWarner Losh case 6: 2234*8d457988SWarner Losh tm->tm_isdst = -1; /* let mktime figure it out */ 2235*8d457988SWarner Losh /* FALLTHROUGH */ 2236*8d457988SWarner Losh case 7: 2237*8d457988SWarner Losh tm->tm_year -= 1900; 2238*8d457988SWarner Losh tm->tm_mon--; 2239*8d457988SWarner Losh u = mktime(tm); 2240*8d457988SWarner Losh break; 2241*8d457988SWarner Losh default: 2242*8d457988SWarner Losh u = -1; 2243*8d457988SWarner Losh break; 2244*8d457988SWarner Losh } 2245*8d457988SWarner Losh break; 2246eb690a05SWarner Losh case FSYSTIME: 2247eb690a05SWarner Losh u = time((time_t *) 0); 2248eb690a05SWarner Losh break; 2249eb690a05SWarner Losh case FSTRFTIME: 2250eb690a05SWarner Losh /* strftime([format [,timestamp]]) */ 2251eb690a05SWarner Losh if (nextarg) { 2252eb690a05SWarner Losh y = execute(nextarg); 2253eb690a05SWarner Losh nextarg = nextarg->nnext; 2254eb690a05SWarner Losh tv = (time_t) getfval(y); 2255eb690a05SWarner Losh tempfree(y); 2256eb690a05SWarner Losh } else 2257eb690a05SWarner Losh tv = time((time_t *) 0); 2258eb690a05SWarner Losh tm = localtime(&tv); 2259eb690a05SWarner Losh if (tm == NULL) 2260eb690a05SWarner Losh FATAL("bad time %ld", (long)tv); 2261eb690a05SWarner Losh 2262eb690a05SWarner Losh if (isrec(x)) { 2263eb690a05SWarner Losh /* format argument not provided, use default */ 2264eb690a05SWarner Losh fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 2265eb690a05SWarner Losh } else 2266eb690a05SWarner Losh fmt = tostring(getsval(x)); 2267eb690a05SWarner Losh 2268eb690a05SWarner Losh sz = 32; 2269eb690a05SWarner Losh buf = NULL; 2270eb690a05SWarner Losh do { 2271eb690a05SWarner Losh if ((buf = realloc(buf, (sz *= 2))) == NULL) 2272eb690a05SWarner Losh FATAL("out of memory in strftime"); 2273eb690a05SWarner Losh } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 2274eb690a05SWarner Losh 2275eb690a05SWarner Losh y = gettemp(); 2276eb690a05SWarner Losh setsval(y, buf); 2277eb690a05SWarner Losh free(fmt); 2278eb690a05SWarner Losh free(buf); 2279eb690a05SWarner Losh 2280eb690a05SWarner Losh return y; 22812a55deb1SDavid E. O'Brien default: /* can't happen */ 22822a55deb1SDavid E. O'Brien FATAL("illegal function type %d", t); 22832a55deb1SDavid E. O'Brien break; 22842a55deb1SDavid E. O'Brien } 22852a55deb1SDavid E. O'Brien tempfree(x); 22862a55deb1SDavid E. O'Brien x = gettemp(); 22872a55deb1SDavid E. O'Brien setfval(x, u); 228810ce5b99SWarner Losh if (nextarg != NULL) { 22892a55deb1SDavid E. O'Brien WARNING("warning: function has too many arguments"); 2290f32a6403SWarner Losh for ( ; nextarg; nextarg = nextarg->nnext) { 2291f32a6403SWarner Losh y = execute(nextarg); 2292f32a6403SWarner Losh tempfree(y); 2293f32a6403SWarner Losh } 22942a55deb1SDavid E. O'Brien } 22952a55deb1SDavid E. O'Brien return(x); 22962a55deb1SDavid E. O'Brien } 22972a55deb1SDavid E. O'Brien 22982a55deb1SDavid E. O'Brien Cell *printstat(Node **a, int n) /* print a[0] */ 22992a55deb1SDavid E. O'Brien { 23002a55deb1SDavid E. O'Brien Node *x; 23012a55deb1SDavid E. O'Brien Cell *y; 23022a55deb1SDavid E. O'Brien FILE *fp; 23032a55deb1SDavid E. O'Brien 230410ce5b99SWarner Losh if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 23052a55deb1SDavid E. O'Brien fp = stdout; 23062a55deb1SDavid E. O'Brien else 23072a55deb1SDavid E. O'Brien fp = redirect(ptoi(a[1]), a[2]); 23082a55deb1SDavid E. O'Brien for (x = a[0]; x != NULL; x = x->nnext) { 23092a55deb1SDavid E. O'Brien y = execute(x); 2310813da98dSDavid E. O'Brien fputs(getpssval(y), fp); 23112a55deb1SDavid E. O'Brien tempfree(y); 23122a55deb1SDavid E. O'Brien if (x->nnext == NULL) 2313b5253557SWarner Losh fputs(getsval(orsloc), fp); 23142a55deb1SDavid E. O'Brien else 2315b5253557SWarner Losh fputs(getsval(ofsloc), fp); 23162a55deb1SDavid E. O'Brien } 231710ce5b99SWarner Losh if (a[1] != NULL) 23182a55deb1SDavid E. O'Brien fflush(fp); 23192a55deb1SDavid E. O'Brien if (ferror(fp)) 23202a55deb1SDavid E. O'Brien FATAL("write error on %s", filename(fp)); 23212a55deb1SDavid E. O'Brien return(True); 23222a55deb1SDavid E. O'Brien } 23232a55deb1SDavid E. O'Brien 23242a55deb1SDavid E. O'Brien Cell *nullproc(Node **a, int n) 23252a55deb1SDavid E. O'Brien { 23262a55deb1SDavid E. O'Brien return 0; 23272a55deb1SDavid E. O'Brien } 23282a55deb1SDavid E. O'Brien 23292a55deb1SDavid E. O'Brien 23302a55deb1SDavid E. O'Brien FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 23312a55deb1SDavid E. O'Brien { 23322a55deb1SDavid E. O'Brien FILE *fp; 23332a55deb1SDavid E. O'Brien Cell *x; 23342a55deb1SDavid E. O'Brien char *fname; 23352a55deb1SDavid E. O'Brien 23362a55deb1SDavid E. O'Brien x = execute(b); 23372a55deb1SDavid E. O'Brien fname = getsval(x); 2338f39dd6a9SWarner Losh fp = openfile(a, fname, NULL); 23392a55deb1SDavid E. O'Brien if (fp == NULL) 23402a55deb1SDavid E. O'Brien FATAL("can't open file %s", fname); 23412a55deb1SDavid E. O'Brien tempfree(x); 23422a55deb1SDavid E. O'Brien return fp; 23432a55deb1SDavid E. O'Brien } 23442a55deb1SDavid E. O'Brien 23452a55deb1SDavid E. O'Brien struct files { 23462a55deb1SDavid E. O'Brien FILE *fp; 2347813da98dSDavid E. O'Brien const char *fname; 23482a55deb1SDavid E. O'Brien int mode; /* '|', 'a', 'w' => LE/LT, GT */ 2349d86a0988SRuslan Ermilov } *files; 2350d86a0988SRuslan Ermilov 2351f39dd6a9SWarner Losh size_t nfiles; 23522a55deb1SDavid E. O'Brien 2353f39dd6a9SWarner Losh static void stdinit(void) /* in case stdin, etc., are not constants */ 23542a55deb1SDavid E. O'Brien { 2355d86a0988SRuslan Ermilov nfiles = FOPEN_MAX; 2356f39dd6a9SWarner Losh files = (struct files *) calloc(nfiles, sizeof(*files)); 2357d86a0988SRuslan Ermilov if (files == NULL) 2358f39dd6a9SWarner Losh FATAL("can't allocate file memory for %zu files", nfiles); 23592a55deb1SDavid E. O'Brien files[0].fp = stdin; 2360f32a6403SWarner Losh files[0].fname = tostring("/dev/stdin"); 2361d86a0988SRuslan Ermilov files[0].mode = LT; 23622a55deb1SDavid E. O'Brien files[1].fp = stdout; 2363f32a6403SWarner Losh files[1].fname = tostring("/dev/stdout"); 2364d86a0988SRuslan Ermilov files[1].mode = GT; 23652a55deb1SDavid E. O'Brien files[2].fp = stderr; 2366f32a6403SWarner Losh files[2].fname = tostring("/dev/stderr"); 2367d86a0988SRuslan Ermilov files[2].mode = GT; 23682a55deb1SDavid E. O'Brien } 23692a55deb1SDavid E. O'Brien 2370f39dd6a9SWarner Losh FILE *openfile(int a, const char *us, bool *pnewflag) 23712a55deb1SDavid E. O'Brien { 2372813da98dSDavid E. O'Brien const char *s = us; 2373f39dd6a9SWarner Losh size_t i; 2374f39dd6a9SWarner Losh int m; 237510ce5b99SWarner Losh FILE *fp = NULL; 23762a55deb1SDavid E. O'Brien 23772a55deb1SDavid E. O'Brien if (*s == '\0') 23782a55deb1SDavid E. O'Brien FATAL("null file name in print or getline"); 2379d86a0988SRuslan Ermilov for (i = 0; i < nfiles; i++) 2380f39dd6a9SWarner Losh if (files[i].fname && strcmp(s, files[i].fname) == 0 && 2381f39dd6a9SWarner Losh (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 2382f39dd6a9SWarner Losh a == FFLUSH)) { 2383f39dd6a9SWarner Losh if (pnewflag) 2384f39dd6a9SWarner Losh *pnewflag = false; 23852a55deb1SDavid E. O'Brien return files[i].fp; 23862a55deb1SDavid E. O'Brien } 23872a55deb1SDavid E. O'Brien if (a == FFLUSH) /* didn't find it, so don't create it! */ 23882a55deb1SDavid E. O'Brien return NULL; 23892a55deb1SDavid E. O'Brien 2390d86a0988SRuslan Ermilov for (i = 0; i < nfiles; i++) 239110ce5b99SWarner Losh if (files[i].fp == NULL) 23922a55deb1SDavid E. O'Brien break; 2393d86a0988SRuslan Ermilov if (i >= nfiles) { 2394d86a0988SRuslan Ermilov struct files *nf; 2395f39dd6a9SWarner Losh size_t nnf = nfiles + FOPEN_MAX; 2396f39dd6a9SWarner Losh nf = (struct files *) realloc(files, nnf * sizeof(*nf)); 2397d86a0988SRuslan Ermilov if (nf == NULL) 2398f39dd6a9SWarner Losh FATAL("cannot grow files for %s and %zu files", s, nnf); 2399d86a0988SRuslan Ermilov memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 2400d86a0988SRuslan Ermilov nfiles = nnf; 2401d86a0988SRuslan Ermilov files = nf; 2402d86a0988SRuslan Ermilov } 24032a55deb1SDavid E. O'Brien fflush(stdout); /* force a semblance of order */ 24042a55deb1SDavid E. O'Brien m = a; 24052a55deb1SDavid E. O'Brien if (a == GT) { 24062a55deb1SDavid E. O'Brien fp = fopen(s, "w"); 24072a55deb1SDavid E. O'Brien } else if (a == APPEND) { 24082a55deb1SDavid E. O'Brien fp = fopen(s, "a"); 24092a55deb1SDavid E. O'Brien m = GT; /* so can mix > and >> */ 24102a55deb1SDavid E. O'Brien } else if (a == '|') { /* output pipe */ 24112a55deb1SDavid E. O'Brien fp = popen(s, "w"); 24122a55deb1SDavid E. O'Brien } else if (a == LE) { /* input pipe */ 24132a55deb1SDavid E. O'Brien fp = popen(s, "r"); 24142a55deb1SDavid E. O'Brien } else if (a == LT) { /* getline <file */ 24152a55deb1SDavid E. O'Brien fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 24162a55deb1SDavid E. O'Brien } else /* can't happen */ 24172a55deb1SDavid E. O'Brien FATAL("illegal redirection %d", a); 24182a55deb1SDavid E. O'Brien if (fp != NULL) { 24192a55deb1SDavid E. O'Brien files[i].fname = tostring(s); 24202a55deb1SDavid E. O'Brien files[i].fp = fp; 24212a55deb1SDavid E. O'Brien files[i].mode = m; 2422f39dd6a9SWarner Losh if (pnewflag) 2423f39dd6a9SWarner Losh *pnewflag = true; 2424f39dd6a9SWarner Losh if (fp != stdin && fp != stdout && fp != stderr) 2425f39dd6a9SWarner Losh (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 24262a55deb1SDavid E. O'Brien } 24272a55deb1SDavid E. O'Brien return fp; 24282a55deb1SDavid E. O'Brien } 24292a55deb1SDavid E. O'Brien 2430813da98dSDavid E. O'Brien const char *filename(FILE *fp) 24312a55deb1SDavid E. O'Brien { 2432f39dd6a9SWarner Losh size_t i; 24332a55deb1SDavid E. O'Brien 2434d86a0988SRuslan Ermilov for (i = 0; i < nfiles; i++) 24352a55deb1SDavid E. O'Brien if (fp == files[i].fp) 24362a55deb1SDavid E. O'Brien return files[i].fname; 24372a55deb1SDavid E. O'Brien return "???"; 24382a55deb1SDavid E. O'Brien } 24392a55deb1SDavid E. O'Brien 24402a55deb1SDavid E. O'Brien Cell *closefile(Node **a, int n) 24412a55deb1SDavid E. O'Brien { 24422a55deb1SDavid E. O'Brien Cell *x; 2443f39dd6a9SWarner Losh size_t i; 2444f39dd6a9SWarner Losh bool stat; 24452a55deb1SDavid E. O'Brien 24462a55deb1SDavid E. O'Brien x = execute(a[0]); 24472a55deb1SDavid E. O'Brien getsval(x); 2448f39dd6a9SWarner Losh stat = true; 2449d86a0988SRuslan Ermilov for (i = 0; i < nfiles; i++) { 2450f39dd6a9SWarner Losh if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 2451f39dd6a9SWarner Losh continue; 2452f32a6403SWarner Losh if (files[i].mode == GT || files[i].mode == '|') 2453f32a6403SWarner Losh fflush(files[i].fp); 2454f32a6403SWarner Losh if (ferror(files[i].fp)) { 2455f32a6403SWarner Losh if ((files[i].mode == GT && files[i].fp != stderr) 2456f32a6403SWarner Losh || files[i].mode == '|') 2457f32a6403SWarner Losh FATAL("write error on %s", files[i].fname); 2458f32a6403SWarner Losh else 2459f32a6403SWarner Losh WARNING("i/o error occurred on %s", files[i].fname); 2460f32a6403SWarner Losh } 2461f39dd6a9SWarner Losh if (files[i].fp == stdin || files[i].fp == stdout || 2462f39dd6a9SWarner Losh files[i].fp == stderr) 2463f39dd6a9SWarner Losh stat = freopen("/dev/null", "r+", files[i].fp) == NULL; 2464f39dd6a9SWarner Losh else if (files[i].mode == '|' || files[i].mode == LE) 2465f39dd6a9SWarner Losh stat = pclose(files[i].fp) == -1; 24662a55deb1SDavid E. O'Brien else 2467f39dd6a9SWarner Losh stat = fclose(files[i].fp) == EOF; 2468f39dd6a9SWarner Losh if (stat) 2469f32a6403SWarner Losh WARNING("i/o error occurred closing %s", files[i].fname); 24702a55deb1SDavid E. O'Brien xfree(files[i].fname); 24712a55deb1SDavid E. O'Brien files[i].fname = NULL; /* watch out for ref thru this */ 24722a55deb1SDavid E. O'Brien files[i].fp = NULL; 2473f39dd6a9SWarner Losh break; 24742a55deb1SDavid E. O'Brien } 24752a55deb1SDavid E. O'Brien tempfree(x); 24762a55deb1SDavid E. O'Brien x = gettemp(); 2477f39dd6a9SWarner Losh setfval(x, (Awkfloat) (stat ? -1 : 0)); 24782a55deb1SDavid E. O'Brien return(x); 24792a55deb1SDavid E. O'Brien } 24802a55deb1SDavid E. O'Brien 24812a55deb1SDavid E. O'Brien void closeall(void) 24822a55deb1SDavid E. O'Brien { 2483f39dd6a9SWarner Losh size_t i; 2484f39dd6a9SWarner Losh bool stat = false; 24852a55deb1SDavid E. O'Brien 2486f39dd6a9SWarner Losh for (i = 0; i < nfiles; i++) { 2487f39dd6a9SWarner Losh if (! files[i].fp) 2488f39dd6a9SWarner Losh continue; 2489f32a6403SWarner Losh if (files[i].mode == GT || files[i].mode == '|') 2490f32a6403SWarner Losh fflush(files[i].fp); 2491f32a6403SWarner Losh if (ferror(files[i].fp)) { 2492f32a6403SWarner Losh if ((files[i].mode == GT && files[i].fp != stderr) 2493f32a6403SWarner Losh || files[i].mode == '|') 2494f32a6403SWarner Losh FATAL("write error on %s", files[i].fname); 2495f32a6403SWarner Losh else 2496f32a6403SWarner Losh WARNING("i/o error occurred on %s", files[i].fname); 2497f32a6403SWarner Losh } 2498f32a6403SWarner Losh if (files[i].fp == stdin || files[i].fp == stdout || 2499f32a6403SWarner Losh files[i].fp == stderr) 2500f39dd6a9SWarner Losh continue; 25012a55deb1SDavid E. O'Brien if (files[i].mode == '|' || files[i].mode == LE) 2502f39dd6a9SWarner Losh stat = pclose(files[i].fp) == -1; 25032a55deb1SDavid E. O'Brien else 2504f39dd6a9SWarner Losh stat = fclose(files[i].fp) == EOF; 2505f39dd6a9SWarner Losh if (stat) 2506f32a6403SWarner Losh WARNING("i/o error occurred while closing %s", files[i].fname); 25072a55deb1SDavid E. O'Brien } 25082a55deb1SDavid E. O'Brien } 25092a55deb1SDavid E. O'Brien 2510f39dd6a9SWarner Losh static void flush_all(void) 2511007c6572SDag-Erling Smørgrav { 2512f39dd6a9SWarner Losh size_t i; 2513007c6572SDag-Erling Smørgrav 2514d86a0988SRuslan Ermilov for (i = 0; i < nfiles; i++) 2515007c6572SDag-Erling Smørgrav if (files[i].fp) 2516007c6572SDag-Erling Smørgrav fflush(files[i].fp); 2517007c6572SDag-Erling Smørgrav } 2518007c6572SDag-Erling Smørgrav 2519f39dd6a9SWarner Losh void backsub(char **pb_ptr, const char **sptr_ptr); 25202a55deb1SDavid E. O'Brien 2521f32a6403SWarner Losh Cell *dosub(Node **a, int subop) /* sub and gsub */ 25222a55deb1SDavid E. O'Brien { 25232a55deb1SDavid E. O'Brien fa *pfa; 25241023317aSWarner Losh int tempstat = 0; 2525f32a6403SWarner Losh char *repl; 2526f32a6403SWarner Losh Cell *x; 2527f32a6403SWarner Losh 2528f32a6403SWarner Losh char *buf = NULL; 2529f32a6403SWarner Losh char *pb = NULL; 25302a55deb1SDavid E. O'Brien int bufsz = recsize; 25312a55deb1SDavid E. O'Brien 2532f32a6403SWarner Losh const char *r, *s; 2533f32a6403SWarner Losh const char *start; 2534f32a6403SWarner Losh const char *noempty = NULL; /* empty match disallowed here */ 2535f32a6403SWarner Losh size_t m = 0; /* match count */ 253617853db4SWarner Losh size_t whichm = 0; /* which match to select, 0 = global */ 2537f32a6403SWarner Losh int mtype; /* match type */ 2538f32a6403SWarner Losh 2539f32a6403SWarner Losh if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */ 2540f32a6403SWarner Losh pfa = (fa *) a[1]; 2541f32a6403SWarner Losh } else { 2542f32a6403SWarner Losh x = execute(a[1]); 2543f32a6403SWarner Losh pfa = makedfa(getsval(x), 1); 25442a55deb1SDavid E. O'Brien tempfree(x); 25452a55deb1SDavid E. O'Brien } 25462a55deb1SDavid E. O'Brien 2547f32a6403SWarner Losh x = execute(a[2]); /* replacement string */ 2548f32a6403SWarner Losh repl = tostring(getsval(x)); 2549f32a6403SWarner Losh tempfree(x); 25502a55deb1SDavid E. O'Brien 2551f32a6403SWarner Losh switch (subop) { 2552f32a6403SWarner Losh case SUB: 2553f32a6403SWarner Losh whichm = 1; 2554f32a6403SWarner Losh x = execute(a[3]); /* source string */ 2555f32a6403SWarner Losh break; 2556f32a6403SWarner Losh case GSUB: 2557f32a6403SWarner Losh whichm = 0; 2558f32a6403SWarner Losh x = execute(a[3]); /* source string */ 2559f32a6403SWarner Losh break; 2560f32a6403SWarner Losh default: 2561f32a6403SWarner Losh FATAL("dosub: unrecognized subop: %d", subop); 25622a55deb1SDavid E. O'Brien } 2563f32a6403SWarner Losh 2564f32a6403SWarner Losh start = getsval(x); 2565f32a6403SWarner Losh while (pmatch(pfa, start)) { 2566f32a6403SWarner Losh if (buf == NULL) { 2567f32a6403SWarner Losh if ((pb = buf = (char *) malloc(bufsz)) == NULL) 2568f32a6403SWarner Losh FATAL("out of memory in dosub"); 25692a55deb1SDavid E. O'Brien tempstat = pfa->initstat; 25702a55deb1SDavid E. O'Brien pfa->initstat = 2; 2571f32a6403SWarner Losh } 2572f32a6403SWarner Losh 2573f32a6403SWarner Losh /* match types */ 2574f32a6403SWarner Losh #define MT_IGNORE 0 /* unselected or invalid */ 2575f32a6403SWarner Losh #define MT_INSERT 1 /* selected, empty */ 2576f32a6403SWarner Losh #define MT_REPLACE 2 /* selected, not empty */ 2577f32a6403SWarner Losh 2578f32a6403SWarner Losh /* an empty match just after replacement is invalid */ 2579f32a6403SWarner Losh 2580f32a6403SWarner Losh if (patbeg == noempty && patlen == 0) { 2581f32a6403SWarner Losh mtype = MT_IGNORE; /* invalid, not counted */ 2582f32a6403SWarner Losh } else if (whichm == ++m || whichm == 0) { 2583f32a6403SWarner Losh mtype = patlen ? MT_REPLACE : MT_INSERT; 2584f32a6403SWarner Losh } else { 2585f32a6403SWarner Losh mtype = MT_IGNORE; /* unselected, but counted */ 2586f32a6403SWarner Losh } 2587f32a6403SWarner Losh 2588f32a6403SWarner Losh /* leading text: */ 2589f32a6403SWarner Losh if (patbeg > start) { 2590f32a6403SWarner Losh adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start), 2591f32a6403SWarner Losh recsize, &pb, "dosub"); 2592f32a6403SWarner Losh s = start; 2593f32a6403SWarner Losh while (s < patbeg) 2594f32a6403SWarner Losh *pb++ = *s++; 2595f32a6403SWarner Losh } 2596f32a6403SWarner Losh 2597f32a6403SWarner Losh if (mtype == MT_IGNORE) 2598f32a6403SWarner Losh goto matching_text; /* skip replacement text */ 2599f32a6403SWarner Losh 2600f32a6403SWarner Losh r = repl; 2601f32a6403SWarner Losh while (*r != 0) { 2602f32a6403SWarner Losh adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub"); 2603f32a6403SWarner Losh if (*r == '\\') { 2604f32a6403SWarner Losh backsub(&pb, &r); 2605f32a6403SWarner Losh } else if (*r == '&') { 2606f32a6403SWarner Losh r++; 2607f32a6403SWarner Losh adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, 2608f32a6403SWarner Losh &pb, "dosub"); 2609f32a6403SWarner Losh for (s = patbeg; s < patbeg+patlen; ) 2610f32a6403SWarner Losh *pb++ = *s++; 2611f32a6403SWarner Losh } else { 2612f32a6403SWarner Losh *pb++ = *r++; 26132a55deb1SDavid E. O'Brien } 26142a55deb1SDavid E. O'Brien } 2615f32a6403SWarner Losh 2616f32a6403SWarner Losh matching_text: 2617f32a6403SWarner Losh if (mtype == MT_REPLACE || *patbeg == '\0') 2618f32a6403SWarner Losh goto next_search; /* skip matching text */ 2619f32a6403SWarner Losh 2620f32a6403SWarner Losh if (patlen == 0) 2621f32a6403SWarner Losh patlen = u8_nextlen(patbeg); 2622f32a6403SWarner Losh adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub"); 2623f32a6403SWarner Losh s = patbeg; 2624f32a6403SWarner Losh while (s < patbeg + patlen) 2625f32a6403SWarner Losh *pb++ = *s++; 2626f32a6403SWarner Losh 2627f32a6403SWarner Losh next_search: 2628f32a6403SWarner Losh start = patbeg + patlen; 2629f32a6403SWarner Losh if (m == whichm || *patbeg == '\0') 2630f32a6403SWarner Losh break; 2631f32a6403SWarner Losh if (mtype == MT_REPLACE) 2632f32a6403SWarner Losh noempty = start; 2633f32a6403SWarner Losh 2634f32a6403SWarner Losh #undef MT_IGNORE 2635f32a6403SWarner Losh #undef MT_INSERT 2636f32a6403SWarner Losh #undef MT_REPLACE 26372a55deb1SDavid E. O'Brien } 2638f32a6403SWarner Losh 2639f32a6403SWarner Losh xfree(repl); 2640f32a6403SWarner Losh 2641f32a6403SWarner Losh if (buf != NULL) { 26422a55deb1SDavid E. O'Brien pfa->initstat = tempstat; 2643f32a6403SWarner Losh 2644f32a6403SWarner Losh /* trailing text */ 2645f32a6403SWarner Losh adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub"); 2646f32a6403SWarner Losh while ((*pb++ = *start++) != '\0') 2647f32a6403SWarner Losh ; 2648f32a6403SWarner Losh 2649f32a6403SWarner Losh setsval(x, buf); 2650f32a6403SWarner Losh free(buf); 26512a55deb1SDavid E. O'Brien } 2652f32a6403SWarner Losh 26532a55deb1SDavid E. O'Brien tempfree(x); 26542a55deb1SDavid E. O'Brien x = gettemp(); 26552a55deb1SDavid E. O'Brien x->tval = NUM; 2656f32a6403SWarner Losh x->fval = m; 2657f32a6403SWarner Losh return x; 26582a55deb1SDavid E. O'Brien } 26592a55deb1SDavid E. O'Brien 2660eb690a05SWarner Losh Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2661eb690a05SWarner Losh /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2662eb690a05SWarner Losh { 2663eb690a05SWarner Losh Cell *x, *y, *res, *h; 2664eb690a05SWarner Losh char *rptr; 2665eb690a05SWarner Losh const char *sptr; 2666eb690a05SWarner Losh char *buf, *pb; 2667eb690a05SWarner Losh const char *t, *q; 2668eb690a05SWarner Losh fa *pfa; 2669eb690a05SWarner Losh int mflag, tempstat, num, whichm; 2670eb690a05SWarner Losh int bufsz = recsize; 2671eb690a05SWarner Losh 2672eb690a05SWarner Losh if ((buf = malloc(bufsz)) == NULL) 2673eb690a05SWarner Losh FATAL("out of memory in gensub"); 2674eb690a05SWarner Losh mflag = 0; /* if mflag == 0, can replace empty string */ 2675eb690a05SWarner Losh num = 0; 2676eb690a05SWarner Losh x = execute(a[4]); /* source string */ 2677eb690a05SWarner Losh t = getsval(x); 2678eb690a05SWarner Losh res = copycell(x); /* target string - initially copy of source */ 2679eb690a05SWarner Losh res->csub = CTEMP; /* result values are temporary */ 2680eb690a05SWarner Losh if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2681eb690a05SWarner Losh pfa = (fa *) a[1]; /* regular expression */ 2682eb690a05SWarner Losh else { 2683eb690a05SWarner Losh y = execute(a[1]); 2684eb690a05SWarner Losh pfa = makedfa(getsval(y), 1); 2685eb690a05SWarner Losh tempfree(y); 2686eb690a05SWarner Losh } 2687eb690a05SWarner Losh y = execute(a[2]); /* replacement string */ 2688eb690a05SWarner Losh h = execute(a[3]); /* which matches should be replaced */ 2689eb690a05SWarner Losh sptr = getsval(h); 2690eb690a05SWarner Losh if (sptr[0] == 'g' || sptr[0] == 'G') 2691eb690a05SWarner Losh whichm = -1; 2692eb690a05SWarner Losh else { 2693eb690a05SWarner Losh /* 2694eb690a05SWarner Losh * The specified number is index of replacement, starting 2695eb690a05SWarner Losh * from 1. GNU awk treats index lower than 0 same as 2696eb690a05SWarner Losh * 1, we do same for compatibility. 2697eb690a05SWarner Losh */ 2698eb690a05SWarner Losh whichm = (int) getfval(h) - 1; 2699eb690a05SWarner Losh if (whichm < 0) 2700eb690a05SWarner Losh whichm = 0; 2701eb690a05SWarner Losh } 2702eb690a05SWarner Losh tempfree(h); 2703eb690a05SWarner Losh 2704eb690a05SWarner Losh if (pmatch(pfa, t)) { 2705eb690a05SWarner Losh char *sl; 2706eb690a05SWarner Losh 2707eb690a05SWarner Losh tempstat = pfa->initstat; 2708eb690a05SWarner Losh pfa->initstat = 2; 2709eb690a05SWarner Losh pb = buf; 2710eb690a05SWarner Losh rptr = getsval(y); 2711eb690a05SWarner Losh /* 2712eb690a05SWarner Losh * XXX if there are any backreferences in subst string, 2713eb690a05SWarner Losh * complain now. 2714eb690a05SWarner Losh */ 2715eb690a05SWarner Losh for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2716eb690a05SWarner Losh if (strchr("0123456789", sl[1])) { 2717eb690a05SWarner Losh FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2718eb690a05SWarner Losh } 2719eb690a05SWarner Losh } 2720eb690a05SWarner Losh 2721eb690a05SWarner Losh do { 2722eb690a05SWarner Losh if (whichm >= 0 && whichm != num) { 2723eb690a05SWarner Losh num++; 2724eb690a05SWarner Losh adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2725eb690a05SWarner Losh 2726eb690a05SWarner Losh /* copy the part of string up to and including 2727eb690a05SWarner Losh * match to output buffer */ 2728eb690a05SWarner Losh while (t < patbeg + patlen) 2729eb690a05SWarner Losh *pb++ = *t++; 2730eb690a05SWarner Losh continue; 2731eb690a05SWarner Losh } 2732eb690a05SWarner Losh 2733eb690a05SWarner Losh if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2734eb690a05SWarner Losh if (mflag == 0) { /* can replace empty */ 2735eb690a05SWarner Losh num++; 2736eb690a05SWarner Losh sptr = rptr; 2737eb690a05SWarner Losh while (*sptr != 0) { 2738eb690a05SWarner Losh adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2739eb690a05SWarner Losh if (*sptr == '\\') { 2740eb690a05SWarner Losh backsub(&pb, &sptr); 2741eb690a05SWarner Losh } else if (*sptr == '&') { 2742eb690a05SWarner Losh sptr++; 2743eb690a05SWarner Losh adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2744eb690a05SWarner Losh for (q = patbeg; q < patbeg+patlen; ) 2745eb690a05SWarner Losh *pb++ = *q++; 2746eb690a05SWarner Losh } else 2747eb690a05SWarner Losh *pb++ = *sptr++; 2748eb690a05SWarner Losh } 2749eb690a05SWarner Losh } 2750eb690a05SWarner Losh if (*t == 0) /* at end */ 2751eb690a05SWarner Losh goto done; 2752eb690a05SWarner Losh adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2753eb690a05SWarner Losh *pb++ = *t++; 2754eb690a05SWarner Losh if (pb > buf + bufsz) /* BUG: not sure of this test */ 2755eb690a05SWarner Losh FATAL("gensub result0 %.30s too big; can't happen", buf); 2756eb690a05SWarner Losh mflag = 0; 2757eb690a05SWarner Losh } 2758eb690a05SWarner Losh else { /* matched nonempty string */ 2759eb690a05SWarner Losh num++; 2760eb690a05SWarner Losh sptr = t; 2761eb690a05SWarner Losh adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2762eb690a05SWarner Losh while (sptr < patbeg) 2763eb690a05SWarner Losh *pb++ = *sptr++; 2764eb690a05SWarner Losh sptr = rptr; 2765eb690a05SWarner Losh while (*sptr != 0) { 2766eb690a05SWarner Losh adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2767eb690a05SWarner Losh if (*sptr == '\\') { 2768eb690a05SWarner Losh backsub(&pb, &sptr); 2769eb690a05SWarner Losh } else if (*sptr == '&') { 2770eb690a05SWarner Losh sptr++; 2771eb690a05SWarner Losh adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2772eb690a05SWarner Losh for (q = patbeg; q < patbeg+patlen; ) 2773eb690a05SWarner Losh *pb++ = *q++; 2774eb690a05SWarner Losh } else 2775eb690a05SWarner Losh *pb++ = *sptr++; 2776eb690a05SWarner Losh } 2777eb690a05SWarner Losh t = patbeg + patlen; 2778eb690a05SWarner Losh if (patlen == 0 || *t == 0 || *(t-1) == 0) 2779eb690a05SWarner Losh goto done; 2780eb690a05SWarner Losh if (pb > buf + bufsz) 2781eb690a05SWarner Losh FATAL("gensub result1 %.30s too big; can't happen", buf); 2782eb690a05SWarner Losh mflag = 1; 2783eb690a05SWarner Losh } 2784eb690a05SWarner Losh } while (pmatch(pfa,t)); 2785eb690a05SWarner Losh sptr = t; 2786eb690a05SWarner Losh adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2787eb690a05SWarner Losh while ((*pb++ = *sptr++) != 0) 2788eb690a05SWarner Losh ; 2789eb690a05SWarner Losh done: if (pb > buf + bufsz) 2790eb690a05SWarner Losh FATAL("gensub result2 %.30s too big; can't happen", buf); 2791eb690a05SWarner Losh *pb = '\0'; 2792eb690a05SWarner Losh setsval(res, buf); 2793eb690a05SWarner Losh pfa->initstat = tempstat; 2794eb690a05SWarner Losh } 2795eb690a05SWarner Losh tempfree(x); 2796eb690a05SWarner Losh tempfree(y); 2797eb690a05SWarner Losh free(buf); 2798eb690a05SWarner Losh return(res); 2799eb690a05SWarner Losh } 2800eb690a05SWarner Losh 2801f39dd6a9SWarner Losh void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 28022a55deb1SDavid E. O'Brien { /* sptr[0] == '\\' */ 2803f39dd6a9SWarner Losh char *pb = *pb_ptr; 2804f39dd6a9SWarner Losh const char *sptr = *sptr_ptr; 2805f39dd6a9SWarner Losh static bool first = true; 2806f39dd6a9SWarner Losh static bool do_posix = false; 2807f39dd6a9SWarner Losh 2808f39dd6a9SWarner Losh if (first) { 2809f39dd6a9SWarner Losh first = false; 2810f39dd6a9SWarner Losh do_posix = (getenv("POSIXLY_CORRECT") != NULL); 2811f39dd6a9SWarner Losh } 28122a55deb1SDavid E. O'Brien 28132a55deb1SDavid E. O'Brien if (sptr[1] == '\\') { 28142a55deb1SDavid E. O'Brien if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 28152a55deb1SDavid E. O'Brien *pb++ = '\\'; 28162a55deb1SDavid E. O'Brien *pb++ = '&'; 28172a55deb1SDavid E. O'Brien sptr += 4; 28182a55deb1SDavid E. O'Brien } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 28192a55deb1SDavid E. O'Brien *pb++ = '\\'; 28202a55deb1SDavid E. O'Brien sptr += 2; 2821f39dd6a9SWarner Losh } else if (do_posix) { /* \\x -> \x */ 2822f39dd6a9SWarner Losh sptr++; 2823f39dd6a9SWarner Losh *pb++ = *sptr++; 28242a55deb1SDavid E. O'Brien } else { /* \\x -> \\x */ 28252a55deb1SDavid E. O'Brien *pb++ = *sptr++; 28262a55deb1SDavid E. O'Brien *pb++ = *sptr++; 28272a55deb1SDavid E. O'Brien } 28282a55deb1SDavid E. O'Brien } else if (sptr[1] == '&') { /* literal & */ 28292a55deb1SDavid E. O'Brien sptr++; 28302a55deb1SDavid E. O'Brien *pb++ = *sptr++; 28312a55deb1SDavid E. O'Brien } else /* literal \ */ 28322a55deb1SDavid E. O'Brien *pb++ = *sptr++; 28332a55deb1SDavid E. O'Brien 28342a55deb1SDavid E. O'Brien *pb_ptr = pb; 28352a55deb1SDavid E. O'Brien *sptr_ptr = sptr; 28362a55deb1SDavid E. O'Brien } 2837f32a6403SWarner Losh 2838f32a6403SWarner Losh static char *wide_char_to_byte_str(int rune, size_t *outlen) 2839f32a6403SWarner Losh { 2840f32a6403SWarner Losh static char buf[5]; 2841f32a6403SWarner Losh int len; 2842f32a6403SWarner Losh 2843f32a6403SWarner Losh if (rune < 0 || rune > 0x10FFFF) 2844f32a6403SWarner Losh return NULL; 2845f32a6403SWarner Losh 2846f32a6403SWarner Losh memset(buf, 0, sizeof(buf)); 2847f32a6403SWarner Losh 2848f32a6403SWarner Losh len = 0; 2849f32a6403SWarner Losh if (rune <= 0x0000007F) { 2850f32a6403SWarner Losh buf[len++] = rune; 2851f32a6403SWarner Losh } else if (rune <= 0x000007FF) { 2852f32a6403SWarner Losh // 110xxxxx 10xxxxxx 2853f32a6403SWarner Losh buf[len++] = 0xC0 | (rune >> 6); 2854f32a6403SWarner Losh buf[len++] = 0x80 | (rune & 0x3F); 2855f32a6403SWarner Losh } else if (rune <= 0x0000FFFF) { 2856f32a6403SWarner Losh // 1110xxxx 10xxxxxx 10xxxxxx 2857f32a6403SWarner Losh buf[len++] = 0xE0 | (rune >> 12); 2858f32a6403SWarner Losh buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2859f32a6403SWarner Losh buf[len++] = 0x80 | (rune & 0x3F); 2860f32a6403SWarner Losh 2861f32a6403SWarner Losh } else { 2862f32a6403SWarner Losh // 0x00010000 - 0x10FFFF 2863f32a6403SWarner Losh // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 2864f32a6403SWarner Losh buf[len++] = 0xF0 | (rune >> 18); 2865f32a6403SWarner Losh buf[len++] = 0x80 | ((rune >> 12) & 0x3F); 2866f32a6403SWarner Losh buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2867f32a6403SWarner Losh buf[len++] = 0x80 | (rune & 0x3F); 2868f32a6403SWarner Losh } 2869f32a6403SWarner Losh 2870f32a6403SWarner Losh *outlen = len; 2871f32a6403SWarner Losh buf[len++] = '\0'; 2872f32a6403SWarner Losh 2873f32a6403SWarner Losh return buf; 2874f32a6403SWarner Losh } 2875