1 /* $OpenBSD: run.c,v 1.72 2022/06/03 19:40:56 millert Exp $ */ 2 /**************************************************************** 3 Copyright (C) Lucent Technologies 1997 4 All Rights Reserved 5 6 Permission to use, copy, modify, and distribute this software and 7 its documentation for any purpose and without fee is hereby 8 granted, provided that the above copyright notice appear in all 9 copies and that both that the copyright notice and this 10 permission notice and warranty disclaimer appear in supporting 11 documentation, and that the name Lucent Technologies or any of 12 its entities not be used in advertising or publicity pertaining 13 to distribution of the software without specific, written prior 14 permission. 15 16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 23 THIS SOFTWARE. 24 ****************************************************************/ 25 26 #define DEBUG 27 #include <stdio.h> 28 #include <ctype.h> 29 #include <errno.h> 30 #include <wchar.h> 31 #include <wctype.h> 32 #include <fcntl.h> 33 #include <setjmp.h> 34 #include <limits.h> 35 #include <math.h> 36 #include <string.h> 37 #include <stdlib.h> 38 #include <time.h> 39 #include <sys/types.h> 40 #include <sys/wait.h> 41 #include "awk.h" 42 #include "awkgram.tab.h" 43 44 static void stdinit(void); 45 static void flush_all(void); 46 47 #if 1 48 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 49 #else 50 void tempfree(Cell *p) { 51 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 52 WARNING("bad csub %d in Cell %d %s", 53 p->csub, p->ctype, p->sval); 54 } 55 if (istemp(p)) 56 tfree(p); 57 } 58 #endif 59 60 /* do we really need these? */ 61 /* #ifdef _NFILE */ 62 /* #ifndef FOPEN_MAX */ 63 /* #define FOPEN_MAX _NFILE */ 64 /* #endif */ 65 /* #endif */ 66 /* */ 67 /* #ifndef FOPEN_MAX */ 68 /* #define FOPEN_MAX 40 */ /* max number of open files */ 69 /* #endif */ 70 /* */ 71 /* #ifndef RAND_MAX */ 72 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 73 /* #endif */ 74 75 jmp_buf env; 76 extern int pairstack[]; 77 extern Awkfloat srand_seed; 78 79 Node *winner = NULL; /* root of parse tree */ 80 Cell *tmps; /* free temporary cells for execution */ 81 82 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 83 Cell *True = &truecell; 84 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 85 Cell *False = &falsecell; 86 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 87 Cell *jbreak = &breakcell; 88 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 89 Cell *jcont = &contcell; 90 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 91 Cell *jnext = &nextcell; 92 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 93 Cell *jnextfile = &nextfilecell; 94 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 95 Cell *jexit = &exitcell; 96 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 97 Cell *jret = &retcell; 98 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 99 100 Node *curnode = NULL; /* the node being executed, for debugging */ 101 102 /* buffer memory management */ 103 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 104 const char *whatrtn) 105 /* pbuf: address of pointer to buffer being managed 106 * psiz: address of buffer size variable 107 * minlen: minimum length of buffer needed 108 * quantum: buffer size quantum 109 * pbptr: address of movable pointer into buffer, or 0 if none 110 * whatrtn: name of the calling routine if failure should cause fatal error 111 * 112 * return 0 for realloc failure, !=0 for success 113 */ 114 { 115 if (minlen > *psiz) { 116 char *tbuf; 117 int rminlen = quantum ? minlen % quantum : 0; 118 int boff = pbptr ? *pbptr - *pbuf : 0; 119 /* round up to next multiple of quantum */ 120 if (rminlen) 121 minlen += quantum - rminlen; 122 tbuf = (char *) realloc(*pbuf, minlen); 123 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); 124 if (tbuf == NULL) { 125 if (whatrtn) 126 FATAL("out of memory in %s", whatrtn); 127 return 0; 128 } 129 *pbuf = tbuf; 130 *psiz = minlen; 131 if (pbptr) 132 *pbptr = tbuf + boff; 133 } 134 return 1; 135 } 136 137 void run(Node *a) /* execution of parse tree starts here */ 138 { 139 140 stdinit(); 141 execute(a); 142 closeall(); 143 } 144 145 Cell *execute(Node *u) /* execute a node of the parse tree */ 146 { 147 Cell *(*proc)(Node **, int); 148 Cell *x; 149 Node *a; 150 151 if (u == NULL) 152 return(True); 153 for (a = u; ; a = a->nnext) { 154 curnode = a; 155 if (isvalue(a)) { 156 x = (Cell *) (a->narg[0]); 157 if (isfld(x) && !donefld) 158 fldbld(); 159 else if (isrec(x) && !donerec) 160 recbld(); 161 return(x); 162 } 163 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 164 FATAL("illegal statement"); 165 proc = proctab[a->nobj-FIRSTTOKEN]; 166 x = (*proc)(a->narg, a->nobj); 167 if (isfld(x) && !donefld) 168 fldbld(); 169 else if (isrec(x) && !donerec) 170 recbld(); 171 if (isexpr(a)) 172 return(x); 173 if (isjump(x)) 174 return(x); 175 if (a->nnext == NULL) 176 return(x); 177 tempfree(x); 178 } 179 } 180 181 182 Cell *program(Node **a, int n) /* execute an awk program */ 183 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 184 Cell *x; 185 186 if (setjmp(env) != 0) 187 goto ex; 188 if (a[0]) { /* BEGIN */ 189 x = execute(a[0]); 190 if (isexit(x)) 191 return(True); 192 if (isjump(x)) 193 FATAL("illegal break, continue, next or nextfile from BEGIN"); 194 tempfree(x); 195 } 196 if (a[1] || a[2]) 197 while (getrec(&record, &recsize, true) > 0) { 198 x = execute(a[1]); 199 if (isexit(x)) 200 break; 201 tempfree(x); 202 } 203 ex: 204 if (setjmp(env) != 0) /* handles exit within END */ 205 goto ex1; 206 if (a[2]) { /* END */ 207 x = execute(a[2]); 208 if (isbreak(x) || isnext(x) || iscont(x)) 209 FATAL("illegal break, continue, next or nextfile from END"); 210 tempfree(x); 211 } 212 ex1: 213 return(True); 214 } 215 216 struct Frame { /* stack frame for awk function calls */ 217 int nargs; /* number of arguments in this call */ 218 Cell *fcncell; /* pointer to Cell for function */ 219 Cell **args; /* pointer to array of arguments after execute */ 220 Cell *retval; /* return value */ 221 }; 222 223 #define NARGS 50 /* max args in a call */ 224 225 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 226 int nframe = 0; /* number of frames allocated */ 227 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 228 229 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 230 { 231 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 232 int i, ncall, ndef; 233 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 234 Node *x; 235 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 236 Cell *y, *z, *fcn; 237 char *s; 238 239 fcn = execute(a[0]); /* the function itself */ 240 s = fcn->nval; 241 if (!isfcn(fcn)) 242 FATAL("calling undefined function %s", s); 243 if (frame == NULL) { 244 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); 245 if (frame == NULL) 246 FATAL("out of space for stack frames calling %s", s); 247 } 248 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 249 ncall++; 250 ndef = (int) fcn->fval; /* args in defn */ 251 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 252 if (ncall > ndef) 253 WARNING("function %s called with %d args, uses only %d", 254 s, ncall, ndef); 255 if (ncall + ndef > NARGS) 256 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 257 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 258 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 259 y = execute(x); 260 oargs[i] = y; 261 DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 262 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 263 if (isfcn(y)) 264 FATAL("can't use function %s as argument in %s", y->nval, s); 265 if (isarr(y)) 266 args[i] = y; /* arrays by ref */ 267 else 268 args[i] = copycell(y); 269 tempfree(y); 270 } 271 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 272 args[i] = gettemp(); 273 *args[i] = newcopycell; 274 } 275 frp++; /* now ok to up frame */ 276 if (frp >= frame + nframe) { 277 int dfp = frp - frame; /* old index */ 278 frame = (struct Frame *) reallocarray(frame, (nframe += 100), sizeof(*frame)); 279 if (frame == NULL) 280 FATAL("out of space for stack frames in %s", s); 281 frp = frame + dfp; 282 } 283 frp->fcncell = fcn; 284 frp->args = args; 285 frp->nargs = ndef; /* number defined with (excess are locals) */ 286 frp->retval = gettemp(); 287 288 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 289 y = execute((Node *)(fcn->sval)); /* execute body */ 290 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 291 292 for (i = 0; i < ndef; i++) { 293 Cell *t = frp->args[i]; 294 if (isarr(t)) { 295 if (t->csub == CCOPY) { 296 if (i >= ncall) { 297 freesymtab(t); 298 t->csub = CTEMP; 299 tempfree(t); 300 } else { 301 oargs[i]->tval = t->tval; 302 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 303 oargs[i]->sval = t->sval; 304 tempfree(t); 305 } 306 } 307 } else if (t != y) { /* kludge to prevent freeing twice */ 308 t->csub = CTEMP; 309 tempfree(t); 310 } else if (t == y && t->csub == CCOPY) { 311 t->csub = CTEMP; 312 tempfree(t); 313 freed = 1; 314 } 315 } 316 tempfree(fcn); 317 if (isexit(y) || isnext(y)) 318 return y; 319 if (freed == 0) { 320 tempfree(y); /* don't free twice! */ 321 } 322 z = frp->retval; /* return value */ 323 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 324 frp--; 325 return(z); 326 } 327 328 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 329 { 330 Cell *y; 331 332 /* copy is not constant or field */ 333 334 y = gettemp(); 335 y->tval = x->tval & ~(CON|FLD|REC); 336 y->csub = CCOPY; /* prevents freeing until call is over */ 337 y->nval = x->nval; /* BUG? */ 338 if (isstr(x) /* || x->ctype == OCELL */) { 339 y->sval = tostring(x->sval); 340 y->tval &= ~DONTFREE; 341 } else 342 y->tval |= DONTFREE; 343 y->fval = x->fval; 344 return y; 345 } 346 347 Cell *arg(Node **a, int n) /* nth argument of a function */ 348 { 349 350 n = ptoi(a[0]); /* argument number, counting from 0 */ 351 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 352 if (n+1 > frp->nargs) 353 FATAL("argument #%d of function %s was not supplied", 354 n+1, frp->fcncell->nval); 355 return frp->args[n]; 356 } 357 358 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 359 { 360 Cell *y; 361 362 switch (n) { 363 case EXIT: 364 if (a[0] != NULL) { 365 y = execute(a[0]); 366 errorflag = (int) getfval(y); 367 tempfree(y); 368 } 369 longjmp(env, 1); 370 case RETURN: 371 if (a[0] != NULL) { 372 y = execute(a[0]); 373 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 374 setsval(frp->retval, getsval(y)); 375 frp->retval->fval = getfval(y); 376 frp->retval->tval |= NUM; 377 } 378 else if (y->tval & STR) 379 setsval(frp->retval, getsval(y)); 380 else if (y->tval & NUM) 381 setfval(frp->retval, getfval(y)); 382 else /* can't happen */ 383 FATAL("bad type variable %d", y->tval); 384 tempfree(y); 385 } 386 return(jret); 387 case NEXT: 388 return(jnext); 389 case NEXTFILE: 390 nextfile(); 391 return(jnextfile); 392 case BREAK: 393 return(jbreak); 394 case CONTINUE: 395 return(jcont); 396 default: /* can't happen */ 397 FATAL("illegal jump type %d", n); 398 } 399 return 0; /* not reached */ 400 } 401 402 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 403 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 404 Cell *r, *x; 405 extern Cell **fldtab; 406 FILE *fp; 407 char *buf; 408 int bufsize = recsize; 409 int mode; 410 bool newflag; 411 double result; 412 413 if ((buf = (char *) malloc(bufsize)) == NULL) 414 FATAL("out of memory in getline"); 415 416 fflush(stdout); /* in case someone is waiting for a prompt */ 417 r = gettemp(); 418 if (a[1] != NULL) { /* getline < file */ 419 x = execute(a[2]); /* filename */ 420 mode = ptoi(a[1]); 421 if (mode == '|') /* input pipe */ 422 mode = LE; /* arbitrary flag */ 423 fp = openfile(mode, getsval(x), &newflag); 424 tempfree(x); 425 if (fp == NULL) 426 n = -1; 427 else 428 n = readrec(&buf, &bufsize, fp, newflag); 429 if (n <= 0) { 430 ; 431 } else if (a[0] != NULL) { /* getline var <file */ 432 x = execute(a[0]); 433 setsval(x, buf); 434 if (is_number(x->sval, & result)) { 435 x->fval = result; 436 x->tval |= NUM; 437 } 438 tempfree(x); 439 } else { /* getline <file */ 440 setsval(fldtab[0], buf); 441 if (is_number(fldtab[0]->sval, & result)) { 442 fldtab[0]->fval = result; 443 fldtab[0]->tval |= NUM; 444 } 445 } 446 } else { /* bare getline; use current input */ 447 if (a[0] == NULL) /* getline */ 448 n = getrec(&record, &recsize, true); 449 else { /* getline var */ 450 n = getrec(&buf, &bufsize, false); 451 if (n > 0) { 452 x = execute(a[0]); 453 setsval(x, buf); 454 if (is_number(x->sval, & result)) { 455 x->fval = result; 456 x->tval |= NUM; 457 } 458 tempfree(x); 459 } 460 } 461 } 462 setfval(r, (Awkfloat) n); 463 free(buf); 464 return r; 465 } 466 467 Cell *getnf(Node **a, int n) /* get NF */ 468 { 469 if (!donefld) 470 fldbld(); 471 return (Cell *) a[0]; 472 } 473 474 static char * 475 makearraystring(Node *p, const char *func) 476 { 477 char *buf; 478 int bufsz = recsize; 479 size_t blen; 480 481 if ((buf = (char *) malloc(bufsz)) == NULL) { 482 FATAL("%s: out of memory", func); 483 } 484 485 blen = 0; 486 buf[blen] = '\0'; 487 488 for (; p; p = p->nnext) { 489 Cell *x = execute(p); /* expr */ 490 char *s = getsval(x); 491 size_t seplen = strlen(getsval(subseploc)); 492 size_t nsub = p->nnext ? seplen : 0; 493 size_t slen = strlen(s); 494 size_t tlen = blen + slen + nsub; 495 496 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 497 FATAL("%s: out of memory %s[%s...]", 498 func, x->nval, buf); 499 } 500 memcpy(buf + blen, s, slen); 501 if (nsub) { 502 memcpy(buf + blen + slen, *SUBSEP, nsub); 503 } 504 buf[tlen] = '\0'; 505 blen = tlen; 506 tempfree(x); 507 } 508 return buf; 509 } 510 511 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 512 { 513 Cell *x, *z; 514 char *buf; 515 516 x = execute(a[0]); /* Cell* for symbol table */ 517 buf = makearraystring(a[1], __func__); 518 if (!isarr(x)) { 519 DPRINTF("making %s into an array\n", NN(x->nval)); 520 if (freeable(x)) 521 xfree(x->sval); 522 x->tval &= ~(STR|NUM|DONTFREE); 523 x->tval |= ARR; 524 x->sval = (char *) makesymtab(NSYMTAB); 525 } 526 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 527 z->ctype = OCELL; 528 z->csub = CVAR; 529 tempfree(x); 530 free(buf); 531 return(z); 532 } 533 534 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 535 { 536 Cell *x; 537 538 x = execute(a[0]); /* Cell* for symbol table */ 539 if (x == symtabloc) { 540 FATAL("cannot delete SYMTAB or its elements"); 541 } 542 if (!isarr(x)) 543 return True; 544 if (a[1] == NULL) { /* delete the elements, not the table */ 545 freesymtab(x); 546 x->tval &= ~STR; 547 x->tval |= ARR; 548 x->sval = (char *) makesymtab(NSYMTAB); 549 } else { 550 char *buf = makearraystring(a[1], __func__); 551 freeelem(x, buf); 552 free(buf); 553 } 554 tempfree(x); 555 return True; 556 } 557 558 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 559 { 560 Cell *ap, *k; 561 char *buf; 562 563 ap = execute(a[1]); /* array name */ 564 if (!isarr(ap)) { 565 DPRINTF("making %s into an array\n", ap->nval); 566 if (freeable(ap)) 567 xfree(ap->sval); 568 ap->tval &= ~(STR|NUM|DONTFREE); 569 ap->tval |= ARR; 570 ap->sval = (char *) makesymtab(NSYMTAB); 571 } 572 buf = makearraystring(a[0], __func__); 573 k = lookup(buf, (Array *) ap->sval); 574 tempfree(ap); 575 free(buf); 576 if (k == NULL) 577 return(False); 578 else 579 return(True); 580 } 581 582 583 Cell *matchop(Node **a, int n) /* ~ and match() */ 584 { 585 Cell *x, *y; 586 char *s, *t; 587 int i; 588 fa *pfa; 589 int (*mf)(fa *, const char *) = match, mode = 0; 590 591 if (n == MATCHFCN) { 592 mf = pmatch; 593 mode = 1; 594 } 595 x = execute(a[1]); /* a[1] = target text */ 596 s = getsval(x); 597 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 598 i = (*mf)((fa *) a[2], s); 599 else { 600 y = execute(a[2]); /* a[2] = regular expr */ 601 t = getsval(y); 602 pfa = makedfa(t, mode); 603 i = (*mf)(pfa, s); 604 tempfree(y); 605 } 606 tempfree(x); 607 if (n == MATCHFCN) { 608 int start = patbeg - s + 1; 609 if (patlen < 0) 610 start = 0; 611 setfval(rstartloc, (Awkfloat) start); 612 setfval(rlengthloc, (Awkfloat) patlen); 613 x = gettemp(); 614 x->tval = NUM; 615 x->fval = start; 616 return x; 617 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 618 return(True); 619 else 620 return(False); 621 } 622 623 624 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 625 { 626 Cell *x, *y; 627 int i; 628 629 x = execute(a[0]); 630 i = istrue(x); 631 tempfree(x); 632 switch (n) { 633 case BOR: 634 if (i) return(True); 635 y = execute(a[1]); 636 i = istrue(y); 637 tempfree(y); 638 if (i) return(True); 639 else return(False); 640 case AND: 641 if ( !i ) return(False); 642 y = execute(a[1]); 643 i = istrue(y); 644 tempfree(y); 645 if (i) return(True); 646 else return(False); 647 case NOT: 648 if (i) return(False); 649 else return(True); 650 default: /* can't happen */ 651 FATAL("unknown boolean operator %d", n); 652 } 653 return 0; /*NOTREACHED*/ 654 } 655 656 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 657 { 658 int i; 659 Cell *x, *y; 660 Awkfloat j; 661 662 x = execute(a[0]); 663 y = execute(a[1]); 664 if (x->tval&NUM && y->tval&NUM) { 665 j = x->fval - y->fval; 666 i = j<0? -1: (j>0? 1: 0); 667 } else { 668 i = strcmp(getsval(x), getsval(y)); 669 } 670 tempfree(x); 671 tempfree(y); 672 switch (n) { 673 case LT: if (i<0) return(True); 674 else return(False); 675 case LE: if (i<=0) return(True); 676 else return(False); 677 case NE: if (i!=0) return(True); 678 else return(False); 679 case EQ: if (i == 0) return(True); 680 else return(False); 681 case GE: if (i>=0) return(True); 682 else return(False); 683 case GT: if (i>0) return(True); 684 else return(False); 685 default: /* can't happen */ 686 FATAL("unknown relational operator %d", n); 687 } 688 return 0; /*NOTREACHED*/ 689 } 690 691 void tfree(Cell *a) /* free a tempcell */ 692 { 693 if (freeable(a)) { 694 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 695 xfree(a->sval); 696 } 697 if (a == tmps) 698 FATAL("tempcell list is curdled"); 699 a->cnext = tmps; 700 tmps = a; 701 } 702 703 Cell *gettemp(void) /* get a tempcell */ 704 { int i; 705 Cell *x; 706 707 if (!tmps) { 708 tmps = (Cell *) calloc(100, sizeof(*tmps)); 709 if (!tmps) 710 FATAL("out of space for temporaries"); 711 for (i = 1; i < 100; i++) 712 tmps[i-1].cnext = &tmps[i]; 713 tmps[i-1].cnext = NULL; 714 } 715 x = tmps; 716 tmps = x->cnext; 717 *x = tempcell; 718 return(x); 719 } 720 721 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 722 { 723 Awkfloat val; 724 Cell *x; 725 int m; 726 char *s; 727 728 x = execute(a[0]); 729 val = getfval(x); /* freebsd: defend against super large field numbers */ 730 if ((Awkfloat)INT_MAX < val) 731 FATAL("trying to access out of range field %s", x->nval); 732 m = (int) val; 733 if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ 734 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 735 /* BUG: can x->nval ever be null??? */ 736 tempfree(x); 737 x = fieldadr(m); 738 x->ctype = OCELL; /* BUG? why are these needed? */ 739 x->csub = CFLD; 740 return(x); 741 } 742 743 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 744 { 745 int k, m, n; 746 char *s; 747 int temp; 748 Cell *x, *y, *z = NULL; 749 750 x = execute(a[0]); 751 y = execute(a[1]); 752 if (a[2] != NULL) 753 z = execute(a[2]); 754 s = getsval(x); 755 k = strlen(s) + 1; 756 if (k <= 1) { 757 tempfree(x); 758 tempfree(y); 759 if (a[2] != NULL) { 760 tempfree(z); 761 } 762 x = gettemp(); 763 setsval(x, ""); 764 return(x); 765 } 766 m = (int) getfval(y); 767 if (m <= 0) 768 m = 1; 769 else if (m > k) 770 m = k; 771 tempfree(y); 772 if (a[2] != NULL) { 773 n = (int) getfval(z); 774 tempfree(z); 775 } else 776 n = k - 1; 777 if (n < 0) 778 n = 0; 779 else if (n > k - m) 780 n = k - m; 781 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 782 y = gettemp(); 783 temp = s[n+m-1]; /* with thanks to John Linderman */ 784 s[n+m-1] = '\0'; 785 setsval(y, s + m - 1); 786 s[n+m-1] = temp; 787 tempfree(x); 788 return(y); 789 } 790 791 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 792 { 793 Cell *x, *y, *z; 794 char *s1, *s2, *p1, *p2, *q; 795 Awkfloat v = 0.0; 796 797 x = execute(a[0]); 798 s1 = getsval(x); 799 y = execute(a[1]); 800 s2 = getsval(y); 801 802 z = gettemp(); 803 for (p1 = s1; *p1 != '\0'; p1++) { 804 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 805 continue; 806 if (*p2 == '\0') { 807 v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ 808 break; 809 } 810 } 811 tempfree(x); 812 tempfree(y); 813 setfval(z, v); 814 return(z); 815 } 816 817 #define MAXNUMSIZE 50 818 819 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 820 { 821 char *fmt; 822 char *p, *t; 823 const char *os; 824 Cell *x; 825 int flag = 0, n; 826 int fmtwd; /* format width */ 827 int fmtsz = recsize; 828 char *buf = *pbuf; 829 int bufsize = *pbufsize; 830 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 831 #define BUFSZ(a) (bufsize - ((a) - buf)) 832 833 static bool first = true; 834 static bool have_a_format = false; 835 836 if (first) { 837 char xbuf[100]; 838 839 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 840 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 841 first = false; 842 } 843 844 os = s; 845 p = buf; 846 if ((fmt = (char *) malloc(fmtsz)) == NULL) 847 FATAL("out of memory in format()"); 848 while (*s) { 849 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 850 if (*s != '%') { 851 *p++ = *s++; 852 continue; 853 } 854 if (*(s+1) == '%') { 855 *p++ = '%'; 856 s += 2; 857 continue; 858 } 859 /* have to be real careful in case this is a huge number, eg, %100000d */ 860 fmtwd = atoi(s+1); 861 if (fmtwd < 0) 862 fmtwd = -fmtwd; 863 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 864 for (t = fmt; (*t++ = *s) != '\0'; s++) { 865 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 866 FATAL("format item %.30s... ran format() out of memory", os); 867 /* Ignore size specifiers */ 868 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 869 t--; 870 continue; 871 } 872 if (isalpha((uschar)*s)) 873 break; 874 if (*s == '$') { 875 FATAL("'$' not permitted in awk formats"); 876 } 877 if (*s == '*') { 878 if (a == NULL) { 879 FATAL("not enough args in printf(%s)", os); 880 } 881 x = execute(a); 882 a = a->nnext; 883 snprintf(t - 1, FMTSZ(t - 1), 884 "%d", fmtwd=(int) getfval(x)); 885 if (fmtwd < 0) 886 fmtwd = -fmtwd; 887 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 888 t = fmt + strlen(fmt); 889 tempfree(x); 890 } 891 } 892 *t = '\0'; 893 if (fmtwd < 0) 894 fmtwd = -fmtwd; 895 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 896 switch (*s) { 897 case 'a': case 'A': 898 if (have_a_format) 899 flag = *s; 900 else 901 flag = 'f'; 902 break; 903 case 'f': case 'e': case 'g': case 'E': case 'G': 904 flag = 'f'; 905 break; 906 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 907 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 908 *(t-1) = 'j'; 909 *t = *s; 910 *++t = '\0'; 911 break; 912 case 's': 913 flag = 's'; 914 break; 915 case 'c': 916 flag = 'c'; 917 break; 918 default: 919 WARNING("weird printf conversion %s", fmt); 920 flag = '?'; 921 break; 922 } 923 if (a == NULL) 924 FATAL("not enough args in printf(%s)", os); 925 x = execute(a); 926 a = a->nnext; 927 n = MAXNUMSIZE; 928 if (fmtwd > n) 929 n = fmtwd; 930 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 931 switch (flag) { 932 case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 933 t = getsval(x); 934 n = strlen(t); 935 if (fmtwd > n) 936 n = fmtwd; 937 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 938 p += strlen(p); 939 snprintf(p, BUFSZ(p), "%s", t); 940 break; 941 case 'a': 942 case 'A': 943 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 944 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 945 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 946 case 's': 947 t = getsval(x); 948 n = strlen(t); 949 if (fmtwd > n) 950 n = fmtwd; 951 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 952 FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); 953 snprintf(p, BUFSZ(p), fmt, t); 954 break; 955 case 'c': 956 if (isnum(x)) { 957 if ((int)getfval(x)) 958 snprintf(p, BUFSZ(p), fmt, (int) getfval(x)); 959 else { 960 *p++ = '\0'; /* explicit null byte */ 961 *p = '\0'; /* next output will start here */ 962 } 963 } else 964 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 965 break; 966 default: 967 FATAL("can't happen: bad conversion %c in format()", flag); 968 } 969 tempfree(x); 970 p += strlen(p); 971 s++; 972 } 973 *p = '\0'; 974 free(fmt); 975 for ( ; a; a = a->nnext) /* evaluate any remaining args */ 976 execute(a); 977 *pbuf = buf; 978 *pbufsize = bufsize; 979 return p - buf; 980 } 981 982 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 983 { 984 Cell *x; 985 Node *y; 986 char *buf; 987 int bufsz=3*recsize; 988 989 if ((buf = (char *) malloc(bufsz)) == NULL) 990 FATAL("out of memory in awksprintf"); 991 y = a[0]->nnext; 992 x = execute(a[0]); 993 if (format(&buf, &bufsz, getsval(x), y) == -1) 994 FATAL("sprintf string %.30s... too long. can't happen.", buf); 995 tempfree(x); 996 x = gettemp(); 997 x->sval = buf; 998 x->tval = STR; 999 return(x); 1000 } 1001 1002 Cell *awkprintf(Node **a, int n) /* printf */ 1003 { /* a[0] is list of args, starting with format string */ 1004 /* a[1] is redirection operator, a[2] is redirection file */ 1005 FILE *fp; 1006 Cell *x; 1007 Node *y; 1008 char *buf; 1009 int len; 1010 int bufsz=3*recsize; 1011 1012 if ((buf = (char *) malloc(bufsz)) == NULL) 1013 FATAL("out of memory in awkprintf"); 1014 y = a[0]->nnext; 1015 x = execute(a[0]); 1016 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1017 FATAL("printf string %.30s... too long. can't happen.", buf); 1018 tempfree(x); 1019 if (a[1] == NULL) { 1020 /* fputs(buf, stdout); */ 1021 fwrite(buf, len, 1, stdout); 1022 if (ferror(stdout)) 1023 FATAL("write error on stdout"); 1024 } else { 1025 fp = redirect(ptoi(a[1]), a[2]); 1026 /* fputs(buf, fp); */ 1027 fwrite(buf, len, 1, fp); 1028 fflush(fp); 1029 if (ferror(fp)) 1030 FATAL("write error on %s", filename(fp)); 1031 } 1032 free(buf); 1033 return(True); 1034 } 1035 1036 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1037 { 1038 Awkfloat i, j = 0; 1039 double v; 1040 Cell *x, *y, *z; 1041 1042 x = execute(a[0]); 1043 i = getfval(x); 1044 tempfree(x); 1045 if (n != UMINUS && n != UPLUS) { 1046 y = execute(a[1]); 1047 j = getfval(y); 1048 tempfree(y); 1049 } 1050 z = gettemp(); 1051 switch (n) { 1052 case ADD: 1053 i += j; 1054 break; 1055 case MINUS: 1056 i -= j; 1057 break; 1058 case MULT: 1059 i *= j; 1060 break; 1061 case DIVIDE: 1062 if (j == 0) 1063 FATAL("division by zero"); 1064 i /= j; 1065 break; 1066 case MOD: 1067 if (j == 0) 1068 FATAL("division by zero in mod"); 1069 modf(i/j, &v); 1070 i = i - j * v; 1071 break; 1072 case UMINUS: 1073 i = -i; 1074 break; 1075 case UPLUS: /* handled by getfval(), above */ 1076 break; 1077 case POWER: 1078 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1079 i = ipow(i, (int) j); 1080 else { 1081 errno = 0; 1082 i = errcheck(pow(i, j), "pow"); 1083 } 1084 break; 1085 default: /* can't happen */ 1086 FATAL("illegal arithmetic operator %d", n); 1087 } 1088 setfval(z, i); 1089 return(z); 1090 } 1091 1092 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1093 { 1094 double v; 1095 1096 if (n <= 0) 1097 return 1; 1098 v = ipow(x, n/2); 1099 if (n % 2 == 0) 1100 return v * v; 1101 else 1102 return x * v * v; 1103 } 1104 1105 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1106 { 1107 Cell *x, *z; 1108 int k; 1109 Awkfloat xf; 1110 1111 x = execute(a[0]); 1112 xf = getfval(x); 1113 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1114 if (n == PREINCR || n == PREDECR) { 1115 setfval(x, xf + k); 1116 return(x); 1117 } 1118 z = gettemp(); 1119 setfval(z, xf); 1120 setfval(x, xf + k); 1121 tempfree(x); 1122 return(z); 1123 } 1124 1125 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1126 { /* this is subtle; don't muck with it. */ 1127 Cell *x, *y; 1128 Awkfloat xf, yf; 1129 double v; 1130 1131 y = execute(a[1]); 1132 x = execute(a[0]); 1133 if (n == ASSIGN) { /* ordinary assignment */ 1134 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1135 ; /* self-assignment: leave alone unless it's a field or NF */ 1136 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1137 setsval(x, getsval(y)); 1138 x->fval = getfval(y); 1139 x->tval |= NUM; 1140 } 1141 else if (isstr(y)) 1142 setsval(x, getsval(y)); 1143 else if (isnum(y)) 1144 setfval(x, getfval(y)); 1145 else 1146 funnyvar(y, "read value of"); 1147 tempfree(y); 1148 return(x); 1149 } 1150 xf = getfval(x); 1151 yf = getfval(y); 1152 switch (n) { 1153 case ADDEQ: 1154 xf += yf; 1155 break; 1156 case SUBEQ: 1157 xf -= yf; 1158 break; 1159 case MULTEQ: 1160 xf *= yf; 1161 break; 1162 case DIVEQ: 1163 if (yf == 0) 1164 FATAL("division by zero in /="); 1165 xf /= yf; 1166 break; 1167 case MODEQ: 1168 if (yf == 0) 1169 FATAL("division by zero in %%="); 1170 modf(xf/yf, &v); 1171 xf = xf - yf * v; 1172 break; 1173 case POWEQ: 1174 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1175 xf = ipow(xf, (int) yf); 1176 else { 1177 errno = 0; 1178 xf = errcheck(pow(xf, yf), "pow"); 1179 } 1180 break; 1181 default: 1182 FATAL("illegal assignment operator %d", n); 1183 break; 1184 } 1185 tempfree(y); 1186 setfval(x, xf); 1187 return(x); 1188 } 1189 1190 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1191 { 1192 Cell *x, *y, *z; 1193 int n1, n2; 1194 char *s = NULL; 1195 int ssz = 0; 1196 1197 x = execute(a[0]); 1198 n1 = strlen(getsval(x)); 1199 adjbuf(&s, &ssz, n1, recsize, 0, "cat1"); 1200 memcpy(s, x->sval, n1); 1201 1202 y = execute(a[1]); 1203 n2 = strlen(getsval(y)); 1204 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1205 memcpy(s + n1, y->sval, n2); 1206 s[n1 + n2] = '\0'; 1207 1208 tempfree(x); 1209 tempfree(y); 1210 1211 z = gettemp(); 1212 z->sval = s; 1213 z->tval = STR; 1214 1215 return(z); 1216 } 1217 1218 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1219 { 1220 Cell *x; 1221 1222 if (a[0] == NULL) 1223 x = execute(a[1]); 1224 else { 1225 x = execute(a[0]); 1226 if (istrue(x)) { 1227 tempfree(x); 1228 x = execute(a[1]); 1229 } 1230 } 1231 return x; 1232 } 1233 1234 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1235 { 1236 Cell *x; 1237 int pair; 1238 1239 pair = ptoi(a[3]); 1240 if (pairstack[pair] == 0) { 1241 x = execute(a[0]); 1242 if (istrue(x)) 1243 pairstack[pair] = 1; 1244 tempfree(x); 1245 } 1246 if (pairstack[pair] == 1) { 1247 x = execute(a[1]); 1248 if (istrue(x)) 1249 pairstack[pair] = 0; 1250 tempfree(x); 1251 x = execute(a[2]); 1252 return(x); 1253 } 1254 return(False); 1255 } 1256 1257 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1258 { 1259 Cell *x = NULL, *y, *ap; 1260 const char *s, *origs, *t; 1261 const char *fs = NULL; 1262 char *origfs = NULL; 1263 int sep; 1264 char temp, num[50]; 1265 int n, tempstat, arg3type; 1266 double result; 1267 1268 y = execute(a[0]); /* source string */ 1269 origs = s = strdup(getsval(y)); 1270 if (s == NULL) 1271 FATAL("out of space in split"); 1272 arg3type = ptoi(a[3]); 1273 if (a[2] == NULL) /* fs string */ 1274 fs = getsval(fsloc); 1275 else if (arg3type == STRING) { /* split(str,arr,"string") */ 1276 x = execute(a[2]); 1277 fs = origfs = strdup(getsval(x)); 1278 if (fs == NULL) 1279 FATAL("out of space in split"); 1280 tempfree(x); 1281 } else if (arg3type == REGEXPR) 1282 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1283 else 1284 FATAL("illegal type of split"); 1285 sep = *fs; 1286 ap = execute(a[1]); /* array name */ 1287 freesymtab(ap); 1288 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 1289 ap->tval &= ~STR; 1290 ap->tval |= ARR; 1291 ap->sval = (char *) makesymtab(NSYMTAB); 1292 1293 n = 0; 1294 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1295 /* split(s, a, //); have to arrange that it looks like empty sep */ 1296 arg3type = 0; 1297 fs = ""; 1298 sep = 0; 1299 } 1300 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1301 fa *pfa; 1302 if (arg3type == REGEXPR) { /* it's ready already */ 1303 pfa = (fa *) a[2]; 1304 } else { 1305 pfa = makedfa(fs, 1); 1306 } 1307 if (nematch(pfa,s)) { 1308 tempstat = pfa->initstat; 1309 pfa->initstat = 2; 1310 do { 1311 n++; 1312 snprintf(num, sizeof(num), "%d", n); 1313 temp = *patbeg; 1314 setptr(patbeg, '\0'); 1315 if (is_number(s, & result)) 1316 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1317 else 1318 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1319 setptr(patbeg, temp); 1320 s = patbeg + patlen; 1321 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1322 n++; 1323 snprintf(num, sizeof(num), "%d", n); 1324 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1325 pfa->initstat = tempstat; 1326 goto spdone; 1327 } 1328 } while (nematch(pfa,s)); 1329 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1330 /* cf gsub and refldbld */ 1331 } 1332 n++; 1333 snprintf(num, sizeof(num), "%d", n); 1334 if (is_number(s, & result)) 1335 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1336 else 1337 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1338 spdone: 1339 pfa = NULL; 1340 } else if (sep == ' ') { 1341 for (n = 0; ; ) { 1342 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1343 while (ISWS(*s)) 1344 s++; 1345 if (*s == '\0') 1346 break; 1347 n++; 1348 t = s; 1349 do 1350 s++; 1351 while (*s != '\0' && !ISWS(*s)); 1352 temp = *s; 1353 setptr(s, '\0'); 1354 snprintf(num, sizeof(num), "%d", n); 1355 if (is_number(t, & result)) 1356 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1357 else 1358 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1359 setptr(s, temp); 1360 if (*s != '\0') 1361 s++; 1362 } 1363 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1364 for (n = 0; *s != '\0'; s++) { 1365 char buf[2]; 1366 n++; 1367 snprintf(num, sizeof(num), "%d", n); 1368 buf[0] = *s; 1369 buf[1] = '\0'; 1370 if (isdigit((uschar)buf[0])) 1371 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1372 else 1373 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1374 } 1375 } else if (*s != '\0') { 1376 for (;;) { 1377 n++; 1378 t = s; 1379 while (*s != sep && *s != '\n' && *s != '\0') 1380 s++; 1381 temp = *s; 1382 setptr(s, '\0'); 1383 snprintf(num, sizeof(num), "%d", n); 1384 if (is_number(t, & result)) 1385 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1386 else 1387 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1388 setptr(s, temp); 1389 if (*s++ == '\0') 1390 break; 1391 } 1392 } 1393 tempfree(ap); 1394 tempfree(y); 1395 xfree(origs); 1396 xfree(origfs); 1397 x = gettemp(); 1398 x->tval = NUM; 1399 x->fval = n; 1400 return(x); 1401 } 1402 1403 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1404 { 1405 Cell *x; 1406 1407 x = execute(a[0]); 1408 if (istrue(x)) { 1409 tempfree(x); 1410 x = execute(a[1]); 1411 } else { 1412 tempfree(x); 1413 x = execute(a[2]); 1414 } 1415 return(x); 1416 } 1417 1418 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1419 { 1420 Cell *x; 1421 1422 x = execute(a[0]); 1423 if (istrue(x)) { 1424 tempfree(x); 1425 x = execute(a[1]); 1426 } else if (a[2] != NULL) { 1427 tempfree(x); 1428 x = execute(a[2]); 1429 } 1430 return(x); 1431 } 1432 1433 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1434 { 1435 Cell *x; 1436 1437 for (;;) { 1438 x = execute(a[0]); 1439 if (!istrue(x)) 1440 return(x); 1441 tempfree(x); 1442 x = execute(a[1]); 1443 if (isbreak(x)) { 1444 x = True; 1445 return(x); 1446 } 1447 if (isnext(x) || isexit(x) || isret(x)) 1448 return(x); 1449 tempfree(x); 1450 } 1451 } 1452 1453 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1454 { 1455 Cell *x; 1456 1457 for (;;) { 1458 x = execute(a[0]); 1459 if (isbreak(x)) 1460 return True; 1461 if (isnext(x) || isexit(x) || isret(x)) 1462 return(x); 1463 tempfree(x); 1464 x = execute(a[1]); 1465 if (!istrue(x)) 1466 return(x); 1467 tempfree(x); 1468 } 1469 } 1470 1471 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1472 { 1473 Cell *x; 1474 1475 x = execute(a[0]); 1476 tempfree(x); 1477 for (;;) { 1478 if (a[1]!=NULL) { 1479 x = execute(a[1]); 1480 if (!istrue(x)) return(x); 1481 else tempfree(x); 1482 } 1483 x = execute(a[3]); 1484 if (isbreak(x)) /* turn off break */ 1485 return True; 1486 if (isnext(x) || isexit(x) || isret(x)) 1487 return(x); 1488 tempfree(x); 1489 x = execute(a[2]); 1490 tempfree(x); 1491 } 1492 } 1493 1494 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1495 { 1496 Cell *x, *vp, *arrayp, *cp, *ncp; 1497 Array *tp; 1498 int i; 1499 1500 vp = execute(a[0]); 1501 arrayp = execute(a[1]); 1502 if (!isarr(arrayp)) { 1503 return True; 1504 } 1505 tp = (Array *) arrayp->sval; 1506 tempfree(arrayp); 1507 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1508 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1509 setsval(vp, cp->nval); 1510 ncp = cp->cnext; 1511 x = execute(a[2]); 1512 if (isbreak(x)) { 1513 tempfree(vp); 1514 return True; 1515 } 1516 if (isnext(x) || isexit(x) || isret(x)) { 1517 tempfree(vp); 1518 return(x); 1519 } 1520 tempfree(x); 1521 } 1522 } 1523 return True; 1524 } 1525 1526 static char *nawk_convert(const char *s, int (*fun_c)(int), 1527 wint_t (*fun_wc)(wint_t)) 1528 { 1529 char *buf = NULL; 1530 char *pbuf = NULL; 1531 const char *ps = NULL; 1532 size_t n = 0; 1533 wchar_t wc; 1534 size_t sz = MB_CUR_MAX; 1535 1536 if (sz == 1) { 1537 buf = tostring(s); 1538 1539 for (pbuf = buf; *pbuf; pbuf++) 1540 *pbuf = fun_c((uschar)*pbuf); 1541 1542 return buf; 1543 } else { 1544 /* upper/lower character may be shorter/longer */ 1545 buf = tostringN(s, strlen(s) * sz + 1); 1546 1547 (void) mbtowc(NULL, NULL, 0); /* reset internal state */ 1548 /* 1549 * Reset internal state here too. 1550 * Assign result to avoid a compiler warning. (Casting to void 1551 * doesn't work.) 1552 * Increment said variable to avoid a different warning. 1553 */ 1554 int unused = wctomb(NULL, L'\0'); 1555 unused++; 1556 1557 ps = s; 1558 pbuf = buf; 1559 while (n = mbtowc(&wc, ps, sz), 1560 n > 0 && n != (size_t)-1 && n != (size_t)-2) 1561 { 1562 ps += n; 1563 1564 n = wctomb(pbuf, fun_wc(wc)); 1565 if (n == (size_t)-1) 1566 FATAL("illegal wide character %s", s); 1567 1568 pbuf += n; 1569 } 1570 1571 *pbuf = '\0'; 1572 1573 if (n) 1574 FATAL("illegal byte sequence %s", s); 1575 1576 return buf; 1577 } 1578 } 1579 1580 #ifdef __DJGPP__ 1581 static wint_t towupper(wint_t wc) 1582 { 1583 if (wc >= 0 && wc < 256) 1584 return toupper(wc & 0xFF); 1585 1586 return wc; 1587 } 1588 1589 static wint_t towlower(wint_t wc) 1590 { 1591 if (wc >= 0 && wc < 256) 1592 return tolower(wc & 0xFF); 1593 1594 return wc; 1595 } 1596 #endif 1597 1598 static char *nawk_toupper(const char *s) 1599 { 1600 return nawk_convert(s, toupper, towupper); 1601 } 1602 1603 static char *nawk_tolower(const char *s) 1604 { 1605 return nawk_convert(s, tolower, towlower); 1606 } 1607 1608 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 1609 { 1610 Cell *x, *y; 1611 Awkfloat u; 1612 int t, sz; 1613 Awkfloat tmp; 1614 char *buf, *fmt; 1615 Node *nextarg; 1616 FILE *fp; 1617 int status = 0; 1618 time_t tv; 1619 struct tm *tm, tmbuf; 1620 1621 t = ptoi(a[0]); 1622 x = execute(a[1]); 1623 nextarg = a[1]->nnext; 1624 switch (t) { 1625 case FLENGTH: 1626 if (isarr(x)) 1627 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 1628 else 1629 u = strlen(getsval(x)); 1630 break; 1631 case FLOG: 1632 errno = 0; 1633 u = errcheck(log(getfval(x)), "log"); 1634 break; 1635 case FINT: 1636 modf(getfval(x), &u); break; 1637 case FEXP: 1638 errno = 0; 1639 u = errcheck(exp(getfval(x)), "exp"); 1640 break; 1641 case FSQRT: 1642 errno = 0; 1643 u = errcheck(sqrt(getfval(x)), "sqrt"); 1644 break; 1645 case FSIN: 1646 u = sin(getfval(x)); break; 1647 case FCOS: 1648 u = cos(getfval(x)); break; 1649 case FATAN: 1650 if (nextarg == NULL) { 1651 WARNING("atan2 requires two arguments; returning 1.0"); 1652 u = 1.0; 1653 } else { 1654 y = execute(a[1]->nnext); 1655 u = atan2(getfval(x), getfval(y)); 1656 tempfree(y); 1657 nextarg = nextarg->nnext; 1658 } 1659 break; 1660 case FCOMPL: 1661 u = ~((int)getfval(x)); 1662 break; 1663 case FAND: 1664 if (nextarg == 0) { 1665 WARNING("and requires two arguments; returning 0"); 1666 u = 0; 1667 break; 1668 } 1669 y = execute(a[1]->nnext); 1670 u = ((int)getfval(x)) & ((int)getfval(y)); 1671 tempfree(y); 1672 nextarg = nextarg->nnext; 1673 break; 1674 case FFOR: 1675 if (nextarg == 0) { 1676 WARNING("or requires two arguments; returning 0"); 1677 u = 0; 1678 break; 1679 } 1680 y = execute(a[1]->nnext); 1681 u = ((int)getfval(x)) | ((int)getfval(y)); 1682 tempfree(y); 1683 nextarg = nextarg->nnext; 1684 break; 1685 case FXOR: 1686 if (nextarg == 0) { 1687 WARNING("xor requires two arguments; returning 0"); 1688 u = 0; 1689 break; 1690 } 1691 y = execute(a[1]->nnext); 1692 u = ((int)getfval(x)) ^ ((int)getfval(y)); 1693 tempfree(y); 1694 nextarg = nextarg->nnext; 1695 break; 1696 case FLSHIFT: 1697 if (nextarg == 0) { 1698 WARNING("lshift requires two arguments; returning 0"); 1699 u = 0; 1700 break; 1701 } 1702 y = execute(a[1]->nnext); 1703 u = ((int)getfval(x)) << ((int)getfval(y)); 1704 tempfree(y); 1705 nextarg = nextarg->nnext; 1706 break; 1707 case FRSHIFT: 1708 if (nextarg == 0) { 1709 WARNING("rshift requires two arguments; returning 0"); 1710 u = 0; 1711 break; 1712 } 1713 y = execute(a[1]->nnext); 1714 u = ((int)getfval(x)) >> ((int)getfval(y)); 1715 tempfree(y); 1716 nextarg = nextarg->nnext; 1717 break; 1718 case FSYSTEM: 1719 fflush(stdout); /* in case something is buffered already */ 1720 status = system(getsval(x)); 1721 u = status; 1722 if (status != -1) { 1723 if (WIFEXITED(status)) { 1724 u = WEXITSTATUS(status); 1725 } else if (WIFSIGNALED(status)) { 1726 u = WTERMSIG(status) + 256; 1727 #ifdef WCOREDUMP 1728 if (WCOREDUMP(status)) 1729 u += 256; 1730 #endif 1731 } else /* something else?!? */ 1732 u = 0; 1733 } 1734 break; 1735 case FRAND: 1736 /* random() returns numbers in [0..2^31-1] 1737 * in order to get a number in [0, 1), divide it by 2^31 1738 */ 1739 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 1740 break; 1741 case FSRAND: 1742 if (isrec(x)) { /* no argument provided */ 1743 u = time(NULL); 1744 tmp = u; 1745 srandom((unsigned int) u); 1746 } else { 1747 u = getfval(x); 1748 tmp = u; 1749 srandom_deterministic((unsigned int) u); 1750 } 1751 u = srand_seed; 1752 srand_seed = tmp; 1753 break; 1754 case FTOUPPER: 1755 case FTOLOWER: 1756 if (t == FTOUPPER) 1757 buf = nawk_toupper(getsval(x)); 1758 else 1759 buf = nawk_tolower(getsval(x)); 1760 tempfree(x); 1761 x = gettemp(); 1762 setsval(x, buf); 1763 free(buf); 1764 return x; 1765 case FFLUSH: 1766 if (isrec(x) || strlen(getsval(x)) == 0) { 1767 flush_all(); /* fflush() or fflush("") -> all */ 1768 u = 0; 1769 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 1770 u = EOF; 1771 else 1772 u = fflush(fp); 1773 break; 1774 case FMKTIME: 1775 memset(&tmbuf, 0, sizeof(tmbuf)); 1776 tm = &tmbuf; 1777 t = sscanf(getsval(x), "%d %d %d %d %d %d %d", 1778 &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour, 1779 &tm->tm_min, &tm->tm_sec, &tm->tm_isdst); 1780 switch (t) { 1781 case 6: 1782 tm->tm_isdst = -1; /* let mktime figure it out */ 1783 /* FALLTHROUGH */ 1784 case 7: 1785 tm->tm_year -= 1900; 1786 tm->tm_mon--; 1787 u = mktime(tm); 1788 break; 1789 default: 1790 u = -1; 1791 break; 1792 } 1793 break; 1794 case FSYSTIME: 1795 u = time((time_t *) 0); 1796 break; 1797 case FSTRFTIME: 1798 /* strftime([format [,timestamp]]) */ 1799 if (nextarg) { 1800 y = execute(nextarg); 1801 nextarg = nextarg->nnext; 1802 tv = (time_t) getfval(y); 1803 tempfree(y); 1804 } else 1805 tv = time((time_t *) 0); 1806 tm = localtime(&tv); 1807 if (tm == NULL) 1808 FATAL("bad time %ld", (long)tv); 1809 1810 if (isrec(x)) { 1811 /* format argument not provided, use default */ 1812 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 1813 } else 1814 fmt = tostring(getsval(x)); 1815 1816 sz = 32; 1817 buf = NULL; 1818 do { 1819 if ((buf = (char *) reallocarray(buf, 2, sz)) == NULL) 1820 FATAL("out of memory in strftime"); 1821 sz *= 2; 1822 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 1823 1824 y = gettemp(); 1825 setsval(y, buf); 1826 free(fmt); 1827 free(buf); 1828 1829 return y; 1830 default: /* can't happen */ 1831 FATAL("illegal function type %d", t); 1832 break; 1833 } 1834 tempfree(x); 1835 x = gettemp(); 1836 setfval(x, u); 1837 if (nextarg != NULL) { 1838 WARNING("warning: function has too many arguments"); 1839 for ( ; nextarg; nextarg = nextarg->nnext) 1840 execute(nextarg); 1841 } 1842 return(x); 1843 } 1844 1845 Cell *printstat(Node **a, int n) /* print a[0] */ 1846 { 1847 Node *x; 1848 Cell *y; 1849 FILE *fp; 1850 1851 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 1852 fp = stdout; 1853 else 1854 fp = redirect(ptoi(a[1]), a[2]); 1855 for (x = a[0]; x != NULL; x = x->nnext) { 1856 y = execute(x); 1857 fputs(getpssval(y), fp); 1858 tempfree(y); 1859 if (x->nnext == NULL) 1860 fputs(getsval(orsloc), fp); 1861 else 1862 fputs(getsval(ofsloc), fp); 1863 } 1864 if (a[1] != NULL) 1865 fflush(fp); 1866 if (ferror(fp)) 1867 FATAL("write error on %s", filename(fp)); 1868 return(True); 1869 } 1870 1871 Cell *nullproc(Node **a, int n) 1872 { 1873 return 0; 1874 } 1875 1876 1877 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 1878 { 1879 FILE *fp; 1880 Cell *x; 1881 char *fname; 1882 1883 x = execute(b); 1884 fname = getsval(x); 1885 fp = openfile(a, fname, NULL); 1886 if (fp == NULL) 1887 FATAL("can't open file %s", fname); 1888 tempfree(x); 1889 return fp; 1890 } 1891 1892 struct files { 1893 FILE *fp; 1894 const char *fname; 1895 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 1896 } *files; 1897 1898 size_t nfiles; 1899 1900 static void stdinit(void) /* in case stdin, etc., are not constants */ 1901 { 1902 nfiles = FOPEN_MAX; 1903 files = (struct files *) calloc(nfiles, sizeof(*files)); 1904 if (files == NULL) 1905 FATAL("can't allocate file memory for %zu files", nfiles); 1906 files[0].fp = stdin; 1907 files[0].fname = tostring("/dev/stdin"); 1908 files[0].mode = LT; 1909 files[1].fp = stdout; 1910 files[1].fname = tostring("/dev/stdout"); 1911 files[1].mode = GT; 1912 files[2].fp = stderr; 1913 files[2].fname = tostring("/dev/stderr"); 1914 files[2].mode = GT; 1915 } 1916 1917 FILE *openfile(int a, const char *us, bool *pnewflag) 1918 { 1919 const char *s = us; 1920 size_t i; 1921 int m; 1922 FILE *fp = NULL; 1923 1924 if (*s == '\0') 1925 FATAL("null file name in print or getline"); 1926 for (i = 0; i < nfiles; i++) 1927 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 1928 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 1929 a == FFLUSH)) { 1930 if (pnewflag) 1931 *pnewflag = false; 1932 return files[i].fp; 1933 } 1934 if (a == FFLUSH) /* didn't find it, so don't create it! */ 1935 return NULL; 1936 1937 for (i = 0; i < nfiles; i++) 1938 if (files[i].fp == NULL) 1939 break; 1940 if (i >= nfiles) { 1941 struct files *nf; 1942 size_t nnf = nfiles + FOPEN_MAX; 1943 nf = (struct files *) reallocarray(files, nnf, sizeof(*nf)); 1944 if (nf == NULL) 1945 FATAL("cannot grow files for %s and %zu files", s, nnf); 1946 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 1947 nfiles = nnf; 1948 files = nf; 1949 } 1950 fflush(stdout); /* force a semblance of order */ 1951 m = a; 1952 if (a == GT) { 1953 fp = fopen(s, "w"); 1954 } else if (a == APPEND) { 1955 fp = fopen(s, "a"); 1956 m = GT; /* so can mix > and >> */ 1957 } else if (a == '|') { /* output pipe */ 1958 fp = popen(s, "w"); 1959 } else if (a == LE) { /* input pipe */ 1960 fp = popen(s, "r"); 1961 } else if (a == LT) { /* getline <file */ 1962 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 1963 } else /* can't happen */ 1964 FATAL("illegal redirection %d", a); 1965 if (fp != NULL) { 1966 files[i].fname = tostring(s); 1967 files[i].fp = fp; 1968 files[i].mode = m; 1969 if (pnewflag) 1970 *pnewflag = true; 1971 if (fp != stdin && fp != stdout && fp != stderr) 1972 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 1973 } 1974 return fp; 1975 } 1976 1977 const char *filename(FILE *fp) 1978 { 1979 size_t i; 1980 1981 for (i = 0; i < nfiles; i++) 1982 if (fp == files[i].fp) 1983 return files[i].fname; 1984 return "???"; 1985 } 1986 1987 Cell *closefile(Node **a, int n) 1988 { 1989 Cell *x; 1990 size_t i; 1991 bool stat; 1992 1993 x = execute(a[0]); 1994 getsval(x); 1995 stat = true; 1996 for (i = 0; i < nfiles; i++) { 1997 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 1998 continue; 1999 if (files[i].mode == GT || files[i].mode == '|') 2000 fflush(files[i].fp); 2001 if (ferror(files[i].fp)) { 2002 if ((files[i].mode == GT && files[i].fp != stderr) 2003 || files[i].mode == '|') 2004 FATAL("write error on %s", files[i].fname); 2005 else 2006 WARNING("i/o error occurred on %s", files[i].fname); 2007 } 2008 if (files[i].fp == stdin || files[i].fp == stdout || 2009 files[i].fp == stderr) 2010 stat = freopen("/dev/null", "r+", files[i].fp) == NULL; 2011 else if (files[i].mode == '|' || files[i].mode == LE) 2012 stat = pclose(files[i].fp) == -1; 2013 else 2014 stat = fclose(files[i].fp) == EOF; 2015 if (stat) 2016 WARNING("i/o error occurred closing %s", files[i].fname); 2017 xfree(files[i].fname); 2018 files[i].fname = NULL; /* watch out for ref thru this */ 2019 files[i].fp = NULL; 2020 break; 2021 } 2022 tempfree(x); 2023 x = gettemp(); 2024 setfval(x, (Awkfloat) (stat ? -1 : 0)); 2025 return(x); 2026 } 2027 2028 void closeall(void) 2029 { 2030 size_t i; 2031 bool stat = false; 2032 2033 for (i = 0; i < nfiles; i++) { 2034 if (! files[i].fp) 2035 continue; 2036 if (files[i].mode == GT || files[i].mode == '|') 2037 fflush(files[i].fp); 2038 if (ferror(files[i].fp)) { 2039 if ((files[i].mode == GT && files[i].fp != stderr) 2040 || files[i].mode == '|') 2041 FATAL("write error on %s", files[i].fname); 2042 else 2043 WARNING("i/o error occurred on %s", files[i].fname); 2044 } 2045 if (files[i].fp == stdin || files[i].fp == stdout || 2046 files[i].fp == stderr) 2047 continue; 2048 if (files[i].mode == '|' || files[i].mode == LE) 2049 stat = pclose(files[i].fp) == -1; 2050 else 2051 stat = fclose(files[i].fp) == EOF; 2052 if (stat) 2053 WARNING("i/o error occurred while closing %s", files[i].fname); 2054 } 2055 } 2056 2057 static void flush_all(void) 2058 { 2059 size_t i; 2060 2061 for (i = 0; i < nfiles; i++) 2062 if (files[i].fp) 2063 fflush(files[i].fp); 2064 } 2065 2066 void backsub(char **pb_ptr, const char **sptr_ptr); 2067 2068 Cell *sub(Node **a, int nnn) /* substitute command */ 2069 { 2070 const char *sptr, *q; 2071 Cell *x, *y, *result; 2072 char *t, *buf, *pb; 2073 fa *pfa; 2074 int bufsz = recsize; 2075 2076 if ((buf = (char *) malloc(bufsz)) == NULL) 2077 FATAL("out of memory in sub"); 2078 x = execute(a[3]); /* target string */ 2079 t = getsval(x); 2080 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2081 pfa = (fa *) a[1]; /* regular expression */ 2082 else { 2083 y = execute(a[1]); 2084 pfa = makedfa(getsval(y), 1); 2085 tempfree(y); 2086 } 2087 y = execute(a[2]); /* replacement string */ 2088 result = False; 2089 if (pmatch(pfa, t)) { 2090 sptr = t; 2091 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); 2092 pb = buf; 2093 while (sptr < patbeg) 2094 *pb++ = *sptr++; 2095 sptr = getsval(y); 2096 while (*sptr != '\0') { 2097 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); 2098 if (*sptr == '\\') { 2099 backsub(&pb, &sptr); 2100 } else if (*sptr == '&') { 2101 sptr++; 2102 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); 2103 for (q = patbeg; q < patbeg+patlen; ) 2104 *pb++ = *q++; 2105 } else 2106 *pb++ = *sptr++; 2107 } 2108 *pb = '\0'; 2109 if (pb > buf + bufsz) 2110 FATAL("sub result1 %.30s too big; can't happen", buf); 2111 sptr = patbeg + patlen; 2112 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { 2113 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); 2114 while ((*pb++ = *sptr++) != '\0') 2115 continue; 2116 } 2117 if (pb > buf + bufsz) 2118 FATAL("sub result2 %.30s too big; can't happen", buf); 2119 setsval(x, buf); /* BUG: should be able to avoid copy */ 2120 result = True; 2121 } 2122 tempfree(x); 2123 tempfree(y); 2124 free(buf); 2125 return result; 2126 } 2127 2128 Cell *gsub(Node **a, int nnn) /* global substitute */ 2129 { 2130 Cell *x, *y; 2131 char *rptr, *pb; 2132 const char *q, *t, *sptr; 2133 char *buf; 2134 fa *pfa; 2135 int mflag, tempstat, num; 2136 int bufsz = recsize; 2137 2138 if ((buf = (char *) malloc(bufsz)) == NULL) 2139 FATAL("out of memory in gsub"); 2140 mflag = 0; /* if mflag == 0, can replace empty string */ 2141 num = 0; 2142 x = execute(a[3]); /* target string */ 2143 t = getsval(x); 2144 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2145 pfa = (fa *) a[1]; /* regular expression */ 2146 else { 2147 y = execute(a[1]); 2148 pfa = makedfa(getsval(y), 1); 2149 tempfree(y); 2150 } 2151 y = execute(a[2]); /* replacement string */ 2152 if (pmatch(pfa, t)) { 2153 tempstat = pfa->initstat; 2154 pfa->initstat = 2; 2155 pb = buf; 2156 rptr = getsval(y); 2157 do { 2158 if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ 2159 if (mflag == 0) { /* can replace empty */ 2160 num++; 2161 sptr = rptr; 2162 while (*sptr != '\0') { 2163 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2164 if (*sptr == '\\') { 2165 backsub(&pb, &sptr); 2166 } else if (*sptr == '&') { 2167 sptr++; 2168 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2169 for (q = patbeg; q < patbeg+patlen; ) 2170 *pb++ = *q++; 2171 } else 2172 *pb++ = *sptr++; 2173 } 2174 } 2175 if (*t == '\0') /* at end */ 2176 goto done; 2177 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); 2178 *pb++ = *t++; 2179 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2180 FATAL("gsub result0 %.30s too big; can't happen", buf); 2181 mflag = 0; 2182 } 2183 else { /* matched nonempty string */ 2184 num++; 2185 sptr = t; 2186 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); 2187 while (sptr < patbeg) 2188 *pb++ = *sptr++; 2189 sptr = rptr; 2190 while (*sptr != '\0') { 2191 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2192 if (*sptr == '\\') { 2193 backsub(&pb, &sptr); 2194 } else if (*sptr == '&') { 2195 sptr++; 2196 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2197 for (q = patbeg; q < patbeg+patlen; ) 2198 *pb++ = *q++; 2199 } else 2200 *pb++ = *sptr++; 2201 } 2202 t = patbeg + patlen; 2203 if (patlen == 0 || *t == '\0' || *(t-1) == '\0') 2204 goto done; 2205 if (pb > buf + bufsz) 2206 FATAL("gsub result1 %.30s too big; can't happen", buf); 2207 mflag = 1; 2208 } 2209 } while (pmatch(pfa,t)); 2210 sptr = t; 2211 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); 2212 while ((*pb++ = *sptr++) != '\0') 2213 continue; 2214 done: if (pb < buf + bufsz) 2215 *pb = '\0'; 2216 else if (*(pb-1) != '\0') 2217 FATAL("gsub result2 %.30s truncated; can't happen", buf); 2218 setsval(x, buf); /* BUG: should be able to avoid copy + free */ 2219 pfa->initstat = tempstat; 2220 } 2221 tempfree(x); 2222 tempfree(y); 2223 x = gettemp(); 2224 x->tval = NUM; 2225 x->fval = num; 2226 free(buf); 2227 return(x); 2228 } 2229 2230 Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2231 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2232 { 2233 Cell *x, *y, *res, *h; 2234 char *rptr; 2235 const char *sptr; 2236 char *buf, *pb; 2237 const char *t, *q; 2238 fa *pfa; 2239 int mflag, tempstat, num, whichm; 2240 int bufsz = recsize; 2241 2242 if ((buf = malloc(bufsz)) == NULL) 2243 FATAL("out of memory in gensub"); 2244 mflag = 0; /* if mflag == 0, can replace empty string */ 2245 num = 0; 2246 x = execute(a[4]); /* source string */ 2247 t = getsval(x); 2248 res = copycell(x); /* target string - initially copy of source */ 2249 res->csub = CTEMP; /* result values are temporary */ 2250 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2251 pfa = (fa *) a[1]; /* regular expression */ 2252 else { 2253 y = execute(a[1]); 2254 pfa = makedfa(getsval(y), 1); 2255 tempfree(y); 2256 } 2257 y = execute(a[2]); /* replacement string */ 2258 h = execute(a[3]); /* which matches should be replaced */ 2259 sptr = getsval(h); 2260 if (sptr[0] == 'g' || sptr[0] == 'G') 2261 whichm = -1; 2262 else { 2263 /* 2264 * The specified number is index of replacement, starting 2265 * from 1. GNU awk treats index lower than 0 same as 2266 * 1, we do same for compatibility. 2267 */ 2268 whichm = (int) getfval(h) - 1; 2269 if (whichm < 0) 2270 whichm = 0; 2271 } 2272 tempfree(h); 2273 2274 if (pmatch(pfa, t)) { 2275 char *sl; 2276 2277 tempstat = pfa->initstat; 2278 pfa->initstat = 2; 2279 pb = buf; 2280 rptr = getsval(y); 2281 /* 2282 * XXX if there are any backreferences in subst string, 2283 * complain now. 2284 */ 2285 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2286 if (strchr("0123456789", sl[1])) { 2287 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2288 } 2289 } 2290 2291 do { 2292 if (whichm >= 0 && whichm != num) { 2293 num++; 2294 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2295 2296 /* copy the part of string up to and including 2297 * match to output buffer */ 2298 while (t < patbeg + patlen) 2299 *pb++ = *t++; 2300 continue; 2301 } 2302 2303 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2304 if (mflag == 0) { /* can replace empty */ 2305 num++; 2306 sptr = rptr; 2307 while (*sptr != 0) { 2308 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2309 if (*sptr == '\\') { 2310 backsub(&pb, &sptr); 2311 } else if (*sptr == '&') { 2312 sptr++; 2313 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2314 for (q = patbeg; q < patbeg+patlen; ) 2315 *pb++ = *q++; 2316 } else 2317 *pb++ = *sptr++; 2318 } 2319 } 2320 if (*t == 0) /* at end */ 2321 goto done; 2322 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2323 *pb++ = *t++; 2324 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2325 FATAL("gensub result0 %.30s too big; can't happen", buf); 2326 mflag = 0; 2327 } 2328 else { /* matched nonempty string */ 2329 num++; 2330 sptr = t; 2331 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2332 while (sptr < patbeg) 2333 *pb++ = *sptr++; 2334 sptr = rptr; 2335 while (*sptr != 0) { 2336 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2337 if (*sptr == '\\') { 2338 backsub(&pb, &sptr); 2339 } else if (*sptr == '&') { 2340 sptr++; 2341 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2342 for (q = patbeg; q < patbeg+patlen; ) 2343 *pb++ = *q++; 2344 } else 2345 *pb++ = *sptr++; 2346 } 2347 t = patbeg + patlen; 2348 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2349 goto done; 2350 if (pb > buf + bufsz) 2351 FATAL("gensub result1 %.30s too big; can't happen", buf); 2352 mflag = 1; 2353 } 2354 } while (pmatch(pfa,t)); 2355 sptr = t; 2356 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2357 while ((*pb++ = *sptr++) != 0) 2358 ; 2359 done: if (pb > buf + bufsz) 2360 FATAL("gensub result2 %.30s too big; can't happen", buf); 2361 *pb = '\0'; 2362 setsval(res, buf); 2363 pfa->initstat = tempstat; 2364 } 2365 tempfree(x); 2366 tempfree(y); 2367 free(buf); 2368 return(res); 2369 } 2370 2371 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2372 { /* sptr[0] == '\\' */ 2373 char *pb = *pb_ptr; 2374 const char *sptr = *sptr_ptr; 2375 2376 if (sptr[1] == '\\') { 2377 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2378 *pb++ = '\\'; 2379 *pb++ = '&'; 2380 sptr += 4; 2381 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2382 *pb++ = '\\'; 2383 sptr += 2; 2384 } else if (do_posix) { /* \\x -> \x */ 2385 sptr++; 2386 *pb++ = *sptr++; 2387 } else { /* \\x -> \\x */ 2388 *pb++ = *sptr++; 2389 *pb++ = *sptr++; 2390 } 2391 } else if (sptr[1] == '&') { /* literal & */ 2392 sptr++; 2393 *pb++ = *sptr++; 2394 } else /* literal \ */ 2395 *pb++ = *sptr++; 2396 2397 *pb_ptr = pb; 2398 *sptr_ptr = sptr; 2399 } 2400