1 /* $OpenBSD: run.c,v 1.74 2022/09/21 01:42:59 millert Exp $ */ 2 /**************************************************************** 3 Copyright (C) Lucent Technologies 1997 4 All Rights Reserved 5 6 Permission to use, copy, modify, and distribute this software and 7 its documentation for any purpose and without fee is hereby 8 granted, provided that the above copyright notice appear in all 9 copies and that both that the copyright notice and this 10 permission notice and warranty disclaimer appear in supporting 11 documentation, and that the name Lucent Technologies or any of 12 its entities not be used in advertising or publicity pertaining 13 to distribution of the software without specific, written prior 14 permission. 15 16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 23 THIS SOFTWARE. 24 ****************************************************************/ 25 26 #define DEBUG 27 #include <stdio.h> 28 #include <ctype.h> 29 #include <errno.h> 30 #include <wchar.h> 31 #include <wctype.h> 32 #include <fcntl.h> 33 #include <setjmp.h> 34 #include <limits.h> 35 #include <math.h> 36 #include <string.h> 37 #include <stdlib.h> 38 #include <time.h> 39 #include <sys/types.h> 40 #include <sys/wait.h> 41 #include "awk.h" 42 #include "awkgram.tab.h" 43 44 static void stdinit(void); 45 static void flush_all(void); 46 47 #if 1 48 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 49 #else 50 void tempfree(Cell *p) { 51 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 52 WARNING("bad csub %d in Cell %d %s", 53 p->csub, p->ctype, p->sval); 54 } 55 if (istemp(p)) 56 tfree(p); 57 } 58 #endif 59 60 /* do we really need these? */ 61 /* #ifdef _NFILE */ 62 /* #ifndef FOPEN_MAX */ 63 /* #define FOPEN_MAX _NFILE */ 64 /* #endif */ 65 /* #endif */ 66 /* */ 67 /* #ifndef FOPEN_MAX */ 68 /* #define FOPEN_MAX 40 */ /* max number of open files */ 69 /* #endif */ 70 /* */ 71 /* #ifndef RAND_MAX */ 72 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 73 /* #endif */ 74 75 jmp_buf env; 76 extern int pairstack[]; 77 extern Awkfloat srand_seed; 78 79 Node *winner = NULL; /* root of parse tree */ 80 Cell *tmps; /* free temporary cells for execution */ 81 82 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 83 Cell *True = &truecell; 84 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 85 Cell *False = &falsecell; 86 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 87 Cell *jbreak = &breakcell; 88 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 89 Cell *jcont = &contcell; 90 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 91 Cell *jnext = &nextcell; 92 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 93 Cell *jnextfile = &nextfilecell; 94 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 95 Cell *jexit = &exitcell; 96 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 97 Cell *jret = &retcell; 98 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 99 100 Node *curnode = NULL; /* the node being executed, for debugging */ 101 102 /* buffer memory management */ 103 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 104 const char *whatrtn) 105 /* pbuf: address of pointer to buffer being managed 106 * psiz: address of buffer size variable 107 * minlen: minimum length of buffer needed 108 * quantum: buffer size quantum 109 * pbptr: address of movable pointer into buffer, or 0 if none 110 * whatrtn: name of the calling routine if failure should cause fatal error 111 * 112 * return 0 for realloc failure, !=0 for success 113 */ 114 { 115 if (minlen > *psiz) { 116 char *tbuf; 117 int rminlen = quantum ? minlen % quantum : 0; 118 int boff = pbptr ? *pbptr - *pbuf : 0; 119 /* round up to next multiple of quantum */ 120 if (rminlen) 121 minlen += quantum - rminlen; 122 tbuf = (char *) realloc(*pbuf, minlen); 123 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); 124 if (tbuf == NULL) { 125 if (whatrtn) 126 FATAL("out of memory in %s", whatrtn); 127 return 0; 128 } 129 *pbuf = tbuf; 130 *psiz = minlen; 131 if (pbptr) 132 *pbptr = tbuf + boff; 133 } 134 return 1; 135 } 136 137 void run(Node *a) /* execution of parse tree starts here */ 138 { 139 140 stdinit(); 141 execute(a); 142 closeall(); 143 } 144 145 Cell *execute(Node *u) /* execute a node of the parse tree */ 146 { 147 Cell *(*proc)(Node **, int); 148 Cell *x; 149 Node *a; 150 151 if (u == NULL) 152 return(True); 153 for (a = u; ; a = a->nnext) { 154 curnode = a; 155 if (isvalue(a)) { 156 x = (Cell *) (a->narg[0]); 157 if (isfld(x) && !donefld) 158 fldbld(); 159 else if (isrec(x) && !donerec) 160 recbld(); 161 return(x); 162 } 163 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 164 FATAL("illegal statement"); 165 proc = proctab[a->nobj-FIRSTTOKEN]; 166 x = (*proc)(a->narg, a->nobj); 167 if (isfld(x) && !donefld) 168 fldbld(); 169 else if (isrec(x) && !donerec) 170 recbld(); 171 if (isexpr(a)) 172 return(x); 173 if (isjump(x)) 174 return(x); 175 if (a->nnext == NULL) 176 return(x); 177 tempfree(x); 178 } 179 } 180 181 182 Cell *program(Node **a, int n) /* execute an awk program */ 183 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 184 Cell *x; 185 186 if (setjmp(env) != 0) 187 goto ex; 188 if (a[0]) { /* BEGIN */ 189 x = execute(a[0]); 190 if (isexit(x)) 191 return(True); 192 if (isjump(x)) 193 FATAL("illegal break, continue, next or nextfile from BEGIN"); 194 tempfree(x); 195 } 196 if (a[1] || a[2]) 197 while (getrec(&record, &recsize, true) > 0) { 198 x = execute(a[1]); 199 if (isexit(x)) 200 break; 201 tempfree(x); 202 } 203 ex: 204 if (setjmp(env) != 0) /* handles exit within END */ 205 goto ex1; 206 if (a[2]) { /* END */ 207 x = execute(a[2]); 208 if (isbreak(x) || isnext(x) || iscont(x)) 209 FATAL("illegal break, continue, next or nextfile from END"); 210 tempfree(x); 211 } 212 ex1: 213 return(True); 214 } 215 216 struct Frame { /* stack frame for awk function calls */ 217 int nargs; /* number of arguments in this call */ 218 Cell *fcncell; /* pointer to Cell for function */ 219 Cell **args; /* pointer to array of arguments after execute */ 220 Cell *retval; /* return value */ 221 }; 222 223 #define NARGS 50 /* max args in a call */ 224 225 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 226 int nframe = 0; /* number of frames allocated */ 227 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 228 229 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 230 { 231 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 232 int i, ncall, ndef; 233 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 234 Node *x; 235 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 236 Cell *y, *z, *fcn; 237 char *s; 238 239 fcn = execute(a[0]); /* the function itself */ 240 s = fcn->nval; 241 if (!isfcn(fcn)) 242 FATAL("calling undefined function %s", s); 243 if (frame == NULL) { 244 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); 245 if (frame == NULL) 246 FATAL("out of space for stack frames calling %s", s); 247 } 248 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 249 ncall++; 250 ndef = (int) fcn->fval; /* args in defn */ 251 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 252 if (ncall > ndef) 253 WARNING("function %s called with %d args, uses only %d", 254 s, ncall, ndef); 255 if (ncall + ndef > NARGS) 256 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 257 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 258 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 259 y = execute(x); 260 oargs[i] = y; 261 DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 262 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 263 if (isfcn(y)) 264 FATAL("can't use function %s as argument in %s", y->nval, s); 265 if (isarr(y)) 266 args[i] = y; /* arrays by ref */ 267 else 268 args[i] = copycell(y); 269 tempfree(y); 270 } 271 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 272 args[i] = gettemp(); 273 *args[i] = newcopycell; 274 } 275 frp++; /* now ok to up frame */ 276 if (frp >= frame + nframe) { 277 int dfp = frp - frame; /* old index */ 278 frame = (struct Frame *) reallocarray(frame, (nframe += 100), sizeof(*frame)); 279 if (frame == NULL) 280 FATAL("out of space for stack frames in %s", s); 281 frp = frame + dfp; 282 } 283 frp->fcncell = fcn; 284 frp->args = args; 285 frp->nargs = ndef; /* number defined with (excess are locals) */ 286 frp->retval = gettemp(); 287 288 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 289 y = execute((Node *)(fcn->sval)); /* execute body */ 290 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 291 292 for (i = 0; i < ndef; i++) { 293 Cell *t = frp->args[i]; 294 if (isarr(t)) { 295 if (t->csub == CCOPY) { 296 if (i >= ncall) { 297 freesymtab(t); 298 t->csub = CTEMP; 299 tempfree(t); 300 } else { 301 oargs[i]->tval = t->tval; 302 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 303 oargs[i]->sval = t->sval; 304 tempfree(t); 305 } 306 } 307 } else if (t != y) { /* kludge to prevent freeing twice */ 308 t->csub = CTEMP; 309 tempfree(t); 310 } else if (t == y && t->csub == CCOPY) { 311 t->csub = CTEMP; 312 tempfree(t); 313 freed = 1; 314 } 315 } 316 tempfree(fcn); 317 if (isexit(y) || isnext(y)) 318 return y; 319 if (freed == 0) { 320 tempfree(y); /* don't free twice! */ 321 } 322 z = frp->retval; /* return value */ 323 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 324 frp--; 325 return(z); 326 } 327 328 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 329 { 330 Cell *y; 331 332 /* copy is not constant or field */ 333 334 y = gettemp(); 335 y->tval = x->tval & ~(CON|FLD|REC); 336 y->csub = CCOPY; /* prevents freeing until call is over */ 337 y->nval = x->nval; /* BUG? */ 338 if (isstr(x) /* || x->ctype == OCELL */) { 339 y->sval = tostring(x->sval); 340 y->tval &= ~DONTFREE; 341 } else 342 y->tval |= DONTFREE; 343 y->fval = x->fval; 344 return y; 345 } 346 347 Cell *arg(Node **a, int n) /* nth argument of a function */ 348 { 349 350 n = ptoi(a[0]); /* argument number, counting from 0 */ 351 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 352 if (n+1 > frp->nargs) 353 FATAL("argument #%d of function %s was not supplied", 354 n+1, frp->fcncell->nval); 355 return frp->args[n]; 356 } 357 358 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 359 { 360 Cell *y; 361 362 switch (n) { 363 case EXIT: 364 if (a[0] != NULL) { 365 y = execute(a[0]); 366 errorflag = (int) getfval(y); 367 tempfree(y); 368 } 369 longjmp(env, 1); 370 case RETURN: 371 if (a[0] != NULL) { 372 y = execute(a[0]); 373 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 374 setsval(frp->retval, getsval(y)); 375 frp->retval->fval = getfval(y); 376 frp->retval->tval |= NUM; 377 } 378 else if (y->tval & STR) 379 setsval(frp->retval, getsval(y)); 380 else if (y->tval & NUM) 381 setfval(frp->retval, getfval(y)); 382 else /* can't happen */ 383 FATAL("bad type variable %d", y->tval); 384 tempfree(y); 385 } 386 return(jret); 387 case NEXT: 388 return(jnext); 389 case NEXTFILE: 390 nextfile(); 391 return(jnextfile); 392 case BREAK: 393 return(jbreak); 394 case CONTINUE: 395 return(jcont); 396 default: /* can't happen */ 397 FATAL("illegal jump type %d", n); 398 } 399 return 0; /* not reached */ 400 } 401 402 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 403 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 404 Cell *r, *x; 405 extern Cell **fldtab; 406 FILE *fp; 407 char *buf; 408 int bufsize = recsize; 409 int mode; 410 bool newflag; 411 double result; 412 413 if ((buf = (char *) malloc(bufsize)) == NULL) 414 FATAL("out of memory in getline"); 415 416 fflush(stdout); /* in case someone is waiting for a prompt */ 417 r = gettemp(); 418 if (a[1] != NULL) { /* getline < file */ 419 x = execute(a[2]); /* filename */ 420 mode = ptoi(a[1]); 421 if (mode == '|') /* input pipe */ 422 mode = LE; /* arbitrary flag */ 423 fp = openfile(mode, getsval(x), &newflag); 424 tempfree(x); 425 if (fp == NULL) 426 n = -1; 427 else 428 n = readrec(&buf, &bufsize, fp, newflag); 429 if (n <= 0) { 430 ; 431 } else if (a[0] != NULL) { /* getline var <file */ 432 x = execute(a[0]); 433 setsval(x, buf); 434 if (is_number(x->sval, & result)) { 435 x->fval = result; 436 x->tval |= NUM; 437 } 438 tempfree(x); 439 } else { /* getline <file */ 440 setsval(fldtab[0], buf); 441 if (is_number(fldtab[0]->sval, & result)) { 442 fldtab[0]->fval = result; 443 fldtab[0]->tval |= NUM; 444 } 445 } 446 } else { /* bare getline; use current input */ 447 if (a[0] == NULL) /* getline */ 448 n = getrec(&record, &recsize, true); 449 else { /* getline var */ 450 n = getrec(&buf, &bufsize, false); 451 if (n > 0) { 452 x = execute(a[0]); 453 setsval(x, buf); 454 if (is_number(x->sval, & result)) { 455 x->fval = result; 456 x->tval |= NUM; 457 } 458 tempfree(x); 459 } 460 } 461 } 462 setfval(r, (Awkfloat) n); 463 free(buf); 464 return r; 465 } 466 467 Cell *getnf(Node **a, int n) /* get NF */ 468 { 469 if (!donefld) 470 fldbld(); 471 return (Cell *) a[0]; 472 } 473 474 static char * 475 makearraystring(Node *p, const char *func) 476 { 477 char *buf; 478 int bufsz = recsize; 479 size_t blen; 480 481 if ((buf = (char *) malloc(bufsz)) == NULL) { 482 FATAL("%s: out of memory", func); 483 } 484 485 blen = 0; 486 buf[blen] = '\0'; 487 488 for (; p; p = p->nnext) { 489 Cell *x = execute(p); /* expr */ 490 char *s = getsval(x); 491 size_t seplen = strlen(getsval(subseploc)); 492 size_t nsub = p->nnext ? seplen : 0; 493 size_t slen = strlen(s); 494 size_t tlen = blen + slen + nsub; 495 496 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 497 FATAL("%s: out of memory %s[%s...]", 498 func, x->nval, buf); 499 } 500 memcpy(buf + blen, s, slen); 501 if (nsub) { 502 memcpy(buf + blen + slen, *SUBSEP, nsub); 503 } 504 buf[tlen] = '\0'; 505 blen = tlen; 506 tempfree(x); 507 } 508 return buf; 509 } 510 511 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 512 { 513 Cell *x, *z; 514 char *buf; 515 516 x = execute(a[0]); /* Cell* for symbol table */ 517 buf = makearraystring(a[1], __func__); 518 if (!isarr(x)) { 519 DPRINTF("making %s into an array\n", NN(x->nval)); 520 if (freeable(x)) 521 xfree(x->sval); 522 x->tval &= ~(STR|NUM|DONTFREE); 523 x->tval |= ARR; 524 x->sval = (char *) makesymtab(NSYMTAB); 525 } 526 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 527 z->ctype = OCELL; 528 z->csub = CVAR; 529 tempfree(x); 530 free(buf); 531 return(z); 532 } 533 534 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 535 { 536 Cell *x; 537 538 x = execute(a[0]); /* Cell* for symbol table */ 539 if (x == symtabloc) { 540 FATAL("cannot delete SYMTAB or its elements"); 541 } 542 if (!isarr(x)) 543 return True; 544 if (a[1] == NULL) { /* delete the elements, not the table */ 545 freesymtab(x); 546 x->tval &= ~STR; 547 x->tval |= ARR; 548 x->sval = (char *) makesymtab(NSYMTAB); 549 } else { 550 char *buf = makearraystring(a[1], __func__); 551 freeelem(x, buf); 552 free(buf); 553 } 554 tempfree(x); 555 return True; 556 } 557 558 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 559 { 560 Cell *ap, *k; 561 char *buf; 562 563 ap = execute(a[1]); /* array name */ 564 if (!isarr(ap)) { 565 DPRINTF("making %s into an array\n", ap->nval); 566 if (freeable(ap)) 567 xfree(ap->sval); 568 ap->tval &= ~(STR|NUM|DONTFREE); 569 ap->tval |= ARR; 570 ap->sval = (char *) makesymtab(NSYMTAB); 571 } 572 buf = makearraystring(a[0], __func__); 573 k = lookup(buf, (Array *) ap->sval); 574 tempfree(ap); 575 free(buf); 576 if (k == NULL) 577 return(False); 578 else 579 return(True); 580 } 581 582 583 Cell *matchop(Node **a, int n) /* ~ and match() */ 584 { 585 Cell *x, *y; 586 char *s, *t; 587 int i; 588 fa *pfa; 589 int (*mf)(fa *, const char *) = match, mode = 0; 590 591 if (n == MATCHFCN) { 592 mf = pmatch; 593 mode = 1; 594 } 595 x = execute(a[1]); /* a[1] = target text */ 596 s = getsval(x); 597 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 598 i = (*mf)((fa *) a[2], s); 599 else { 600 y = execute(a[2]); /* a[2] = regular expr */ 601 t = getsval(y); 602 pfa = makedfa(t, mode); 603 i = (*mf)(pfa, s); 604 tempfree(y); 605 } 606 tempfree(x); 607 if (n == MATCHFCN) { 608 int start = patbeg - s + 1; 609 if (patlen < 0) 610 start = 0; 611 setfval(rstartloc, (Awkfloat) start); 612 setfval(rlengthloc, (Awkfloat) patlen); 613 x = gettemp(); 614 x->tval = NUM; 615 x->fval = start; 616 return x; 617 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 618 return(True); 619 else 620 return(False); 621 } 622 623 624 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 625 { 626 Cell *x, *y; 627 int i; 628 629 x = execute(a[0]); 630 i = istrue(x); 631 tempfree(x); 632 switch (n) { 633 case BOR: 634 if (i) return(True); 635 y = execute(a[1]); 636 i = istrue(y); 637 tempfree(y); 638 if (i) return(True); 639 else return(False); 640 case AND: 641 if ( !i ) return(False); 642 y = execute(a[1]); 643 i = istrue(y); 644 tempfree(y); 645 if (i) return(True); 646 else return(False); 647 case NOT: 648 if (i) return(False); 649 else return(True); 650 default: /* can't happen */ 651 FATAL("unknown boolean operator %d", n); 652 } 653 return 0; /*NOTREACHED*/ 654 } 655 656 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 657 { 658 int i; 659 Cell *x, *y; 660 Awkfloat j; 661 662 x = execute(a[0]); 663 y = execute(a[1]); 664 if (x->tval&NUM && y->tval&NUM) { 665 j = x->fval - y->fval; 666 i = j<0? -1: (j>0? 1: 0); 667 } else { 668 i = strcmp(getsval(x), getsval(y)); 669 } 670 tempfree(x); 671 tempfree(y); 672 switch (n) { 673 case LT: if (i<0) return(True); 674 else return(False); 675 case LE: if (i<=0) return(True); 676 else return(False); 677 case NE: if (i!=0) return(True); 678 else return(False); 679 case EQ: if (i == 0) return(True); 680 else return(False); 681 case GE: if (i>=0) return(True); 682 else return(False); 683 case GT: if (i>0) return(True); 684 else return(False); 685 default: /* can't happen */ 686 FATAL("unknown relational operator %d", n); 687 } 688 return 0; /*NOTREACHED*/ 689 } 690 691 void tfree(Cell *a) /* free a tempcell */ 692 { 693 if (freeable(a)) { 694 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 695 xfree(a->sval); 696 } 697 if (a == tmps) 698 FATAL("tempcell list is curdled"); 699 a->cnext = tmps; 700 tmps = a; 701 } 702 703 Cell *gettemp(void) /* get a tempcell */ 704 { int i; 705 Cell *x; 706 707 if (!tmps) { 708 tmps = (Cell *) calloc(100, sizeof(*tmps)); 709 if (!tmps) 710 FATAL("out of space for temporaries"); 711 for (i = 1; i < 100; i++) 712 tmps[i-1].cnext = &tmps[i]; 713 tmps[i-1].cnext = NULL; 714 } 715 x = tmps; 716 tmps = x->cnext; 717 *x = tempcell; 718 return(x); 719 } 720 721 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 722 { 723 Awkfloat val; 724 Cell *x; 725 int m; 726 char *s; 727 728 x = execute(a[0]); 729 val = getfval(x); /* freebsd: defend against super large field numbers */ 730 if ((Awkfloat)INT_MAX < val) 731 FATAL("trying to access out of range field %s", x->nval); 732 m = (int) val; 733 if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ 734 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 735 /* BUG: can x->nval ever be null??? */ 736 tempfree(x); 737 x = fieldadr(m); 738 x->ctype = OCELL; /* BUG? why are these needed? */ 739 x->csub = CFLD; 740 return(x); 741 } 742 743 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 744 { 745 int k, m, n; 746 char *s; 747 int temp; 748 Cell *x, *y, *z = NULL; 749 750 x = execute(a[0]); 751 y = execute(a[1]); 752 if (a[2] != NULL) 753 z = execute(a[2]); 754 s = getsval(x); 755 k = strlen(s) + 1; 756 if (k <= 1) { 757 tempfree(x); 758 tempfree(y); 759 if (a[2] != NULL) { 760 tempfree(z); 761 } 762 x = gettemp(); 763 setsval(x, ""); 764 return(x); 765 } 766 m = (int) getfval(y); 767 if (m <= 0) 768 m = 1; 769 else if (m > k) 770 m = k; 771 tempfree(y); 772 if (a[2] != NULL) { 773 n = (int) getfval(z); 774 tempfree(z); 775 } else 776 n = k - 1; 777 if (n < 0) 778 n = 0; 779 else if (n > k - m) 780 n = k - m; 781 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 782 y = gettemp(); 783 temp = s[n+m-1]; /* with thanks to John Linderman */ 784 s[n+m-1] = '\0'; 785 setsval(y, s + m - 1); 786 s[n+m-1] = temp; 787 tempfree(x); 788 return(y); 789 } 790 791 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 792 { 793 Cell *x, *y, *z; 794 char *s1, *s2, *p1, *p2, *q; 795 Awkfloat v = 0.0; 796 797 x = execute(a[0]); 798 s1 = getsval(x); 799 y = execute(a[1]); 800 s2 = getsval(y); 801 802 z = gettemp(); 803 for (p1 = s1; *p1 != '\0'; p1++) { 804 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 805 continue; 806 if (*p2 == '\0') { 807 v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ 808 break; 809 } 810 } 811 tempfree(x); 812 tempfree(y); 813 setfval(z, v); 814 return(z); 815 } 816 817 #define MAXNUMSIZE 50 818 819 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 820 { 821 char *fmt; 822 char *p, *t; 823 const char *os; 824 Cell *x; 825 int flag = 0, n; 826 int fmtwd; /* format width */ 827 int fmtsz = recsize; 828 char *buf = *pbuf; 829 int bufsize = *pbufsize; 830 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 831 #define BUFSZ(a) (bufsize - ((a) - buf)) 832 833 static bool first = true; 834 static bool have_a_format = false; 835 836 if (first) { 837 char xbuf[100]; 838 839 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 840 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 841 first = false; 842 } 843 844 os = s; 845 p = buf; 846 if ((fmt = (char *) malloc(fmtsz)) == NULL) 847 FATAL("out of memory in format()"); 848 while (*s) { 849 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 850 if (*s != '%') { 851 *p++ = *s++; 852 continue; 853 } 854 if (*(s+1) == '%') { 855 *p++ = '%'; 856 s += 2; 857 continue; 858 } 859 /* have to be real careful in case this is a huge number, eg, %100000d */ 860 fmtwd = atoi(s+1); 861 if (fmtwd < 0) 862 fmtwd = -fmtwd; 863 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 864 for (t = fmt; (*t++ = *s) != '\0'; s++) { 865 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 866 FATAL("format item %.30s... ran format() out of memory", os); 867 /* Ignore size specifiers */ 868 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 869 t--; 870 continue; 871 } 872 if (isalpha((uschar)*s)) 873 break; 874 if (*s == '$') { 875 FATAL("'$' not permitted in awk formats"); 876 } 877 if (*s == '*') { 878 if (a == NULL) { 879 FATAL("not enough args in printf(%s)", os); 880 } 881 x = execute(a); 882 a = a->nnext; 883 snprintf(t - 1, FMTSZ(t - 1), 884 "%d", fmtwd=(int) getfval(x)); 885 if (fmtwd < 0) 886 fmtwd = -fmtwd; 887 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 888 t = fmt + strlen(fmt); 889 tempfree(x); 890 } 891 } 892 *t = '\0'; 893 if (fmtwd < 0) 894 fmtwd = -fmtwd; 895 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 896 switch (*s) { 897 case 'a': case 'A': 898 if (have_a_format) 899 flag = *s; 900 else 901 flag = 'f'; 902 break; 903 case 'f': case 'e': case 'g': case 'E': case 'G': 904 flag = 'f'; 905 break; 906 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 907 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 908 *(t-1) = 'j'; 909 *t = *s; 910 *++t = '\0'; 911 break; 912 case 's': 913 flag = 's'; 914 break; 915 case 'c': 916 flag = 'c'; 917 break; 918 default: 919 WARNING("weird printf conversion %s", fmt); 920 flag = '?'; 921 break; 922 } 923 if (a == NULL) 924 FATAL("not enough args in printf(%s)", os); 925 x = execute(a); 926 a = a->nnext; 927 n = MAXNUMSIZE; 928 if (fmtwd > n) 929 n = fmtwd; 930 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 931 switch (flag) { 932 case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 933 t = getsval(x); 934 n = strlen(t); 935 if (fmtwd > n) 936 n = fmtwd; 937 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 938 p += strlen(p); 939 snprintf(p, BUFSZ(p), "%s", t); 940 break; 941 case 'a': 942 case 'A': 943 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 944 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 945 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 946 case 's': 947 t = getsval(x); 948 n = strlen(t); 949 if (fmtwd > n) 950 n = fmtwd; 951 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 952 FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); 953 snprintf(p, BUFSZ(p), fmt, t); 954 break; 955 case 'c': 956 if (isnum(x)) { 957 if ((int)getfval(x)) 958 snprintf(p, BUFSZ(p), fmt, (int) getfval(x)); 959 else { 960 *p++ = '\0'; /* explicit null byte */ 961 *p = '\0'; /* next output will start here */ 962 } 963 } else 964 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 965 break; 966 default: 967 FATAL("can't happen: bad conversion %c in format()", flag); 968 } 969 tempfree(x); 970 p += strlen(p); 971 s++; 972 } 973 *p = '\0'; 974 free(fmt); 975 for ( ; a; a = a->nnext) { /* evaluate any remaining args */ 976 x = execute(a); 977 tempfree(x); 978 } 979 *pbuf = buf; 980 *pbufsize = bufsize; 981 return p - buf; 982 } 983 984 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 985 { 986 Cell *x; 987 Node *y; 988 char *buf; 989 int bufsz=3*recsize; 990 991 if ((buf = (char *) malloc(bufsz)) == NULL) 992 FATAL("out of memory in awksprintf"); 993 y = a[0]->nnext; 994 x = execute(a[0]); 995 if (format(&buf, &bufsz, getsval(x), y) == -1) 996 FATAL("sprintf string %.30s... too long. can't happen.", buf); 997 tempfree(x); 998 x = gettemp(); 999 x->sval = buf; 1000 x->tval = STR; 1001 return(x); 1002 } 1003 1004 Cell *awkprintf(Node **a, int n) /* printf */ 1005 { /* a[0] is list of args, starting with format string */ 1006 /* a[1] is redirection operator, a[2] is redirection file */ 1007 FILE *fp; 1008 Cell *x; 1009 Node *y; 1010 char *buf; 1011 int len; 1012 int bufsz=3*recsize; 1013 1014 if ((buf = (char *) malloc(bufsz)) == NULL) 1015 FATAL("out of memory in awkprintf"); 1016 y = a[0]->nnext; 1017 x = execute(a[0]); 1018 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1019 FATAL("printf string %.30s... too long. can't happen.", buf); 1020 tempfree(x); 1021 if (a[1] == NULL) { 1022 /* fputs(buf, stdout); */ 1023 fwrite(buf, len, 1, stdout); 1024 if (ferror(stdout)) 1025 FATAL("write error on stdout"); 1026 } else { 1027 fp = redirect(ptoi(a[1]), a[2]); 1028 /* fputs(buf, fp); */ 1029 fwrite(buf, len, 1, fp); 1030 fflush(fp); 1031 if (ferror(fp)) 1032 FATAL("write error on %s", filename(fp)); 1033 } 1034 free(buf); 1035 return(True); 1036 } 1037 1038 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1039 { 1040 Awkfloat i, j = 0; 1041 double v; 1042 Cell *x, *y, *z; 1043 1044 x = execute(a[0]); 1045 i = getfval(x); 1046 tempfree(x); 1047 if (n != UMINUS && n != UPLUS) { 1048 y = execute(a[1]); 1049 j = getfval(y); 1050 tempfree(y); 1051 } 1052 z = gettemp(); 1053 switch (n) { 1054 case ADD: 1055 i += j; 1056 break; 1057 case MINUS: 1058 i -= j; 1059 break; 1060 case MULT: 1061 i *= j; 1062 break; 1063 case DIVIDE: 1064 if (j == 0) 1065 FATAL("division by zero"); 1066 i /= j; 1067 break; 1068 case MOD: 1069 if (j == 0) 1070 FATAL("division by zero in mod"); 1071 modf(i/j, &v); 1072 i = i - j * v; 1073 break; 1074 case UMINUS: 1075 i = -i; 1076 break; 1077 case UPLUS: /* handled by getfval(), above */ 1078 break; 1079 case POWER: 1080 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1081 i = ipow(i, (int) j); 1082 else { 1083 errno = 0; 1084 i = errcheck(pow(i, j), "pow"); 1085 } 1086 break; 1087 default: /* can't happen */ 1088 FATAL("illegal arithmetic operator %d", n); 1089 } 1090 setfval(z, i); 1091 return(z); 1092 } 1093 1094 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1095 { 1096 double v; 1097 1098 if (n <= 0) 1099 return 1; 1100 v = ipow(x, n/2); 1101 if (n % 2 == 0) 1102 return v * v; 1103 else 1104 return x * v * v; 1105 } 1106 1107 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1108 { 1109 Cell *x, *z; 1110 int k; 1111 Awkfloat xf; 1112 1113 x = execute(a[0]); 1114 xf = getfval(x); 1115 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1116 if (n == PREINCR || n == PREDECR) { 1117 setfval(x, xf + k); 1118 return(x); 1119 } 1120 z = gettemp(); 1121 setfval(z, xf); 1122 setfval(x, xf + k); 1123 tempfree(x); 1124 return(z); 1125 } 1126 1127 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1128 { /* this is subtle; don't muck with it. */ 1129 Cell *x, *y; 1130 Awkfloat xf, yf; 1131 double v; 1132 1133 y = execute(a[1]); 1134 x = execute(a[0]); 1135 if (n == ASSIGN) { /* ordinary assignment */ 1136 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1137 ; /* self-assignment: leave alone unless it's a field or NF */ 1138 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1139 setsval(x, getsval(y)); 1140 x->fval = getfval(y); 1141 x->tval |= NUM; 1142 } 1143 else if (isstr(y)) 1144 setsval(x, getsval(y)); 1145 else if (isnum(y)) 1146 setfval(x, getfval(y)); 1147 else 1148 funnyvar(y, "read value of"); 1149 tempfree(y); 1150 return(x); 1151 } 1152 xf = getfval(x); 1153 yf = getfval(y); 1154 switch (n) { 1155 case ADDEQ: 1156 xf += yf; 1157 break; 1158 case SUBEQ: 1159 xf -= yf; 1160 break; 1161 case MULTEQ: 1162 xf *= yf; 1163 break; 1164 case DIVEQ: 1165 if (yf == 0) 1166 FATAL("division by zero in /="); 1167 xf /= yf; 1168 break; 1169 case MODEQ: 1170 if (yf == 0) 1171 FATAL("division by zero in %%="); 1172 modf(xf/yf, &v); 1173 xf = xf - yf * v; 1174 break; 1175 case POWEQ: 1176 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1177 xf = ipow(xf, (int) yf); 1178 else { 1179 errno = 0; 1180 xf = errcheck(pow(xf, yf), "pow"); 1181 } 1182 break; 1183 default: 1184 FATAL("illegal assignment operator %d", n); 1185 break; 1186 } 1187 tempfree(y); 1188 setfval(x, xf); 1189 return(x); 1190 } 1191 1192 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1193 { 1194 Cell *x, *y, *z; 1195 int n1, n2; 1196 char *s = NULL; 1197 int ssz = 0; 1198 1199 x = execute(a[0]); 1200 n1 = strlen(getsval(x)); 1201 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); 1202 memcpy(s, x->sval, n1); 1203 1204 tempfree(x); 1205 1206 y = execute(a[1]); 1207 n2 = strlen(getsval(y)); 1208 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1209 memcpy(s + n1, y->sval, n2); 1210 s[n1 + n2] = '\0'; 1211 1212 tempfree(y); 1213 1214 z = gettemp(); 1215 z->sval = s; 1216 z->tval = STR; 1217 1218 return(z); 1219 } 1220 1221 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1222 { 1223 Cell *x; 1224 1225 if (a[0] == NULL) 1226 x = execute(a[1]); 1227 else { 1228 x = execute(a[0]); 1229 if (istrue(x)) { 1230 tempfree(x); 1231 x = execute(a[1]); 1232 } 1233 } 1234 return x; 1235 } 1236 1237 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1238 { 1239 Cell *x; 1240 int pair; 1241 1242 pair = ptoi(a[3]); 1243 if (pairstack[pair] == 0) { 1244 x = execute(a[0]); 1245 if (istrue(x)) 1246 pairstack[pair] = 1; 1247 tempfree(x); 1248 } 1249 if (pairstack[pair] == 1) { 1250 x = execute(a[1]); 1251 if (istrue(x)) 1252 pairstack[pair] = 0; 1253 tempfree(x); 1254 x = execute(a[2]); 1255 return(x); 1256 } 1257 return(False); 1258 } 1259 1260 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1261 { 1262 Cell *x = NULL, *y, *ap; 1263 const char *s, *origs, *t; 1264 const char *fs = NULL; 1265 char *origfs = NULL; 1266 int sep; 1267 char temp, num[50]; 1268 int n, tempstat, arg3type; 1269 double result; 1270 1271 y = execute(a[0]); /* source string */ 1272 origs = s = strdup(getsval(y)); 1273 if (s == NULL) 1274 FATAL("out of space in split"); 1275 tempfree(y); 1276 arg3type = ptoi(a[3]); 1277 if (a[2] == NULL) /* fs string */ 1278 fs = getsval(fsloc); 1279 else if (arg3type == STRING) { /* split(str,arr,"string") */ 1280 x = execute(a[2]); 1281 fs = origfs = strdup(getsval(x)); 1282 if (fs == NULL) 1283 FATAL("out of space in split"); 1284 tempfree(x); 1285 } else if (arg3type == REGEXPR) 1286 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1287 else 1288 FATAL("illegal type of split"); 1289 sep = *fs; 1290 ap = execute(a[1]); /* array name */ 1291 freesymtab(ap); 1292 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 1293 ap->tval &= ~STR; 1294 ap->tval |= ARR; 1295 ap->sval = (char *) makesymtab(NSYMTAB); 1296 1297 n = 0; 1298 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1299 /* split(s, a, //); have to arrange that it looks like empty sep */ 1300 arg3type = 0; 1301 fs = ""; 1302 sep = 0; 1303 } 1304 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1305 fa *pfa; 1306 if (arg3type == REGEXPR) { /* it's ready already */ 1307 pfa = (fa *) a[2]; 1308 } else { 1309 pfa = makedfa(fs, 1); 1310 } 1311 if (nematch(pfa,s)) { 1312 tempstat = pfa->initstat; 1313 pfa->initstat = 2; 1314 do { 1315 n++; 1316 snprintf(num, sizeof(num), "%d", n); 1317 temp = *patbeg; 1318 setptr(patbeg, '\0'); 1319 if (is_number(s, & result)) 1320 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1321 else 1322 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1323 setptr(patbeg, temp); 1324 s = patbeg + patlen; 1325 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1326 n++; 1327 snprintf(num, sizeof(num), "%d", n); 1328 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1329 pfa->initstat = tempstat; 1330 goto spdone; 1331 } 1332 } while (nematch(pfa,s)); 1333 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1334 /* cf gsub and refldbld */ 1335 } 1336 n++; 1337 snprintf(num, sizeof(num), "%d", n); 1338 if (is_number(s, & result)) 1339 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1340 else 1341 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1342 spdone: 1343 pfa = NULL; 1344 } else if (sep == ' ') { 1345 for (n = 0; ; ) { 1346 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1347 while (ISWS(*s)) 1348 s++; 1349 if (*s == '\0') 1350 break; 1351 n++; 1352 t = s; 1353 do 1354 s++; 1355 while (*s != '\0' && !ISWS(*s)); 1356 temp = *s; 1357 setptr(s, '\0'); 1358 snprintf(num, sizeof(num), "%d", n); 1359 if (is_number(t, & result)) 1360 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1361 else 1362 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1363 setptr(s, temp); 1364 if (*s != '\0') 1365 s++; 1366 } 1367 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1368 for (n = 0; *s != '\0'; s++) { 1369 char buf[2]; 1370 n++; 1371 snprintf(num, sizeof(num), "%d", n); 1372 buf[0] = *s; 1373 buf[1] = '\0'; 1374 if (isdigit((uschar)buf[0])) 1375 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1376 else 1377 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1378 } 1379 } else if (*s != '\0') { 1380 for (;;) { 1381 n++; 1382 t = s; 1383 while (*s != sep && *s != '\n' && *s != '\0') 1384 s++; 1385 temp = *s; 1386 setptr(s, '\0'); 1387 snprintf(num, sizeof(num), "%d", n); 1388 if (is_number(t, & result)) 1389 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1390 else 1391 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1392 setptr(s, temp); 1393 if (*s++ == '\0') 1394 break; 1395 } 1396 } 1397 tempfree(ap); 1398 xfree(origs); 1399 xfree(origfs); 1400 x = gettemp(); 1401 x->tval = NUM; 1402 x->fval = n; 1403 return(x); 1404 } 1405 1406 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1407 { 1408 Cell *x; 1409 1410 x = execute(a[0]); 1411 if (istrue(x)) { 1412 tempfree(x); 1413 x = execute(a[1]); 1414 } else { 1415 tempfree(x); 1416 x = execute(a[2]); 1417 } 1418 return(x); 1419 } 1420 1421 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1422 { 1423 Cell *x; 1424 1425 x = execute(a[0]); 1426 if (istrue(x)) { 1427 tempfree(x); 1428 x = execute(a[1]); 1429 } else if (a[2] != NULL) { 1430 tempfree(x); 1431 x = execute(a[2]); 1432 } 1433 return(x); 1434 } 1435 1436 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1437 { 1438 Cell *x; 1439 1440 for (;;) { 1441 x = execute(a[0]); 1442 if (!istrue(x)) 1443 return(x); 1444 tempfree(x); 1445 x = execute(a[1]); 1446 if (isbreak(x)) { 1447 x = True; 1448 return(x); 1449 } 1450 if (isnext(x) || isexit(x) || isret(x)) 1451 return(x); 1452 tempfree(x); 1453 } 1454 } 1455 1456 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1457 { 1458 Cell *x; 1459 1460 for (;;) { 1461 x = execute(a[0]); 1462 if (isbreak(x)) 1463 return True; 1464 if (isnext(x) || isexit(x) || isret(x)) 1465 return(x); 1466 tempfree(x); 1467 x = execute(a[1]); 1468 if (!istrue(x)) 1469 return(x); 1470 tempfree(x); 1471 } 1472 } 1473 1474 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1475 { 1476 Cell *x; 1477 1478 x = execute(a[0]); 1479 tempfree(x); 1480 for (;;) { 1481 if (a[1]!=NULL) { 1482 x = execute(a[1]); 1483 if (!istrue(x)) return(x); 1484 else tempfree(x); 1485 } 1486 x = execute(a[3]); 1487 if (isbreak(x)) /* turn off break */ 1488 return True; 1489 if (isnext(x) || isexit(x) || isret(x)) 1490 return(x); 1491 tempfree(x); 1492 x = execute(a[2]); 1493 tempfree(x); 1494 } 1495 } 1496 1497 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1498 { 1499 Cell *x, *vp, *arrayp, *cp, *ncp; 1500 Array *tp; 1501 int i; 1502 1503 vp = execute(a[0]); 1504 arrayp = execute(a[1]); 1505 if (!isarr(arrayp)) { 1506 return True; 1507 } 1508 tp = (Array *) arrayp->sval; 1509 tempfree(arrayp); 1510 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1511 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1512 setsval(vp, cp->nval); 1513 ncp = cp->cnext; 1514 x = execute(a[2]); 1515 if (isbreak(x)) { 1516 tempfree(vp); 1517 return True; 1518 } 1519 if (isnext(x) || isexit(x) || isret(x)) { 1520 tempfree(vp); 1521 return(x); 1522 } 1523 tempfree(x); 1524 } 1525 } 1526 return True; 1527 } 1528 1529 static char *nawk_convert(const char *s, int (*fun_c)(int), 1530 wint_t (*fun_wc)(wint_t)) 1531 { 1532 char *buf = NULL; 1533 char *pbuf = NULL; 1534 const char *ps = NULL; 1535 size_t n = 0; 1536 wchar_t wc; 1537 size_t sz = MB_CUR_MAX; 1538 1539 if (sz == 1) { 1540 buf = tostring(s); 1541 1542 for (pbuf = buf; *pbuf; pbuf++) 1543 *pbuf = fun_c((uschar)*pbuf); 1544 1545 return buf; 1546 } else { 1547 /* upper/lower character may be shorter/longer */ 1548 buf = tostringN(s, strlen(s) * sz + 1); 1549 1550 (void) mbtowc(NULL, NULL, 0); /* reset internal state */ 1551 /* 1552 * Reset internal state here too. 1553 * Assign result to avoid a compiler warning. (Casting to void 1554 * doesn't work.) 1555 * Increment said variable to avoid a different warning. 1556 */ 1557 int unused = wctomb(NULL, L'\0'); 1558 unused++; 1559 1560 ps = s; 1561 pbuf = buf; 1562 while (n = mbtowc(&wc, ps, sz), 1563 n > 0 && n != (size_t)-1 && n != (size_t)-2) 1564 { 1565 ps += n; 1566 1567 n = wctomb(pbuf, fun_wc(wc)); 1568 if (n == (size_t)-1) 1569 FATAL("illegal wide character %s", s); 1570 1571 pbuf += n; 1572 } 1573 1574 *pbuf = '\0'; 1575 1576 if (n) 1577 FATAL("illegal byte sequence %s", s); 1578 1579 return buf; 1580 } 1581 } 1582 1583 #ifdef __DJGPP__ 1584 static wint_t towupper(wint_t wc) 1585 { 1586 if (wc >= 0 && wc < 256) 1587 return toupper(wc & 0xFF); 1588 1589 return wc; 1590 } 1591 1592 static wint_t towlower(wint_t wc) 1593 { 1594 if (wc >= 0 && wc < 256) 1595 return tolower(wc & 0xFF); 1596 1597 return wc; 1598 } 1599 #endif 1600 1601 static char *nawk_toupper(const char *s) 1602 { 1603 return nawk_convert(s, toupper, towupper); 1604 } 1605 1606 static char *nawk_tolower(const char *s) 1607 { 1608 return nawk_convert(s, tolower, towlower); 1609 } 1610 1611 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 1612 { 1613 Cell *x, *y; 1614 Awkfloat u; 1615 int t, sz; 1616 Awkfloat tmp; 1617 char *buf, *fmt; 1618 Node *nextarg; 1619 FILE *fp; 1620 int status = 0; 1621 time_t tv; 1622 struct tm *tm, tmbuf; 1623 1624 t = ptoi(a[0]); 1625 x = execute(a[1]); 1626 nextarg = a[1]->nnext; 1627 switch (t) { 1628 case FLENGTH: 1629 if (isarr(x)) 1630 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 1631 else 1632 u = strlen(getsval(x)); 1633 break; 1634 case FLOG: 1635 errno = 0; 1636 u = errcheck(log(getfval(x)), "log"); 1637 break; 1638 case FINT: 1639 modf(getfval(x), &u); break; 1640 case FEXP: 1641 errno = 0; 1642 u = errcheck(exp(getfval(x)), "exp"); 1643 break; 1644 case FSQRT: 1645 errno = 0; 1646 u = errcheck(sqrt(getfval(x)), "sqrt"); 1647 break; 1648 case FSIN: 1649 u = sin(getfval(x)); break; 1650 case FCOS: 1651 u = cos(getfval(x)); break; 1652 case FATAN: 1653 if (nextarg == NULL) { 1654 WARNING("atan2 requires two arguments; returning 1.0"); 1655 u = 1.0; 1656 } else { 1657 y = execute(a[1]->nnext); 1658 u = atan2(getfval(x), getfval(y)); 1659 tempfree(y); 1660 nextarg = nextarg->nnext; 1661 } 1662 break; 1663 case FCOMPL: 1664 u = ~((int)getfval(x)); 1665 break; 1666 case FAND: 1667 if (nextarg == 0) { 1668 WARNING("and requires two arguments; returning 0"); 1669 u = 0; 1670 break; 1671 } 1672 y = execute(a[1]->nnext); 1673 u = ((int)getfval(x)) & ((int)getfval(y)); 1674 tempfree(y); 1675 nextarg = nextarg->nnext; 1676 break; 1677 case FFOR: 1678 if (nextarg == 0) { 1679 WARNING("or requires two arguments; returning 0"); 1680 u = 0; 1681 break; 1682 } 1683 y = execute(a[1]->nnext); 1684 u = ((int)getfval(x)) | ((int)getfval(y)); 1685 tempfree(y); 1686 nextarg = nextarg->nnext; 1687 break; 1688 case FXOR: 1689 if (nextarg == 0) { 1690 WARNING("xor requires two arguments; returning 0"); 1691 u = 0; 1692 break; 1693 } 1694 y = execute(a[1]->nnext); 1695 u = ((int)getfval(x)) ^ ((int)getfval(y)); 1696 tempfree(y); 1697 nextarg = nextarg->nnext; 1698 break; 1699 case FLSHIFT: 1700 if (nextarg == 0) { 1701 WARNING("lshift requires two arguments; returning 0"); 1702 u = 0; 1703 break; 1704 } 1705 y = execute(a[1]->nnext); 1706 u = ((int)getfval(x)) << ((int)getfval(y)); 1707 tempfree(y); 1708 nextarg = nextarg->nnext; 1709 break; 1710 case FRSHIFT: 1711 if (nextarg == 0) { 1712 WARNING("rshift requires two arguments; returning 0"); 1713 u = 0; 1714 break; 1715 } 1716 y = execute(a[1]->nnext); 1717 u = ((int)getfval(x)) >> ((int)getfval(y)); 1718 tempfree(y); 1719 nextarg = nextarg->nnext; 1720 break; 1721 case FSYSTEM: 1722 fflush(stdout); /* in case something is buffered already */ 1723 status = system(getsval(x)); 1724 u = status; 1725 if (status != -1) { 1726 if (WIFEXITED(status)) { 1727 u = WEXITSTATUS(status); 1728 } else if (WIFSIGNALED(status)) { 1729 u = WTERMSIG(status) + 256; 1730 #ifdef WCOREDUMP 1731 if (WCOREDUMP(status)) 1732 u += 256; 1733 #endif 1734 } else /* something else?!? */ 1735 u = 0; 1736 } 1737 break; 1738 case FRAND: 1739 /* random() returns numbers in [0..2^31-1] 1740 * in order to get a number in [0, 1), divide it by 2^31 1741 */ 1742 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 1743 break; 1744 case FSRAND: 1745 if (isrec(x)) { /* no argument provided */ 1746 u = time(NULL); 1747 tmp = u; 1748 srandom((unsigned int) u); 1749 } else { 1750 u = getfval(x); 1751 tmp = u; 1752 srandom_deterministic((unsigned int) u); 1753 } 1754 u = srand_seed; 1755 srand_seed = tmp; 1756 break; 1757 case FTOUPPER: 1758 case FTOLOWER: 1759 if (t == FTOUPPER) 1760 buf = nawk_toupper(getsval(x)); 1761 else 1762 buf = nawk_tolower(getsval(x)); 1763 tempfree(x); 1764 x = gettemp(); 1765 setsval(x, buf); 1766 free(buf); 1767 return x; 1768 case FFLUSH: 1769 if (isrec(x) || strlen(getsval(x)) == 0) { 1770 flush_all(); /* fflush() or fflush("") -> all */ 1771 u = 0; 1772 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 1773 u = EOF; 1774 else 1775 u = fflush(fp); 1776 break; 1777 case FMKTIME: 1778 memset(&tmbuf, 0, sizeof(tmbuf)); 1779 tm = &tmbuf; 1780 t = sscanf(getsval(x), "%d %d %d %d %d %d %d", 1781 &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour, 1782 &tm->tm_min, &tm->tm_sec, &tm->tm_isdst); 1783 switch (t) { 1784 case 6: 1785 tm->tm_isdst = -1; /* let mktime figure it out */ 1786 /* FALLTHROUGH */ 1787 case 7: 1788 tm->tm_year -= 1900; 1789 tm->tm_mon--; 1790 u = mktime(tm); 1791 break; 1792 default: 1793 u = -1; 1794 break; 1795 } 1796 break; 1797 case FSYSTIME: 1798 u = time((time_t *) 0); 1799 break; 1800 case FSTRFTIME: 1801 /* strftime([format [,timestamp]]) */ 1802 if (nextarg) { 1803 y = execute(nextarg); 1804 nextarg = nextarg->nnext; 1805 tv = (time_t) getfval(y); 1806 tempfree(y); 1807 } else 1808 tv = time((time_t *) 0); 1809 tm = localtime(&tv); 1810 if (tm == NULL) 1811 FATAL("bad time %ld", (long)tv); 1812 1813 if (isrec(x)) { 1814 /* format argument not provided, use default */ 1815 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 1816 } else 1817 fmt = tostring(getsval(x)); 1818 1819 sz = 32; 1820 buf = NULL; 1821 do { 1822 if ((buf = (char *) reallocarray(buf, 2, sz)) == NULL) 1823 FATAL("out of memory in strftime"); 1824 sz *= 2; 1825 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 1826 1827 y = gettemp(); 1828 setsval(y, buf); 1829 free(fmt); 1830 free(buf); 1831 1832 return y; 1833 default: /* can't happen */ 1834 FATAL("illegal function type %d", t); 1835 break; 1836 } 1837 tempfree(x); 1838 x = gettemp(); 1839 setfval(x, u); 1840 if (nextarg != NULL) { 1841 WARNING("warning: function has too many arguments"); 1842 for ( ; nextarg; nextarg = nextarg->nnext) { 1843 y = execute(nextarg); 1844 tempfree(y); 1845 } 1846 } 1847 return(x); 1848 } 1849 1850 Cell *printstat(Node **a, int n) /* print a[0] */ 1851 { 1852 Node *x; 1853 Cell *y; 1854 FILE *fp; 1855 1856 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 1857 fp = stdout; 1858 else 1859 fp = redirect(ptoi(a[1]), a[2]); 1860 for (x = a[0]; x != NULL; x = x->nnext) { 1861 y = execute(x); 1862 fputs(getpssval(y), fp); 1863 tempfree(y); 1864 if (x->nnext == NULL) 1865 fputs(getsval(orsloc), fp); 1866 else 1867 fputs(getsval(ofsloc), fp); 1868 } 1869 if (a[1] != NULL) 1870 fflush(fp); 1871 if (ferror(fp)) 1872 FATAL("write error on %s", filename(fp)); 1873 return(True); 1874 } 1875 1876 Cell *nullproc(Node **a, int n) 1877 { 1878 return 0; 1879 } 1880 1881 1882 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 1883 { 1884 FILE *fp; 1885 Cell *x; 1886 char *fname; 1887 1888 x = execute(b); 1889 fname = getsval(x); 1890 fp = openfile(a, fname, NULL); 1891 if (fp == NULL) 1892 FATAL("can't open file %s", fname); 1893 tempfree(x); 1894 return fp; 1895 } 1896 1897 struct files { 1898 FILE *fp; 1899 const char *fname; 1900 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 1901 } *files; 1902 1903 size_t nfiles; 1904 1905 static void stdinit(void) /* in case stdin, etc., are not constants */ 1906 { 1907 nfiles = FOPEN_MAX; 1908 files = (struct files *) calloc(nfiles, sizeof(*files)); 1909 if (files == NULL) 1910 FATAL("can't allocate file memory for %zu files", nfiles); 1911 files[0].fp = stdin; 1912 files[0].fname = tostring("/dev/stdin"); 1913 files[0].mode = LT; 1914 files[1].fp = stdout; 1915 files[1].fname = tostring("/dev/stdout"); 1916 files[1].mode = GT; 1917 files[2].fp = stderr; 1918 files[2].fname = tostring("/dev/stderr"); 1919 files[2].mode = GT; 1920 } 1921 1922 FILE *openfile(int a, const char *us, bool *pnewflag) 1923 { 1924 const char *s = us; 1925 size_t i; 1926 int m; 1927 FILE *fp = NULL; 1928 1929 if (*s == '\0') 1930 FATAL("null file name in print or getline"); 1931 for (i = 0; i < nfiles; i++) 1932 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 1933 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 1934 a == FFLUSH)) { 1935 if (pnewflag) 1936 *pnewflag = false; 1937 return files[i].fp; 1938 } 1939 if (a == FFLUSH) /* didn't find it, so don't create it! */ 1940 return NULL; 1941 1942 for (i = 0; i < nfiles; i++) 1943 if (files[i].fp == NULL) 1944 break; 1945 if (i >= nfiles) { 1946 struct files *nf; 1947 size_t nnf = nfiles + FOPEN_MAX; 1948 nf = (struct files *) reallocarray(files, nnf, sizeof(*nf)); 1949 if (nf == NULL) 1950 FATAL("cannot grow files for %s and %zu files", s, nnf); 1951 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 1952 nfiles = nnf; 1953 files = nf; 1954 } 1955 fflush(stdout); /* force a semblance of order */ 1956 m = a; 1957 if (a == GT) { 1958 fp = fopen(s, "w"); 1959 } else if (a == APPEND) { 1960 fp = fopen(s, "a"); 1961 m = GT; /* so can mix > and >> */ 1962 } else if (a == '|') { /* output pipe */ 1963 fp = popen(s, "w"); 1964 } else if (a == LE) { /* input pipe */ 1965 fp = popen(s, "r"); 1966 } else if (a == LT) { /* getline <file */ 1967 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 1968 } else /* can't happen */ 1969 FATAL("illegal redirection %d", a); 1970 if (fp != NULL) { 1971 files[i].fname = tostring(s); 1972 files[i].fp = fp; 1973 files[i].mode = m; 1974 if (pnewflag) 1975 *pnewflag = true; 1976 if (fp != stdin && fp != stdout && fp != stderr) 1977 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 1978 } 1979 return fp; 1980 } 1981 1982 const char *filename(FILE *fp) 1983 { 1984 size_t i; 1985 1986 for (i = 0; i < nfiles; i++) 1987 if (fp == files[i].fp) 1988 return files[i].fname; 1989 return "???"; 1990 } 1991 1992 Cell *closefile(Node **a, int n) 1993 { 1994 Cell *x; 1995 size_t i; 1996 bool stat; 1997 1998 x = execute(a[0]); 1999 getsval(x); 2000 stat = true; 2001 for (i = 0; i < nfiles; i++) { 2002 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 2003 continue; 2004 if (files[i].mode == GT || files[i].mode == '|') 2005 fflush(files[i].fp); 2006 if (ferror(files[i].fp)) { 2007 if ((files[i].mode == GT && files[i].fp != stderr) 2008 || files[i].mode == '|') 2009 FATAL("write error on %s", files[i].fname); 2010 else 2011 WARNING("i/o error occurred on %s", files[i].fname); 2012 } 2013 if (files[i].fp == stdin || files[i].fp == stdout || 2014 files[i].fp == stderr) 2015 stat = freopen("/dev/null", "r+", files[i].fp) == NULL; 2016 else if (files[i].mode == '|' || files[i].mode == LE) 2017 stat = pclose(files[i].fp) == -1; 2018 else 2019 stat = fclose(files[i].fp) == EOF; 2020 if (stat) 2021 WARNING("i/o error occurred closing %s", files[i].fname); 2022 xfree(files[i].fname); 2023 files[i].fname = NULL; /* watch out for ref thru this */ 2024 files[i].fp = NULL; 2025 break; 2026 } 2027 tempfree(x); 2028 x = gettemp(); 2029 setfval(x, (Awkfloat) (stat ? -1 : 0)); 2030 return(x); 2031 } 2032 2033 void closeall(void) 2034 { 2035 size_t i; 2036 bool stat = false; 2037 2038 for (i = 0; i < nfiles; i++) { 2039 if (! files[i].fp) 2040 continue; 2041 if (files[i].mode == GT || files[i].mode == '|') 2042 fflush(files[i].fp); 2043 if (ferror(files[i].fp)) { 2044 if ((files[i].mode == GT && files[i].fp != stderr) 2045 || files[i].mode == '|') 2046 FATAL("write error on %s", files[i].fname); 2047 else 2048 WARNING("i/o error occurred on %s", files[i].fname); 2049 } 2050 if (files[i].fp == stdin || files[i].fp == stdout || 2051 files[i].fp == stderr) 2052 continue; 2053 if (files[i].mode == '|' || files[i].mode == LE) 2054 stat = pclose(files[i].fp) == -1; 2055 else 2056 stat = fclose(files[i].fp) == EOF; 2057 if (stat) 2058 WARNING("i/o error occurred while closing %s", files[i].fname); 2059 } 2060 } 2061 2062 static void flush_all(void) 2063 { 2064 size_t i; 2065 2066 for (i = 0; i < nfiles; i++) 2067 if (files[i].fp) 2068 fflush(files[i].fp); 2069 } 2070 2071 void backsub(char **pb_ptr, const char **sptr_ptr); 2072 2073 Cell *sub(Node **a, int nnn) /* substitute command */ 2074 { 2075 const char *sptr, *q; 2076 Cell *x, *y, *result; 2077 char *t, *buf, *pb; 2078 fa *pfa; 2079 int bufsz = recsize; 2080 2081 if ((buf = (char *) malloc(bufsz)) == NULL) 2082 FATAL("out of memory in sub"); 2083 x = execute(a[3]); /* target string */ 2084 t = getsval(x); 2085 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2086 pfa = (fa *) a[1]; /* regular expression */ 2087 else { 2088 y = execute(a[1]); 2089 pfa = makedfa(getsval(y), 1); 2090 tempfree(y); 2091 } 2092 y = execute(a[2]); /* replacement string */ 2093 result = False; 2094 if (pmatch(pfa, t)) { 2095 sptr = t; 2096 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); 2097 pb = buf; 2098 while (sptr < patbeg) 2099 *pb++ = *sptr++; 2100 sptr = getsval(y); 2101 while (*sptr != '\0') { 2102 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); 2103 if (*sptr == '\\') { 2104 backsub(&pb, &sptr); 2105 } else if (*sptr == '&') { 2106 sptr++; 2107 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); 2108 for (q = patbeg; q < patbeg+patlen; ) 2109 *pb++ = *q++; 2110 } else 2111 *pb++ = *sptr++; 2112 } 2113 *pb = '\0'; 2114 if (pb > buf + bufsz) 2115 FATAL("sub result1 %.30s too big; can't happen", buf); 2116 sptr = patbeg + patlen; 2117 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { 2118 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); 2119 while ((*pb++ = *sptr++) != '\0') 2120 continue; 2121 } 2122 if (pb > buf + bufsz) 2123 FATAL("sub result2 %.30s too big; can't happen", buf); 2124 setsval(x, buf); /* BUG: should be able to avoid copy */ 2125 result = True; 2126 } 2127 tempfree(x); 2128 tempfree(y); 2129 free(buf); 2130 return result; 2131 } 2132 2133 Cell *gsub(Node **a, int nnn) /* global substitute */ 2134 { 2135 Cell *x, *y; 2136 char *rptr, *pb; 2137 const char *q, *t, *sptr; 2138 char *buf; 2139 fa *pfa; 2140 int mflag, tempstat, num; 2141 int bufsz = recsize; 2142 2143 if ((buf = (char *) malloc(bufsz)) == NULL) 2144 FATAL("out of memory in gsub"); 2145 mflag = 0; /* if mflag == 0, can replace empty string */ 2146 num = 0; 2147 x = execute(a[3]); /* target string */ 2148 t = getsval(x); 2149 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2150 pfa = (fa *) a[1]; /* regular expression */ 2151 else { 2152 y = execute(a[1]); 2153 pfa = makedfa(getsval(y), 1); 2154 tempfree(y); 2155 } 2156 y = execute(a[2]); /* replacement string */ 2157 if (pmatch(pfa, t)) { 2158 tempstat = pfa->initstat; 2159 pfa->initstat = 2; 2160 pb = buf; 2161 rptr = getsval(y); 2162 do { 2163 if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ 2164 if (mflag == 0) { /* can replace empty */ 2165 num++; 2166 sptr = rptr; 2167 while (*sptr != '\0') { 2168 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2169 if (*sptr == '\\') { 2170 backsub(&pb, &sptr); 2171 } else if (*sptr == '&') { 2172 sptr++; 2173 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2174 for (q = patbeg; q < patbeg+patlen; ) 2175 *pb++ = *q++; 2176 } else 2177 *pb++ = *sptr++; 2178 } 2179 } 2180 if (*t == '\0') /* at end */ 2181 goto done; 2182 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); 2183 *pb++ = *t++; 2184 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2185 FATAL("gsub result0 %.30s too big; can't happen", buf); 2186 mflag = 0; 2187 } 2188 else { /* matched nonempty string */ 2189 num++; 2190 sptr = t; 2191 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); 2192 while (sptr < patbeg) 2193 *pb++ = *sptr++; 2194 sptr = rptr; 2195 while (*sptr != '\0') { 2196 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2197 if (*sptr == '\\') { 2198 backsub(&pb, &sptr); 2199 } else if (*sptr == '&') { 2200 sptr++; 2201 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2202 for (q = patbeg; q < patbeg+patlen; ) 2203 *pb++ = *q++; 2204 } else 2205 *pb++ = *sptr++; 2206 } 2207 t = patbeg + patlen; 2208 if (patlen == 0 || *t == '\0' || *(t-1) == '\0') 2209 goto done; 2210 if (pb > buf + bufsz) 2211 FATAL("gsub result1 %.30s too big; can't happen", buf); 2212 mflag = 1; 2213 } 2214 } while (pmatch(pfa,t)); 2215 sptr = t; 2216 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); 2217 while ((*pb++ = *sptr++) != '\0') 2218 continue; 2219 done: if (pb < buf + bufsz) 2220 *pb = '\0'; 2221 else if (*(pb-1) != '\0') 2222 FATAL("gsub result2 %.30s truncated; can't happen", buf); 2223 setsval(x, buf); /* BUG: should be able to avoid copy + free */ 2224 pfa->initstat = tempstat; 2225 } 2226 tempfree(x); 2227 tempfree(y); 2228 x = gettemp(); 2229 x->tval = NUM; 2230 x->fval = num; 2231 free(buf); 2232 return(x); 2233 } 2234 2235 Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2236 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2237 { 2238 Cell *x, *y, *res, *h; 2239 char *rptr; 2240 const char *sptr; 2241 char *buf, *pb; 2242 const char *t, *q; 2243 fa *pfa; 2244 int mflag, tempstat, num, whichm; 2245 int bufsz = recsize; 2246 2247 if ((buf = malloc(bufsz)) == NULL) 2248 FATAL("out of memory in gensub"); 2249 mflag = 0; /* if mflag == 0, can replace empty string */ 2250 num = 0; 2251 x = execute(a[4]); /* source string */ 2252 t = getsval(x); 2253 res = copycell(x); /* target string - initially copy of source */ 2254 res->csub = CTEMP; /* result values are temporary */ 2255 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2256 pfa = (fa *) a[1]; /* regular expression */ 2257 else { 2258 y = execute(a[1]); 2259 pfa = makedfa(getsval(y), 1); 2260 tempfree(y); 2261 } 2262 y = execute(a[2]); /* replacement string */ 2263 h = execute(a[3]); /* which matches should be replaced */ 2264 sptr = getsval(h); 2265 if (sptr[0] == 'g' || sptr[0] == 'G') 2266 whichm = -1; 2267 else { 2268 /* 2269 * The specified number is index of replacement, starting 2270 * from 1. GNU awk treats index lower than 0 same as 2271 * 1, we do same for compatibility. 2272 */ 2273 whichm = (int) getfval(h) - 1; 2274 if (whichm < 0) 2275 whichm = 0; 2276 } 2277 tempfree(h); 2278 2279 if (pmatch(pfa, t)) { 2280 char *sl; 2281 2282 tempstat = pfa->initstat; 2283 pfa->initstat = 2; 2284 pb = buf; 2285 rptr = getsval(y); 2286 /* 2287 * XXX if there are any backreferences in subst string, 2288 * complain now. 2289 */ 2290 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2291 if (strchr("0123456789", sl[1])) { 2292 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2293 } 2294 } 2295 2296 do { 2297 if (whichm >= 0 && whichm != num) { 2298 num++; 2299 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2300 2301 /* copy the part of string up to and including 2302 * match to output buffer */ 2303 while (t < patbeg + patlen) 2304 *pb++ = *t++; 2305 continue; 2306 } 2307 2308 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2309 if (mflag == 0) { /* can replace empty */ 2310 num++; 2311 sptr = rptr; 2312 while (*sptr != 0) { 2313 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2314 if (*sptr == '\\') { 2315 backsub(&pb, &sptr); 2316 } else if (*sptr == '&') { 2317 sptr++; 2318 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2319 for (q = patbeg; q < patbeg+patlen; ) 2320 *pb++ = *q++; 2321 } else 2322 *pb++ = *sptr++; 2323 } 2324 } 2325 if (*t == 0) /* at end */ 2326 goto done; 2327 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2328 *pb++ = *t++; 2329 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2330 FATAL("gensub result0 %.30s too big; can't happen", buf); 2331 mflag = 0; 2332 } 2333 else { /* matched nonempty string */ 2334 num++; 2335 sptr = t; 2336 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2337 while (sptr < patbeg) 2338 *pb++ = *sptr++; 2339 sptr = rptr; 2340 while (*sptr != 0) { 2341 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2342 if (*sptr == '\\') { 2343 backsub(&pb, &sptr); 2344 } else if (*sptr == '&') { 2345 sptr++; 2346 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2347 for (q = patbeg; q < patbeg+patlen; ) 2348 *pb++ = *q++; 2349 } else 2350 *pb++ = *sptr++; 2351 } 2352 t = patbeg + patlen; 2353 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2354 goto done; 2355 if (pb > buf + bufsz) 2356 FATAL("gensub result1 %.30s too big; can't happen", buf); 2357 mflag = 1; 2358 } 2359 } while (pmatch(pfa,t)); 2360 sptr = t; 2361 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2362 while ((*pb++ = *sptr++) != 0) 2363 ; 2364 done: if (pb > buf + bufsz) 2365 FATAL("gensub result2 %.30s too big; can't happen", buf); 2366 *pb = '\0'; 2367 setsval(res, buf); 2368 pfa->initstat = tempstat; 2369 } 2370 tempfree(x); 2371 tempfree(y); 2372 free(buf); 2373 return(res); 2374 } 2375 2376 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2377 { /* sptr[0] == '\\' */ 2378 char *pb = *pb_ptr; 2379 const char *sptr = *sptr_ptr; 2380 2381 if (sptr[1] == '\\') { 2382 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2383 *pb++ = '\\'; 2384 *pb++ = '&'; 2385 sptr += 4; 2386 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2387 *pb++ = '\\'; 2388 sptr += 2; 2389 } else if (do_posix) { /* \\x -> \x */ 2390 sptr++; 2391 *pb++ = *sptr++; 2392 } else { /* \\x -> \\x */ 2393 *pb++ = *sptr++; 2394 *pb++ = *sptr++; 2395 } 2396 } else if (sptr[1] == '&') { /* literal & */ 2397 sptr++; 2398 *pb++ = *sptr++; 2399 } else /* literal \ */ 2400 *pb++ = *sptr++; 2401 2402 *pb_ptr = pb; 2403 *sptr_ptr = sptr; 2404 } 2405