1 /* $OpenBSD: run.c,v 1.68 2020/08/28 16:29:16 millert Exp $ */ 2 /**************************************************************** 3 Copyright (C) Lucent Technologies 1997 4 All Rights Reserved 5 6 Permission to use, copy, modify, and distribute this software and 7 its documentation for any purpose and without fee is hereby 8 granted, provided that the above copyright notice appear in all 9 copies and that both that the copyright notice and this 10 permission notice and warranty disclaimer appear in supporting 11 documentation, and that the name Lucent Technologies or any of 12 its entities not be used in advertising or publicity pertaining 13 to distribution of the software without specific, written prior 14 permission. 15 16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 23 THIS SOFTWARE. 24 ****************************************************************/ 25 26 #define DEBUG 27 #include <stdio.h> 28 #include <ctype.h> 29 #include <errno.h> 30 #include <wchar.h> 31 #include <wctype.h> 32 #include <fcntl.h> 33 #include <setjmp.h> 34 #include <limits.h> 35 #include <math.h> 36 #include <string.h> 37 #include <stdlib.h> 38 #include <time.h> 39 #include <sys/types.h> 40 #include <sys/wait.h> 41 #include "awk.h" 42 #include "awkgram.tab.h" 43 44 static void stdinit(void); 45 static void flush_all(void); 46 47 #if 1 48 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 49 #else 50 void tempfree(Cell *p) { 51 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 52 WARNING("bad csub %d in Cell %d %s", 53 p->csub, p->ctype, p->sval); 54 } 55 if (istemp(p)) 56 tfree(p); 57 } 58 #endif 59 60 /* do we really need these? */ 61 /* #ifdef _NFILE */ 62 /* #ifndef FOPEN_MAX */ 63 /* #define FOPEN_MAX _NFILE */ 64 /* #endif */ 65 /* #endif */ 66 /* */ 67 /* #ifndef FOPEN_MAX */ 68 /* #define FOPEN_MAX 40 */ /* max number of open files */ 69 /* #endif */ 70 /* */ 71 /* #ifndef RAND_MAX */ 72 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 73 /* #endif */ 74 75 jmp_buf env; 76 extern int pairstack[]; 77 extern Awkfloat srand_seed; 78 79 Node *winner = NULL; /* root of parse tree */ 80 Cell *tmps; /* free temporary cells for execution */ 81 82 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 83 Cell *True = &truecell; 84 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 85 Cell *False = &falsecell; 86 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 87 Cell *jbreak = &breakcell; 88 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 89 Cell *jcont = &contcell; 90 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 91 Cell *jnext = &nextcell; 92 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 93 Cell *jnextfile = &nextfilecell; 94 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 95 Cell *jexit = &exitcell; 96 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 97 Cell *jret = &retcell; 98 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 99 100 Node *curnode = NULL; /* the node being executed, for debugging */ 101 102 /* buffer memory management */ 103 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 104 const char *whatrtn) 105 /* pbuf: address of pointer to buffer being managed 106 * psiz: address of buffer size variable 107 * minlen: minimum length of buffer needed 108 * quantum: buffer size quantum 109 * pbptr: address of movable pointer into buffer, or 0 if none 110 * whatrtn: name of the calling routine if failure should cause fatal error 111 * 112 * return 0 for realloc failure, !=0 for success 113 */ 114 { 115 if (minlen > *psiz) { 116 char *tbuf; 117 int rminlen = quantum ? minlen % quantum : 0; 118 int boff = pbptr ? *pbptr - *pbuf : 0; 119 /* round up to next multiple of quantum */ 120 if (rminlen) 121 minlen += quantum - rminlen; 122 tbuf = realloc(*pbuf, minlen); 123 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); 124 if (tbuf == NULL) { 125 if (whatrtn) 126 FATAL("out of memory in %s", whatrtn); 127 return 0; 128 } 129 *pbuf = tbuf; 130 *psiz = minlen; 131 if (pbptr) 132 *pbptr = tbuf + boff; 133 } 134 return 1; 135 } 136 137 void run(Node *a) /* execution of parse tree starts here */ 138 { 139 140 stdinit(); 141 execute(a); 142 closeall(); 143 } 144 145 Cell *execute(Node *u) /* execute a node of the parse tree */ 146 { 147 Cell *(*proc)(Node **, int); 148 Cell *x; 149 Node *a; 150 151 if (u == NULL) 152 return(True); 153 for (a = u; ; a = a->nnext) { 154 curnode = a; 155 if (isvalue(a)) { 156 x = (Cell *) (a->narg[0]); 157 if (isfld(x) && !donefld) 158 fldbld(); 159 else if (isrec(x) && !donerec) 160 recbld(); 161 return(x); 162 } 163 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 164 FATAL("illegal statement"); 165 proc = proctab[a->nobj-FIRSTTOKEN]; 166 x = (*proc)(a->narg, a->nobj); 167 if (isfld(x) && !donefld) 168 fldbld(); 169 else if (isrec(x) && !donerec) 170 recbld(); 171 if (isexpr(a)) 172 return(x); 173 if (isjump(x)) 174 return(x); 175 if (a->nnext == NULL) 176 return(x); 177 tempfree(x); 178 } 179 } 180 181 182 Cell *program(Node **a, int n) /* execute an awk program */ 183 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 184 Cell *x; 185 186 if (setjmp(env) != 0) 187 goto ex; 188 if (a[0]) { /* BEGIN */ 189 x = execute(a[0]); 190 if (isexit(x)) 191 return(True); 192 if (isjump(x)) 193 FATAL("illegal break, continue, next or nextfile from BEGIN"); 194 tempfree(x); 195 } 196 if (a[1] || a[2]) 197 while (getrec(&record, &recsize, true) > 0) { 198 x = execute(a[1]); 199 if (isexit(x)) 200 break; 201 tempfree(x); 202 } 203 ex: 204 if (setjmp(env) != 0) /* handles exit within END */ 205 goto ex1; 206 if (a[2]) { /* END */ 207 x = execute(a[2]); 208 if (isbreak(x) || isnext(x) || iscont(x)) 209 FATAL("illegal break, continue, next or nextfile from END"); 210 tempfree(x); 211 } 212 ex1: 213 return(True); 214 } 215 216 struct Frame { /* stack frame for awk function calls */ 217 int nargs; /* number of arguments in this call */ 218 Cell *fcncell; /* pointer to Cell for function */ 219 Cell **args; /* pointer to array of arguments after execute */ 220 Cell *retval; /* return value */ 221 }; 222 223 #define NARGS 50 /* max args in a call */ 224 225 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 226 int nframe = 0; /* number of frames allocated */ 227 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 228 229 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 230 { 231 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 232 int i, ncall, ndef; 233 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 234 Node *x; 235 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 236 Cell *y, *z, *fcn; 237 char *s; 238 239 fcn = execute(a[0]); /* the function itself */ 240 s = fcn->nval; 241 if (!isfcn(fcn)) 242 FATAL("calling undefined function %s", s); 243 if (frame == NULL) { 244 frp = frame = calloc(nframe += 100, sizeof(*frame)); 245 if (frame == NULL) 246 FATAL("out of space for stack frames calling %s", s); 247 } 248 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 249 ncall++; 250 ndef = (int) fcn->fval; /* args in defn */ 251 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 252 if (ncall > ndef) 253 WARNING("function %s called with %d args, uses only %d", 254 s, ncall, ndef); 255 if (ncall + ndef > NARGS) 256 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 257 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 258 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 259 y = execute(x); 260 oargs[i] = y; 261 DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 262 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 263 if (isfcn(y)) 264 FATAL("can't use function %s as argument in %s", y->nval, s); 265 if (isarr(y)) 266 args[i] = y; /* arrays by ref */ 267 else 268 args[i] = copycell(y); 269 tempfree(y); 270 } 271 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 272 args[i] = gettemp(); 273 *args[i] = newcopycell; 274 } 275 frp++; /* now ok to up frame */ 276 if (frp >= frame + nframe) { 277 int dfp = frp - frame; /* old index */ 278 frame = reallocarray(frame, (nframe += 100), sizeof(*frame)); 279 if (frame == NULL) 280 FATAL("out of space for stack frames in %s", s); 281 frp = frame + dfp; 282 } 283 frp->fcncell = fcn; 284 frp->args = args; 285 frp->nargs = ndef; /* number defined with (excess are locals) */ 286 frp->retval = gettemp(); 287 288 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 289 y = execute((Node *)(fcn->sval)); /* execute body */ 290 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 291 292 for (i = 0; i < ndef; i++) { 293 Cell *t = frp->args[i]; 294 if (isarr(t)) { 295 if (t->csub == CCOPY) { 296 if (i >= ncall) { 297 freesymtab(t); 298 t->csub = CTEMP; 299 tempfree(t); 300 } else { 301 oargs[i]->tval = t->tval; 302 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 303 oargs[i]->sval = t->sval; 304 tempfree(t); 305 } 306 } 307 } else if (t != y) { /* kludge to prevent freeing twice */ 308 t->csub = CTEMP; 309 tempfree(t); 310 } else if (t == y && t->csub == CCOPY) { 311 t->csub = CTEMP; 312 tempfree(t); 313 freed = 1; 314 } 315 } 316 tempfree(fcn); 317 if (isexit(y) || isnext(y)) 318 return y; 319 if (freed == 0) { 320 tempfree(y); /* don't free twice! */ 321 } 322 z = frp->retval; /* return value */ 323 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 324 frp--; 325 return(z); 326 } 327 328 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 329 { 330 Cell *y; 331 332 /* copy is not constant or field */ 333 334 y = gettemp(); 335 y->tval = x->tval & ~(CON|FLD|REC); 336 y->csub = CCOPY; /* prevents freeing until call is over */ 337 y->nval = x->nval; /* BUG? */ 338 if (isstr(x) /* || x->ctype == OCELL */) { 339 y->sval = tostring(x->sval); 340 y->tval &= ~DONTFREE; 341 } else 342 y->tval |= DONTFREE; 343 y->fval = x->fval; 344 return y; 345 } 346 347 Cell *arg(Node **a, int n) /* nth argument of a function */ 348 { 349 350 n = ptoi(a[0]); /* argument number, counting from 0 */ 351 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 352 if (n+1 > frp->nargs) 353 FATAL("argument #%d of function %s was not supplied", 354 n+1, frp->fcncell->nval); 355 return frp->args[n]; 356 } 357 358 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 359 { 360 Cell *y; 361 362 switch (n) { 363 case EXIT: 364 if (a[0] != NULL) { 365 y = execute(a[0]); 366 errorflag = (int) getfval(y); 367 tempfree(y); 368 } 369 longjmp(env, 1); 370 case RETURN: 371 if (a[0] != NULL) { 372 y = execute(a[0]); 373 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 374 setsval(frp->retval, getsval(y)); 375 frp->retval->fval = getfval(y); 376 frp->retval->tval |= NUM; 377 } 378 else if (y->tval & STR) 379 setsval(frp->retval, getsval(y)); 380 else if (y->tval & NUM) 381 setfval(frp->retval, getfval(y)); 382 else /* can't happen */ 383 FATAL("bad type variable %d", y->tval); 384 tempfree(y); 385 } 386 return(jret); 387 case NEXT: 388 return(jnext); 389 case NEXTFILE: 390 nextfile(); 391 return(jnextfile); 392 case BREAK: 393 return(jbreak); 394 case CONTINUE: 395 return(jcont); 396 default: /* can't happen */ 397 FATAL("illegal jump type %d", n); 398 } 399 return 0; /* not reached */ 400 } 401 402 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 403 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 404 Cell *r, *x; 405 extern Cell **fldtab; 406 FILE *fp; 407 char *buf; 408 int bufsize = recsize; 409 int mode; 410 bool newflag; 411 412 if ((buf = malloc(bufsize)) == NULL) 413 FATAL("out of memory in getline"); 414 415 fflush(stdout); /* in case someone is waiting for a prompt */ 416 r = gettemp(); 417 if (a[1] != NULL) { /* getline < file */ 418 x = execute(a[2]); /* filename */ 419 mode = ptoi(a[1]); 420 if (mode == '|') /* input pipe */ 421 mode = LE; /* arbitrary flag */ 422 fp = openfile(mode, getsval(x), &newflag); 423 tempfree(x); 424 if (fp == NULL) 425 n = -1; 426 else 427 n = readrec(&buf, &bufsize, fp, newflag); 428 if (n <= 0) { 429 ; 430 } else if (a[0] != NULL) { /* getline var <file */ 431 x = execute(a[0]); 432 setsval(x, buf); 433 if (is_number(x->sval)) { 434 x->fval = atof(x->sval); 435 x->tval |= NUM; 436 } 437 tempfree(x); 438 } else { /* getline <file */ 439 setsval(fldtab[0], buf); 440 if (is_number(fldtab[0]->sval)) { 441 fldtab[0]->fval = atof(fldtab[0]->sval); 442 fldtab[0]->tval |= NUM; 443 } 444 } 445 } else { /* bare getline; use current input */ 446 if (a[0] == NULL) /* getline */ 447 n = getrec(&record, &recsize, true); 448 else { /* getline var */ 449 n = getrec(&buf, &bufsize, false); 450 x = execute(a[0]); 451 setsval(x, buf); 452 if (is_number(x->sval)) { 453 x->fval = atof(x->sval); 454 x->tval |= NUM; 455 } 456 tempfree(x); 457 } 458 } 459 setfval(r, (Awkfloat) n); 460 free(buf); 461 return r; 462 } 463 464 Cell *getnf(Node **a, int n) /* get NF */ 465 { 466 if (!donefld) 467 fldbld(); 468 return (Cell *) a[0]; 469 } 470 471 static char * 472 makearraystring(Node *p, const char *func) 473 { 474 char *buf; 475 int bufsz = recsize; 476 size_t blen; 477 478 if ((buf = malloc(bufsz)) == NULL) { 479 FATAL("%s: out of memory", func); 480 } 481 482 blen = 0; 483 buf[blen] = '\0'; 484 485 for (; p; p = p->nnext) { 486 Cell *x = execute(p); /* expr */ 487 char *s = getsval(x); 488 size_t seplen = strlen(getsval(subseploc)); 489 size_t nsub = p->nnext ? seplen : 0; 490 size_t slen = strlen(s); 491 size_t tlen = blen + slen + nsub; 492 493 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 494 FATAL("%s: out of memory %s[%s...]", 495 func, x->nval, buf); 496 } 497 memcpy(buf + blen, s, slen); 498 if (nsub) { 499 memcpy(buf + blen + slen, *SUBSEP, nsub); 500 } 501 buf[tlen] = '\0'; 502 blen = tlen; 503 tempfree(x); 504 } 505 return buf; 506 } 507 508 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 509 { 510 Cell *x, *z; 511 char *buf; 512 513 x = execute(a[0]); /* Cell* for symbol table */ 514 buf = makearraystring(a[1], __func__); 515 if (!isarr(x)) { 516 DPRINTF("making %s into an array\n", NN(x->nval)); 517 if (freeable(x)) 518 xfree(x->sval); 519 x->tval &= ~(STR|NUM|DONTFREE); 520 x->tval |= ARR; 521 x->sval = (char *) makesymtab(NSYMTAB); 522 } 523 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 524 z->ctype = OCELL; 525 z->csub = CVAR; 526 tempfree(x); 527 free(buf); 528 return(z); 529 } 530 531 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 532 { 533 Cell *x; 534 535 x = execute(a[0]); /* Cell* for symbol table */ 536 if (x == symtabloc) { 537 FATAL("cannot delete SYMTAB or its elements"); 538 } 539 if (!isarr(x)) 540 return True; 541 if (a[1] == NULL) { /* delete the elements, not the table */ 542 freesymtab(x); 543 x->tval &= ~STR; 544 x->tval |= ARR; 545 x->sval = (char *) makesymtab(NSYMTAB); 546 } else { 547 char *buf = makearraystring(a[1], __func__); 548 freeelem(x, buf); 549 free(buf); 550 } 551 tempfree(x); 552 return True; 553 } 554 555 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 556 { 557 Cell *ap, *k; 558 char *buf; 559 560 ap = execute(a[1]); /* array name */ 561 if (!isarr(ap)) { 562 DPRINTF("making %s into an array\n", ap->nval); 563 if (freeable(ap)) 564 xfree(ap->sval); 565 ap->tval &= ~(STR|NUM|DONTFREE); 566 ap->tval |= ARR; 567 ap->sval = (char *) makesymtab(NSYMTAB); 568 } 569 buf = makearraystring(a[0], __func__); 570 k = lookup(buf, (Array *) ap->sval); 571 tempfree(ap); 572 free(buf); 573 if (k == NULL) 574 return(False); 575 else 576 return(True); 577 } 578 579 580 Cell *matchop(Node **a, int n) /* ~ and match() */ 581 { 582 Cell *x, *y; 583 char *s, *t; 584 int i; 585 fa *pfa; 586 int (*mf)(fa *, const char *) = match, mode = 0; 587 588 if (n == MATCHFCN) { 589 mf = pmatch; 590 mode = 1; 591 } 592 x = execute(a[1]); /* a[1] = target text */ 593 s = getsval(x); 594 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 595 i = (*mf)((fa *) a[2], s); 596 else { 597 y = execute(a[2]); /* a[2] = regular expr */ 598 t = getsval(y); 599 pfa = makedfa(t, mode); 600 i = (*mf)(pfa, s); 601 tempfree(y); 602 } 603 tempfree(x); 604 if (n == MATCHFCN) { 605 int start = patbeg - s + 1; 606 if (patlen < 0) 607 start = 0; 608 setfval(rstartloc, (Awkfloat) start); 609 setfval(rlengthloc, (Awkfloat) patlen); 610 x = gettemp(); 611 x->tval = NUM; 612 x->fval = start; 613 return x; 614 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 615 return(True); 616 else 617 return(False); 618 } 619 620 621 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 622 { 623 Cell *x, *y; 624 int i; 625 626 x = execute(a[0]); 627 i = istrue(x); 628 tempfree(x); 629 switch (n) { 630 case BOR: 631 if (i) return(True); 632 y = execute(a[1]); 633 i = istrue(y); 634 tempfree(y); 635 if (i) return(True); 636 else return(False); 637 case AND: 638 if ( !i ) return(False); 639 y = execute(a[1]); 640 i = istrue(y); 641 tempfree(y); 642 if (i) return(True); 643 else return(False); 644 case NOT: 645 if (i) return(False); 646 else return(True); 647 default: /* can't happen */ 648 FATAL("unknown boolean operator %d", n); 649 } 650 return 0; /*NOTREACHED*/ 651 } 652 653 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 654 { 655 int i; 656 Cell *x, *y; 657 Awkfloat j; 658 659 x = execute(a[0]); 660 y = execute(a[1]); 661 if (x->tval&NUM && y->tval&NUM) { 662 j = x->fval - y->fval; 663 i = j<0? -1: (j>0? 1: 0); 664 } else { 665 i = strcmp(getsval(x), getsval(y)); 666 } 667 tempfree(x); 668 tempfree(y); 669 switch (n) { 670 case LT: if (i<0) return(True); 671 else return(False); 672 case LE: if (i<=0) return(True); 673 else return(False); 674 case NE: if (i!=0) return(True); 675 else return(False); 676 case EQ: if (i == 0) return(True); 677 else return(False); 678 case GE: if (i>=0) return(True); 679 else return(False); 680 case GT: if (i>0) return(True); 681 else return(False); 682 default: /* can't happen */ 683 FATAL("unknown relational operator %d", n); 684 } 685 return 0; /*NOTREACHED*/ 686 } 687 688 void tfree(Cell *a) /* free a tempcell */ 689 { 690 if (freeable(a)) { 691 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 692 xfree(a->sval); 693 } 694 if (a == tmps) 695 FATAL("tempcell list is curdled"); 696 a->cnext = tmps; 697 tmps = a; 698 } 699 700 Cell *gettemp(void) /* get a tempcell */ 701 { int i; 702 Cell *x; 703 704 if (!tmps) { 705 tmps = calloc(100, sizeof(*tmps)); 706 if (!tmps) 707 FATAL("out of space for temporaries"); 708 for (i = 1; i < 100; i++) 709 tmps[i-1].cnext = &tmps[i]; 710 tmps[i-1].cnext = NULL; 711 } 712 x = tmps; 713 tmps = x->cnext; 714 *x = tempcell; 715 return(x); 716 } 717 718 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 719 { 720 Awkfloat val; 721 Cell *x; 722 int m; 723 char *s; 724 725 x = execute(a[0]); 726 val = getfval(x); /* freebsd: defend against super large field numbers */ 727 if ((Awkfloat)INT_MAX < val) 728 FATAL("trying to access out of range field %s", x->nval); 729 m = (int) val; 730 if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */ 731 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 732 /* BUG: can x->nval ever be null??? */ 733 tempfree(x); 734 x = fieldadr(m); 735 x->ctype = OCELL; /* BUG? why are these needed? */ 736 x->csub = CFLD; 737 return(x); 738 } 739 740 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 741 { 742 int k, m, n; 743 char *s; 744 int temp; 745 Cell *x, *y, *z = NULL; 746 747 x = execute(a[0]); 748 y = execute(a[1]); 749 if (a[2] != NULL) 750 z = execute(a[2]); 751 s = getsval(x); 752 k = strlen(s) + 1; 753 if (k <= 1) { 754 tempfree(x); 755 tempfree(y); 756 if (a[2] != NULL) { 757 tempfree(z); 758 } 759 x = gettemp(); 760 setsval(x, ""); 761 return(x); 762 } 763 m = (int) getfval(y); 764 if (m <= 0) 765 m = 1; 766 else if (m > k) 767 m = k; 768 tempfree(y); 769 if (a[2] != NULL) { 770 n = (int) getfval(z); 771 tempfree(z); 772 } else 773 n = k - 1; 774 if (n < 0) 775 n = 0; 776 else if (n > k - m) 777 n = k - m; 778 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 779 y = gettemp(); 780 temp = s[n+m-1]; /* with thanks to John Linderman */ 781 s[n+m-1] = '\0'; 782 setsval(y, s + m - 1); 783 s[n+m-1] = temp; 784 tempfree(x); 785 return(y); 786 } 787 788 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 789 { 790 Cell *x, *y, *z; 791 char *s1, *s2, *p1, *p2, *q; 792 Awkfloat v = 0.0; 793 794 x = execute(a[0]); 795 s1 = getsval(x); 796 y = execute(a[1]); 797 s2 = getsval(y); 798 799 z = gettemp(); 800 for (p1 = s1; *p1 != '\0'; p1++) { 801 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 802 continue; 803 if (*p2 == '\0') { 804 v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ 805 break; 806 } 807 } 808 tempfree(x); 809 tempfree(y); 810 setfval(z, v); 811 return(z); 812 } 813 814 #define MAXNUMSIZE 50 815 816 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 817 { 818 char *fmt; 819 char *p, *t; 820 const char *os; 821 Cell *x; 822 int flag = 0, n; 823 int fmtwd; /* format width */ 824 int fmtsz = recsize; 825 char *buf = *pbuf; 826 int bufsize = *pbufsize; 827 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 828 #define BUFSZ(a) (bufsize - ((a) - buf)) 829 830 static bool first = true; 831 static bool have_a_format = false; 832 833 if (first) { 834 char xbuf[100]; 835 836 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 837 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 838 first = false; 839 } 840 841 os = s; 842 p = buf; 843 if ((fmt = malloc(fmtsz)) == NULL) 844 FATAL("out of memory in format()"); 845 while (*s) { 846 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 847 if (*s != '%') { 848 *p++ = *s++; 849 continue; 850 } 851 if (*(s+1) == '%') { 852 *p++ = '%'; 853 s += 2; 854 continue; 855 } 856 /* have to be real careful in case this is a huge number, eg, %100000d */ 857 fmtwd = atoi(s+1); 858 if (fmtwd < 0) 859 fmtwd = -fmtwd; 860 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 861 for (t = fmt; (*t++ = *s) != '\0'; s++) { 862 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 863 FATAL("format item %.30s... ran format() out of memory", os); 864 /* Ignore size specifiers */ 865 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 866 t--; 867 continue; 868 } 869 if (isalpha((uschar)*s)) 870 break; 871 if (*s == '$') { 872 FATAL("'$' not permitted in awk formats"); 873 } 874 if (*s == '*') { 875 if (a == NULL) { 876 FATAL("not enough args in printf(%s)", os); 877 } 878 x = execute(a); 879 a = a->nnext; 880 snprintf(t - 1, FMTSZ(t - 1), 881 "%d", fmtwd=(int) getfval(x)); 882 if (fmtwd < 0) 883 fmtwd = -fmtwd; 884 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 885 t = fmt + strlen(fmt); 886 tempfree(x); 887 } 888 } 889 *t = '\0'; 890 if (fmtwd < 0) 891 fmtwd = -fmtwd; 892 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 893 switch (*s) { 894 case 'a': case 'A': 895 if (have_a_format) 896 flag = *s; 897 else 898 flag = 'f'; 899 break; 900 case 'f': case 'e': case 'g': case 'E': case 'G': 901 flag = 'f'; 902 break; 903 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 904 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 905 *(t-1) = 'j'; 906 *t = *s; 907 *++t = '\0'; 908 break; 909 case 's': 910 flag = 's'; 911 break; 912 case 'c': 913 flag = 'c'; 914 break; 915 default: 916 WARNING("weird printf conversion %s", fmt); 917 flag = '?'; 918 break; 919 } 920 if (a == NULL) 921 FATAL("not enough args in printf(%s)", os); 922 x = execute(a); 923 a = a->nnext; 924 n = MAXNUMSIZE; 925 if (fmtwd > n) 926 n = fmtwd; 927 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 928 switch (flag) { 929 case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 930 t = getsval(x); 931 n = strlen(t); 932 if (fmtwd > n) 933 n = fmtwd; 934 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 935 p += strlen(p); 936 snprintf(p, BUFSZ(p), "%s", t); 937 break; 938 case 'a': 939 case 'A': 940 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 941 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 942 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 943 case 's': 944 t = getsval(x); 945 n = strlen(t); 946 if (fmtwd > n) 947 n = fmtwd; 948 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 949 FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); 950 snprintf(p, BUFSZ(p), fmt, t); 951 break; 952 case 'c': 953 if (isnum(x)) { 954 if ((int)getfval(x)) 955 snprintf(p, BUFSZ(p), fmt, (int) getfval(x)); 956 else { 957 *p++ = '\0'; /* explicit null byte */ 958 *p = '\0'; /* next output will start here */ 959 } 960 } else 961 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 962 break; 963 default: 964 FATAL("can't happen: bad conversion %c in format()", flag); 965 } 966 tempfree(x); 967 p += strlen(p); 968 s++; 969 } 970 *p = '\0'; 971 free(fmt); 972 for ( ; a; a = a->nnext) /* evaluate any remaining args */ 973 execute(a); 974 *pbuf = buf; 975 *pbufsize = bufsize; 976 return p - buf; 977 } 978 979 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 980 { 981 Cell *x; 982 Node *y; 983 char *buf; 984 int bufsz=3*recsize; 985 986 if ((buf = malloc(bufsz)) == NULL) 987 FATAL("out of memory in awksprintf"); 988 y = a[0]->nnext; 989 x = execute(a[0]); 990 if (format(&buf, &bufsz, getsval(x), y) == -1) 991 FATAL("sprintf string %.30s... too long. can't happen.", buf); 992 tempfree(x); 993 x = gettemp(); 994 x->sval = buf; 995 x->tval = STR; 996 return(x); 997 } 998 999 Cell *awkprintf(Node **a, int n) /* printf */ 1000 { /* a[0] is list of args, starting with format string */ 1001 /* a[1] is redirection operator, a[2] is redirection file */ 1002 FILE *fp; 1003 Cell *x; 1004 Node *y; 1005 char *buf; 1006 int len; 1007 int bufsz=3*recsize; 1008 1009 if ((buf = malloc(bufsz)) == NULL) 1010 FATAL("out of memory in awkprintf"); 1011 y = a[0]->nnext; 1012 x = execute(a[0]); 1013 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1014 FATAL("printf string %.30s... too long. can't happen.", buf); 1015 tempfree(x); 1016 if (a[1] == NULL) { 1017 /* fputs(buf, stdout); */ 1018 fwrite(buf, len, 1, stdout); 1019 if (ferror(stdout)) 1020 FATAL("write error on stdout"); 1021 } else { 1022 fp = redirect(ptoi(a[1]), a[2]); 1023 /* fputs(buf, fp); */ 1024 fwrite(buf, len, 1, fp); 1025 fflush(fp); 1026 if (ferror(fp)) 1027 FATAL("write error on %s", filename(fp)); 1028 } 1029 free(buf); 1030 return(True); 1031 } 1032 1033 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1034 { 1035 Awkfloat i, j = 0; 1036 double v; 1037 Cell *x, *y, *z; 1038 1039 x = execute(a[0]); 1040 i = getfval(x); 1041 tempfree(x); 1042 if (n != UMINUS && n != UPLUS) { 1043 y = execute(a[1]); 1044 j = getfval(y); 1045 tempfree(y); 1046 } 1047 z = gettemp(); 1048 switch (n) { 1049 case ADD: 1050 i += j; 1051 break; 1052 case MINUS: 1053 i -= j; 1054 break; 1055 case MULT: 1056 i *= j; 1057 break; 1058 case DIVIDE: 1059 if (j == 0) 1060 FATAL("division by zero"); 1061 i /= j; 1062 break; 1063 case MOD: 1064 if (j == 0) 1065 FATAL("division by zero in mod"); 1066 modf(i/j, &v); 1067 i = i - j * v; 1068 break; 1069 case UMINUS: 1070 i = -i; 1071 break; 1072 case UPLUS: /* handled by getfval(), above */ 1073 break; 1074 case POWER: 1075 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1076 i = ipow(i, (int) j); 1077 else { 1078 errno = 0; 1079 i = errcheck(pow(i, j), "pow"); 1080 } 1081 break; 1082 default: /* can't happen */ 1083 FATAL("illegal arithmetic operator %d", n); 1084 } 1085 setfval(z, i); 1086 return(z); 1087 } 1088 1089 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1090 { 1091 double v; 1092 1093 if (n <= 0) 1094 return 1; 1095 v = ipow(x, n/2); 1096 if (n % 2 == 0) 1097 return v * v; 1098 else 1099 return x * v * v; 1100 } 1101 1102 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1103 { 1104 Cell *x, *z; 1105 int k; 1106 Awkfloat xf; 1107 1108 x = execute(a[0]); 1109 xf = getfval(x); 1110 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1111 if (n == PREINCR || n == PREDECR) { 1112 setfval(x, xf + k); 1113 return(x); 1114 } 1115 z = gettemp(); 1116 setfval(z, xf); 1117 setfval(x, xf + k); 1118 tempfree(x); 1119 return(z); 1120 } 1121 1122 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1123 { /* this is subtle; don't muck with it. */ 1124 Cell *x, *y; 1125 Awkfloat xf, yf; 1126 double v; 1127 1128 y = execute(a[1]); 1129 x = execute(a[0]); 1130 if (n == ASSIGN) { /* ordinary assignment */ 1131 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1132 ; /* self-assignment: leave alone unless it's a field or NF */ 1133 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1134 setsval(x, getsval(y)); 1135 x->fval = getfval(y); 1136 x->tval |= NUM; 1137 } 1138 else if (isstr(y)) 1139 setsval(x, getsval(y)); 1140 else if (isnum(y)) 1141 setfval(x, getfval(y)); 1142 else 1143 funnyvar(y, "read value of"); 1144 tempfree(y); 1145 return(x); 1146 } 1147 xf = getfval(x); 1148 yf = getfval(y); 1149 switch (n) { 1150 case ADDEQ: 1151 xf += yf; 1152 break; 1153 case SUBEQ: 1154 xf -= yf; 1155 break; 1156 case MULTEQ: 1157 xf *= yf; 1158 break; 1159 case DIVEQ: 1160 if (yf == 0) 1161 FATAL("division by zero in /="); 1162 xf /= yf; 1163 break; 1164 case MODEQ: 1165 if (yf == 0) 1166 FATAL("division by zero in %%="); 1167 modf(xf/yf, &v); 1168 xf = xf - yf * v; 1169 break; 1170 case POWEQ: 1171 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1172 xf = ipow(xf, (int) yf); 1173 else { 1174 errno = 0; 1175 xf = errcheck(pow(xf, yf), "pow"); 1176 } 1177 break; 1178 default: 1179 FATAL("illegal assignment operator %d", n); 1180 break; 1181 } 1182 tempfree(y); 1183 setfval(x, xf); 1184 return(x); 1185 } 1186 1187 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1188 { 1189 Cell *x, *y, *z; 1190 int n1, n2; 1191 char *s = NULL; 1192 int ssz = 0; 1193 1194 x = execute(a[0]); 1195 n1 = strlen(getsval(x)); 1196 adjbuf(&s, &ssz, n1, recsize, 0, "cat1"); 1197 memcpy(s, x->sval, n1); 1198 1199 y = execute(a[1]); 1200 n2 = strlen(getsval(y)); 1201 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1202 memcpy(s + n1, y->sval, n2); 1203 s[n1 + n2] = '\0'; 1204 1205 tempfree(x); 1206 tempfree(y); 1207 1208 z = gettemp(); 1209 z->sval = s; 1210 z->tval = STR; 1211 1212 return(z); 1213 } 1214 1215 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1216 { 1217 Cell *x; 1218 1219 if (a[0] == NULL) 1220 x = execute(a[1]); 1221 else { 1222 x = execute(a[0]); 1223 if (istrue(x)) { 1224 tempfree(x); 1225 x = execute(a[1]); 1226 } 1227 } 1228 return x; 1229 } 1230 1231 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1232 { 1233 Cell *x; 1234 int pair; 1235 1236 pair = ptoi(a[3]); 1237 if (pairstack[pair] == 0) { 1238 x = execute(a[0]); 1239 if (istrue(x)) 1240 pairstack[pair] = 1; 1241 tempfree(x); 1242 } 1243 if (pairstack[pair] == 1) { 1244 x = execute(a[1]); 1245 if (istrue(x)) 1246 pairstack[pair] = 0; 1247 tempfree(x); 1248 x = execute(a[2]); 1249 return(x); 1250 } 1251 return(False); 1252 } 1253 1254 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1255 { 1256 Cell *x = NULL, *y, *ap; 1257 const char *s, *origs, *t; 1258 const char *fs = NULL; 1259 char *origfs = NULL; 1260 int sep; 1261 char temp, num[50]; 1262 int n, tempstat, arg3type; 1263 1264 y = execute(a[0]); /* source string */ 1265 origs = s = strdup(getsval(y)); 1266 if (s == NULL) 1267 FATAL("out of space in split"); 1268 arg3type = ptoi(a[3]); 1269 if (a[2] == NULL) /* fs string */ 1270 fs = getsval(fsloc); 1271 else if (arg3type == STRING) { /* split(str,arr,"string") */ 1272 x = execute(a[2]); 1273 fs = origfs = strdup(getsval(x)); 1274 if (fs == NULL) 1275 FATAL("out of space in split"); 1276 tempfree(x); 1277 } else if (arg3type == REGEXPR) 1278 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1279 else 1280 FATAL("illegal type of split"); 1281 sep = *fs; 1282 ap = execute(a[1]); /* array name */ 1283 freesymtab(ap); 1284 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 1285 ap->tval &= ~STR; 1286 ap->tval |= ARR; 1287 ap->sval = (char *) makesymtab(NSYMTAB); 1288 1289 n = 0; 1290 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1291 /* split(s, a, //); have to arrange that it looks like empty sep */ 1292 arg3type = 0; 1293 fs = ""; 1294 sep = 0; 1295 } 1296 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1297 fa *pfa; 1298 if (arg3type == REGEXPR) { /* it's ready already */ 1299 pfa = (fa *) a[2]; 1300 } else { 1301 pfa = makedfa(fs, 1); 1302 } 1303 if (nematch(pfa,s)) { 1304 tempstat = pfa->initstat; 1305 pfa->initstat = 2; 1306 do { 1307 n++; 1308 snprintf(num, sizeof(num), "%d", n); 1309 temp = *patbeg; 1310 setptr(patbeg, '\0'); 1311 if (is_number(s)) 1312 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); 1313 else 1314 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1315 setptr(patbeg, temp); 1316 s = patbeg + patlen; 1317 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1318 n++; 1319 snprintf(num, sizeof(num), "%d", n); 1320 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1321 pfa->initstat = tempstat; 1322 goto spdone; 1323 } 1324 } while (nematch(pfa,s)); 1325 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1326 /* cf gsub and refldbld */ 1327 } 1328 n++; 1329 snprintf(num, sizeof(num), "%d", n); 1330 if (is_number(s)) 1331 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); 1332 else 1333 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1334 spdone: 1335 pfa = NULL; 1336 } else if (sep == ' ') { 1337 for (n = 0; ; ) { 1338 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1339 while (ISWS(*s)) 1340 s++; 1341 if (*s == '\0') 1342 break; 1343 n++; 1344 t = s; 1345 do 1346 s++; 1347 while (*s != '\0' && !ISWS(*s)); 1348 temp = *s; 1349 setptr(s, '\0'); 1350 snprintf(num, sizeof(num), "%d", n); 1351 if (is_number(t)) 1352 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); 1353 else 1354 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1355 setptr(s, temp); 1356 if (*s != '\0') 1357 s++; 1358 } 1359 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1360 for (n = 0; *s != '\0'; s++) { 1361 char buf[2]; 1362 n++; 1363 snprintf(num, sizeof(num), "%d", n); 1364 buf[0] = *s; 1365 buf[1] = '\0'; 1366 if (isdigit((uschar)buf[0])) 1367 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1368 else 1369 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1370 } 1371 } else if (*s != '\0') { 1372 for (;;) { 1373 n++; 1374 t = s; 1375 while (*s != sep && *s != '\n' && *s != '\0') 1376 s++; 1377 temp = *s; 1378 setptr(s, '\0'); 1379 snprintf(num, sizeof(num), "%d", n); 1380 if (is_number(t)) 1381 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); 1382 else 1383 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1384 setptr(s, temp); 1385 if (*s++ == '\0') 1386 break; 1387 } 1388 } 1389 tempfree(ap); 1390 tempfree(y); 1391 xfree(origs); 1392 xfree(origfs); 1393 x = gettemp(); 1394 x->tval = NUM; 1395 x->fval = n; 1396 return(x); 1397 } 1398 1399 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1400 { 1401 Cell *x; 1402 1403 x = execute(a[0]); 1404 if (istrue(x)) { 1405 tempfree(x); 1406 x = execute(a[1]); 1407 } else { 1408 tempfree(x); 1409 x = execute(a[2]); 1410 } 1411 return(x); 1412 } 1413 1414 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1415 { 1416 Cell *x; 1417 1418 x = execute(a[0]); 1419 if (istrue(x)) { 1420 tempfree(x); 1421 x = execute(a[1]); 1422 } else if (a[2] != NULL) { 1423 tempfree(x); 1424 x = execute(a[2]); 1425 } 1426 return(x); 1427 } 1428 1429 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1430 { 1431 Cell *x; 1432 1433 for (;;) { 1434 x = execute(a[0]); 1435 if (!istrue(x)) 1436 return(x); 1437 tempfree(x); 1438 x = execute(a[1]); 1439 if (isbreak(x)) { 1440 x = True; 1441 return(x); 1442 } 1443 if (isnext(x) || isexit(x) || isret(x)) 1444 return(x); 1445 tempfree(x); 1446 } 1447 } 1448 1449 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1450 { 1451 Cell *x; 1452 1453 for (;;) { 1454 x = execute(a[0]); 1455 if (isbreak(x)) 1456 return True; 1457 if (isnext(x) || isexit(x) || isret(x)) 1458 return(x); 1459 tempfree(x); 1460 x = execute(a[1]); 1461 if (!istrue(x)) 1462 return(x); 1463 tempfree(x); 1464 } 1465 } 1466 1467 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1468 { 1469 Cell *x; 1470 1471 x = execute(a[0]); 1472 tempfree(x); 1473 for (;;) { 1474 if (a[1]!=NULL) { 1475 x = execute(a[1]); 1476 if (!istrue(x)) return(x); 1477 else tempfree(x); 1478 } 1479 x = execute(a[3]); 1480 if (isbreak(x)) /* turn off break */ 1481 return True; 1482 if (isnext(x) || isexit(x) || isret(x)) 1483 return(x); 1484 tempfree(x); 1485 x = execute(a[2]); 1486 tempfree(x); 1487 } 1488 } 1489 1490 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1491 { 1492 Cell *x, *vp, *arrayp, *cp, *ncp; 1493 Array *tp; 1494 int i; 1495 1496 vp = execute(a[0]); 1497 arrayp = execute(a[1]); 1498 if (!isarr(arrayp)) { 1499 return True; 1500 } 1501 tp = (Array *) arrayp->sval; 1502 tempfree(arrayp); 1503 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1504 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1505 setsval(vp, cp->nval); 1506 ncp = cp->cnext; 1507 x = execute(a[2]); 1508 if (isbreak(x)) { 1509 tempfree(vp); 1510 return True; 1511 } 1512 if (isnext(x) || isexit(x) || isret(x)) { 1513 tempfree(vp); 1514 return(x); 1515 } 1516 tempfree(x); 1517 } 1518 } 1519 return True; 1520 } 1521 1522 static char *nawk_convert(const char *s, int (*fun_c)(int), 1523 wint_t (*fun_wc)(wint_t)) 1524 { 1525 char *buf = NULL; 1526 char *pbuf = NULL; 1527 const char *ps = NULL; 1528 size_t n = 0; 1529 wchar_t wc; 1530 size_t sz = MB_CUR_MAX; 1531 1532 if (sz == 1) { 1533 buf = tostring(s); 1534 1535 for (pbuf = buf; *pbuf; pbuf++) 1536 *pbuf = fun_c((uschar)*pbuf); 1537 1538 return buf; 1539 } else { 1540 /* upper/lower character may be shorter/longer */ 1541 buf = tostringN(s, strlen(s) * sz + 1); 1542 1543 (void) mbtowc(NULL, NULL, 0); /* reset internal state */ 1544 /* 1545 * Reset internal state here too. 1546 * Assign result to avoid a compiler warning. (Casting to void 1547 * doesn't work.) 1548 * Increment said variable to avoid a different warning. 1549 */ 1550 int unused = wctomb(NULL, L'\0'); 1551 unused++; 1552 1553 ps = s; 1554 pbuf = buf; 1555 while (n = mbtowc(&wc, ps, sz), 1556 n > 0 && n != (size_t)-1 && n != (size_t)-2) 1557 { 1558 ps += n; 1559 1560 n = wctomb(pbuf, fun_wc(wc)); 1561 if (n == (size_t)-1) 1562 FATAL("illegal wide character %s", s); 1563 1564 pbuf += n; 1565 } 1566 1567 *pbuf = '\0'; 1568 1569 if (n) 1570 FATAL("illegal byte sequence %s", s); 1571 1572 return buf; 1573 } 1574 } 1575 1576 static char *nawk_toupper(const char *s) 1577 { 1578 return nawk_convert(s, toupper, towupper); 1579 } 1580 1581 static char *nawk_tolower(const char *s) 1582 { 1583 return nawk_convert(s, tolower, towlower); 1584 } 1585 1586 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 1587 { 1588 Cell *x, *y; 1589 Awkfloat u; 1590 int t, sz; 1591 Awkfloat tmp; 1592 char *buf, *fmt; 1593 Node *nextarg; 1594 FILE *fp; 1595 int status = 0; 1596 time_t tv; 1597 struct tm *tm, tmbuf; 1598 1599 t = ptoi(a[0]); 1600 x = execute(a[1]); 1601 nextarg = a[1]->nnext; 1602 switch (t) { 1603 case FLENGTH: 1604 if (isarr(x)) 1605 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 1606 else 1607 u = strlen(getsval(x)); 1608 break; 1609 case FLOG: 1610 errno = 0; 1611 u = errcheck(log(getfval(x)), "log"); 1612 break; 1613 case FINT: 1614 modf(getfval(x), &u); break; 1615 case FEXP: 1616 errno = 0; 1617 u = errcheck(exp(getfval(x)), "exp"); 1618 break; 1619 case FSQRT: 1620 errno = 0; 1621 u = errcheck(sqrt(getfval(x)), "sqrt"); 1622 break; 1623 case FSIN: 1624 u = sin(getfval(x)); break; 1625 case FCOS: 1626 u = cos(getfval(x)); break; 1627 case FATAN: 1628 if (nextarg == NULL) { 1629 WARNING("atan2 requires two arguments; returning 1.0"); 1630 u = 1.0; 1631 } else { 1632 y = execute(a[1]->nnext); 1633 u = atan2(getfval(x), getfval(y)); 1634 tempfree(y); 1635 nextarg = nextarg->nnext; 1636 } 1637 break; 1638 case FCOMPL: 1639 u = ~((int)getfval(x)); 1640 break; 1641 case FAND: 1642 if (nextarg == 0) { 1643 WARNING("and requires two arguments; returning 0"); 1644 u = 0; 1645 break; 1646 } 1647 y = execute(a[1]->nnext); 1648 u = ((int)getfval(x)) & ((int)getfval(y)); 1649 tempfree(y); 1650 nextarg = nextarg->nnext; 1651 break; 1652 case FFOR: 1653 if (nextarg == 0) { 1654 WARNING("or requires two arguments; returning 0"); 1655 u = 0; 1656 break; 1657 } 1658 y = execute(a[1]->nnext); 1659 u = ((int)getfval(x)) | ((int)getfval(y)); 1660 tempfree(y); 1661 nextarg = nextarg->nnext; 1662 break; 1663 case FXOR: 1664 if (nextarg == 0) { 1665 WARNING("xor requires two arguments; returning 0"); 1666 u = 0; 1667 break; 1668 } 1669 y = execute(a[1]->nnext); 1670 u = ((int)getfval(x)) ^ ((int)getfval(y)); 1671 tempfree(y); 1672 nextarg = nextarg->nnext; 1673 break; 1674 case FLSHIFT: 1675 if (nextarg == 0) { 1676 WARNING("lshift requires two arguments; returning 0"); 1677 u = 0; 1678 break; 1679 } 1680 y = execute(a[1]->nnext); 1681 u = ((int)getfval(x)) << ((int)getfval(y)); 1682 tempfree(y); 1683 nextarg = nextarg->nnext; 1684 break; 1685 case FRSHIFT: 1686 if (nextarg == 0) { 1687 WARNING("rshift requires two arguments; returning 0"); 1688 u = 0; 1689 break; 1690 } 1691 y = execute(a[1]->nnext); 1692 u = ((int)getfval(x)) >> ((int)getfval(y)); 1693 tempfree(y); 1694 nextarg = nextarg->nnext; 1695 break; 1696 case FSYSTEM: 1697 fflush(stdout); /* in case something is buffered already */ 1698 status = system(getsval(x)); 1699 u = status; 1700 if (status != -1) { 1701 if (WIFEXITED(status)) { 1702 u = WEXITSTATUS(status); 1703 } else if (WIFSIGNALED(status)) { 1704 u = WTERMSIG(status) + 256; 1705 #ifdef WCOREDUMP 1706 if (WCOREDUMP(status)) 1707 u += 256; 1708 #endif 1709 } else /* something else?!? */ 1710 u = 0; 1711 } 1712 break; 1713 case FRAND: 1714 /* random() returns numbers in [0..2^31-1] 1715 * in order to get a number in [0, 1), divide it by 2^31 1716 */ 1717 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 1718 break; 1719 case FSRAND: 1720 if (isrec(x)) { /* no argument provided */ 1721 u = time(NULL); 1722 tmp = u; 1723 srandom((unsigned int) u); 1724 } else { 1725 u = getfval(x); 1726 tmp = u; 1727 srandom_deterministic((unsigned int) u); 1728 } 1729 u = srand_seed; 1730 srand_seed = tmp; 1731 break; 1732 case FTOUPPER: 1733 case FTOLOWER: 1734 if (t == FTOUPPER) 1735 buf = nawk_toupper(getsval(x)); 1736 else 1737 buf = nawk_tolower(getsval(x)); 1738 tempfree(x); 1739 x = gettemp(); 1740 setsval(x, buf); 1741 free(buf); 1742 return x; 1743 case FFLUSH: 1744 if (isrec(x) || strlen(getsval(x)) == 0) { 1745 flush_all(); /* fflush() or fflush("") -> all */ 1746 u = 0; 1747 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 1748 u = EOF; 1749 else 1750 u = fflush(fp); 1751 break; 1752 case FMKTIME: 1753 memset(&tmbuf, 0, sizeof(tmbuf)); 1754 tm = &tmbuf; 1755 t = sscanf(getsval(x), "%d %d %d %d %d %d %d", 1756 &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour, 1757 &tm->tm_min, &tm->tm_sec, &tm->tm_isdst); 1758 switch (t) { 1759 case 6: 1760 tm->tm_isdst = -1; /* let mktime figure it out */ 1761 /* FALLTHROUGH */ 1762 case 7: 1763 tm->tm_year -= 1900; 1764 tm->tm_mon--; 1765 u = mktime(tm); 1766 break; 1767 default: 1768 u = -1; 1769 break; 1770 } 1771 break; 1772 case FSYSTIME: 1773 u = time((time_t *) 0); 1774 break; 1775 case FSTRFTIME: 1776 /* strftime([format [,timestamp]]) */ 1777 if (nextarg) { 1778 y = execute(nextarg); 1779 nextarg = nextarg->nnext; 1780 tv = (time_t) getfval(y); 1781 tempfree(y); 1782 } else 1783 tv = time((time_t *) 0); 1784 tm = localtime(&tv); 1785 if (tm == NULL) 1786 FATAL("bad time %ld", (long)tv); 1787 1788 if (isrec(x)) { 1789 /* format argument not provided, use default */ 1790 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 1791 } else 1792 fmt = tostring(getsval(x)); 1793 1794 sz = 32; 1795 buf = NULL; 1796 do { 1797 if ((buf = reallocarray(buf, 2, sz)) == NULL) 1798 FATAL("out of memory in strftime"); 1799 sz *= 2; 1800 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 1801 1802 y = gettemp(); 1803 setsval(y, buf); 1804 free(fmt); 1805 free(buf); 1806 1807 return y; 1808 default: /* can't happen */ 1809 FATAL("illegal function type %d", t); 1810 break; 1811 } 1812 tempfree(x); 1813 x = gettemp(); 1814 setfval(x, u); 1815 if (nextarg != NULL) { 1816 WARNING("warning: function has too many arguments"); 1817 for ( ; nextarg; nextarg = nextarg->nnext) 1818 execute(nextarg); 1819 } 1820 return(x); 1821 } 1822 1823 Cell *printstat(Node **a, int n) /* print a[0] */ 1824 { 1825 Node *x; 1826 Cell *y; 1827 FILE *fp; 1828 1829 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 1830 fp = stdout; 1831 else 1832 fp = redirect(ptoi(a[1]), a[2]); 1833 for (x = a[0]; x != NULL; x = x->nnext) { 1834 y = execute(x); 1835 fputs(getpssval(y), fp); 1836 tempfree(y); 1837 if (x->nnext == NULL) 1838 fputs(getsval(orsloc), fp); 1839 else 1840 fputs(getsval(ofsloc), fp); 1841 } 1842 if (a[1] != NULL) 1843 fflush(fp); 1844 if (ferror(fp)) 1845 FATAL("write error on %s", filename(fp)); 1846 return(True); 1847 } 1848 1849 Cell *nullproc(Node **a, int n) 1850 { 1851 return 0; 1852 } 1853 1854 1855 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 1856 { 1857 FILE *fp; 1858 Cell *x; 1859 char *fname; 1860 1861 x = execute(b); 1862 fname = getsval(x); 1863 fp = openfile(a, fname, NULL); 1864 if (fp == NULL) 1865 FATAL("can't open file %s", fname); 1866 tempfree(x); 1867 return fp; 1868 } 1869 1870 struct files { 1871 FILE *fp; 1872 const char *fname; 1873 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 1874 } *files; 1875 1876 size_t nfiles; 1877 1878 static void stdinit(void) /* in case stdin, etc., are not constants */ 1879 { 1880 nfiles = FOPEN_MAX; 1881 files = calloc(nfiles, sizeof(*files)); 1882 if (files == NULL) 1883 FATAL("can't allocate file memory for %zu files", nfiles); 1884 files[0].fp = stdin; 1885 files[0].fname = "/dev/stdin"; 1886 files[0].mode = LT; 1887 files[1].fp = stdout; 1888 files[1].fname = "/dev/stdout"; 1889 files[1].mode = GT; 1890 files[2].fp = stderr; 1891 files[2].fname = "/dev/stderr"; 1892 files[2].mode = GT; 1893 } 1894 1895 FILE *openfile(int a, const char *us, bool *pnewflag) 1896 { 1897 const char *s = us; 1898 size_t i; 1899 int m; 1900 FILE *fp = NULL; 1901 1902 if (*s == '\0') 1903 FATAL("null file name in print or getline"); 1904 for (i = 0; i < nfiles; i++) 1905 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 1906 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 1907 a == FFLUSH)) { 1908 if (pnewflag) 1909 *pnewflag = false; 1910 return files[i].fp; 1911 } 1912 if (a == FFLUSH) /* didn't find it, so don't create it! */ 1913 return NULL; 1914 1915 for (i = 0; i < nfiles; i++) 1916 if (files[i].fp == NULL) 1917 break; 1918 if (i >= nfiles) { 1919 struct files *nf; 1920 size_t nnf = nfiles + FOPEN_MAX; 1921 nf = reallocarray(files, nnf, sizeof(*nf)); 1922 if (nf == NULL) 1923 FATAL("cannot grow files for %s and %zu files", s, nnf); 1924 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 1925 nfiles = nnf; 1926 files = nf; 1927 } 1928 fflush(stdout); /* force a semblance of order */ 1929 m = a; 1930 if (a == GT) { 1931 fp = fopen(s, "w"); 1932 } else if (a == APPEND) { 1933 fp = fopen(s, "a"); 1934 m = GT; /* so can mix > and >> */ 1935 } else if (a == '|') { /* output pipe */ 1936 fp = popen(s, "w"); 1937 } else if (a == LE) { /* input pipe */ 1938 fp = popen(s, "r"); 1939 } else if (a == LT) { /* getline <file */ 1940 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 1941 } else /* can't happen */ 1942 FATAL("illegal redirection %d", a); 1943 if (fp != NULL) { 1944 files[i].fname = tostring(s); 1945 files[i].fp = fp; 1946 files[i].mode = m; 1947 if (pnewflag) 1948 *pnewflag = true; 1949 if (fp != stdin && fp != stdout && fp != stderr) 1950 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 1951 } 1952 return fp; 1953 } 1954 1955 const char *filename(FILE *fp) 1956 { 1957 size_t i; 1958 1959 for (i = 0; i < nfiles; i++) 1960 if (fp == files[i].fp) 1961 return files[i].fname; 1962 return "???"; 1963 } 1964 1965 Cell *closefile(Node **a, int n) 1966 { 1967 Cell *x; 1968 size_t i; 1969 bool stat; 1970 1971 x = execute(a[0]); 1972 getsval(x); 1973 stat = true; 1974 for (i = 0; i < nfiles; i++) { 1975 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 1976 continue; 1977 if (ferror(files[i].fp)) 1978 FATAL("i/o error occurred on %s", files[i].fname); 1979 if (files[i].fp == stdin || files[i].fp == stdout || 1980 files[i].fp == stderr) 1981 stat = freopen("/dev/null", "r+", files[i].fp) == NULL; 1982 else if (files[i].mode == '|' || files[i].mode == LE) 1983 stat = pclose(files[i].fp) == -1; 1984 else 1985 stat = fclose(files[i].fp) == EOF; 1986 if (stat) 1987 FATAL("i/o error occurred closing %s", files[i].fname); 1988 if (i > 2) /* don't do /dev/std... */ 1989 xfree(files[i].fname); 1990 files[i].fname = NULL; /* watch out for ref thru this */ 1991 files[i].fp = NULL; 1992 break; 1993 } 1994 tempfree(x); 1995 x = gettemp(); 1996 setfval(x, (Awkfloat) (stat ? -1 : 0)); 1997 return(x); 1998 } 1999 2000 void closeall(void) 2001 { 2002 size_t i; 2003 bool stat = false; 2004 2005 for (i = 0; i < nfiles; i++) { 2006 if (! files[i].fp) 2007 continue; 2008 if (ferror(files[i].fp)) 2009 FATAL( "i/o error occurred on %s", files[i].fname ); 2010 if (files[i].fp == stdin) 2011 continue; 2012 if (files[i].mode == '|' || files[i].mode == LE) 2013 stat = pclose(files[i].fp) == -1; 2014 else if (files[i].fp == stdout || files[i].fp == stderr) 2015 stat = fflush(files[i].fp) == EOF; 2016 else 2017 stat = fclose(files[i].fp) == EOF; 2018 if (stat) 2019 FATAL( "i/o error occurred while closing %s", files[i].fname ); 2020 } 2021 } 2022 2023 static void flush_all(void) 2024 { 2025 size_t i; 2026 2027 for (i = 0; i < nfiles; i++) 2028 if (files[i].fp) 2029 fflush(files[i].fp); 2030 } 2031 2032 void backsub(char **pb_ptr, const char **sptr_ptr); 2033 2034 Cell *sub(Node **a, int nnn) /* substitute command */ 2035 { 2036 const char *sptr, *q; 2037 Cell *x, *y, *result; 2038 char *t, *buf, *pb; 2039 fa *pfa; 2040 int bufsz = recsize; 2041 2042 if ((buf = malloc(bufsz)) == NULL) 2043 FATAL("out of memory in sub"); 2044 x = execute(a[3]); /* target string */ 2045 t = getsval(x); 2046 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2047 pfa = (fa *) a[1]; /* regular expression */ 2048 else { 2049 y = execute(a[1]); 2050 pfa = makedfa(getsval(y), 1); 2051 tempfree(y); 2052 } 2053 y = execute(a[2]); /* replacement string */ 2054 result = False; 2055 if (pmatch(pfa, t)) { 2056 sptr = t; 2057 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); 2058 pb = buf; 2059 while (sptr < patbeg) 2060 *pb++ = *sptr++; 2061 sptr = getsval(y); 2062 while (*sptr != '\0') { 2063 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); 2064 if (*sptr == '\\') { 2065 backsub(&pb, &sptr); 2066 } else if (*sptr == '&') { 2067 sptr++; 2068 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); 2069 for (q = patbeg; q < patbeg+patlen; ) 2070 *pb++ = *q++; 2071 } else 2072 *pb++ = *sptr++; 2073 } 2074 *pb = '\0'; 2075 if (pb > buf + bufsz) 2076 FATAL("sub result1 %.30s too big; can't happen", buf); 2077 sptr = patbeg + patlen; 2078 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { 2079 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); 2080 while ((*pb++ = *sptr++) != '\0') 2081 continue; 2082 } 2083 if (pb > buf + bufsz) 2084 FATAL("sub result2 %.30s too big; can't happen", buf); 2085 setsval(x, buf); /* BUG: should be able to avoid copy */ 2086 result = True; 2087 } 2088 tempfree(x); 2089 tempfree(y); 2090 free(buf); 2091 return result; 2092 } 2093 2094 Cell *gsub(Node **a, int nnn) /* global substitute */ 2095 { 2096 Cell *x, *y; 2097 char *rptr, *pb; 2098 const char *q, *t, *sptr; 2099 char *buf; 2100 fa *pfa; 2101 int mflag, tempstat, num; 2102 int bufsz = recsize; 2103 2104 if ((buf = malloc(bufsz)) == NULL) 2105 FATAL("out of memory in gsub"); 2106 mflag = 0; /* if mflag == 0, can replace empty string */ 2107 num = 0; 2108 x = execute(a[3]); /* target string */ 2109 t = getsval(x); 2110 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2111 pfa = (fa *) a[1]; /* regular expression */ 2112 else { 2113 y = execute(a[1]); 2114 pfa = makedfa(getsval(y), 1); 2115 tempfree(y); 2116 } 2117 y = execute(a[2]); /* replacement string */ 2118 if (pmatch(pfa, t)) { 2119 tempstat = pfa->initstat; 2120 pfa->initstat = 2; 2121 pb = buf; 2122 rptr = getsval(y); 2123 do { 2124 if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ 2125 if (mflag == 0) { /* can replace empty */ 2126 num++; 2127 sptr = rptr; 2128 while (*sptr != '\0') { 2129 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2130 if (*sptr == '\\') { 2131 backsub(&pb, &sptr); 2132 } else if (*sptr == '&') { 2133 sptr++; 2134 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2135 for (q = patbeg; q < patbeg+patlen; ) 2136 *pb++ = *q++; 2137 } else 2138 *pb++ = *sptr++; 2139 } 2140 } 2141 if (*t == '\0') /* at end */ 2142 goto done; 2143 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); 2144 *pb++ = *t++; 2145 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2146 FATAL("gsub result0 %.30s too big; can't happen", buf); 2147 mflag = 0; 2148 } 2149 else { /* matched nonempty string */ 2150 num++; 2151 sptr = t; 2152 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); 2153 while (sptr < patbeg) 2154 *pb++ = *sptr++; 2155 sptr = rptr; 2156 while (*sptr != '\0') { 2157 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2158 if (*sptr == '\\') { 2159 backsub(&pb, &sptr); 2160 } else if (*sptr == '&') { 2161 sptr++; 2162 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2163 for (q = patbeg; q < patbeg+patlen; ) 2164 *pb++ = *q++; 2165 } else 2166 *pb++ = *sptr++; 2167 } 2168 t = patbeg + patlen; 2169 if (patlen == 0 || *t == '\0' || *(t-1) == '\0') 2170 goto done; 2171 if (pb > buf + bufsz) 2172 FATAL("gsub result1 %.30s too big; can't happen", buf); 2173 mflag = 1; 2174 } 2175 } while (pmatch(pfa,t)); 2176 sptr = t; 2177 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); 2178 while ((*pb++ = *sptr++) != '\0') 2179 continue; 2180 done: if (pb < buf + bufsz) 2181 *pb = '\0'; 2182 else if (*(pb-1) != '\0') 2183 FATAL("gsub result2 %.30s truncated; can't happen", buf); 2184 setsval(x, buf); /* BUG: should be able to avoid copy + free */ 2185 pfa->initstat = tempstat; 2186 } 2187 tempfree(x); 2188 tempfree(y); 2189 x = gettemp(); 2190 x->tval = NUM; 2191 x->fval = num; 2192 free(buf); 2193 return(x); 2194 } 2195 2196 Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2197 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2198 { 2199 Cell *x, *y, *res, *h; 2200 char *rptr; 2201 const char *sptr; 2202 char *buf, *pb; 2203 const char *t, *q; 2204 fa *pfa; 2205 int mflag, tempstat, num, whichm; 2206 int bufsz = recsize; 2207 2208 if ((buf = malloc(bufsz)) == NULL) 2209 FATAL("out of memory in gensub"); 2210 mflag = 0; /* if mflag == 0, can replace empty string */ 2211 num = 0; 2212 x = execute(a[4]); /* source string */ 2213 t = getsval(x); 2214 res = copycell(x); /* target string - initially copy of source */ 2215 res->csub = CTEMP; /* result values are temporary */ 2216 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2217 pfa = (fa *) a[1]; /* regular expression */ 2218 else { 2219 y = execute(a[1]); 2220 pfa = makedfa(getsval(y), 1); 2221 tempfree(y); 2222 } 2223 y = execute(a[2]); /* replacement string */ 2224 h = execute(a[3]); /* which matches should be replaced */ 2225 sptr = getsval(h); 2226 if (sptr[0] == 'g' || sptr[0] == 'G') 2227 whichm = -1; 2228 else { 2229 /* 2230 * The specified number is index of replacement, starting 2231 * from 1. GNU awk treats index lower than 0 same as 2232 * 1, we do same for compatibility. 2233 */ 2234 whichm = (int) getfval(h) - 1; 2235 if (whichm < 0) 2236 whichm = 0; 2237 } 2238 tempfree(h); 2239 2240 if (pmatch(pfa, t)) { 2241 char *sl; 2242 2243 tempstat = pfa->initstat; 2244 pfa->initstat = 2; 2245 pb = buf; 2246 rptr = getsval(y); 2247 /* 2248 * XXX if there are any backreferences in subst string, 2249 * complain now. 2250 */ 2251 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2252 if (strchr("0123456789", sl[1])) { 2253 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2254 } 2255 } 2256 2257 do { 2258 if (whichm >= 0 && whichm != num) { 2259 num++; 2260 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2261 2262 /* copy the part of string up to and including 2263 * match to output buffer */ 2264 while (t < patbeg + patlen) 2265 *pb++ = *t++; 2266 continue; 2267 } 2268 2269 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2270 if (mflag == 0) { /* can replace empty */ 2271 num++; 2272 sptr = rptr; 2273 while (*sptr != 0) { 2274 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2275 if (*sptr == '\\') { 2276 backsub(&pb, &sptr); 2277 } else if (*sptr == '&') { 2278 sptr++; 2279 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2280 for (q = patbeg; q < patbeg+patlen; ) 2281 *pb++ = *q++; 2282 } else 2283 *pb++ = *sptr++; 2284 } 2285 } 2286 if (*t == 0) /* at end */ 2287 goto done; 2288 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2289 *pb++ = *t++; 2290 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2291 FATAL("gensub result0 %.30s too big; can't happen", buf); 2292 mflag = 0; 2293 } 2294 else { /* matched nonempty string */ 2295 num++; 2296 sptr = t; 2297 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2298 while (sptr < patbeg) 2299 *pb++ = *sptr++; 2300 sptr = rptr; 2301 while (*sptr != 0) { 2302 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2303 if (*sptr == '\\') { 2304 backsub(&pb, &sptr); 2305 } else if (*sptr == '&') { 2306 sptr++; 2307 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2308 for (q = patbeg; q < patbeg+patlen; ) 2309 *pb++ = *q++; 2310 } else 2311 *pb++ = *sptr++; 2312 } 2313 t = patbeg + patlen; 2314 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2315 goto done; 2316 if (pb > buf + bufsz) 2317 FATAL("gensub result1 %.30s too big; can't happen", buf); 2318 mflag = 1; 2319 } 2320 } while (pmatch(pfa,t)); 2321 sptr = t; 2322 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2323 while ((*pb++ = *sptr++) != 0) 2324 ; 2325 done: if (pb > buf + bufsz) 2326 FATAL("gensub result2 %.30s too big; can't happen", buf); 2327 *pb = '\0'; 2328 setsval(res, buf); 2329 pfa->initstat = tempstat; 2330 } 2331 tempfree(x); 2332 tempfree(y); 2333 free(buf); 2334 return(res); 2335 } 2336 2337 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2338 { /* sptr[0] == '\\' */ 2339 char *pb = *pb_ptr; 2340 const char *sptr = *sptr_ptr; 2341 2342 if (sptr[1] == '\\') { 2343 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2344 *pb++ = '\\'; 2345 *pb++ = '&'; 2346 sptr += 4; 2347 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2348 *pb++ = '\\'; 2349 sptr += 2; 2350 } else if (do_posix) { /* \\x -> \x */ 2351 sptr++; 2352 *pb++ = *sptr++; 2353 } else { /* \\x -> \\x */ 2354 *pb++ = *sptr++; 2355 *pb++ = *sptr++; 2356 } 2357 } else if (sptr[1] == '&') { /* literal & */ 2358 sptr++; 2359 *pb++ = *sptr++; 2360 } else /* literal \ */ 2361 *pb++ = *sptr++; 2362 2363 *pb_ptr = pb; 2364 *sptr_ptr = sptr; 2365 } 2366