1 /* $OpenBSD: run.c,v 1.59 2020/06/13 01:21:01 millert Exp $ */ 2 /**************************************************************** 3 Copyright (C) Lucent Technologies 1997 4 All Rights Reserved 5 6 Permission to use, copy, modify, and distribute this software and 7 its documentation for any purpose and without fee is hereby 8 granted, provided that the above copyright notice appear in all 9 copies and that both that the copyright notice and this 10 permission notice and warranty disclaimer appear in supporting 11 documentation, and that the name Lucent Technologies or any of 12 its entities not be used in advertising or publicity pertaining 13 to distribution of the software without specific, written prior 14 permission. 15 16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 23 THIS SOFTWARE. 24 ****************************************************************/ 25 26 #define DEBUG 27 #include <stdio.h> 28 #include <ctype.h> 29 #include <wchar.h> 30 #include <wctype.h> 31 #include <errno.h> 32 #include <fcntl.h> 33 #include <setjmp.h> 34 #include <limits.h> 35 #include <math.h> 36 #include <string.h> 37 #include <stdlib.h> 38 #include <time.h> 39 #include <sys/types.h> 40 #include <sys/wait.h> 41 #include "awk.h" 42 #include "ytab.h" 43 44 static void stdinit(void); 45 static void flush_all(void); 46 47 #if 1 48 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 49 #else 50 void tempfree(Cell *p) { 51 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 52 WARNING("bad csub %d in Cell %d %s", 53 p->csub, p->ctype, p->sval); 54 } 55 if (istemp(p)) 56 tfree(p); 57 } 58 #endif 59 60 /* do we really need these? */ 61 /* #ifdef _NFILE */ 62 /* #ifndef FOPEN_MAX */ 63 /* #define FOPEN_MAX _NFILE */ 64 /* #endif */ 65 /* #endif */ 66 /* */ 67 /* #ifndef FOPEN_MAX */ 68 /* #define FOPEN_MAX 40 */ /* max number of open files */ 69 /* #endif */ 70 /* */ 71 /* #ifndef RAND_MAX */ 72 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 73 /* #endif */ 74 75 jmp_buf env; 76 extern int pairstack[]; 77 extern Awkfloat srand_seed; 78 79 Node *winner = NULL; /* root of parse tree */ 80 Cell *tmps; /* free temporary cells for execution */ 81 82 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 83 Cell *True = &truecell; 84 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 85 Cell *False = &falsecell; 86 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 87 Cell *jbreak = &breakcell; 88 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 89 Cell *jcont = &contcell; 90 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 91 Cell *jnext = &nextcell; 92 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 93 Cell *jnextfile = &nextfilecell; 94 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 95 Cell *jexit = &exitcell; 96 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 97 Cell *jret = &retcell; 98 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 99 100 Node *curnode = NULL; /* the node being executed, for debugging */ 101 102 /* buffer memory management */ 103 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 104 const char *whatrtn) 105 /* pbuf: address of pointer to buffer being managed 106 * psiz: address of buffer size variable 107 * minlen: minimum length of buffer needed 108 * quantum: buffer size quantum 109 * pbptr: address of movable pointer into buffer, or 0 if none 110 * whatrtn: name of the calling routine if failure should cause fatal error 111 * 112 * return 0 for realloc failure, !=0 for success 113 */ 114 { 115 if (minlen > *psiz) { 116 char *tbuf; 117 int rminlen = quantum ? minlen % quantum : 0; 118 int boff = pbptr ? *pbptr - *pbuf : 0; 119 /* round up to next multiple of quantum */ 120 if (rminlen) 121 minlen += quantum - rminlen; 122 tbuf = realloc(*pbuf, minlen); 123 DPRINTF( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) ); 124 if (tbuf == NULL) { 125 if (whatrtn) 126 FATAL("out of memory in %s", whatrtn); 127 return 0; 128 } 129 *pbuf = tbuf; 130 *psiz = minlen; 131 if (pbptr) 132 *pbptr = tbuf + boff; 133 } 134 return 1; 135 } 136 137 void run(Node *a) /* execution of parse tree starts here */ 138 { 139 stdinit(); 140 execute(a); 141 closeall(); 142 } 143 144 Cell *execute(Node *u) /* execute a node of the parse tree */ 145 { 146 Cell *(*proc)(Node **, int); 147 Cell *x; 148 Node *a; 149 150 if (u == NULL) 151 return(True); 152 for (a = u; ; a = a->nnext) { 153 curnode = a; 154 if (isvalue(a)) { 155 x = (Cell *) (a->narg[0]); 156 if (isfld(x) && !donefld) 157 fldbld(); 158 else if (isrec(x) && !donerec) 159 recbld(); 160 return(x); 161 } 162 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 163 FATAL("illegal statement"); 164 proc = proctab[a->nobj-FIRSTTOKEN]; 165 x = (*proc)(a->narg, a->nobj); 166 if (isfld(x) && !donefld) 167 fldbld(); 168 else if (isrec(x) && !donerec) 169 recbld(); 170 if (isexpr(a)) 171 return(x); 172 if (isjump(x)) 173 return(x); 174 if (a->nnext == NULL) 175 return(x); 176 tempfree(x); 177 } 178 } 179 180 181 Cell *program(Node **a, int n) /* execute an awk program */ 182 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 183 Cell *x; 184 185 if (setjmp(env) != 0) 186 goto ex; 187 if (a[0]) { /* BEGIN */ 188 x = execute(a[0]); 189 if (isexit(x)) 190 return(True); 191 if (isjump(x)) 192 FATAL("illegal break, continue, next or nextfile from BEGIN"); 193 tempfree(x); 194 } 195 if (a[1] || a[2]) 196 while (getrec(&record, &recsize, true) > 0) { 197 x = execute(a[1]); 198 if (isexit(x)) 199 break; 200 tempfree(x); 201 } 202 ex: 203 if (setjmp(env) != 0) /* handles exit within END */ 204 goto ex1; 205 if (a[2]) { /* END */ 206 x = execute(a[2]); 207 if (isbreak(x) || isnext(x) || iscont(x)) 208 FATAL("illegal break, continue, next or nextfile from END"); 209 tempfree(x); 210 } 211 ex1: 212 return(True); 213 } 214 215 struct Frame { /* stack frame for awk function calls */ 216 int nargs; /* number of arguments in this call */ 217 Cell *fcncell; /* pointer to Cell for function */ 218 Cell **args; /* pointer to array of arguments after execute */ 219 Cell *retval; /* return value */ 220 }; 221 222 #define NARGS 50 /* max args in a call */ 223 224 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 225 int nframe = 0; /* number of frames allocated */ 226 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 227 228 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 229 { 230 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 231 int i, ncall, ndef; 232 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 233 Node *x; 234 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 235 Cell *y, *z, *fcn; 236 char *s; 237 238 fcn = execute(a[0]); /* the function itself */ 239 s = fcn->nval; 240 if (!isfcn(fcn)) 241 FATAL("calling undefined function %s", s); 242 if (frame == NULL) { 243 frp = frame = calloc(nframe += 100, sizeof(*frame)); 244 if (frame == NULL) 245 FATAL("out of space for stack frames calling %s", s); 246 } 247 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 248 ncall++; 249 ndef = (int) fcn->fval; /* args in defn */ 250 DPRINTF( ("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)) ); 251 if (ncall > ndef) 252 WARNING("function %s called with %d args, uses only %d", 253 s, ncall, ndef); 254 if (ncall + ndef > NARGS) 255 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 256 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 257 DPRINTF( ("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)) ); 258 y = execute(x); 259 oargs[i] = y; 260 DPRINTF( ("args[%d]: %s %f <%s>, t=%o\n", 261 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval) ); 262 if (isfcn(y)) 263 FATAL("can't use function %s as argument in %s", y->nval, s); 264 if (isarr(y)) 265 args[i] = y; /* arrays by ref */ 266 else 267 args[i] = copycell(y); 268 tempfree(y); 269 } 270 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 271 args[i] = gettemp(); 272 *args[i] = newcopycell; 273 } 274 frp++; /* now ok to up frame */ 275 if (frp >= frame + nframe) { 276 int dfp = frp - frame; /* old index */ 277 frame = reallocarray(frame, (nframe += 100), sizeof(*frame)); 278 if (frame == NULL) 279 FATAL("out of space for stack frames in %s", s); 280 frp = frame + dfp; 281 } 282 frp->fcncell = fcn; 283 frp->args = args; 284 frp->nargs = ndef; /* number defined with (excess are locals) */ 285 frp->retval = gettemp(); 286 287 DPRINTF( ("start exec of %s, frp=%d\n", s, (int) (frp-frame)) ); 288 y = execute((Node *)(fcn->sval)); /* execute body */ 289 DPRINTF( ("finished exec of %s, frp=%d\n", s, (int) (frp-frame)) ); 290 291 for (i = 0; i < ndef; i++) { 292 Cell *t = frp->args[i]; 293 if (isarr(t)) { 294 if (t->csub == CCOPY) { 295 if (i >= ncall) { 296 freesymtab(t); 297 t->csub = CTEMP; 298 tempfree(t); 299 } else { 300 oargs[i]->tval = t->tval; 301 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 302 oargs[i]->sval = t->sval; 303 tempfree(t); 304 } 305 } 306 } else if (t != y) { /* kludge to prevent freeing twice */ 307 t->csub = CTEMP; 308 tempfree(t); 309 } else if (t == y && t->csub == CCOPY) { 310 t->csub = CTEMP; 311 tempfree(t); 312 freed = 1; 313 } 314 } 315 tempfree(fcn); 316 if (isexit(y) || isnext(y)) 317 return y; 318 if (freed == 0) { 319 tempfree(y); /* don't free twice! */ 320 } 321 z = frp->retval; /* return value */ 322 DPRINTF( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) ); 323 frp--; 324 return(z); 325 } 326 327 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 328 { 329 Cell *y; 330 331 /* copy is not constant or field */ 332 333 y = gettemp(); 334 y->tval = x->tval & ~(CON|FLD|REC); 335 y->csub = CCOPY; /* prevents freeing until call is over */ 336 y->nval = x->nval; /* BUG? */ 337 if (isstr(x) /* || x->ctype == OCELL */) { 338 y->sval = tostring(x->sval); 339 y->tval &= ~DONTFREE; 340 } else 341 y->tval |= DONTFREE; 342 y->fval = x->fval; 343 return y; 344 } 345 346 Cell *arg(Node **a, int n) /* nth argument of a function */ 347 { 348 349 n = ptoi(a[0]); /* argument number, counting from 0 */ 350 DPRINTF( ("arg(%d), frp->nargs=%d\n", n, frp->nargs) ); 351 if (n+1 > frp->nargs) 352 FATAL("argument #%d of function %s was not supplied", 353 n+1, frp->fcncell->nval); 354 return frp->args[n]; 355 } 356 357 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 358 { 359 Cell *y; 360 361 switch (n) { 362 case EXIT: 363 if (a[0] != NULL) { 364 y = execute(a[0]); 365 errorflag = (int) getfval(y); 366 tempfree(y); 367 } 368 longjmp(env, 1); 369 case RETURN: 370 if (a[0] != NULL) { 371 y = execute(a[0]); 372 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 373 setsval(frp->retval, getsval(y)); 374 frp->retval->fval = getfval(y); 375 frp->retval->tval |= NUM; 376 } 377 else if (y->tval & STR) 378 setsval(frp->retval, getsval(y)); 379 else if (y->tval & NUM) 380 setfval(frp->retval, getfval(y)); 381 else /* can't happen */ 382 FATAL("bad type variable %d", y->tval); 383 tempfree(y); 384 } 385 return(jret); 386 case NEXT: 387 return(jnext); 388 case NEXTFILE: 389 nextfile(); 390 return(jnextfile); 391 case BREAK: 392 return(jbreak); 393 case CONTINUE: 394 return(jcont); 395 default: /* can't happen */ 396 FATAL("illegal jump type %d", n); 397 } 398 return 0; /* not reached */ 399 } 400 401 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 402 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 403 Cell *r, *x; 404 extern Cell **fldtab; 405 FILE *fp; 406 char *buf; 407 int bufsize = recsize; 408 int mode; 409 bool newflag; 410 411 if ((buf = malloc(bufsize)) == NULL) 412 FATAL("out of memory in getline"); 413 414 fflush(stdout); /* in case someone is waiting for a prompt */ 415 r = gettemp(); 416 if (a[1] != NULL) { /* getline < file */ 417 x = execute(a[2]); /* filename */ 418 mode = ptoi(a[1]); 419 if (mode == '|') /* input pipe */ 420 mode = LE; /* arbitrary flag */ 421 fp = openfile(mode, getsval(x), &newflag); 422 tempfree(x); 423 if (fp == NULL) 424 n = -1; 425 else 426 n = readrec(&buf, &bufsize, fp, newflag); 427 if (n <= 0) { 428 ; 429 } else if (a[0] != NULL) { /* getline var <file */ 430 x = execute(a[0]); 431 setsval(x, buf); 432 if (is_number(x->sval)) { 433 x->fval = atof(x->sval); 434 x->tval |= NUM; 435 } 436 tempfree(x); 437 } else { /* getline <file */ 438 setsval(fldtab[0], buf); 439 if (is_number(fldtab[0]->sval)) { 440 fldtab[0]->fval = atof(fldtab[0]->sval); 441 fldtab[0]->tval |= NUM; 442 } 443 } 444 } else { /* bare getline; use current input */ 445 if (a[0] == NULL) /* getline */ 446 n = getrec(&record, &recsize, true); 447 else { /* getline var */ 448 n = getrec(&buf, &bufsize, false); 449 x = execute(a[0]); 450 setsval(x, buf); 451 if (is_number(x->sval)) { 452 x->fval = atof(x->sval); 453 x->tval |= NUM; 454 } 455 tempfree(x); 456 } 457 } 458 setfval(r, (Awkfloat) n); 459 free(buf); 460 return r; 461 } 462 463 Cell *getnf(Node **a, int n) /* get NF */ 464 { 465 if (!donefld) 466 fldbld(); 467 return (Cell *) a[0]; 468 } 469 470 static char * 471 makearraystring(Node *p, const char *func) 472 { 473 char *buf; 474 int bufsz = recsize; 475 size_t blen, seplen; 476 477 if ((buf = malloc(bufsz)) == NULL) { 478 FATAL("%s: out of memory", func); 479 } 480 481 blen = 0; 482 buf[blen] = '\0'; 483 seplen = strlen(getsval(subseploc)); 484 485 for (; p; p = p->nnext) { 486 Cell *x = execute(p); /* expr */ 487 char *s = getsval(x); 488 size_t nsub = p->nnext ? seplen : 0; 489 size_t slen = strlen(s); 490 size_t tlen = blen + slen + nsub; 491 492 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 493 FATAL("%s: out of memory %s[%s...]", 494 func, x->nval, buf); 495 } 496 memcpy(buf + blen, s, slen); 497 if (nsub) { 498 memcpy(buf + blen + slen, *SUBSEP, nsub); 499 } 500 buf[tlen] = '\0'; 501 blen = tlen; 502 tempfree(x); 503 } 504 return buf; 505 } 506 507 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 508 { 509 Cell *x, *z; 510 char *buf; 511 512 x = execute(a[0]); /* Cell* for symbol table */ 513 buf = makearraystring(a[1], __func__); 514 if (!isarr(x)) { 515 DPRINTF( ("making %s into an array\n", NN(x->nval)) ); 516 if (freeable(x)) 517 xfree(x->sval); 518 x->tval &= ~(STR|NUM|DONTFREE); 519 x->tval |= ARR; 520 x->sval = (char *) makesymtab(NSYMTAB); 521 } 522 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 523 z->ctype = OCELL; 524 z->csub = CVAR; 525 tempfree(x); 526 free(buf); 527 return(z); 528 } 529 530 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 531 { 532 Cell *x; 533 534 x = execute(a[0]); /* Cell* for symbol table */ 535 if (x == symtabloc) { 536 FATAL("cannot delete SYMTAB or its elements"); 537 } 538 if (!isarr(x)) 539 return True; 540 if (a[1] == NULL) { /* delete the elements, not the table */ 541 freesymtab(x); 542 x->tval &= ~STR; 543 x->tval |= ARR; 544 x->sval = (char *) makesymtab(NSYMTAB); 545 } else { 546 char *buf = makearraystring(a[1], __func__); 547 freeelem(x, buf); 548 free(buf); 549 } 550 tempfree(x); 551 return True; 552 } 553 554 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 555 { 556 Cell *ap, *k; 557 char *buf; 558 559 ap = execute(a[1]); /* array name */ 560 if (!isarr(ap)) { 561 DPRINTF( ("making %s into an array\n", ap->nval) ); 562 if (freeable(ap)) 563 xfree(ap->sval); 564 ap->tval &= ~(STR|NUM|DONTFREE); 565 ap->tval |= ARR; 566 ap->sval = (char *) makesymtab(NSYMTAB); 567 } 568 buf = makearraystring(a[0], __func__); 569 k = lookup(buf, (Array *) ap->sval); 570 tempfree(ap); 571 free(buf); 572 if (k == NULL) 573 return(False); 574 else 575 return(True); 576 } 577 578 579 Cell *matchop(Node **a, int n) /* ~ and match() */ 580 { 581 Cell *x, *y; 582 char *s, *t; 583 int i; 584 fa *pfa; 585 int (*mf)(fa *, const char *) = match, mode = 0; 586 587 if (n == MATCHFCN) { 588 mf = pmatch; 589 mode = 1; 590 } 591 x = execute(a[1]); /* a[1] = target text */ 592 s = getsval(x); 593 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 594 i = (*mf)((fa *) a[2], s); 595 else { 596 y = execute(a[2]); /* a[2] = regular expr */ 597 t = getsval(y); 598 pfa = makedfa(t, mode); 599 i = (*mf)(pfa, s); 600 tempfree(y); 601 } 602 tempfree(x); 603 if (n == MATCHFCN) { 604 int start = patbeg - s + 1; 605 if (patlen < 0) 606 start = 0; 607 setfval(rstartloc, (Awkfloat) start); 608 setfval(rlengthloc, (Awkfloat) patlen); 609 x = gettemp(); 610 x->tval = NUM; 611 x->fval = start; 612 return x; 613 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 614 return(True); 615 else 616 return(False); 617 } 618 619 620 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 621 { 622 Cell *x, *y; 623 int i; 624 625 x = execute(a[0]); 626 i = istrue(x); 627 tempfree(x); 628 switch (n) { 629 case BOR: 630 if (i) return(True); 631 y = execute(a[1]); 632 i = istrue(y); 633 tempfree(y); 634 if (i) return(True); 635 else return(False); 636 case AND: 637 if ( !i ) return(False); 638 y = execute(a[1]); 639 i = istrue(y); 640 tempfree(y); 641 if (i) return(True); 642 else return(False); 643 case NOT: 644 if (i) return(False); 645 else return(True); 646 default: /* can't happen */ 647 FATAL("unknown boolean operator %d", n); 648 } 649 return 0; /*NOTREACHED*/ 650 } 651 652 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 653 { 654 int i; 655 Cell *x, *y; 656 Awkfloat j; 657 658 x = execute(a[0]); 659 y = execute(a[1]); 660 if (x->tval&NUM && y->tval&NUM) { 661 j = x->fval - y->fval; 662 i = j<0? -1: (j>0? 1: 0); 663 } else { 664 i = strcmp(getsval(x), getsval(y)); 665 } 666 tempfree(x); 667 tempfree(y); 668 switch (n) { 669 case LT: if (i<0) return(True); 670 else return(False); 671 case LE: if (i<=0) return(True); 672 else return(False); 673 case NE: if (i!=0) return(True); 674 else return(False); 675 case EQ: if (i == 0) return(True); 676 else return(False); 677 case GE: if (i>=0) return(True); 678 else return(False); 679 case GT: if (i>0) return(True); 680 else return(False); 681 default: /* can't happen */ 682 FATAL("unknown relational operator %d", n); 683 } 684 return 0; /*NOTREACHED*/ 685 } 686 687 void tfree(Cell *a) /* free a tempcell */ 688 { 689 if (freeable(a)) { 690 DPRINTF( ("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval) ); 691 xfree(a->sval); 692 } 693 if (a == tmps) 694 FATAL("tempcell list is curdled"); 695 a->cnext = tmps; 696 tmps = a; 697 } 698 699 Cell *gettemp(void) /* get a tempcell */ 700 { int i; 701 Cell *x; 702 703 if (!tmps) { 704 tmps = calloc(100, sizeof(*tmps)); 705 if (!tmps) 706 FATAL("out of space for temporaries"); 707 for (i = 1; i < 100; i++) 708 tmps[i-1].cnext = &tmps[i]; 709 tmps[i-1].cnext = NULL; 710 } 711 x = tmps; 712 tmps = x->cnext; 713 *x = tempcell; 714 return(x); 715 } 716 717 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 718 { 719 Awkfloat val; 720 Cell *x; 721 int m; 722 char *s; 723 724 x = execute(a[0]); 725 val = getfval(x); /* freebsd: defend against super large field numbers */ 726 if ((Awkfloat)INT_MAX < val) 727 FATAL("trying to access out of range field %s", x->nval); 728 m = (int) val; 729 if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */ 730 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 731 /* BUG: can x->nval ever be null??? */ 732 tempfree(x); 733 x = fieldadr(m); 734 x->ctype = OCELL; /* BUG? why are these needed? */ 735 x->csub = CFLD; 736 return(x); 737 } 738 739 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 740 { 741 int k, m, n; 742 char *s; 743 int temp; 744 Cell *x, *y, *z = NULL; 745 746 x = execute(a[0]); 747 y = execute(a[1]); 748 if (a[2] != NULL) 749 z = execute(a[2]); 750 s = getsval(x); 751 k = strlen(s) + 1; 752 if (k <= 1) { 753 tempfree(x); 754 tempfree(y); 755 if (a[2] != NULL) { 756 tempfree(z); 757 } 758 x = gettemp(); 759 setsval(x, ""); 760 return(x); 761 } 762 m = (int) getfval(y); 763 if (m <= 0) 764 m = 1; 765 else if (m > k) 766 m = k; 767 tempfree(y); 768 if (a[2] != NULL) { 769 n = (int) getfval(z); 770 tempfree(z); 771 } else 772 n = k - 1; 773 if (n < 0) 774 n = 0; 775 else if (n > k - m) 776 n = k - m; 777 DPRINTF( ("substr: m=%d, n=%d, s=%s\n", m, n, s) ); 778 y = gettemp(); 779 temp = s[n+m-1]; /* with thanks to John Linderman */ 780 s[n+m-1] = '\0'; 781 setsval(y, s + m - 1); 782 s[n+m-1] = temp; 783 tempfree(x); 784 return(y); 785 } 786 787 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 788 { 789 Cell *x, *y, *z; 790 char *s1, *s2, *p1, *p2, *q; 791 Awkfloat v = 0.0; 792 793 x = execute(a[0]); 794 s1 = getsval(x); 795 y = execute(a[1]); 796 s2 = getsval(y); 797 798 z = gettemp(); 799 for (p1 = s1; *p1 != '\0'; p1++) { 800 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 801 continue; 802 if (*p2 == '\0') { 803 v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ 804 break; 805 } 806 } 807 tempfree(x); 808 tempfree(y); 809 setfval(z, v); 810 return(z); 811 } 812 813 #define MAXNUMSIZE 50 814 815 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 816 { 817 char *fmt; 818 char *p, *t; 819 const char *os; 820 Cell *x; 821 int flag = 0, n; 822 int fmtwd; /* format width */ 823 int fmtsz = recsize; 824 char *buf = *pbuf; 825 int bufsize = *pbufsize; 826 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 827 #define BUFSZ(a) (bufsize - ((a) - buf)) 828 829 static bool first = true; 830 static bool have_a_format = false; 831 832 if (first) { 833 char xbuf[100]; 834 835 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 836 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 837 first = false; 838 } 839 840 os = s; 841 p = buf; 842 if ((fmt = malloc(fmtsz)) == NULL) 843 FATAL("out of memory in format()"); 844 while (*s) { 845 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 846 if (*s != '%') { 847 *p++ = *s++; 848 continue; 849 } 850 if (*(s+1) == '%') { 851 *p++ = '%'; 852 s += 2; 853 continue; 854 } 855 /* have to be real careful in case this is a huge number, eg, %100000d */ 856 fmtwd = atoi(s+1); 857 if (fmtwd < 0) 858 fmtwd = -fmtwd; 859 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 860 for (t = fmt; (*t++ = *s) != '\0'; s++) { 861 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 862 FATAL("format item %.30s... ran format() out of memory", os); 863 /* Ignore size specifiers */ 864 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 865 t--; 866 continue; 867 } 868 if (isalpha((uschar)*s)) 869 break; 870 if (*s == '$') { 871 FATAL("'$' not permitted in awk formats"); 872 } 873 if (*s == '*') { 874 if (a == NULL) { 875 FATAL("not enough args in printf(%s)", os); 876 } 877 x = execute(a); 878 a = a->nnext; 879 snprintf(t - 1, FMTSZ(t - 1), 880 "%d", fmtwd=(int) getfval(x)); 881 if (fmtwd < 0) 882 fmtwd = -fmtwd; 883 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 884 t = fmt + strlen(fmt); 885 tempfree(x); 886 } 887 } 888 *t = '\0'; 889 if (fmtwd < 0) 890 fmtwd = -fmtwd; 891 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 892 switch (*s) { 893 case 'a': case 'A': 894 if (have_a_format) 895 flag = *s; 896 else 897 flag = 'f'; 898 break; 899 case 'f': case 'e': case 'g': case 'E': case 'G': 900 flag = 'f'; 901 break; 902 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 903 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 904 *(t-1) = 'j'; 905 *t = *s; 906 *++t = '\0'; 907 break; 908 case 's': 909 flag = 's'; 910 break; 911 case 'c': 912 flag = 'c'; 913 break; 914 default: 915 WARNING("weird printf conversion %s", fmt); 916 flag = '?'; 917 break; 918 } 919 if (a == NULL) 920 FATAL("not enough args in printf(%s)", os); 921 x = execute(a); 922 a = a->nnext; 923 n = MAXNUMSIZE; 924 if (fmtwd > n) 925 n = fmtwd; 926 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 927 switch (flag) { 928 case '?': /* unknown, so dump it too */ 929 snprintf(p, BUFSZ(p), "%s", fmt); 930 t = getsval(x); 931 n = strlen(t); 932 if (fmtwd > n) 933 n = fmtwd; 934 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 935 p += strlen(p); 936 snprintf(p, BUFSZ(p), "%s", t); 937 break; 938 case 'a': 939 case 'A': 940 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 941 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 942 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 943 case 's': 944 t = getsval(x); 945 n = strlen(t); 946 if (fmtwd > n) 947 n = fmtwd; 948 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 949 FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); 950 snprintf(p, BUFSZ(p), fmt, t); 951 break; 952 case 'c': 953 if (isnum(x)) { 954 if ((int)getfval(x)) 955 snprintf(p, BUFSZ(p), fmt, (int) getfval(x)); 956 else { 957 *p++ = '\0'; /* explicit null byte */ 958 *p = '\0'; /* next output will start here */ 959 } 960 } else 961 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 962 break; 963 default: 964 FATAL("can't happen: bad conversion %c in format()", flag); 965 } 966 tempfree(x); 967 p += strlen(p); 968 s++; 969 } 970 *p = '\0'; 971 free(fmt); 972 for ( ; a; a = a->nnext) /* evaluate any remaining args */ 973 execute(a); 974 *pbuf = buf; 975 *pbufsize = bufsize; 976 return p - buf; 977 } 978 979 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 980 { 981 Cell *x; 982 Node *y; 983 char *buf; 984 int bufsz=3*recsize; 985 986 if ((buf = malloc(bufsz)) == NULL) 987 FATAL("out of memory in awksprintf"); 988 y = a[0]->nnext; 989 x = execute(a[0]); 990 if (format(&buf, &bufsz, getsval(x), y) == -1) 991 FATAL("sprintf string %.30s... too long. can't happen.", buf); 992 tempfree(x); 993 x = gettemp(); 994 x->sval = buf; 995 x->tval = STR; 996 return(x); 997 } 998 999 Cell *awkprintf(Node **a, int n) /* printf */ 1000 { /* a[0] is list of args, starting with format string */ 1001 /* a[1] is redirection operator, a[2] is redirection file */ 1002 FILE *fp; 1003 Cell *x; 1004 Node *y; 1005 char *buf; 1006 int len; 1007 int bufsz=3*recsize; 1008 1009 if ((buf = malloc(bufsz)) == NULL) 1010 FATAL("out of memory in awkprintf"); 1011 y = a[0]->nnext; 1012 x = execute(a[0]); 1013 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1014 FATAL("printf string %.30s... too long. can't happen.", buf); 1015 tempfree(x); 1016 if (a[1] == NULL) { 1017 /* fputs(buf, stdout); */ 1018 fwrite(buf, len, 1, stdout); 1019 if (ferror(stdout)) 1020 FATAL("write error on stdout"); 1021 } else { 1022 fp = redirect(ptoi(a[1]), a[2]); 1023 /* fputs(buf, fp); */ 1024 fwrite(buf, len, 1, fp); 1025 fflush(fp); 1026 if (ferror(fp)) 1027 FATAL("write error on %s", filename(fp)); 1028 } 1029 free(buf); 1030 return(True); 1031 } 1032 1033 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1034 { 1035 Awkfloat i, j = 0; 1036 double v; 1037 Cell *x, *y, *z; 1038 1039 x = execute(a[0]); 1040 i = getfval(x); 1041 tempfree(x); 1042 if (n != UMINUS && n != UPLUS) { 1043 y = execute(a[1]); 1044 j = getfval(y); 1045 tempfree(y); 1046 } 1047 z = gettemp(); 1048 switch (n) { 1049 case ADD: 1050 i += j; 1051 break; 1052 case MINUS: 1053 i -= j; 1054 break; 1055 case MULT: 1056 i *= j; 1057 break; 1058 case DIVIDE: 1059 if (j == 0) 1060 FATAL("division by zero"); 1061 i /= j; 1062 break; 1063 case MOD: 1064 if (j == 0) 1065 FATAL("division by zero in mod"); 1066 modf(i/j, &v); 1067 i = i - j * v; 1068 break; 1069 case UMINUS: 1070 i = -i; 1071 break; 1072 case UPLUS: /* handled by getfval(), above */ 1073 break; 1074 case POWER: 1075 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1076 i = ipow(i, (int) j); 1077 else { 1078 errno = 0; 1079 i = errcheck(pow(i, j), "pow"); 1080 } 1081 break; 1082 default: /* can't happen */ 1083 FATAL("illegal arithmetic operator %d", n); 1084 } 1085 setfval(z, i); 1086 return(z); 1087 } 1088 1089 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1090 { 1091 double v; 1092 1093 if (n <= 0) 1094 return 1; 1095 v = ipow(x, n/2); 1096 if (n % 2 == 0) 1097 return v * v; 1098 else 1099 return x * v * v; 1100 } 1101 1102 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1103 { 1104 Cell *x, *z; 1105 int k; 1106 Awkfloat xf; 1107 1108 x = execute(a[0]); 1109 xf = getfval(x); 1110 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1111 if (n == PREINCR || n == PREDECR) { 1112 setfval(x, xf + k); 1113 return(x); 1114 } 1115 z = gettemp(); 1116 setfval(z, xf); 1117 setfval(x, xf + k); 1118 tempfree(x); 1119 return(z); 1120 } 1121 1122 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1123 { /* this is subtle; don't muck with it. */ 1124 Cell *x, *y; 1125 Awkfloat xf, yf; 1126 double v; 1127 1128 y = execute(a[1]); 1129 x = execute(a[0]); 1130 if (n == ASSIGN) { /* ordinary assignment */ 1131 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1132 ; /* self-assignment: leave alone unless it's a field or NF */ 1133 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1134 setsval(x, getsval(y)); 1135 x->fval = getfval(y); 1136 x->tval |= NUM; 1137 } 1138 else if (isstr(y)) 1139 setsval(x, getsval(y)); 1140 else if (isnum(y)) 1141 setfval(x, getfval(y)); 1142 else 1143 funnyvar(y, "read value of"); 1144 tempfree(y); 1145 return(x); 1146 } 1147 xf = getfval(x); 1148 yf = getfval(y); 1149 switch (n) { 1150 case ADDEQ: 1151 xf += yf; 1152 break; 1153 case SUBEQ: 1154 xf -= yf; 1155 break; 1156 case MULTEQ: 1157 xf *= yf; 1158 break; 1159 case DIVEQ: 1160 if (yf == 0) 1161 FATAL("division by zero in /="); 1162 xf /= yf; 1163 break; 1164 case MODEQ: 1165 if (yf == 0) 1166 FATAL("division by zero in %%="); 1167 modf(xf/yf, &v); 1168 xf = xf - yf * v; 1169 break; 1170 case POWEQ: 1171 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1172 xf = ipow(xf, (int) yf); 1173 else { 1174 errno = 0; 1175 xf = errcheck(pow(xf, yf), "pow"); 1176 } 1177 break; 1178 default: 1179 FATAL("illegal assignment operator %d", n); 1180 break; 1181 } 1182 tempfree(y); 1183 setfval(x, xf); 1184 return(x); 1185 } 1186 1187 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1188 { 1189 Cell *x, *y, *z; 1190 int n1, n2; 1191 char *s = NULL; 1192 int ssz = 0; 1193 1194 x = execute(a[0]); 1195 n1 = strlen(getsval(x)); 1196 1197 y = execute(a[1]); 1198 n2 = strlen(getsval(y)); 1199 1200 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat"); 1201 memcpy(s, x->sval, n1); 1202 memcpy(s + n1, y->sval, n2); 1203 s[n1 + n2] = '\0'; 1204 1205 tempfree(x); 1206 tempfree(y); 1207 1208 z = gettemp(); 1209 z->sval = s; 1210 z->tval = STR; 1211 1212 return(z); 1213 } 1214 1215 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1216 { 1217 Cell *x; 1218 1219 if (a[0] == NULL) 1220 x = execute(a[1]); 1221 else { 1222 x = execute(a[0]); 1223 if (istrue(x)) { 1224 tempfree(x); 1225 x = execute(a[1]); 1226 } 1227 } 1228 return x; 1229 } 1230 1231 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1232 { 1233 Cell *x; 1234 int pair; 1235 1236 pair = ptoi(a[3]); 1237 if (pairstack[pair] == 0) { 1238 x = execute(a[0]); 1239 if (istrue(x)) 1240 pairstack[pair] = 1; 1241 tempfree(x); 1242 } 1243 if (pairstack[pair] == 1) { 1244 x = execute(a[1]); 1245 if (istrue(x)) 1246 pairstack[pair] = 0; 1247 tempfree(x); 1248 x = execute(a[2]); 1249 return(x); 1250 } 1251 return(False); 1252 } 1253 1254 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1255 { 1256 Cell *x = NULL, *y, *ap; 1257 const char *s, *origs, *t; 1258 const char *fs = NULL; 1259 char *origfs = NULL; 1260 int sep; 1261 char temp, num[50]; 1262 int n, tempstat, arg3type; 1263 1264 y = execute(a[0]); /* source string */ 1265 origs = s = strdup(getsval(y)); 1266 if (s == NULL) 1267 FATAL("out of space in split"); 1268 arg3type = ptoi(a[3]); 1269 if (a[2] == NULL) /* fs string */ 1270 fs = getsval(fsloc); 1271 else if (arg3type == STRING) { /* split(str,arr,"string") */ 1272 x = execute(a[2]); 1273 fs = origfs = strdup(getsval(x)); 1274 if (fs == NULL) 1275 FATAL("out of space in split"); 1276 tempfree(x); 1277 } else if (arg3type == REGEXPR) 1278 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1279 else 1280 FATAL("illegal type of split"); 1281 sep = *fs; 1282 ap = execute(a[1]); /* array name */ 1283 freesymtab(ap); 1284 DPRINTF( ("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs) ); 1285 ap->tval &= ~STR; 1286 ap->tval |= ARR; 1287 ap->sval = (char *) makesymtab(NSYMTAB); 1288 1289 n = 0; 1290 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1291 /* split(s, a, //); have to arrange that it looks like empty sep */ 1292 arg3type = 0; 1293 fs = ""; 1294 sep = 0; 1295 } 1296 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1297 fa *pfa; 1298 if (arg3type == REGEXPR) { /* it's ready already */ 1299 pfa = (fa *) a[2]; 1300 } else { 1301 pfa = makedfa(fs, 1); 1302 } 1303 if (nematch(pfa,s)) { 1304 tempstat = pfa->initstat; 1305 pfa->initstat = 2; 1306 do { 1307 n++; 1308 snprintf(num, sizeof(num), "%d", n); 1309 temp = *patbeg; 1310 setptr(patbeg, '\0'); 1311 if (is_number(s)) 1312 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); 1313 else 1314 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1315 setptr(patbeg, temp); 1316 s = patbeg + patlen; 1317 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1318 n++; 1319 snprintf(num, sizeof(num), "%d", n); 1320 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1321 pfa->initstat = tempstat; 1322 goto spdone; 1323 } 1324 } while (nematch(pfa,s)); 1325 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1326 /* cf gsub and refldbld */ 1327 } 1328 n++; 1329 snprintf(num, sizeof(num), "%d", n); 1330 if (is_number(s)) 1331 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); 1332 else 1333 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1334 spdone: 1335 pfa = NULL; 1336 } else if (sep == ' ') { 1337 for (n = 0; ; ) { 1338 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1339 while (ISWS(*s)) 1340 s++; 1341 if (*s == '\0') 1342 break; 1343 n++; 1344 t = s; 1345 do 1346 s++; 1347 while (*s != '\0' && !ISWS(*s)); 1348 temp = *s; 1349 setptr(s, '\0'); 1350 snprintf(num, sizeof(num), "%d", n); 1351 if (is_number(t)) 1352 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); 1353 else 1354 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1355 setptr(s, temp); 1356 if (*s != '\0') 1357 s++; 1358 } 1359 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1360 for (n = 0; *s != '\0'; s++) { 1361 char buf[2]; 1362 n++; 1363 snprintf(num, sizeof(num), "%d", n); 1364 buf[0] = *s; 1365 buf[1] = '\0'; 1366 if (isdigit((uschar)buf[0])) 1367 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1368 else 1369 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1370 } 1371 } else if (*s != '\0') { 1372 for (;;) { 1373 n++; 1374 t = s; 1375 while (*s != sep && *s != '\n' && *s != '\0') 1376 s++; 1377 temp = *s; 1378 setptr(s, '\0'); 1379 snprintf(num, sizeof(num), "%d", n); 1380 if (is_number(t)) 1381 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); 1382 else 1383 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1384 setptr(s, temp); 1385 if (*s++ == '\0') 1386 break; 1387 } 1388 } 1389 tempfree(ap); 1390 tempfree(y); 1391 xfree(origs); 1392 xfree(origfs); 1393 x = gettemp(); 1394 x->tval = NUM; 1395 x->fval = n; 1396 return(x); 1397 } 1398 1399 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1400 { 1401 Cell *x; 1402 1403 x = execute(a[0]); 1404 if (istrue(x)) { 1405 tempfree(x); 1406 x = execute(a[1]); 1407 } else { 1408 tempfree(x); 1409 x = execute(a[2]); 1410 } 1411 return(x); 1412 } 1413 1414 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1415 { 1416 Cell *x; 1417 1418 x = execute(a[0]); 1419 if (istrue(x)) { 1420 tempfree(x); 1421 x = execute(a[1]); 1422 } else if (a[2] != NULL) { 1423 tempfree(x); 1424 x = execute(a[2]); 1425 } 1426 return(x); 1427 } 1428 1429 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1430 { 1431 Cell *x; 1432 1433 for (;;) { 1434 x = execute(a[0]); 1435 if (!istrue(x)) 1436 return(x); 1437 tempfree(x); 1438 x = execute(a[1]); 1439 if (isbreak(x)) { 1440 x = True; 1441 return(x); 1442 } 1443 if (isnext(x) || isexit(x) || isret(x)) 1444 return(x); 1445 tempfree(x); 1446 } 1447 } 1448 1449 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1450 { 1451 Cell *x; 1452 1453 for (;;) { 1454 x = execute(a[0]); 1455 if (isbreak(x)) 1456 return True; 1457 if (isnext(x) || isexit(x) || isret(x)) 1458 return(x); 1459 tempfree(x); 1460 x = execute(a[1]); 1461 if (!istrue(x)) 1462 return(x); 1463 tempfree(x); 1464 } 1465 } 1466 1467 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1468 { 1469 Cell *x; 1470 1471 x = execute(a[0]); 1472 tempfree(x); 1473 for (;;) { 1474 if (a[1]!=NULL) { 1475 x = execute(a[1]); 1476 if (!istrue(x)) return(x); 1477 else tempfree(x); 1478 } 1479 x = execute(a[3]); 1480 if (isbreak(x)) /* turn off break */ 1481 return True; 1482 if (isnext(x) || isexit(x) || isret(x)) 1483 return(x); 1484 tempfree(x); 1485 x = execute(a[2]); 1486 tempfree(x); 1487 } 1488 } 1489 1490 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1491 { 1492 Cell *x, *vp, *arrayp, *cp, *ncp; 1493 Array *tp; 1494 int i; 1495 1496 vp = execute(a[0]); 1497 arrayp = execute(a[1]); 1498 if (!isarr(arrayp)) { 1499 return True; 1500 } 1501 tp = (Array *) arrayp->sval; 1502 tempfree(arrayp); 1503 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1504 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1505 setsval(vp, cp->nval); 1506 ncp = cp->cnext; 1507 x = execute(a[2]); 1508 if (isbreak(x)) { 1509 tempfree(vp); 1510 return True; 1511 } 1512 if (isnext(x) || isexit(x) || isret(x)) { 1513 tempfree(vp); 1514 return(x); 1515 } 1516 tempfree(x); 1517 } 1518 } 1519 return True; 1520 } 1521 1522 static char *nawk_convert(const char *s, int (*fun_c)(int), 1523 wint_t (*fun_wc)(wint_t)) 1524 { 1525 char *buf = NULL; 1526 char *pbuf = NULL; 1527 const char *ps = NULL; 1528 size_t n = 0; 1529 mbstate_t mbs, mbs2; 1530 wchar_t wc; 1531 size_t sz = MB_CUR_MAX; 1532 1533 if (sz == 1) { 1534 buf = tostring(s); 1535 1536 for (pbuf = buf; *pbuf; pbuf++) 1537 *pbuf = fun_c((uschar)*pbuf); 1538 1539 return buf; 1540 } else { 1541 /* upper/lower character may be shorter/longer */ 1542 buf = tostringN(s, strlen(s) * sz + 1); 1543 1544 memset(&mbs, 0, sizeof(mbs)); 1545 memset(&mbs2, 0, sizeof(mbs2)); 1546 1547 ps = s; 1548 pbuf = buf; 1549 while (n = mbrtowc(&wc, ps, sz, &mbs), 1550 n > 0 && n != (size_t)-1 && n != (size_t)-2) 1551 { 1552 ps += n; 1553 1554 n = wcrtomb(pbuf, fun_wc(wc), &mbs2); 1555 if (n == (size_t)-1) 1556 FATAL("illegal wide character %s", s); 1557 1558 pbuf += n; 1559 } 1560 1561 *pbuf = '\0'; 1562 1563 if (n) 1564 FATAL("illegal byte sequence %s", s); 1565 1566 return buf; 1567 } 1568 } 1569 1570 static char *nawk_toupper(const char *s) 1571 { 1572 return nawk_convert(s, toupper, towupper); 1573 } 1574 1575 static char *nawk_tolower(const char *s) 1576 { 1577 return nawk_convert(s, tolower, towlower); 1578 } 1579 1580 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 1581 { 1582 Cell *x, *y; 1583 Awkfloat u; 1584 int t, sz; 1585 Awkfloat tmp; 1586 char *buf, *fmt; 1587 Node *nextarg; 1588 FILE *fp; 1589 int status = 0; 1590 time_t tv; 1591 struct tm *tm; 1592 1593 t = ptoi(a[0]); 1594 x = execute(a[1]); 1595 nextarg = a[1]->nnext; 1596 switch (t) { 1597 case FLENGTH: 1598 if (isarr(x)) 1599 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 1600 else 1601 u = strlen(getsval(x)); 1602 break; 1603 case FLOG: 1604 errno = 0; 1605 u = errcheck(log(getfval(x)), "log"); break; 1606 case FINT: 1607 modf(getfval(x), &u); break; 1608 case FEXP: 1609 errno = 0; 1610 u = errcheck(exp(getfval(x)), "exp"); break; 1611 case FSQRT: 1612 errno = 0; 1613 u = errcheck(sqrt(getfval(x)), "sqrt"); break; 1614 case FSIN: 1615 u = sin(getfval(x)); break; 1616 case FCOS: 1617 u = cos(getfval(x)); break; 1618 case FATAN: 1619 if (nextarg == NULL) { 1620 WARNING("atan2 requires two arguments; returning 1.0"); 1621 u = 1.0; 1622 } else { 1623 y = execute(a[1]->nnext); 1624 u = atan2(getfval(x), getfval(y)); 1625 tempfree(y); 1626 nextarg = nextarg->nnext; 1627 } 1628 break; 1629 case FCOMPL: 1630 u = ~((int)getfval(x)); 1631 break; 1632 case FAND: 1633 if (nextarg == 0) { 1634 WARNING("and requires two arguments; returning 0"); 1635 u = 0; 1636 break; 1637 } 1638 y = execute(a[1]->nnext); 1639 u = ((int)getfval(x)) & ((int)getfval(y)); 1640 tempfree(y); 1641 nextarg = nextarg->nnext; 1642 break; 1643 case FFOR: 1644 if (nextarg == 0) { 1645 WARNING("or requires two arguments; returning 0"); 1646 u = 0; 1647 break; 1648 } 1649 y = execute(a[1]->nnext); 1650 u = ((int)getfval(x)) | ((int)getfval(y)); 1651 tempfree(y); 1652 nextarg = nextarg->nnext; 1653 break; 1654 case FXOR: 1655 if (nextarg == 0) { 1656 WARNING("xor requires two arguments; returning 0"); 1657 u = 0; 1658 break; 1659 } 1660 y = execute(a[1]->nnext); 1661 u = ((int)getfval(x)) ^ ((int)getfval(y)); 1662 tempfree(y); 1663 nextarg = nextarg->nnext; 1664 break; 1665 case FLSHIFT: 1666 if (nextarg == 0) { 1667 WARNING("lshift requires two arguments; returning 0"); 1668 u = 0; 1669 break; 1670 } 1671 y = execute(a[1]->nnext); 1672 u = ((int)getfval(x)) << ((int)getfval(y)); 1673 tempfree(y); 1674 nextarg = nextarg->nnext; 1675 break; 1676 case FRSHIFT: 1677 if (nextarg == 0) { 1678 WARNING("rshift requires two arguments; returning 0"); 1679 u = 0; 1680 break; 1681 } 1682 y = execute(a[1]->nnext); 1683 u = ((int)getfval(x)) >> ((int)getfval(y)); 1684 tempfree(y); 1685 nextarg = nextarg->nnext; 1686 break; 1687 case FSYSTEM: 1688 fflush(stdout); /* in case something is buffered already */ 1689 status = system(getsval(x)); 1690 u = status; 1691 if (status != -1) { 1692 if (WIFEXITED(status)) { 1693 u = WEXITSTATUS(status); 1694 } else if (WIFSIGNALED(status)) { 1695 u = WTERMSIG(status) + 256; 1696 #ifdef WCOREDUMP 1697 if (WCOREDUMP(status)) 1698 u += 256; 1699 #endif 1700 } else /* something else?!? */ 1701 u = 0; 1702 } 1703 break; 1704 case FRAND: 1705 /* random() returns numbers in [0..2^31-1] 1706 * in order to get a number in [0, 1), divide it by 2^31 1707 */ 1708 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 1709 break; 1710 case FSRAND: 1711 if (isrec(x)) { /* no argument provided */ 1712 u = time(NULL); 1713 tmp = u; 1714 srandom((unsigned int) u); 1715 } else { 1716 u = getfval(x); 1717 tmp = u; 1718 srandom_deterministic((unsigned int) u); 1719 } 1720 u = srand_seed; 1721 srand_seed = tmp; 1722 break; 1723 case FTOUPPER: 1724 case FTOLOWER: 1725 if (t == FTOUPPER) 1726 buf = nawk_toupper(getsval(x)); 1727 else 1728 buf = nawk_tolower(getsval(x)); 1729 tempfree(x); 1730 x = gettemp(); 1731 setsval(x, buf); 1732 free(buf); 1733 return x; 1734 case FFLUSH: 1735 if (isrec(x) || strlen(getsval(x)) == 0) { 1736 flush_all(); /* fflush() or fflush("") -> all */ 1737 u = 0; 1738 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 1739 u = EOF; 1740 else 1741 u = fflush(fp); 1742 break; 1743 case FSYSTIME: 1744 u = time((time_t *) 0); 1745 break; 1746 case FSTRFTIME: 1747 /* strftime([format [,timestamp]]) */ 1748 if (nextarg) { 1749 y = execute(nextarg); 1750 nextarg = nextarg->nnext; 1751 tv = (time_t) getfval(y); 1752 tempfree(y); 1753 } else 1754 tv = time((time_t *) 0); 1755 tm = localtime(&tv); 1756 if (tm == NULL) 1757 FATAL("bad time %ld", (long)tv); 1758 1759 if (isrec(x)) { 1760 /* format argument not provided, use default */ 1761 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 1762 } else 1763 fmt = tostring(getsval(x)); 1764 1765 sz = 32; 1766 buf = NULL; 1767 do { 1768 if ((buf = reallocarray(buf, 2, sz)) == NULL) 1769 FATAL("out of memory in strftime"); 1770 sz *= 2; 1771 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 1772 1773 y = gettemp(); 1774 setsval(y, buf); 1775 free(fmt); 1776 free(buf); 1777 1778 return y; 1779 default: /* can't happen */ 1780 FATAL("illegal function type %d", t); 1781 break; 1782 } 1783 tempfree(x); 1784 x = gettemp(); 1785 setfval(x, u); 1786 if (nextarg != NULL) { 1787 WARNING("warning: function has too many arguments"); 1788 for ( ; nextarg; nextarg = nextarg->nnext) 1789 execute(nextarg); 1790 } 1791 return(x); 1792 } 1793 1794 Cell *printstat(Node **a, int n) /* print a[0] */ 1795 { 1796 Node *x; 1797 Cell *y; 1798 FILE *fp; 1799 1800 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 1801 fp = stdout; 1802 else 1803 fp = redirect(ptoi(a[1]), a[2]); 1804 for (x = a[0]; x != NULL; x = x->nnext) { 1805 y = execute(x); 1806 fputs(getpssval(y), fp); 1807 tempfree(y); 1808 if (x->nnext == NULL) 1809 fputs(getsval(orsloc), fp); 1810 else 1811 fputs(getsval(ofsloc), fp); 1812 } 1813 if (a[1] != NULL) 1814 fflush(fp); 1815 if (ferror(fp)) 1816 FATAL("write error on %s", filename(fp)); 1817 return(True); 1818 } 1819 1820 Cell *nullproc(Node **a, int n) 1821 { 1822 return 0; 1823 } 1824 1825 1826 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 1827 { 1828 FILE *fp; 1829 Cell *x; 1830 char *fname; 1831 1832 x = execute(b); 1833 fname = getsval(x); 1834 fp = openfile(a, fname, NULL); 1835 if (fp == NULL) 1836 FATAL("can't open file %s", fname); 1837 tempfree(x); 1838 return fp; 1839 } 1840 1841 struct files { 1842 FILE *fp; 1843 const char *fname; 1844 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 1845 } *files; 1846 1847 size_t nfiles; 1848 1849 static void stdinit(void) /* in case stdin, etc., are not constants */ 1850 { 1851 nfiles = FOPEN_MAX; 1852 files = calloc(nfiles, sizeof(*files)); 1853 if (files == NULL) 1854 FATAL("can't allocate file memory for %zu files", nfiles); 1855 files[0].fp = stdin; 1856 files[0].fname = "/dev/stdin"; 1857 files[0].mode = LT; 1858 files[1].fp = stdout; 1859 files[1].fname = "/dev/stdout"; 1860 files[1].mode = GT; 1861 files[2].fp = stderr; 1862 files[2].fname = "/dev/stderr"; 1863 files[2].mode = GT; 1864 } 1865 1866 FILE *openfile(int a, const char *us, bool *pnewflag) 1867 { 1868 const char *s = us; 1869 size_t i; 1870 int m; 1871 FILE *fp = NULL; 1872 1873 if (*s == '\0') 1874 FATAL("null file name in print or getline"); 1875 for (i = 0; i < nfiles; i++) 1876 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 1877 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 1878 a == FFLUSH)) { 1879 if (pnewflag) 1880 *pnewflag = false; 1881 return files[i].fp; 1882 } 1883 if (a == FFLUSH) /* didn't find it, so don't create it! */ 1884 return NULL; 1885 1886 for (i = 0; i < nfiles; i++) 1887 if (files[i].fp == NULL) 1888 break; 1889 if (i >= nfiles) { 1890 struct files *nf; 1891 size_t nnf = nfiles + FOPEN_MAX; 1892 nf = reallocarray(files, nnf, sizeof(*nf)); 1893 if (nf == NULL) 1894 FATAL("cannot grow files for %s and %zu files", s, nnf); 1895 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 1896 nfiles = nnf; 1897 files = nf; 1898 } 1899 fflush(stdout); /* force a semblance of order */ 1900 m = a; 1901 if (a == GT) { 1902 fp = fopen(s, "w"); 1903 } else if (a == APPEND) { 1904 fp = fopen(s, "a"); 1905 m = GT; /* so can mix > and >> */ 1906 } else if (a == '|') { /* output pipe */ 1907 fp = popen(s, "w"); 1908 } else if (a == LE) { /* input pipe */ 1909 fp = popen(s, "r"); 1910 } else if (a == LT) { /* getline <file */ 1911 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 1912 } else /* can't happen */ 1913 FATAL("illegal redirection %d", a); 1914 if (fp != NULL) { 1915 files[i].fname = tostring(s); 1916 files[i].fp = fp; 1917 files[i].mode = m; 1918 if (pnewflag) 1919 *pnewflag = true; 1920 if (fp != stdin && fp != stdout && fp != stderr) 1921 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 1922 } 1923 return fp; 1924 } 1925 1926 const char *filename(FILE *fp) 1927 { 1928 size_t i; 1929 1930 for (i = 0; i < nfiles; i++) 1931 if (fp == files[i].fp) 1932 return files[i].fname; 1933 return "???"; 1934 } 1935 1936 Cell *closefile(Node **a, int n) 1937 { 1938 Cell *x; 1939 size_t i; 1940 bool stat; 1941 1942 x = execute(a[0]); 1943 getsval(x); 1944 stat = true; 1945 for (i = 0; i < nfiles; i++) { 1946 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 1947 continue; 1948 if (ferror(files[i].fp)) 1949 FATAL("i/o error occurred on %s", files[i].fname); 1950 if (files[i].mode == '|' || files[i].mode == LE) 1951 stat = pclose(files[i].fp) == -1; 1952 else 1953 stat = fclose(files[i].fp) == EOF; 1954 if (stat) 1955 FATAL("i/o error occurred closing %s", files[i].fname); 1956 if (i > 2) /* don't do /dev/std... */ 1957 xfree(files[i].fname); 1958 files[i].fname = NULL; /* watch out for ref thru this */ 1959 files[i].fp = NULL; 1960 } 1961 tempfree(x); 1962 x = gettemp(); 1963 setfval(x, (Awkfloat) (stat ? -1 : 0)); 1964 return(x); 1965 } 1966 1967 void closeall(void) 1968 { 1969 size_t i; 1970 bool stat = false; 1971 1972 for (i = 0; i < nfiles; i++) { 1973 if (! files[i].fp) 1974 continue; 1975 if (ferror(files[i].fp)) 1976 FATAL( "i/o error occurred on %s", files[i].fname ); 1977 if (files[i].mode == '|' || files[i].mode == LE) 1978 stat = pclose(files[i].fp) == -1; 1979 else 1980 stat = fclose(files[i].fp) == EOF; 1981 if (stat) 1982 FATAL( "i/o error occurred while closing %s", files[i].fname ); 1983 } 1984 } 1985 1986 static void flush_all(void) 1987 { 1988 size_t i; 1989 1990 for (i = 0; i < nfiles; i++) 1991 if (files[i].fp) 1992 fflush(files[i].fp); 1993 } 1994 1995 void backsub(char **pb_ptr, const char **sptr_ptr); 1996 1997 Cell *sub(Node **a, int nnn) /* substitute command */ 1998 { 1999 const char *sptr, *q; 2000 Cell *x, *y, *result; 2001 char *t, *buf, *pb; 2002 fa *pfa; 2003 int bufsz = recsize; 2004 2005 if ((buf = malloc(bufsz)) == NULL) 2006 FATAL("out of memory in sub"); 2007 x = execute(a[3]); /* target string */ 2008 t = getsval(x); 2009 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2010 pfa = (fa *) a[1]; /* regular expression */ 2011 else { 2012 y = execute(a[1]); 2013 pfa = makedfa(getsval(y), 1); 2014 tempfree(y); 2015 } 2016 y = execute(a[2]); /* replacement string */ 2017 result = False; 2018 if (pmatch(pfa, t)) { 2019 sptr = t; 2020 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); 2021 pb = buf; 2022 while (sptr < patbeg) 2023 *pb++ = *sptr++; 2024 sptr = getsval(y); 2025 while (*sptr != '\0') { 2026 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); 2027 if (*sptr == '\\') { 2028 backsub(&pb, &sptr); 2029 } else if (*sptr == '&') { 2030 sptr++; 2031 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); 2032 for (q = patbeg; q < patbeg+patlen; ) 2033 *pb++ = *q++; 2034 } else 2035 *pb++ = *sptr++; 2036 } 2037 *pb = '\0'; 2038 if (pb > buf + bufsz) 2039 FATAL("sub result1 %.30s too big; can't happen", buf); 2040 sptr = patbeg + patlen; 2041 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { 2042 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); 2043 while ((*pb++ = *sptr++) != '\0') 2044 continue; 2045 } 2046 if (pb > buf + bufsz) 2047 FATAL("sub result2 %.30s too big; can't happen", buf); 2048 setsval(x, buf); /* BUG: should be able to avoid copy */ 2049 result = True; 2050 } 2051 tempfree(x); 2052 tempfree(y); 2053 free(buf); 2054 return result; 2055 } 2056 2057 Cell *gsub(Node **a, int nnn) /* global substitute */ 2058 { 2059 Cell *x, *y; 2060 char *rptr, *pb; 2061 const char *q, *t, *sptr; 2062 char *buf; 2063 fa *pfa; 2064 int mflag, tempstat, num; 2065 int bufsz = recsize; 2066 2067 if ((buf = malloc(bufsz)) == NULL) 2068 FATAL("out of memory in gsub"); 2069 mflag = 0; /* if mflag == 0, can replace empty string */ 2070 num = 0; 2071 x = execute(a[3]); /* target string */ 2072 t = getsval(x); 2073 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2074 pfa = (fa *) a[1]; /* regular expression */ 2075 else { 2076 y = execute(a[1]); 2077 pfa = makedfa(getsval(y), 1); 2078 tempfree(y); 2079 } 2080 y = execute(a[2]); /* replacement string */ 2081 if (pmatch(pfa, t)) { 2082 tempstat = pfa->initstat; 2083 pfa->initstat = 2; 2084 pb = buf; 2085 rptr = getsval(y); 2086 do { 2087 if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ 2088 if (mflag == 0) { /* can replace empty */ 2089 num++; 2090 sptr = rptr; 2091 while (*sptr != '\0') { 2092 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2093 if (*sptr == '\\') { 2094 backsub(&pb, &sptr); 2095 } else if (*sptr == '&') { 2096 sptr++; 2097 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2098 for (q = patbeg; q < patbeg+patlen; ) 2099 *pb++ = *q++; 2100 } else 2101 *pb++ = *sptr++; 2102 } 2103 } 2104 if (*t == '\0') /* at end */ 2105 goto done; 2106 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); 2107 *pb++ = *t++; 2108 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2109 FATAL("gsub result0 %.30s too big; can't happen", buf); 2110 mflag = 0; 2111 } 2112 else { /* matched nonempty string */ 2113 num++; 2114 sptr = t; 2115 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); 2116 while (sptr < patbeg) 2117 *pb++ = *sptr++; 2118 sptr = rptr; 2119 while (*sptr != '\0') { 2120 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2121 if (*sptr == '\\') { 2122 backsub(&pb, &sptr); 2123 } else if (*sptr == '&') { 2124 sptr++; 2125 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2126 for (q = patbeg; q < patbeg+patlen; ) 2127 *pb++ = *q++; 2128 } else 2129 *pb++ = *sptr++; 2130 } 2131 t = patbeg + patlen; 2132 if (patlen == 0 || *t == '\0' || *(t-1) == '\0') 2133 goto done; 2134 if (pb > buf + bufsz) 2135 FATAL("gsub result1 %.30s too big; can't happen", buf); 2136 mflag = 1; 2137 } 2138 } while (pmatch(pfa,t)); 2139 sptr = t; 2140 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); 2141 while ((*pb++ = *sptr++) != '\0') 2142 continue; 2143 done: if (pb < buf + bufsz) 2144 *pb = '\0'; 2145 else if (*(pb-1) != '\0') 2146 FATAL("gsub result2 %.30s truncated; can't happen", buf); 2147 setsval(x, buf); /* BUG: should be able to avoid copy + free */ 2148 pfa->initstat = tempstat; 2149 } 2150 tempfree(x); 2151 tempfree(y); 2152 x = gettemp(); 2153 x->tval = NUM; 2154 x->fval = num; 2155 free(buf); 2156 return(x); 2157 } 2158 2159 Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2160 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2161 { 2162 Cell *x, *y, *res, *h; 2163 char *rptr; 2164 const char *sptr; 2165 char *buf, *pb; 2166 const char *t, *q; 2167 fa *pfa; 2168 int mflag, tempstat, num, whichm; 2169 int bufsz = recsize; 2170 2171 if ((buf = malloc(bufsz)) == NULL) 2172 FATAL("out of memory in gensub"); 2173 mflag = 0; /* if mflag == 0, can replace empty string */ 2174 num = 0; 2175 x = execute(a[4]); /* source string */ 2176 t = getsval(x); 2177 res = copycell(x); /* target string - initially copy of source */ 2178 res->csub = CTEMP; /* result values are temporary */ 2179 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2180 pfa = (fa *) a[1]; /* regular expression */ 2181 else { 2182 y = execute(a[1]); 2183 pfa = makedfa(getsval(y), 1); 2184 tempfree(y); 2185 } 2186 y = execute(a[2]); /* replacement string */ 2187 h = execute(a[3]); /* which matches should be replaced */ 2188 sptr = getsval(h); 2189 if (sptr[0] == 'g' || sptr[0] == 'G') 2190 whichm = -1; 2191 else { 2192 /* 2193 * The specified number is index of replacement, starting 2194 * from 1. GNU awk treats index lower than 0 same as 2195 * 1, we do same for compatibility. 2196 */ 2197 whichm = (int) getfval(h) - 1; 2198 if (whichm < 0) 2199 whichm = 0; 2200 } 2201 tempfree(h); 2202 2203 if (pmatch(pfa, t)) { 2204 char *sl; 2205 2206 tempstat = pfa->initstat; 2207 pfa->initstat = 2; 2208 pb = buf; 2209 rptr = getsval(y); 2210 /* 2211 * XXX if there are any backreferences in subst string, 2212 * complain now. 2213 */ 2214 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2215 if (strchr("0123456789", sl[1])) { 2216 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2217 } 2218 } 2219 2220 do { 2221 if (whichm >= 0 && whichm != num) { 2222 num++; 2223 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2224 2225 /* copy the part of string up to and including 2226 * match to output buffer */ 2227 while (t < patbeg + patlen) 2228 *pb++ = *t++; 2229 continue; 2230 } 2231 2232 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2233 if (mflag == 0) { /* can replace empty */ 2234 num++; 2235 sptr = rptr; 2236 while (*sptr != 0) { 2237 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2238 if (*sptr == '\\') { 2239 backsub(&pb, &sptr); 2240 } else if (*sptr == '&') { 2241 sptr++; 2242 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2243 for (q = patbeg; q < patbeg+patlen; ) 2244 *pb++ = *q++; 2245 } else 2246 *pb++ = *sptr++; 2247 } 2248 } 2249 if (*t == 0) /* at end */ 2250 goto done; 2251 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2252 *pb++ = *t++; 2253 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2254 FATAL("gensub result0 %.30s too big; can't happen", buf); 2255 mflag = 0; 2256 } 2257 else { /* matched nonempty string */ 2258 num++; 2259 sptr = t; 2260 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2261 while (sptr < patbeg) 2262 *pb++ = *sptr++; 2263 sptr = rptr; 2264 while (*sptr != 0) { 2265 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2266 if (*sptr == '\\') { 2267 backsub(&pb, &sptr); 2268 } else if (*sptr == '&') { 2269 sptr++; 2270 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2271 for (q = patbeg; q < patbeg+patlen; ) 2272 *pb++ = *q++; 2273 } else 2274 *pb++ = *sptr++; 2275 } 2276 t = patbeg + patlen; 2277 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2278 goto done; 2279 if (pb > buf + bufsz) 2280 FATAL("gensub result1 %.30s too big; can't happen", buf); 2281 mflag = 1; 2282 } 2283 } while (pmatch(pfa,t)); 2284 sptr = t; 2285 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2286 while ((*pb++ = *sptr++) != 0) 2287 ; 2288 done: if (pb > buf + bufsz) 2289 FATAL("gensub result2 %.30s too big; can't happen", buf); 2290 *pb = '\0'; 2291 setsval(res, buf); 2292 pfa->initstat = tempstat; 2293 } 2294 tempfree(x); 2295 tempfree(y); 2296 free(buf); 2297 return(res); 2298 } 2299 2300 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2301 { /* sptr[0] == '\\' */ 2302 char *pb = *pb_ptr; 2303 const char *sptr = *sptr_ptr; 2304 2305 if (sptr[1] == '\\') { 2306 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2307 *pb++ = '\\'; 2308 *pb++ = '&'; 2309 sptr += 4; 2310 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2311 *pb++ = '\\'; 2312 sptr += 2; 2313 } else if (do_posix) { /* \\x -> \x */ 2314 sptr++; 2315 *pb++ = *sptr++; 2316 } else { /* \\x -> \\x */ 2317 *pb++ = *sptr++; 2318 *pb++ = *sptr++; 2319 } 2320 } else if (sptr[1] == '&') { /* literal & */ 2321 sptr++; 2322 *pb++ = *sptr++; 2323 } else /* literal \ */ 2324 *pb++ = *sptr++; 2325 2326 *pb_ptr = pb; 2327 *sptr_ptr = sptr; 2328 } 2329