1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 #if HAVE_NBTOOL_CONFIG_H 26 #include "nbtool_config.h" 27 #endif 28 29 #define DEBUG 30 #include <stdio.h> 31 #include <ctype.h> 32 #include <errno.h> 33 #include <wctype.h> 34 #include <fcntl.h> 35 #include <setjmp.h> 36 #include <limits.h> 37 #include <math.h> 38 #include <string.h> 39 #include <stdlib.h> 40 #include <time.h> 41 #include <sys/types.h> 42 #include <sys/wait.h> 43 #include "awk.h" 44 #include "awkgram.h" 45 46 47 static void stdinit(void); 48 static void flush_all(void); 49 static char *wide_char_to_byte_str(int rune, size_t *outlen); 50 51 #if 1 52 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 53 #else 54 void tempfree(Cell *p) { 55 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 56 WARNING("bad csub %d in Cell %d %s", 57 p->csub, p->ctype, p->sval); 58 } 59 if (istemp(p)) 60 tfree(p); 61 } 62 #endif 63 64 /* do we really need these? */ 65 /* #ifdef _NFILE */ 66 /* #ifndef FOPEN_MAX */ 67 /* #define FOPEN_MAX _NFILE */ 68 /* #endif */ 69 /* #endif */ 70 /* */ 71 /* #ifndef FOPEN_MAX */ 72 /* #define FOPEN_MAX 40 */ /* max number of open files */ 73 /* #endif */ 74 /* */ 75 /* #ifndef RAND_MAX */ 76 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 77 /* #endif */ 78 79 jmp_buf env; 80 extern int pairstack[]; 81 extern Awkfloat srand_seed; 82 83 Node *winner = NULL; /* root of parse tree */ 84 Cell *tmps; /* free temporary cells for execution */ 85 86 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 87 Cell *True = &truecell; 88 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 89 Cell *False = &falsecell; 90 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 91 Cell *jbreak = &breakcell; 92 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 93 Cell *jcont = &contcell; 94 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 95 Cell *jnext = &nextcell; 96 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 97 Cell *jnextfile = &nextfilecell; 98 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 99 Cell *jexit = &exitcell; 100 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 101 Cell *jret = &retcell; 102 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 103 104 Node *curnode = NULL; /* the node being executed, for debugging */ 105 106 /* buffer memory management */ 107 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 108 const char *whatrtn) 109 /* pbuf: address of pointer to buffer being managed 110 * psiz: address of buffer size variable 111 * minlen: minimum length of buffer needed 112 * quantum: buffer size quantum 113 * pbptr: address of movable pointer into buffer, or 0 if none 114 * whatrtn: name of the calling routine if failure should cause fatal error 115 * 116 * return 0 for realloc failure, !=0 for success 117 */ 118 { 119 if (minlen > *psiz) { 120 char *tbuf; 121 int rminlen = quantum ? minlen % quantum : 0; 122 int boff = pbptr ? *pbptr - *pbuf : 0; 123 /* round up to next multiple of quantum */ 124 if (rminlen) 125 minlen += quantum - rminlen; 126 tbuf = (char *) realloc(*pbuf, minlen); 127 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); 128 if (tbuf == NULL) { 129 if (whatrtn) 130 FATAL("out of memory in %s", whatrtn); 131 return 0; 132 } 133 *pbuf = tbuf; 134 *psiz = minlen; 135 if (pbptr) 136 *pbptr = tbuf + boff; 137 } 138 return 1; 139 } 140 141 void run(Node *a) /* execution of parse tree starts here */ 142 { 143 144 stdinit(); 145 execute(a); 146 closeall(); 147 } 148 149 Cell *execute(Node *u) /* execute a node of the parse tree */ 150 { 151 Cell *(*proc)(Node **, int); 152 Cell *x; 153 Node *a; 154 155 if (u == NULL) 156 return(True); 157 for (a = u; ; a = a->nnext) { 158 curnode = a; 159 if (isvalue(a)) { 160 x = (Cell *) (a->narg[0]); 161 if (isfld(x) && !donefld) 162 fldbld(); 163 else if (isrec(x) && !donerec) 164 recbld(); 165 return(x); 166 } 167 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 168 FATAL("illegal statement"); 169 proc = proctab[a->nobj-FIRSTTOKEN]; 170 x = (*proc)(a->narg, a->nobj); 171 if (isfld(x) && !donefld) 172 fldbld(); 173 else if (isrec(x) && !donerec) 174 recbld(); 175 if (isexpr(a)) 176 return(x); 177 if (isjump(x)) 178 return(x); 179 if (a->nnext == NULL) 180 return(x); 181 tempfree(x); 182 } 183 } 184 185 186 Cell *program(Node **a, int n) /* execute an awk program */ 187 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 188 Cell *x; 189 190 if (setjmp(env) != 0) 191 goto ex; 192 if (a[0]) { /* BEGIN */ 193 x = execute(a[0]); 194 if (isexit(x)) 195 return(True); 196 if (isjump(x)) 197 FATAL("illegal break, continue, next or nextfile from BEGIN"); 198 tempfree(x); 199 } 200 if (a[1] || a[2]) 201 while (getrec(&record, &recsize, true) > 0) { 202 x = execute(a[1]); 203 if (isexit(x)) 204 break; 205 tempfree(x); 206 } 207 ex: 208 if (setjmp(env) != 0) /* handles exit within END */ 209 goto ex1; 210 if (a[2]) { /* END */ 211 x = execute(a[2]); 212 if (isbreak(x) || isnext(x) || iscont(x)) 213 FATAL("illegal break, continue, next or nextfile from END"); 214 tempfree(x); 215 } 216 ex1: 217 return(True); 218 } 219 220 struct Frame { /* stack frame for awk function calls */ 221 int nargs; /* number of arguments in this call */ 222 Cell *fcncell; /* pointer to Cell for function */ 223 Cell **args; /* pointer to array of arguments after execute */ 224 Cell *retval; /* return value */ 225 }; 226 227 #define NARGS 50 /* max args in a call */ 228 229 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 230 int nframe = 0; /* number of frames allocated */ 231 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 232 233 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 234 { 235 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 236 int i, ncall, ndef; 237 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 238 Node *x; 239 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 240 Cell *y, *z, *fcn; 241 char *s; 242 243 fcn = execute(a[0]); /* the function itself */ 244 s = fcn->nval; 245 if (!isfcn(fcn)) 246 FATAL("calling undefined function %s", s); 247 if (frame == NULL) { 248 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); 249 if (frame == NULL) 250 FATAL("out of space for stack frames calling %s", s); 251 } 252 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 253 ncall++; 254 ndef = (int) fcn->fval; /* args in defn */ 255 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 256 if (ncall > ndef) 257 WARNING("function %s called with %d args, uses only %d", 258 s, ncall, ndef); 259 if (ncall + ndef > NARGS) 260 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 261 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 262 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 263 y = execute(x); 264 oargs[i] = y; 265 DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 266 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 267 if (isfcn(y)) 268 FATAL("can't use function %s as argument in %s", y->nval, s); 269 if (isarr(y)) 270 args[i] = y; /* arrays by ref */ 271 else 272 args[i] = copycell(y); 273 tempfree(y); 274 } 275 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 276 args[i] = gettemp(); 277 *args[i] = newcopycell; 278 } 279 frp++; /* now ok to up frame */ 280 if (frp >= frame + nframe) { 281 int dfp = frp - frame; /* old index */ 282 frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame)); 283 if (frame == NULL) 284 FATAL("out of space for stack frames in %s", s); 285 frp = frame + dfp; 286 } 287 frp->fcncell = fcn; 288 frp->args = args; 289 frp->nargs = ndef; /* number defined with (excess are locals) */ 290 frp->retval = gettemp(); 291 292 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 293 y = execute((Node *)(fcn->sval)); /* execute body */ 294 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 295 296 for (i = 0; i < ndef; i++) { 297 Cell *t = frp->args[i]; 298 if (isarr(t)) { 299 if (t->csub == CCOPY) { 300 if (i >= ncall) { 301 freesymtab(t); 302 t->csub = CTEMP; 303 tempfree(t); 304 } else { 305 oargs[i]->tval = t->tval; 306 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 307 oargs[i]->sval = t->sval; 308 tempfree(t); 309 } 310 } 311 } else if (t != y) { /* kludge to prevent freeing twice */ 312 t->csub = CTEMP; 313 tempfree(t); 314 } else if (t == y && t->csub == CCOPY) { 315 t->csub = CTEMP; 316 tempfree(t); 317 freed = 1; 318 } 319 } 320 tempfree(fcn); 321 if (isexit(y) || isnext(y)) 322 return y; 323 if (freed == 0) { 324 tempfree(y); /* don't free twice! */ 325 } 326 z = frp->retval; /* return value */ 327 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 328 frp--; 329 return(z); 330 } 331 332 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 333 { 334 Cell *y; 335 336 /* copy is not constant or field */ 337 338 y = gettemp(); 339 y->tval = x->tval & ~(CON|FLD|REC); 340 y->csub = CCOPY; /* prevents freeing until call is over */ 341 y->nval = x->nval; /* BUG? */ 342 if (isstr(x) /* || x->ctype == OCELL */) { 343 y->sval = tostring(x->sval); 344 y->tval &= ~DONTFREE; 345 } else 346 y->tval |= DONTFREE; 347 y->fval = x->fval; 348 return y; 349 } 350 351 Cell *arg(Node **a, int n) /* nth argument of a function */ 352 { 353 354 n = ptoi(a[0]); /* argument number, counting from 0 */ 355 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 356 if (n+1 > frp->nargs) 357 FATAL("argument #%d of function %s was not supplied", 358 n+1, frp->fcncell->nval); 359 return frp->args[n]; 360 } 361 362 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 363 { 364 Cell *y; 365 366 switch (n) { 367 case EXIT: 368 if (a[0] != NULL) { 369 y = execute(a[0]); 370 errorflag = (int) getfval(y); 371 tempfree(y); 372 } 373 longjmp(env, 1); 374 case RETURN: 375 if (a[0] != NULL) { 376 y = execute(a[0]); 377 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 378 setsval(frp->retval, getsval(y)); 379 frp->retval->fval = getfval(y); 380 frp->retval->tval |= NUM; 381 } 382 else if (y->tval & STR) 383 setsval(frp->retval, getsval(y)); 384 else if (y->tval & NUM) 385 setfval(frp->retval, getfval(y)); 386 else /* can't happen */ 387 FATAL("bad type variable %d", y->tval); 388 tempfree(y); 389 } 390 return(jret); 391 case NEXT: 392 return(jnext); 393 case NEXTFILE: 394 nextfile(); 395 return(jnextfile); 396 case BREAK: 397 return(jbreak); 398 case CONTINUE: 399 return(jcont); 400 default: /* can't happen */ 401 FATAL("illegal jump type %d", n); 402 } 403 return 0; /* not reached */ 404 } 405 406 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 407 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 408 Cell *r, *x; 409 extern Cell **fldtab; 410 FILE *fp; 411 char *buf; 412 int bufsize = recsize; 413 int mode; 414 bool newflag; 415 416 if ((buf = (char *) malloc(bufsize)) == NULL) 417 FATAL("out of memory in getline"); 418 419 fflush(stdout); /* in case someone is waiting for a prompt */ 420 r = gettemp(); 421 if (a[1] != NULL) { /* getline < file */ 422 x = execute(a[2]); /* filename */ 423 mode = ptoi(a[1]); 424 if (mode == '|') /* input pipe */ 425 mode = LE; /* arbitrary flag */ 426 fp = openfile(mode, getsval(x), &newflag); 427 tempfree(x); 428 if (fp == NULL) 429 n = -1; 430 else 431 n = readrec(&buf, &bufsize, fp, newflag); 432 if (n <= 0) { 433 ; 434 } else if (a[0] != NULL) { /* getline var <file */ 435 x = execute(a[0]); 436 setsval(x, buf); 437 check_number(x); 438 tempfree(x); 439 } else { /* getline <file */ 440 setsval(fldtab[0], buf); 441 check_number(fldtab[0]); 442 } 443 } else { /* bare getline; use current input */ 444 if (a[0] == NULL) /* getline */ 445 n = getrec(&record, &recsize, true); 446 else { /* getline var */ 447 n = getrec(&buf, &bufsize, false); 448 if (n > 0) { 449 x = execute(a[0]); 450 setsval(x, buf); 451 check_number(x); 452 tempfree(x); 453 } 454 } 455 } 456 setfval(r, (Awkfloat) n); 457 free(buf); 458 return r; 459 } 460 461 Cell *getnf(Node **a, int n) /* get NF */ 462 { 463 if (!donefld) 464 fldbld(); 465 return (Cell *) a[0]; 466 } 467 468 static char * 469 makearraystring(Node *p, const char *func) 470 { 471 char *buf; 472 int bufsz = recsize; 473 size_t blen; 474 475 if ((buf = (char *) malloc(bufsz)) == NULL) { 476 FATAL("%s: out of memory", func); 477 } 478 479 blen = 0; 480 buf[blen] = '\0'; 481 482 for (; p; p = p->nnext) { 483 Cell *x = execute(p); /* expr */ 484 char *s = getsval(x); 485 size_t seplen = strlen(getsval(subseploc)); 486 size_t nsub = p->nnext ? seplen : 0; 487 size_t slen = strlen(s); 488 size_t tlen = blen + slen + nsub; 489 490 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 491 FATAL("%s: out of memory %s[%s...]", 492 func, x->nval, buf); 493 } 494 memcpy(buf + blen, s, slen); 495 if (nsub) { 496 memcpy(buf + blen + slen, *SUBSEP, nsub); 497 } 498 buf[tlen] = '\0'; 499 blen = tlen; 500 tempfree(x); 501 } 502 return buf; 503 } 504 505 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 506 { 507 Cell *x, *z; 508 char *buf; 509 510 x = execute(a[0]); /* Cell* for symbol table */ 511 buf = makearraystring(a[1], __func__); 512 if (!isarr(x)) { 513 DPRINTF("making %s into an array\n", NN(x->nval)); 514 if (freeable(x)) 515 xfree(x->sval); 516 x->tval &= ~(STR|NUM|DONTFREE); 517 x->tval |= ARR; 518 x->sval = (char *) makesymtab(NSYMTAB); 519 } 520 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 521 z->ctype = OCELL; 522 z->csub = CVAR; 523 tempfree(x); 524 free(buf); 525 return(z); 526 } 527 528 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 529 { 530 Cell *x; 531 532 x = execute(a[0]); /* Cell* for symbol table */ 533 if (x == symtabloc) { 534 FATAL("cannot delete SYMTAB or its elements"); 535 } 536 if (!isarr(x)) 537 return True; 538 if (a[1] == NULL) { /* delete the elements, not the table */ 539 freesymtab(x); 540 x->tval &= ~STR; 541 x->tval |= ARR; 542 x->sval = (char *) makesymtab(NSYMTAB); 543 } else { 544 char *buf = makearraystring(a[1], __func__); 545 freeelem(x, buf); 546 free(buf); 547 } 548 tempfree(x); 549 return True; 550 } 551 552 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 553 { 554 Cell *ap, *k; 555 char *buf; 556 557 ap = execute(a[1]); /* array name */ 558 if (!isarr(ap)) { 559 DPRINTF("making %s into an array\n", ap->nval); 560 if (freeable(ap)) 561 xfree(ap->sval); 562 ap->tval &= ~(STR|NUM|DONTFREE); 563 ap->tval |= ARR; 564 ap->sval = (char *) makesymtab(NSYMTAB); 565 } 566 buf = makearraystring(a[0], __func__); 567 k = lookup(buf, (Array *) ap->sval); 568 tempfree(ap); 569 free(buf); 570 if (k == NULL) 571 return(False); 572 else 573 return(True); 574 } 575 576 577 /* ======== utf-8 code ========== */ 578 579 /* 580 * Awk strings can contain ascii, random 8-bit items (eg Latin-1), 581 * or utf-8. u8_isutf tests whether a string starts with a valid 582 * utf-8 sequence, and returns 0 if not (e.g., high bit set). 583 * u8_nextlen returns length of next valid sequence, which is 584 * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf. 585 * u8_strlen returns length of string in valid utf-8 sequences 586 * and/or high-bit bytes. Conversion functions go between byte 587 * number and character number. 588 * 589 * In theory, this behaves the same as before for non-utf8 bytes. 590 * 591 * Limited checking! This is a potential security hole. 592 */ 593 594 /* is s the beginning of a valid utf-8 string? */ 595 /* return length 1..4 if yes, 0 if no */ 596 static int u8_isutf(const char *s) 597 { 598 int n, ret; 599 unsigned char c; 600 601 c = s[0]; 602 if (c < 128 || awk_mb_cur_max == 1) 603 return 1; /* what if it's 0? */ 604 605 n = strlen(s); 606 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 607 ret = 2; /* 110xxxxx 10xxxxxx */ 608 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 609 && (s[2] & 0xC0) == 0x80) { 610 ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */ 611 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 612 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 613 ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 614 } else { 615 ret = 0; 616 } 617 return ret; 618 } 619 620 /* Convert (prefix of) utf8 string to utf-32 rune. */ 621 /* Sets *rune to the value, returns the length. */ 622 /* No error checking: watch out. */ 623 int u8_rune(int *rune, const char *s) 624 { 625 int n, ret; 626 unsigned char c; 627 628 c = s[0]; 629 if (c < 128 || awk_mb_cur_max == 1) { 630 *rune = c; 631 return 1; 632 } 633 634 n = strlen(s); 635 if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) { 636 *rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */ 637 ret = 2; 638 } else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80 639 && (s[2] & 0xC0) == 0x80) { 640 *rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); 641 /* 1110xxxx 10xxxxxx 10xxxxxx */ 642 ret = 3; 643 } else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80 644 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) { 645 *rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); 646 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 647 ret = 4; 648 } else { 649 *rune = c; 650 ret = 1; 651 } 652 return ret; /* returns one byte if sequence doesn't look like utf */ 653 } 654 655 /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */ 656 int u8_nextlen(const char *s) 657 { 658 int len; 659 660 len = u8_isutf(s); 661 if (len == 0) 662 len = 1; 663 return len; 664 } 665 666 /* return number of utf characters or single non-utf bytes */ 667 static int u8_strlen(const char *s) 668 { 669 int i, len, n, totlen; 670 unsigned char c; 671 672 n = strlen(s); 673 totlen = 0; 674 for (i = 0; i < n; i += len) { 675 c = s[i]; 676 if (c < 128 || awk_mb_cur_max == 1) { 677 len = 1; 678 } else { 679 len = u8_nextlen(&s[i]); 680 } 681 totlen++; 682 if (i > n) 683 FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i); 684 } 685 return totlen; 686 } 687 688 /* convert utf-8 char number in a string to its byte offset */ 689 static int u8_char2byte(const char *s, int charnum) 690 { 691 int n; 692 int bytenum = 0; 693 694 while (charnum > 0) { 695 n = u8_nextlen(s); 696 s += n; 697 bytenum += n; 698 charnum--; 699 } 700 return bytenum; 701 } 702 703 /* convert byte offset in s to utf-8 char number that starts there */ 704 static int u8_byte2char(const char *s, int bytenum) 705 { 706 int i, len, b; 707 int charnum = 0; /* BUG: what origin? */ 708 /* should be 0 to match start==0 which means no match */ 709 710 b = strlen(s); 711 if (bytenum > b) { 712 return -1; /* ??? */ 713 } 714 for (i = 0; i <= bytenum; i += len) { 715 len = u8_nextlen(s+i); 716 charnum++; 717 } 718 return charnum; 719 } 720 721 /* runetochar() adapted from rune.c in the Plan 9 distribution */ 722 723 enum 724 { 725 Runeerror = 128, /* from somewhere else */ 726 Runemax = 0x10FFFF, 727 728 Bit1 = 7, 729 Bitx = 6, 730 Bit2 = 5, 731 Bit3 = 4, 732 Bit4 = 3, 733 Bit5 = 2, 734 735 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ 736 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ 737 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ 738 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ 739 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ 740 T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ 741 742 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ 743 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ 744 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ 745 Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ 746 747 Maskx = (1<<Bitx)-1, /* 0011 1111 */ 748 Testx = Maskx ^ 0xFF, /* 1100 0000 */ 749 750 }; 751 752 int runetochar(char *str, int c) 753 { 754 /* one character sequence 00000-0007F => 00-7F */ 755 if (c <= Rune1) { 756 str[0] = c; 757 return 1; 758 } 759 760 /* two character sequence 00080-007FF => T2 Tx */ 761 if (c <= Rune2) { 762 str[0] = T2 | (c >> 1*Bitx); 763 str[1] = Tx | (c & Maskx); 764 return 2; 765 } 766 767 /* three character sequence 00800-0FFFF => T3 Tx Tx */ 768 if (c > Runemax) 769 c = Runeerror; 770 if (c <= Rune3) { 771 str[0] = T3 | (c >> 2*Bitx); 772 str[1] = Tx | ((c >> 1*Bitx) & Maskx); 773 str[2] = Tx | (c & Maskx); 774 return 3; 775 } 776 777 /* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */ 778 str[0] = T4 | (c >> 3*Bitx); 779 str[1] = Tx | ((c >> 2*Bitx) & Maskx); 780 str[2] = Tx | ((c >> 1*Bitx) & Maskx); 781 str[3] = Tx | (c & Maskx); 782 return 4; 783 } 784 785 786 /* ========== end of utf8 code =========== */ 787 788 789 790 Cell *matchop(Node **a, int n) /* ~ and match() */ 791 { 792 Cell *x, *y, *z; 793 char *s, *t; 794 int i; 795 int cstart, cpatlen, len; 796 fa *pfa; 797 int (*mf)(fa *, const char *) = match, mode = 0; 798 799 if (n == MATCHFCN) { 800 mf = pmatch; 801 mode = 1; 802 } 803 x = execute(a[1]); /* a[1] = target text */ 804 s = getsval(x); 805 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 806 i = (*mf)((fa *) a[2], s); 807 else { 808 y = execute(a[2]); /* a[2] = regular expr */ 809 t = getsval(y); 810 pfa = makedfa(t, mode); 811 i = (*mf)(pfa, s); 812 tempfree(y); 813 } 814 z = x; 815 if (n == MATCHFCN) { 816 int start = patbeg - s + 1; /* origin 1 */ 817 if (patlen < 0) { 818 start = 0; /* not found */ 819 } else { 820 cstart = u8_byte2char(s, start-1); 821 cpatlen = 0; 822 for (i = 0; i < patlen; i += len) { 823 len = u8_nextlen(patbeg+i); 824 cpatlen++; 825 } 826 827 start = cstart; 828 patlen = cpatlen; 829 } 830 831 setfval(rstartloc, (Awkfloat) start); 832 setfval(rlengthloc, (Awkfloat) patlen); 833 x = gettemp(); 834 x->tval = NUM; 835 x->fval = start; 836 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 837 x = True; 838 else 839 x = False; 840 841 tempfree(z); 842 return x; 843 } 844 845 846 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 847 { 848 Cell *x, *y; 849 int i; 850 851 x = execute(a[0]); 852 i = istrue(x); 853 tempfree(x); 854 switch (n) { 855 case BOR: 856 if (i) return(True); 857 y = execute(a[1]); 858 i = istrue(y); 859 tempfree(y); 860 if (i) return(True); 861 else return(False); 862 case AND: 863 if ( !i ) return(False); 864 y = execute(a[1]); 865 i = istrue(y); 866 tempfree(y); 867 if (i) return(True); 868 else return(False); 869 case NOT: 870 if (i) return(False); 871 else return(True); 872 default: /* can't happen */ 873 FATAL("unknown boolean operator %d", n); 874 } 875 return 0; /*NOTREACHED*/ 876 } 877 878 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 879 { 880 int i; 881 Cell *x, *y; 882 Awkfloat j; 883 bool x_is_nan, y_is_nan; 884 885 x = execute(a[0]); 886 y = execute(a[1]); 887 x_is_nan = isnan(x->fval); 888 y_is_nan = isnan(y->fval); 889 if (x->tval&NUM && y->tval&NUM) { 890 if ((x_is_nan || y_is_nan) && n != NE) 891 return(False); 892 j = x->fval - y->fval; 893 i = j<0? -1: (j>0? 1: 0); 894 } else { 895 i = strcmp(getsval(x), getsval(y)); 896 } 897 tempfree(x); 898 tempfree(y); 899 switch (n) { 900 case LT: if (i<0) return(True); 901 else return(False); 902 case LE: if (i<=0) return(True); 903 else return(False); 904 case NE: if (x_is_nan && y_is_nan) return(True); 905 else if (i!=0) return(True); 906 else return(False); 907 case EQ: if (i == 0) return(True); 908 else return(False); 909 case GE: if (i>=0) return(True); 910 else return(False); 911 case GT: if (i>0) return(True); 912 else return(False); 913 default: /* can't happen */ 914 FATAL("unknown relational operator %d", n); 915 } 916 return 0; /*NOTREACHED*/ 917 } 918 919 void tfree(Cell *a) /* free a tempcell */ 920 { 921 if (freeable(a)) { 922 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 923 xfree(a->sval); 924 } 925 if (a == tmps) 926 FATAL("tempcell list is curdled"); 927 a->cnext = tmps; 928 tmps = a; 929 } 930 931 Cell *gettemp(void) /* get a tempcell */ 932 { int i; 933 Cell *x; 934 935 if (!tmps) { 936 tmps = (Cell *) calloc(100, sizeof(*tmps)); 937 if (!tmps) 938 FATAL("out of space for temporaries"); 939 for (i = 1; i < 100; i++) 940 tmps[i-1].cnext = &tmps[i]; 941 tmps[i-1].cnext = NULL; 942 } 943 x = tmps; 944 tmps = x->cnext; 945 *x = tempcell; 946 return(x); 947 } 948 949 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 950 { 951 Awkfloat val; 952 Cell *x; 953 int m; 954 char *s; 955 956 x = execute(a[0]); 957 val = getfval(x); /* freebsd: defend against super large field numbers */ 958 if ((Awkfloat)INT_MAX < val) 959 FATAL("trying to access out of range field %s", x->nval); 960 m = (int) val; 961 if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ 962 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 963 /* BUG: can x->nval ever be null??? */ 964 tempfree(x); 965 x = fieldadr(m); 966 x->ctype = OCELL; /* BUG? why are these needed? */ 967 x->csub = CFLD; 968 return(x); 969 } 970 971 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 972 { 973 int k, m, n; 974 int mb, nb; 975 char *s; 976 int temp; 977 Cell *x, *y, *z = NULL; 978 979 x = execute(a[0]); 980 y = execute(a[1]); 981 if (a[2] != NULL) 982 z = execute(a[2]); 983 s = getsval(x); 984 k = u8_strlen(s) + 1; 985 if (k <= 1) { 986 tempfree(x); 987 tempfree(y); 988 if (a[2] != NULL) { 989 tempfree(z); 990 } 991 x = gettemp(); 992 setsval(x, ""); 993 return(x); 994 } 995 m = (int) getfval(y); 996 if (m <= 0) 997 m = 1; 998 else if (m > k) 999 m = k; 1000 tempfree(y); 1001 if (a[2] != NULL) { 1002 n = (int) getfval(z); 1003 tempfree(z); 1004 } else 1005 n = k - 1; 1006 if (n < 0) 1007 n = 0; 1008 else if (n > k - m) 1009 n = k - m; 1010 /* m is start, n is length from there */ 1011 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 1012 y = gettemp(); 1013 mb = u8_char2byte(s, m-1); /* byte offset of start char in s */ 1014 nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */ 1015 1016 temp = s[nb]; /* with thanks to John Linderman */ 1017 s[nb] = '\0'; 1018 setsval(y, s + mb); 1019 s[nb] = temp; 1020 tempfree(x); 1021 return(y); 1022 } 1023 1024 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 1025 { 1026 Cell *x, *y, *z; 1027 char *s1, *s2, *p1, *p2, *q; 1028 Awkfloat v = 0.0; 1029 1030 x = execute(a[0]); 1031 s1 = getsval(x); 1032 y = execute(a[1]); 1033 s2 = getsval(y); 1034 1035 z = gettemp(); 1036 for (p1 = s1; *p1 != '\0'; p1++) { 1037 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 1038 continue; 1039 if (*p2 == '\0') { 1040 /* v = (Awkfloat) (p1 - s1 + 1); origin 1 */ 1041 1042 /* should be a function: used in match() as well */ 1043 int i, len; 1044 v = 0; 1045 for (i = 0; i < p1-s1+1; i += len) { 1046 len = u8_nextlen(s1+i); 1047 v++; 1048 } 1049 break; 1050 } 1051 } 1052 tempfree(x); 1053 tempfree(y); 1054 setfval(z, v); 1055 return(z); 1056 } 1057 1058 static int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */ 1059 { 1060 int n; 1061 1062 for (n = 0; *s != 0; s += n) { 1063 n = u8_nextlen(s); 1064 if (n > 1) 1065 return 1; 1066 } 1067 return 0; 1068 } 1069 1070 #define MAXNUMSIZE 50 1071 1072 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 1073 { 1074 char *fmt; 1075 char *p, *t; 1076 const char *os; 1077 Cell *x; 1078 int flag = 0, n; 1079 int fmtwd; /* format width */ 1080 int fmtsz = recsize; 1081 char *buf = *pbuf; 1082 int bufsize = *pbufsize; 1083 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 1084 #define BUFSZ(a) (bufsize - ((a) - buf)) 1085 1086 static bool first = true; 1087 static bool have_a_format = false; 1088 1089 if (first) { 1090 char xbuf[100]; 1091 1092 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 1093 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 1094 first = false; 1095 } 1096 1097 os = s; 1098 p = buf; 1099 if ((fmt = (char *) malloc(fmtsz)) == NULL) 1100 FATAL("out of memory in format()"); 1101 while (*s) { 1102 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 1103 if (*s != '%') { 1104 *p++ = *s++; 1105 continue; 1106 } 1107 if (*(s+1) == '%') { 1108 *p++ = '%'; 1109 s += 2; 1110 continue; 1111 } 1112 fmtwd = atoi(s+1); 1113 if (fmtwd < 0) 1114 fmtwd = -fmtwd; 1115 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 1116 for (t = fmt; (*t++ = *s) != '\0'; s++) { 1117 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 1118 FATAL("format item %.30s... ran format() out of memory", os); 1119 /* Ignore size specifiers */ 1120 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 1121 t--; 1122 continue; 1123 } 1124 if (isalpha((uschar)*s)) 1125 break; 1126 if (*s == '$') { 1127 FATAL("'$' not permitted in awk formats"); 1128 } 1129 if (*s == '*') { 1130 if (a == NULL) { 1131 FATAL("not enough args in printf(%s)", os); 1132 } 1133 x = execute(a); 1134 a = a->nnext; 1135 snprintf(t - 1, FMTSZ(t - 1), 1136 "%d", fmtwd=(int) getfval(x)); 1137 if (fmtwd < 0) 1138 fmtwd = -fmtwd; 1139 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 1140 t = fmt + strlen(fmt); 1141 tempfree(x); 1142 } 1143 } 1144 *t = '\0'; 1145 if (fmtwd < 0) 1146 fmtwd = -fmtwd; 1147 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 1148 switch (*s) { 1149 case 'a': case 'A': 1150 if (have_a_format) 1151 flag = *s; 1152 else 1153 flag = 'f'; 1154 break; 1155 case 'f': case 'e': case 'g': case 'E': case 'G': 1156 flag = 'f'; 1157 break; 1158 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 1159 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 1160 *(t-1) = 'j'; 1161 *t = *s; 1162 *++t = '\0'; 1163 break; 1164 case 's': 1165 flag = 's'; 1166 break; 1167 case 'c': 1168 flag = 'c'; 1169 break; 1170 default: 1171 WARNING("weird printf conversion %s", fmt); 1172 flag = '?'; 1173 break; 1174 } 1175 if (a == NULL) 1176 FATAL("not enough args in printf(%s)", os); 1177 x = execute(a); 1178 a = a->nnext; 1179 n = MAXNUMSIZE; 1180 if (fmtwd > n) 1181 n = fmtwd; 1182 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 1183 switch (flag) { 1184 case '?': 1185 snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 1186 t = getsval(x); 1187 n = strlen(t); 1188 if (fmtwd > n) 1189 n = fmtwd; 1190 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 1191 p += strlen(p); 1192 snprintf(p, BUFSZ(p), "%s", t); 1193 break; 1194 case 'a': 1195 case 'A': 1196 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 1197 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 1198 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 1199 1200 case 's': { 1201 t = getsval(x); 1202 n = strlen(t); 1203 /* if simple format or no utf-8 in the string, sprintf works */ 1204 if (!has_utf8(t) || strcmp(fmt,"%s") == 0) { 1205 if (fmtwd > n) 1206 n = fmtwd; 1207 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 1208 FATAL("huge string/format (%d chars) in printf %.30s..." \ 1209 " ran format() out of memory", n, t); 1210 snprintf(p, BUFSZ(p), fmt, t); 1211 break; 1212 } 1213 1214 /* get here if string has utf-8 chars and fmt is not plain %s */ 1215 /* "%-w.ps", where -, w and .p are all optional */ 1216 /* '0' before the w is a flag character */ 1217 /* fmt points at % */ 1218 int ljust = 0, wid = 0, prec = n, pad = 0; 1219 char *f = fmt+1; 1220 if (f[0] == '-') { 1221 ljust = 1; 1222 f++; 1223 } 1224 // flags '0' and '+' are recognized but skipped 1225 if (f[0] == '0') { 1226 f++; 1227 if (f[0] == '+') 1228 f++; 1229 } 1230 if (f[0] == '+') { 1231 f++; 1232 if (f[0] == '0') 1233 f++; 1234 } 1235 if (isdigit((unsigned char)f[0])) { /* there is a wid */ 1236 wid = strtol(f, &f, 10); 1237 } 1238 if (f[0] == '.') { /* there is a .prec */ 1239 prec = strtol(++f, &f, 10); 1240 } 1241 if (prec > u8_strlen(t)) 1242 prec = u8_strlen(t); 1243 pad = wid>prec ? wid - prec : 0; // has to be >= 0 1244 int i, k, l; 1245 1246 if (ljust) { // print prec chars from t, then pad blanks 1247 l = u8_char2byte(t, prec); 1248 for (k = 0; k < l; k++) { 1249 //putchar(t[k]); 1250 *p++ = t[k]; 1251 } 1252 for (i = 0; i < pad; i++) { 1253 //printf(" "); 1254 *p++ = ' '; 1255 } 1256 } else { // print pad blanks, then prec chars from t 1257 for (i = 0; i < pad; i++) { 1258 //printf(" "); 1259 *p++ = ' '; 1260 } 1261 l = u8_char2byte(t, prec); 1262 for (k = 0; k < l; k++) { 1263 //putchar(t[k]); 1264 *p++ = t[k]; 1265 } 1266 } 1267 *p = 0; 1268 break; 1269 } 1270 1271 case 'c': { 1272 /* 1273 * If a numeric value is given, awk should just turn 1274 * it into a character and print it: 1275 * BEGIN { printf("%c\n", 65) } 1276 * prints "A". 1277 * 1278 * But what if the numeric value is > 128 and 1279 * represents a valid Unicode code point?!? We do 1280 * our best to convert it back into UTF-8. If we 1281 * can't, we output the encoding of the Unicode 1282 * "invalid character", 0xFFFD. 1283 */ 1284 if (isnum(x)) { 1285 int charval = (int) getfval(x); 1286 1287 if (charval != 0) { 1288 if (charval < 128 || awk_mb_cur_max == 1) 1289 snprintf(p, BUFSZ(p), fmt, charval); 1290 else { 1291 // possible unicode character 1292 size_t count; 1293 char *bs = wide_char_to_byte_str(charval, &count); 1294 1295 if (bs == NULL) { // invalid character 1296 // use unicode invalid character, 0xFFFD 1297 static char invalid_char[] = "\357\277\275"; 1298 bs = invalid_char; 1299 count = 3; 1300 } 1301 t = bs; 1302 n = count; 1303 goto format_percent_c; 1304 } 1305 } else { 1306 *p++ = '\0'; /* explicit null byte */ 1307 *p = '\0'; /* next output will start here */ 1308 } 1309 break; 1310 } 1311 t = getsval(x); 1312 n = u8_nextlen(t); 1313 format_percent_c: 1314 if (n < 2) { /* not utf8 */ 1315 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 1316 break; 1317 } 1318 1319 // utf8 character, almost same song and dance as for %s 1320 int ljust = 0, wid = 0, prec = n, pad = 0; 1321 char *f = fmt+1; 1322 if (f[0] == '-') { 1323 ljust = 1; 1324 f++; 1325 } 1326 // flags '0' and '+' are recognized but skipped 1327 if (f[0] == '0') { 1328 f++; 1329 if (f[0] == '+') 1330 f++; 1331 } 1332 if (f[0] == '+') { 1333 f++; 1334 if (f[0] == '0') 1335 f++; 1336 } 1337 if (isdigit((unsigned char)f[0])) { /* there is a wid */ 1338 wid = strtol(f, &f, 10); 1339 } 1340 if (f[0] == '.') { /* there is a .prec */ 1341 prec = strtol(++f, &f, 10); 1342 } 1343 if (prec > 1) // %c --> only one character 1344 prec = 1; 1345 pad = wid>prec ? wid - prec : 0; // has to be >= 0 1346 int i; 1347 1348 if (ljust) { // print one char from t, then pad blanks 1349 for (i = 0; i < n; i++) 1350 *p++ = t[i]; 1351 for (i = 0; i < pad; i++) { 1352 //printf(" "); 1353 *p++ = ' '; 1354 } 1355 } else { // print pad blanks, then prec chars from t 1356 for (i = 0; i < pad; i++) { 1357 //printf(" "); 1358 *p++ = ' '; 1359 } 1360 for (i = 0; i < n; i++) 1361 *p++ = t[i]; 1362 } 1363 *p = 0; 1364 break; 1365 } 1366 default: 1367 FATAL("can't happen: bad conversion %c in format()", flag); 1368 } 1369 1370 tempfree(x); 1371 p += strlen(p); 1372 s++; 1373 } 1374 *p = '\0'; 1375 free(fmt); 1376 for ( ; a; a = a->nnext) { /* evaluate any remaining args */ 1377 x = execute(a); 1378 tempfree(x); 1379 } 1380 *pbuf = buf; 1381 *pbufsize = bufsize; 1382 return p - buf; 1383 } 1384 1385 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 1386 { 1387 Cell *x; 1388 Node *y; 1389 char *buf; 1390 int bufsz=3*recsize; 1391 1392 if ((buf = (char *) malloc(bufsz)) == NULL) 1393 FATAL("out of memory in awksprintf"); 1394 y = a[0]->nnext; 1395 x = execute(a[0]); 1396 if (format(&buf, &bufsz, getsval(x), y) == -1) 1397 FATAL("sprintf string %.30s... too long. can't happen.", buf); 1398 tempfree(x); 1399 x = gettemp(); 1400 x->sval = buf; 1401 x->tval = STR; 1402 return(x); 1403 } 1404 1405 Cell *awkprintf(Node **a, int n) /* printf */ 1406 { /* a[0] is list of args, starting with format string */ 1407 /* a[1] is redirection operator, a[2] is redirection file */ 1408 FILE *fp; 1409 Cell *x; 1410 Node *y; 1411 char *buf; 1412 int len; 1413 int bufsz=3*recsize; 1414 1415 if ((buf = (char *) malloc(bufsz)) == NULL) 1416 FATAL("out of memory in awkprintf"); 1417 y = a[0]->nnext; 1418 x = execute(a[0]); 1419 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1420 FATAL("printf string %.30s... too long. can't happen.", buf); 1421 tempfree(x); 1422 if (a[1] == NULL) { 1423 /* fputs(buf, stdout); */ 1424 fwrite(buf, len, 1, stdout); 1425 if (ferror(stdout)) 1426 FATAL("write error on stdout"); 1427 } else { 1428 fp = redirect(ptoi(a[1]), a[2]); 1429 /* fputs(buf, fp); */ 1430 fwrite(buf, len, 1, fp); 1431 fflush(fp); 1432 if (ferror(fp)) 1433 FATAL("write error on %s", filename(fp)); 1434 } 1435 free(buf); 1436 return(True); 1437 } 1438 1439 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1440 { 1441 Awkfloat i, j = 0; 1442 double v; 1443 Cell *x, *y, *z; 1444 1445 x = execute(a[0]); 1446 i = getfval(x); 1447 tempfree(x); 1448 if (n != UMINUS && n != UPLUS) { 1449 y = execute(a[1]); 1450 j = getfval(y); 1451 tempfree(y); 1452 } 1453 z = gettemp(); 1454 switch (n) { 1455 case ADD: 1456 i += j; 1457 break; 1458 case MINUS: 1459 i -= j; 1460 break; 1461 case MULT: 1462 i *= j; 1463 break; 1464 case DIVIDE: 1465 if (j == 0) 1466 FATAL("division by zero"); 1467 i /= j; 1468 break; 1469 case MOD: 1470 if (j == 0) 1471 FATAL("division by zero in mod"); 1472 modf(i/j, &v); 1473 i = i - j * v; 1474 break; 1475 case UMINUS: 1476 i = -i; 1477 break; 1478 case UPLUS: /* handled by getfval(), above */ 1479 break; 1480 case POWER: 1481 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1482 i = ipow(i, (int) j); 1483 else { 1484 errno = 0; 1485 i = errcheck(pow(i, j), "pow"); 1486 } 1487 break; 1488 default: /* can't happen */ 1489 FATAL("illegal arithmetic operator %d", n); 1490 } 1491 setfval(z, i); 1492 return(z); 1493 } 1494 1495 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1496 { 1497 double v; 1498 1499 if (n <= 0) 1500 return 1; 1501 v = ipow(x, n/2); 1502 if (n % 2 == 0) 1503 return v * v; 1504 else 1505 return x * v * v; 1506 } 1507 1508 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1509 { 1510 Cell *x, *z; 1511 int k; 1512 Awkfloat xf; 1513 1514 x = execute(a[0]); 1515 xf = getfval(x); 1516 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1517 if (n == PREINCR || n == PREDECR) { 1518 setfval(x, xf + k); 1519 return(x); 1520 } 1521 z = gettemp(); 1522 setfval(z, xf); 1523 setfval(x, xf + k); 1524 tempfree(x); 1525 return(z); 1526 } 1527 1528 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1529 { /* this is subtle; don't muck with it. */ 1530 Cell *x, *y; 1531 Awkfloat xf, yf; 1532 double v; 1533 1534 y = execute(a[1]); 1535 x = execute(a[0]); 1536 if (n == ASSIGN) { /* ordinary assignment */ 1537 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1538 ; /* self-assignment: leave alone unless it's a field or NF */ 1539 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1540 yf = getfval(y); 1541 setsval(x, getsval(y)); 1542 x->fval = yf; 1543 x->tval |= NUM; 1544 } 1545 else if (isstr(y)) 1546 setsval(x, getsval(y)); 1547 else if (isnum(y)) 1548 setfval(x, getfval(y)); 1549 else 1550 funnyvar(y, "read value of"); 1551 tempfree(y); 1552 return(x); 1553 } 1554 xf = getfval(x); 1555 yf = getfval(y); 1556 switch (n) { 1557 case ADDEQ: 1558 xf += yf; 1559 break; 1560 case SUBEQ: 1561 xf -= yf; 1562 break; 1563 case MULTEQ: 1564 xf *= yf; 1565 break; 1566 case DIVEQ: 1567 if (yf == 0) 1568 FATAL("division by zero in /="); 1569 xf /= yf; 1570 break; 1571 case MODEQ: 1572 if (yf == 0) 1573 FATAL("division by zero in %%="); 1574 modf(xf/yf, &v); 1575 xf = xf - yf * v; 1576 break; 1577 case POWEQ: 1578 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1579 xf = ipow(xf, (int) yf); 1580 else { 1581 errno = 0; 1582 xf = errcheck(pow(xf, yf), "pow"); 1583 } 1584 break; 1585 default: 1586 FATAL("illegal assignment operator %d", n); 1587 break; 1588 } 1589 tempfree(y); 1590 setfval(x, xf); 1591 return(x); 1592 } 1593 1594 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1595 { 1596 Cell *x, *y, *z; 1597 int n1, n2; 1598 char *s = NULL; 1599 int ssz = 0; 1600 1601 x = execute(a[0]); 1602 n1 = strlen(getsval(x)); 1603 adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); 1604 memcpy(s, x->sval, n1); 1605 1606 tempfree(x); 1607 1608 y = execute(a[1]); 1609 n2 = strlen(getsval(y)); 1610 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1611 memcpy(s + n1, y->sval, n2); 1612 s[n1 + n2] = '\0'; 1613 1614 tempfree(y); 1615 1616 z = gettemp(); 1617 z->sval = s; 1618 z->tval = STR; 1619 1620 return(z); 1621 } 1622 1623 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1624 { 1625 Cell *x; 1626 1627 if (a[0] == NULL) 1628 x = execute(a[1]); 1629 else { 1630 x = execute(a[0]); 1631 if (istrue(x)) { 1632 tempfree(x); 1633 x = execute(a[1]); 1634 } 1635 } 1636 return x; 1637 } 1638 1639 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1640 { 1641 Cell *x; 1642 int pair; 1643 1644 pair = ptoi(a[3]); 1645 if (pairstack[pair] == 0) { 1646 x = execute(a[0]); 1647 if (istrue(x)) 1648 pairstack[pair] = 1; 1649 tempfree(x); 1650 } 1651 if (pairstack[pair] == 1) { 1652 x = execute(a[1]); 1653 if (istrue(x)) 1654 pairstack[pair] = 0; 1655 tempfree(x); 1656 x = execute(a[2]); 1657 return(x); 1658 } 1659 return(False); 1660 } 1661 1662 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1663 { 1664 Cell *x = NULL, *y, *ap; 1665 const char *s, *origs, *t; 1666 const char *fs = NULL; 1667 char *origfs = NULL; 1668 int sep; 1669 char temp, num[50]; 1670 int n, tempstat, arg3type; 1671 int j; 1672 double result; 1673 1674 y = execute(a[0]); /* source string */ 1675 origs = s = strdup(getsval(y)); 1676 tempfree(y); 1677 arg3type = ptoi(a[3]); 1678 if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */ 1679 fs = getsval(fsloc); 1680 } else if (arg3type == STRING) { /* split(str,arr,"string") */ 1681 x = execute(a[2]); 1682 fs = origfs = strdup(getsval(x)); 1683 tempfree(x); 1684 } else if (arg3type == REGEXPR) { 1685 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1686 } else { 1687 FATAL("illegal type of split"); 1688 } 1689 sep = *fs; 1690 ap = execute(a[1]); /* array name */ 1691 /* BUG 7/26/22: this appears not to reset array: see C1/asplit */ 1692 freesymtab(ap); 1693 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 1694 ap->tval &= ~STR; 1695 ap->tval |= ARR; 1696 ap->sval = (char *) makesymtab(NSYMTAB); 1697 1698 n = 0; 1699 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1700 /* split(s, a, //); have to arrange that it looks like empty sep */ 1701 arg3type = 0; 1702 fs = ""; 1703 sep = 0; 1704 } 1705 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1706 fa *pfa; 1707 if (arg3type == REGEXPR) { /* it's ready already */ 1708 pfa = (fa *) a[2]; 1709 } else { 1710 pfa = makedfa(fs, 1); 1711 } 1712 if (nematch(pfa,s)) { 1713 tempstat = pfa->initstat; 1714 pfa->initstat = 2; 1715 do { 1716 n++; 1717 snprintf(num, sizeof(num), "%d", n); 1718 temp = *patbeg; 1719 setptr(patbeg, '\0'); 1720 if (is_number(s, & result)) 1721 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1722 else 1723 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1724 setptr(patbeg, temp); 1725 s = patbeg + patlen; 1726 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1727 n++; 1728 snprintf(num, sizeof(num), "%d", n); 1729 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1730 pfa->initstat = tempstat; 1731 goto spdone; 1732 } 1733 } while (nematch(pfa,s)); 1734 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1735 /* cf gsub and refldbld */ 1736 } 1737 n++; 1738 snprintf(num, sizeof(num), "%d", n); 1739 if (is_number(s, & result)) 1740 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1741 else 1742 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1743 spdone: 1744 pfa = NULL; 1745 1746 } else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */ 1747 char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */ 1748 for (;;) { 1749 char *fr = newt; 1750 n++; 1751 if (*s == '"' ) { /* start of "..." */ 1752 for (s++ ; *s != '\0'; ) { 1753 if (*s == '"' && s[1] != '\0' && s[1] == '"') { 1754 s += 2; /* doubled quote */ 1755 *fr++ = '"'; 1756 } else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) { 1757 s++; /* skip over closing quote */ 1758 break; 1759 } else { 1760 *fr++ = *s++; 1761 } 1762 } 1763 *fr++ = 0; 1764 } else { /* unquoted field */ 1765 while (*s != ',' && *s != '\0') 1766 *fr++ = *s++; 1767 *fr++ = 0; 1768 } 1769 snprintf(num, sizeof(num), "%d", n); 1770 if (is_number(newt, &result)) 1771 setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval); 1772 else 1773 setsymtab(num, newt, 0.0, STR, (Array *) ap->sval); 1774 if (*s++ == '\0') 1775 break; 1776 } 1777 free(newt); 1778 1779 } else if (!CSV && sep == ' ') { /* usual case: split on white space */ 1780 for (n = 0; ; ) { 1781 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1782 while (ISWS(*s)) 1783 s++; 1784 if (*s == '\0') 1785 break; 1786 n++; 1787 t = s; 1788 do 1789 s++; 1790 while (*s != '\0' && !ISWS(*s)); 1791 temp = *s; 1792 setptr(s, '\0'); 1793 snprintf(num, sizeof(num), "%d", n); 1794 if (is_number(t, & result)) 1795 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1796 else 1797 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1798 setptr(s, temp); 1799 if (*s != '\0') 1800 s++; 1801 } 1802 1803 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1804 for (n = 0; *s != '\0'; s += u8_nextlen(s)) { 1805 char buf[10]; 1806 n++; 1807 snprintf(num, sizeof(num), "%d", n); 1808 1809 for (j = 0; j < u8_nextlen(s); j++) { 1810 buf[j] = s[j]; 1811 } 1812 buf[j] = '\0'; 1813 1814 if (isdigit((uschar)buf[0])) 1815 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1816 else 1817 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1818 } 1819 1820 } else if (*s != '\0') { /* some random single character */ 1821 for (;;) { 1822 n++; 1823 t = s; 1824 while (*s != sep && *s != '\0') 1825 s++; 1826 temp = *s; 1827 setptr(s, '\0'); 1828 snprintf(num, sizeof(num), "%d", n); 1829 if (is_number(t, & result)) 1830 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1831 else 1832 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1833 setptr(s, temp); 1834 if (*s++ == '\0') 1835 break; 1836 } 1837 } 1838 tempfree(ap); 1839 xfree(origs); 1840 xfree(origfs); 1841 x = gettemp(); 1842 x->tval = NUM; 1843 x->fval = n; 1844 return(x); 1845 } 1846 1847 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1848 { 1849 Cell *x; 1850 1851 x = execute(a[0]); 1852 if (istrue(x)) { 1853 tempfree(x); 1854 x = execute(a[1]); 1855 } else { 1856 tempfree(x); 1857 x = execute(a[2]); 1858 } 1859 return(x); 1860 } 1861 1862 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1863 { 1864 Cell *x; 1865 1866 x = execute(a[0]); 1867 if (istrue(x)) { 1868 tempfree(x); 1869 x = execute(a[1]); 1870 } else if (a[2] != NULL) { 1871 tempfree(x); 1872 x = execute(a[2]); 1873 } 1874 return(x); 1875 } 1876 1877 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1878 { 1879 Cell *x; 1880 1881 for (;;) { 1882 x = execute(a[0]); 1883 if (!istrue(x)) 1884 return(x); 1885 tempfree(x); 1886 x = execute(a[1]); 1887 if (isbreak(x)) { 1888 x = True; 1889 return(x); 1890 } 1891 if (isnext(x) || isexit(x) || isret(x)) 1892 return(x); 1893 tempfree(x); 1894 } 1895 } 1896 1897 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1898 { 1899 Cell *x; 1900 1901 for (;;) { 1902 x = execute(a[0]); 1903 if (isbreak(x)) 1904 return True; 1905 if (isnext(x) || isexit(x) || isret(x)) 1906 return(x); 1907 tempfree(x); 1908 x = execute(a[1]); 1909 if (!istrue(x)) 1910 return(x); 1911 tempfree(x); 1912 } 1913 } 1914 1915 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1916 { 1917 Cell *x; 1918 1919 x = execute(a[0]); 1920 tempfree(x); 1921 for (;;) { 1922 if (a[1]!=NULL) { 1923 x = execute(a[1]); 1924 if (!istrue(x)) return(x); 1925 else tempfree(x); 1926 } 1927 x = execute(a[3]); 1928 if (isbreak(x)) /* turn off break */ 1929 return True; 1930 if (isnext(x) || isexit(x) || isret(x)) 1931 return(x); 1932 tempfree(x); 1933 x = execute(a[2]); 1934 tempfree(x); 1935 } 1936 } 1937 1938 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1939 { 1940 Cell *x, *vp, *arrayp, *cp, *ncp; 1941 Array *tp; 1942 int i; 1943 1944 vp = execute(a[0]); 1945 arrayp = execute(a[1]); 1946 if (!isarr(arrayp)) { 1947 return True; 1948 } 1949 tp = (Array *) arrayp->sval; 1950 tempfree(arrayp); 1951 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1952 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1953 setsval(vp, cp->nval); 1954 ncp = cp->cnext; 1955 x = execute(a[2]); 1956 if (isbreak(x)) { 1957 tempfree(vp); 1958 return True; 1959 } 1960 if (isnext(x) || isexit(x) || isret(x)) { 1961 tempfree(vp); 1962 return(x); 1963 } 1964 tempfree(x); 1965 } 1966 } 1967 return True; 1968 } 1969 1970 static char *nawk_convert(const char *s, int (*fun_c)(int), 1971 wint_t (*fun_wc)(wint_t)) 1972 { 1973 char *buf = NULL; 1974 char *pbuf = NULL; 1975 const char *ps = NULL; 1976 size_t n = 0; 1977 wchar_t wc; 1978 const size_t sz = awk_mb_cur_max; 1979 int unused; 1980 1981 if (sz == 1) { 1982 buf = tostring(s); 1983 1984 for (pbuf = buf; *pbuf; pbuf++) 1985 *pbuf = fun_c((uschar)*pbuf); 1986 1987 return buf; 1988 } else { 1989 /* upper/lower character may be shorter/longer */ 1990 buf = tostringN(s, strlen(s) * sz + 1); 1991 1992 (void) mbtowc(NULL, NULL, 0); /* reset internal state */ 1993 /* 1994 * Reset internal state here too. 1995 * Assign result to avoid a compiler warning. (Casting to void 1996 * doesn't work.) 1997 * Increment said variable to avoid a different warning. 1998 */ 1999 unused = wctomb(NULL, L'\0'); 2000 unused++; 2001 2002 ps = s; 2003 pbuf = buf; 2004 while (n = mbtowc(&wc, ps, sz), 2005 n > 0 && n != (size_t)-1 && n != (size_t)-2) 2006 { 2007 ps += n; 2008 2009 n = wctomb(pbuf, fun_wc(wc)); 2010 if (n == (size_t)-1) 2011 FATAL("illegal wide character %s", s); 2012 2013 pbuf += n; 2014 } 2015 2016 *pbuf = '\0'; 2017 2018 if (n) 2019 FATAL("illegal byte sequence %s", s); 2020 2021 return buf; 2022 } 2023 } 2024 2025 #ifdef __DJGPP__ 2026 static wint_t towupper(wint_t wc) 2027 { 2028 if (wc >= 0 && wc < 256) 2029 return toupper(wc & 0xFF); 2030 2031 return wc; 2032 } 2033 2034 static wint_t towlower(wint_t wc) 2035 { 2036 if (wc >= 0 && wc < 256) 2037 return tolower(wc & 0xFF); 2038 2039 return wc; 2040 } 2041 #endif 2042 2043 static char *nawk_toupper(const char *s) 2044 { 2045 return nawk_convert(s, toupper, towupper); 2046 } 2047 2048 static char *nawk_tolower(const char *s) 2049 { 2050 return nawk_convert(s, tolower, towlower); 2051 } 2052 2053 2054 2055 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 2056 { 2057 Cell *x, *y; 2058 Awkfloat u = 0; 2059 int t, sz; 2060 Awkfloat tmp; 2061 char *buf, *fmt; 2062 Node *nextarg; 2063 FILE *fp; 2064 int status = 0; 2065 time_t tv; 2066 struct tm *tm, tmbuf; 2067 int estatus = 0; 2068 2069 t = ptoi(a[0]); 2070 x = execute(a[1]); 2071 nextarg = a[1]->nnext; 2072 switch (t) { 2073 case FLENGTH: 2074 if (isarr(x)) 2075 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 2076 else 2077 u = u8_strlen(getsval(x)); 2078 break; 2079 case FLOG: 2080 errno = 0; 2081 u = errcheck(log(getfval(x)), "log"); 2082 break; 2083 case FINT: 2084 modf(getfval(x), &u); break; 2085 case FEXP: 2086 errno = 0; 2087 u = errcheck(exp(getfval(x)), "exp"); 2088 break; 2089 case FSQRT: 2090 errno = 0; 2091 u = errcheck(sqrt(getfval(x)), "sqrt"); 2092 break; 2093 case FSIN: 2094 u = sin(getfval(x)); break; 2095 case FCOS: 2096 u = cos(getfval(x)); break; 2097 case FATAN: 2098 if (nextarg == NULL) { 2099 WARNING("atan2 requires two arguments; returning 1.0"); 2100 u = 1.0; 2101 } else { 2102 y = execute(a[1]->nnext); 2103 u = atan2(getfval(x), getfval(y)); 2104 tempfree(y); 2105 nextarg = nextarg->nnext; 2106 } 2107 break; 2108 case FCOMPL: 2109 u = ~((int)getfval(x)); 2110 break; 2111 case FAND: 2112 if (nextarg == 0) { 2113 WARNING("and requires two arguments; returning 0"); 2114 u = 0; 2115 break; 2116 } 2117 y = execute(a[1]->nnext); 2118 u = ((int)getfval(x)) & ((int)getfval(y)); 2119 tempfree(y); 2120 nextarg = nextarg->nnext; 2121 break; 2122 case FFOR: 2123 if (nextarg == 0) { 2124 WARNING("or requires two arguments; returning 0"); 2125 u = 0; 2126 break; 2127 } 2128 y = execute(a[1]->nnext); 2129 u = ((int)getfval(x)) | ((int)getfval(y)); 2130 tempfree(y); 2131 nextarg = nextarg->nnext; 2132 break; 2133 case FXOR: 2134 if (nextarg == 0) { 2135 WARNING("xor requires two arguments; returning 0"); 2136 u = 0; 2137 break; 2138 } 2139 y = execute(a[1]->nnext); 2140 u = ((int)getfval(x)) ^ ((int)getfval(y)); 2141 tempfree(y); 2142 nextarg = nextarg->nnext; 2143 break; 2144 case FLSHIFT: 2145 if (nextarg == 0) { 2146 WARNING("lshift requires two arguments; returning 0"); 2147 u = 0; 2148 break; 2149 } 2150 y = execute(a[1]->nnext); 2151 u = ((int)getfval(x)) << ((int)getfval(y)); 2152 tempfree(y); 2153 nextarg = nextarg->nnext; 2154 break; 2155 case FRSHIFT: 2156 if (nextarg == 0) { 2157 WARNING("rshift requires two arguments; returning 0"); 2158 u = 0; 2159 break; 2160 } 2161 y = execute(a[1]->nnext); 2162 u = ((int)getfval(x)) >> ((int)getfval(y)); 2163 tempfree(y); 2164 nextarg = nextarg->nnext; 2165 break; 2166 case FSYSTEM: 2167 fflush(stdout); /* in case something is buffered already */ 2168 estatus = status = system(getsval(x)); 2169 if (status != -1) { 2170 if (WIFEXITED(status)) { 2171 estatus = WEXITSTATUS(status); 2172 } else if (WIFSIGNALED(status)) { 2173 estatus = WTERMSIG(status) + 256; 2174 #ifdef WCOREDUMP 2175 if (WCOREDUMP(status)) 2176 estatus += 256; 2177 #endif 2178 } else /* something else?!? */ 2179 estatus = 0; 2180 } 2181 /* else estatus was set to -1 */ 2182 u = estatus; 2183 break; 2184 case FRAND: 2185 /* random() returns numbers in [0..2^31-1] 2186 * in order to get a number in [0, 1), divide it by 2^31 2187 */ 2188 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 2189 break; 2190 case FSRAND: 2191 if (isrec(x)) /* no argument provided */ 2192 u = time((time_t *)0); 2193 else 2194 u = getfval(x); 2195 tmp = u; 2196 srandom((unsigned long) u); 2197 u = srand_seed; 2198 srand_seed = tmp; 2199 break; 2200 case FTOUPPER: 2201 case FTOLOWER: 2202 if (t == FTOUPPER) 2203 buf = nawk_toupper(getsval(x)); 2204 else 2205 buf = nawk_tolower(getsval(x)); 2206 tempfree(x); 2207 x = gettemp(); 2208 setsval(x, buf); 2209 free(buf); 2210 return x; 2211 case FFLUSH: 2212 if (isrec(x) || strlen(getsval(x)) == 0) { 2213 flush_all(); /* fflush() or fflush("") -> all */ 2214 u = 0; 2215 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 2216 u = EOF; 2217 else 2218 u = fflush(fp); 2219 break; 2220 case FMKTIME: 2221 memset(&tmbuf, 0, sizeof(tmbuf)); 2222 tm = &tmbuf; 2223 t = sscanf(getsval(x), "%d %d %d %d %d %d %d", 2224 &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour, 2225 &tm->tm_min, &tm->tm_sec, &tm->tm_isdst); 2226 switch (t) { 2227 case 6: 2228 tm->tm_isdst = -1; /* let mktime figure it out */ 2229 /* FALLTHROUGH */ 2230 case 7: 2231 tm->tm_year -= 1900; 2232 tm->tm_mon--; 2233 u = mktime(tm); 2234 break; 2235 default: 2236 u = -1; 2237 break; 2238 } 2239 break; 2240 case FSYSTIME: 2241 u = time((time_t *) 0); 2242 break; 2243 case FSTRFTIME: 2244 /* strftime([format [,timestamp]]) */ 2245 if (nextarg) { 2246 y = execute(nextarg); 2247 nextarg = nextarg->nnext; 2248 tv = (time_t) getfval(y); 2249 tempfree(y); 2250 } else 2251 tv = time((time_t *) 0); 2252 tm = localtime(&tv); 2253 if (tm == NULL) 2254 FATAL("bad time %ld", (long)tv); 2255 2256 if (isrec(x)) { 2257 /* format argument not provided, use default */ 2258 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 2259 } else 2260 fmt = tostring(getsval(x)); 2261 2262 sz = 32; 2263 buf = NULL; 2264 do { 2265 if ((buf = realloc(buf, (sz *= 2))) == NULL) 2266 FATAL("out of memory in strftime"); 2267 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 2268 2269 y = gettemp(); 2270 setsval(y, buf); 2271 free(fmt); 2272 free(buf); 2273 2274 return y; 2275 default: /* can't happen */ 2276 FATAL("illegal function type %d", t); 2277 break; 2278 } 2279 tempfree(x); 2280 x = gettemp(); 2281 setfval(x, u); 2282 if (nextarg != NULL) { 2283 WARNING("warning: function has too many arguments"); 2284 for ( ; nextarg; nextarg = nextarg->nnext) { 2285 y = execute(nextarg); 2286 tempfree(y); 2287 } 2288 } 2289 return(x); 2290 } 2291 2292 Cell *printstat(Node **a, int n) /* print a[0] */ 2293 { 2294 Node *x; 2295 Cell *y; 2296 FILE *fp; 2297 2298 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 2299 fp = stdout; 2300 else 2301 fp = redirect(ptoi(a[1]), a[2]); 2302 for (x = a[0]; x != NULL; x = x->nnext) { 2303 y = execute(x); 2304 fputs(getpssval(y), fp); 2305 tempfree(y); 2306 if (x->nnext == NULL) 2307 fputs(getsval(orsloc), fp); 2308 else 2309 fputs(getsval(ofsloc), fp); 2310 } 2311 if (a[1] != NULL) 2312 fflush(fp); 2313 if (ferror(fp)) 2314 FATAL("write error on %s", filename(fp)); 2315 return(True); 2316 } 2317 2318 Cell *nullproc(Node **a, int n) 2319 { 2320 return 0; 2321 } 2322 2323 2324 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 2325 { 2326 FILE *fp; 2327 Cell *x; 2328 char *fname; 2329 2330 x = execute(b); 2331 fname = getsval(x); 2332 fp = openfile(a, fname, NULL); 2333 if (fp == NULL) 2334 FATAL("can't open file %s", fname); 2335 tempfree(x); 2336 return fp; 2337 } 2338 2339 struct files { 2340 FILE *fp; 2341 const char *fname; 2342 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 2343 } *files; 2344 2345 size_t nfiles; 2346 2347 static void stdinit(void) /* in case stdin, etc., are not constants */ 2348 { 2349 nfiles = FOPEN_MAX; 2350 files = (struct files *) calloc(nfiles, sizeof(*files)); 2351 if (files == NULL) 2352 FATAL("can't allocate file memory for %zu files", nfiles); 2353 files[0].fp = stdin; 2354 files[0].fname = tostring("/dev/stdin"); 2355 files[0].mode = LT; 2356 files[1].fp = stdout; 2357 files[1].fname = tostring("/dev/stdout"); 2358 files[1].mode = GT; 2359 files[2].fp = stderr; 2360 files[2].fname = tostring("/dev/stderr"); 2361 files[2].mode = GT; 2362 } 2363 2364 FILE *openfile(int a, const char *us, bool *pnewflag) 2365 { 2366 const char *s = us; 2367 size_t i; 2368 int m; 2369 FILE *fp = NULL; 2370 2371 if (*s == '\0') 2372 FATAL("null file name in print or getline"); 2373 for (i = 0; i < nfiles; i++) 2374 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 2375 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 2376 a == FFLUSH)) { 2377 if (pnewflag) 2378 *pnewflag = false; 2379 return files[i].fp; 2380 } 2381 if (a == FFLUSH) /* didn't find it, so don't create it! */ 2382 return NULL; 2383 2384 for (i = 0; i < nfiles; i++) 2385 if (files[i].fp == NULL) 2386 break; 2387 if (i >= nfiles) { 2388 struct files *nf; 2389 size_t nnf = nfiles + FOPEN_MAX; 2390 nf = (struct files *) realloc(files, nnf * sizeof(*nf)); 2391 if (nf == NULL) 2392 FATAL("cannot grow files for %s and %zu files", s, nnf); 2393 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 2394 nfiles = nnf; 2395 files = nf; 2396 } 2397 fflush(stdout); /* force a semblance of order */ 2398 m = a; 2399 if (a == GT) { 2400 fp = fopen(s, "w"); 2401 } else if (a == APPEND) { 2402 fp = fopen(s, "a"); 2403 m = GT; /* so can mix > and >> */ 2404 } else if (a == '|') { /* output pipe */ 2405 fp = popen(s, "w"); 2406 } else if (a == LE) { /* input pipe */ 2407 fp = popen(s, "r"); 2408 } else if (a == LT) { /* getline <file */ 2409 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 2410 } else /* can't happen */ 2411 FATAL("illegal redirection %d", a); 2412 if (fp != NULL) { 2413 files[i].fname = tostring(s); 2414 files[i].fp = fp; 2415 files[i].mode = m; 2416 if (pnewflag) 2417 *pnewflag = true; 2418 if (fp != stdin && fp != stdout && fp != stderr) 2419 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 2420 } 2421 return fp; 2422 } 2423 2424 const char *filename(FILE *fp) 2425 { 2426 size_t i; 2427 2428 for (i = 0; i < nfiles; i++) 2429 if (fp == files[i].fp) 2430 return files[i].fname; 2431 return "???"; 2432 } 2433 2434 Cell *closefile(Node **a, int n) 2435 { 2436 Cell *x; 2437 size_t i; 2438 bool stat; 2439 2440 x = execute(a[0]); 2441 getsval(x); 2442 stat = true; 2443 for (i = 0; i < nfiles; i++) { 2444 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 2445 continue; 2446 if (files[i].mode == GT || files[i].mode == '|') 2447 fflush(files[i].fp); 2448 if (ferror(files[i].fp)) { 2449 if ((files[i].mode == GT && files[i].fp != stderr) 2450 || files[i].mode == '|') 2451 FATAL("write error on %s", files[i].fname); 2452 else 2453 WARNING("i/o error occurred on %s", files[i].fname); 2454 } 2455 if (files[i].fp == stdin || files[i].fp == stdout || 2456 files[i].fp == stderr) 2457 stat = freopen("/dev/null", "r+", files[i].fp) == NULL; 2458 else if (files[i].mode == '|' || files[i].mode == LE) 2459 stat = pclose(files[i].fp) == -1; 2460 else 2461 stat = fclose(files[i].fp) == EOF; 2462 if (stat) 2463 WARNING("i/o error occurred closing %s", files[i].fname); 2464 xfree(files[i].fname); 2465 files[i].fname = NULL; /* watch out for ref thru this */ 2466 files[i].fp = NULL; 2467 break; 2468 } 2469 tempfree(x); 2470 x = gettemp(); 2471 setfval(x, (Awkfloat) (stat ? -1 : 0)); 2472 return(x); 2473 } 2474 2475 void closeall(void) 2476 { 2477 size_t i; 2478 bool stat = false; 2479 2480 for (i = 0; i < nfiles; i++) { 2481 if (! files[i].fp) 2482 continue; 2483 if (files[i].mode == GT || files[i].mode == '|') 2484 fflush(files[i].fp); 2485 if (ferror(files[i].fp)) { 2486 if ((files[i].mode == GT && files[i].fp != stderr) 2487 || files[i].mode == '|') 2488 FATAL("write error on %s", files[i].fname); 2489 else 2490 WARNING("i/o error occurred on %s", files[i].fname); 2491 } 2492 if (files[i].fp == stdin || files[i].fp == stdout || 2493 files[i].fp == stderr) 2494 continue; 2495 if (files[i].mode == '|' || files[i].mode == LE) 2496 stat = pclose(files[i].fp) == -1; 2497 else 2498 stat = fclose(files[i].fp) == EOF; 2499 if (stat) 2500 WARNING("i/o error occurred while closing %s", files[i].fname); 2501 } 2502 } 2503 2504 static void flush_all(void) 2505 { 2506 size_t i; 2507 2508 for (i = 0; i < nfiles; i++) 2509 if (files[i].fp) 2510 fflush(files[i].fp); 2511 } 2512 2513 void backsub(char **pb_ptr, const char **sptr_ptr); 2514 2515 Cell *dosub(Node **a, int subop) /* sub and gsub */ 2516 { 2517 fa *pfa; 2518 int tempstat = 0; 2519 char *repl; 2520 Cell *x; 2521 2522 char *buf = NULL; 2523 char *pb = NULL; 2524 int bufsz = recsize; 2525 2526 const char *r, *s; 2527 const char *start; 2528 const char *noempty = NULL; /* empty match disallowed here */ 2529 size_t m = 0; /* match count */ 2530 size_t whichm = 0; /* which match to select, 0 = global */ 2531 int mtype; /* match type */ 2532 2533 if (a[0] == NULL) { /* 0 => a[1] is already-compiled regexpr */ 2534 pfa = (fa *) a[1]; 2535 } else { 2536 x = execute(a[1]); 2537 pfa = makedfa(getsval(x), 1); 2538 tempfree(x); 2539 } 2540 2541 x = execute(a[2]); /* replacement string */ 2542 repl = tostring(getsval(x)); 2543 tempfree(x); 2544 2545 switch (subop) { 2546 case SUB: 2547 whichm = 1; 2548 x = execute(a[3]); /* source string */ 2549 break; 2550 case GSUB: 2551 whichm = 0; 2552 x = execute(a[3]); /* source string */ 2553 break; 2554 default: 2555 FATAL("dosub: unrecognized subop: %d", subop); 2556 } 2557 2558 start = getsval(x); 2559 while (pmatch(pfa, start)) { 2560 if (buf == NULL) { 2561 if ((pb = buf = (char *) malloc(bufsz)) == NULL) 2562 FATAL("out of memory in dosub"); 2563 tempstat = pfa->initstat; 2564 pfa->initstat = 2; 2565 } 2566 2567 /* match types */ 2568 #define MT_IGNORE 0 /* unselected or invalid */ 2569 #define MT_INSERT 1 /* selected, empty */ 2570 #define MT_REPLACE 2 /* selected, not empty */ 2571 2572 /* an empty match just after replacement is invalid */ 2573 2574 if (patbeg == noempty && patlen == 0) { 2575 mtype = MT_IGNORE; /* invalid, not counted */ 2576 } else if (whichm == ++m || whichm == 0) { 2577 mtype = patlen ? MT_REPLACE : MT_INSERT; 2578 } else { 2579 mtype = MT_IGNORE; /* unselected, but counted */ 2580 } 2581 2582 /* leading text: */ 2583 if (patbeg > start) { 2584 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start), 2585 recsize, &pb, "dosub"); 2586 s = start; 2587 while (s < patbeg) 2588 *pb++ = *s++; 2589 } 2590 2591 if (mtype == MT_IGNORE) 2592 goto matching_text; /* skip replacement text */ 2593 2594 r = repl; 2595 while (*r != 0) { 2596 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub"); 2597 if (*r == '\\') { 2598 backsub(&pb, &r); 2599 } else if (*r == '&') { 2600 r++; 2601 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, 2602 &pb, "dosub"); 2603 for (s = patbeg; s < patbeg+patlen; ) 2604 *pb++ = *s++; 2605 } else { 2606 *pb++ = *r++; 2607 } 2608 } 2609 2610 matching_text: 2611 if (mtype == MT_REPLACE || *patbeg == '\0') 2612 goto next_search; /* skip matching text */ 2613 2614 if (patlen == 0) 2615 patlen = u8_nextlen(patbeg); 2616 adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub"); 2617 s = patbeg; 2618 while (s < patbeg + patlen) 2619 *pb++ = *s++; 2620 2621 next_search: 2622 start = patbeg + patlen; 2623 if (m == whichm || *patbeg == '\0') 2624 break; 2625 if (mtype == MT_REPLACE) 2626 noempty = start; 2627 2628 #undef MT_IGNORE 2629 #undef MT_INSERT 2630 #undef MT_REPLACE 2631 } 2632 2633 xfree(repl); 2634 2635 if (buf != NULL) { 2636 pfa->initstat = tempstat; 2637 2638 /* trailing text */ 2639 adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub"); 2640 while ((*pb++ = *start++) != '\0') 2641 ; 2642 2643 setsval(x, buf); 2644 free(buf); 2645 } 2646 2647 tempfree(x); 2648 x = gettemp(); 2649 x->tval = NUM; 2650 x->fval = m; 2651 return x; 2652 } 2653 2654 Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2655 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2656 { 2657 Cell *x, *y, *res, *h; 2658 char *rptr; 2659 const char *sptr; 2660 char *buf, *pb; 2661 const char *t, *q; 2662 fa *pfa; 2663 int mflag, tempstat, num, whichm; 2664 int bufsz = recsize; 2665 2666 if ((buf = malloc(bufsz)) == NULL) 2667 FATAL("out of memory in gensub"); 2668 mflag = 0; /* if mflag == 0, can replace empty string */ 2669 num = 0; 2670 x = execute(a[4]); /* source string */ 2671 t = getsval(x); 2672 res = copycell(x); /* target string - initially copy of source */ 2673 res->csub = CTEMP; /* result values are temporary */ 2674 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2675 pfa = (fa *) a[1]; /* regular expression */ 2676 else { 2677 y = execute(a[1]); 2678 pfa = makedfa(getsval(y), 1); 2679 tempfree(y); 2680 } 2681 y = execute(a[2]); /* replacement string */ 2682 h = execute(a[3]); /* which matches should be replaced */ 2683 sptr = getsval(h); 2684 if (sptr[0] == 'g' || sptr[0] == 'G') 2685 whichm = -1; 2686 else { 2687 /* 2688 * The specified number is index of replacement, starting 2689 * from 1. GNU awk treats index lower than 0 same as 2690 * 1, we do same for compatibility. 2691 */ 2692 whichm = (int) getfval(h) - 1; 2693 if (whichm < 0) 2694 whichm = 0; 2695 } 2696 tempfree(h); 2697 2698 if (pmatch(pfa, t)) { 2699 char *sl; 2700 2701 tempstat = pfa->initstat; 2702 pfa->initstat = 2; 2703 pb = buf; 2704 rptr = getsval(y); 2705 /* 2706 * XXX if there are any backreferences in subst string, 2707 * complain now. 2708 */ 2709 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2710 if (strchr("0123456789", sl[1])) { 2711 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2712 } 2713 } 2714 2715 do { 2716 if (whichm >= 0 && whichm != num) { 2717 num++; 2718 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2719 2720 /* copy the part of string up to and including 2721 * match to output buffer */ 2722 while (t < patbeg + patlen) 2723 *pb++ = *t++; 2724 continue; 2725 } 2726 2727 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2728 if (mflag == 0) { /* can replace empty */ 2729 num++; 2730 sptr = rptr; 2731 while (*sptr != 0) { 2732 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2733 if (*sptr == '\\') { 2734 backsub(&pb, &sptr); 2735 } else if (*sptr == '&') { 2736 sptr++; 2737 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2738 for (q = patbeg; q < patbeg+patlen; ) 2739 *pb++ = *q++; 2740 } else 2741 *pb++ = *sptr++; 2742 } 2743 } 2744 if (*t == 0) /* at end */ 2745 goto done; 2746 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2747 *pb++ = *t++; 2748 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2749 FATAL("gensub result0 %.30s too big; can't happen", buf); 2750 mflag = 0; 2751 } 2752 else { /* matched nonempty string */ 2753 num++; 2754 sptr = t; 2755 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2756 while (sptr < patbeg) 2757 *pb++ = *sptr++; 2758 sptr = rptr; 2759 while (*sptr != 0) { 2760 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2761 if (*sptr == '\\') { 2762 backsub(&pb, &sptr); 2763 } else if (*sptr == '&') { 2764 sptr++; 2765 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2766 for (q = patbeg; q < patbeg+patlen; ) 2767 *pb++ = *q++; 2768 } else 2769 *pb++ = *sptr++; 2770 } 2771 t = patbeg + patlen; 2772 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2773 goto done; 2774 if (pb > buf + bufsz) 2775 FATAL("gensub result1 %.30s too big; can't happen", buf); 2776 mflag = 1; 2777 } 2778 } while (pmatch(pfa,t)); 2779 sptr = t; 2780 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2781 while ((*pb++ = *sptr++) != 0) 2782 ; 2783 done: if (pb > buf + bufsz) 2784 FATAL("gensub result2 %.30s too big; can't happen", buf); 2785 *pb = '\0'; 2786 setsval(res, buf); 2787 pfa->initstat = tempstat; 2788 } 2789 tempfree(x); 2790 tempfree(y); 2791 free(buf); 2792 return(res); 2793 } 2794 2795 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2796 { /* sptr[0] == '\\' */ 2797 char *pb = *pb_ptr; 2798 const char *sptr = *sptr_ptr; 2799 static bool first = true; 2800 static bool do_posix = false; 2801 2802 if (first) { 2803 first = false; 2804 do_posix = (getenv("POSIXLY_CORRECT") != NULL); 2805 } 2806 2807 if (sptr[1] == '\\') { 2808 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2809 *pb++ = '\\'; 2810 *pb++ = '&'; 2811 sptr += 4; 2812 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2813 *pb++ = '\\'; 2814 sptr += 2; 2815 } else if (do_posix) { /* \\x -> \x */ 2816 sptr++; 2817 *pb++ = *sptr++; 2818 } else { /* \\x -> \\x */ 2819 *pb++ = *sptr++; 2820 *pb++ = *sptr++; 2821 } 2822 } else if (sptr[1] == '&') { /* literal & */ 2823 sptr++; 2824 *pb++ = *sptr++; 2825 } else /* literal \ */ 2826 *pb++ = *sptr++; 2827 2828 *pb_ptr = pb; 2829 *sptr_ptr = sptr; 2830 } 2831 2832 static char *wide_char_to_byte_str(int rune, size_t *outlen) 2833 { 2834 static char buf[5]; 2835 int len; 2836 2837 if (rune < 0 || rune > 0x10FFFF) 2838 return NULL; 2839 2840 memset(buf, 0, sizeof(buf)); 2841 2842 len = 0; 2843 if (rune <= 0x0000007F) { 2844 buf[len++] = rune; 2845 } else if (rune <= 0x000007FF) { 2846 // 110xxxxx 10xxxxxx 2847 buf[len++] = 0xC0 | (rune >> 6); 2848 buf[len++] = 0x80 | (rune & 0x3F); 2849 } else if (rune <= 0x0000FFFF) { 2850 // 1110xxxx 10xxxxxx 10xxxxxx 2851 buf[len++] = 0xE0 | (rune >> 12); 2852 buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2853 buf[len++] = 0x80 | (rune & 0x3F); 2854 2855 } else { 2856 // 0x00010000 - 0x10FFFF 2857 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 2858 buf[len++] = 0xF0 | (rune >> 18); 2859 buf[len++] = 0x80 | ((rune >> 12) & 0x3F); 2860 buf[len++] = 0x80 | ((rune >> 6) & 0x3F); 2861 buf[len++] = 0x80 | (rune & 0x3F); 2862 } 2863 2864 *outlen = len; 2865 buf[len++] = '\0'; 2866 2867 return buf; 2868 } 2869