1 /* $OpenBSD: gnum4.c,v 1.50 2015/04/29 00:13:26 millert Exp $ */ 2 3 /* 4 * Copyright (c) 1999 Marc Espie 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * functions needed to support gnu-m4 extensions, including a fake freezing 30 */ 31 32 #include <sys/types.h> 33 #include <sys/wait.h> 34 #include <ctype.h> 35 #include <err.h> 36 #include <paths.h> 37 #include <regex.h> 38 #include <stddef.h> 39 #include <stdlib.h> 40 #include <stdint.h> 41 #include <stdio.h> 42 #include <string.h> 43 #include <errno.h> 44 #include <unistd.h> 45 #include <limits.h> 46 #include "mdef.h" 47 #include "stdd.h" 48 #include "extern.h" 49 50 51 int mimic_gnu = 0; 52 53 /* 54 * Support for include path search 55 * First search in the current directory. 56 * If not found, and the path is not absolute, include path kicks in. 57 * First, -I options, in the order found on the command line. 58 * Then M4PATH env variable 59 */ 60 61 struct path_entry { 62 char *name; 63 struct path_entry *next; 64 } *first, *last; 65 66 static struct path_entry *new_path_entry(const char *); 67 static void ensure_m4path(void); 68 static struct input_file *dopath(struct input_file *, const char *); 69 70 static struct path_entry * 71 new_path_entry(const char *dirname) 72 { 73 struct path_entry *n; 74 75 n = malloc(sizeof(struct path_entry)); 76 if (!n) 77 errx(1, "out of memory"); 78 n->name = xstrdup(dirname); 79 n->next = 0; 80 return n; 81 } 82 83 void 84 addtoincludepath(const char *dirname) 85 { 86 struct path_entry *n; 87 88 n = new_path_entry(dirname); 89 90 if (last) { 91 last->next = n; 92 last = n; 93 } 94 else 95 last = first = n; 96 } 97 98 static void 99 ensure_m4path() 100 { 101 static int envpathdone = 0; 102 char *envpath; 103 char *sweep; 104 char *path; 105 106 if (envpathdone) 107 return; 108 envpathdone = TRUE; 109 envpath = getenv("M4PATH"); 110 if (!envpath) 111 return; 112 /* for portability: getenv result is read-only */ 113 envpath = xstrdup(envpath); 114 for (sweep = envpath; 115 (path = strsep(&sweep, ":")) != NULL;) 116 addtoincludepath(path); 117 free(envpath); 118 } 119 120 static 121 struct input_file * 122 dopath(struct input_file *i, const char *filename) 123 { 124 char path[PATH_MAX]; 125 struct path_entry *pe; 126 FILE *f; 127 128 for (pe = first; pe; pe = pe->next) { 129 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 130 if ((f = fopen(path, "r")) != 0) { 131 set_input(i, f, path); 132 return i; 133 } 134 } 135 return NULL; 136 } 137 138 struct input_file * 139 fopen_trypath(struct input_file *i, const char *filename) 140 { 141 FILE *f; 142 143 f = fopen(filename, "r"); 144 if (f != NULL) { 145 set_input(i, f, filename); 146 return i; 147 } 148 if (filename[0] == '/') 149 return NULL; 150 151 ensure_m4path(); 152 153 return dopath(i, filename); 154 } 155 156 void 157 doindir(const char *argv[], int argc) 158 { 159 ndptr n; 160 struct macro_definition *p; 161 162 n = lookup(argv[2]); 163 if (n == NULL || (p = macro_getdef(n)) == NULL) 164 m4errx(1, "indir: undefined macro %s.", argv[2]); 165 argv[1] = p->defn; 166 167 eval(argv+1, argc-1, p->type, is_traced(n)); 168 } 169 170 void 171 dobuiltin(const char *argv[], int argc) 172 { 173 ndptr p; 174 175 argv[1] = NULL; 176 p = macro_getbuiltin(argv[2]); 177 if (p != NULL) 178 eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p)); 179 else 180 m4errx(1, "unknown builtin %s.", argv[2]); 181 } 182 183 184 /* We need some temporary buffer space, as pb pushes BACK and substitution 185 * proceeds forward... */ 186 static char *buffer; 187 static size_t bufsize = 0; 188 static size_t current = 0; 189 190 static void addchars(const char *, size_t); 191 static void addchar(int); 192 static char *twiddle(const char *); 193 static char *getstring(void); 194 static void exit_regerror(int, regex_t *, const char *); 195 static void do_subst(const char *, regex_t *, const char *, const char *, 196 regmatch_t *); 197 static void do_regexpindex(const char *, regex_t *, const char *, regmatch_t *); 198 static void do_regexp(const char *, regex_t *, const char *, const char *, 199 regmatch_t *); 200 static void add_sub(int, const char *, regex_t *, regmatch_t *); 201 static void add_replace(const char *, regex_t *, const char *, regmatch_t *); 202 #define addconstantstring(s) addchars((s), sizeof(s)-1) 203 204 static void 205 addchars(const char *c, size_t n) 206 { 207 if (n == 0) 208 return; 209 while (current + n > bufsize) { 210 if (bufsize == 0) 211 bufsize = 1024; 212 else if (bufsize <= SIZE_MAX/2) { 213 bufsize *= 2; 214 } else { 215 errx(1, "size overflow"); 216 } 217 buffer = xrealloc(buffer, bufsize, NULL); 218 } 219 memcpy(buffer+current, c, n); 220 current += n; 221 } 222 223 static void 224 addchar(int c) 225 { 226 if (current +1 > bufsize) { 227 if (bufsize == 0) 228 bufsize = 1024; 229 else 230 bufsize *= 2; 231 buffer = xrealloc(buffer, bufsize, NULL); 232 } 233 buffer[current++] = c; 234 } 235 236 static char * 237 getstring() 238 { 239 addchar('\0'); 240 current = 0; 241 return buffer; 242 } 243 244 245 static void 246 exit_regerror(int er, regex_t *re, const char *source) 247 { 248 size_t errlen; 249 char *errbuf; 250 251 errlen = regerror(er, re, NULL, 0); 252 errbuf = xalloc(errlen, 253 "malloc in regerror: %lu", (unsigned long)errlen); 254 regerror(er, re, errbuf, errlen); 255 m4errx(1, "regular expression error in %s: %s.", source, errbuf); 256 } 257 258 static void 259 add_sub(int n, const char *string, regex_t *re, regmatch_t *pm) 260 { 261 if (n > re->re_nsub) 262 warnx("No subexpression %d", n); 263 /* Subexpressions that did not match are 264 * not an error. */ 265 else if (pm[n].rm_so != -1 && 266 pm[n].rm_eo != -1) { 267 addchars(string + pm[n].rm_so, 268 pm[n].rm_eo - pm[n].rm_so); 269 } 270 } 271 272 /* Add replacement string to the output buffer, recognizing special 273 * constructs and replacing them with substrings of the original string. 274 */ 275 static void 276 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 277 { 278 const char *p; 279 280 for (p = replace; *p != '\0'; p++) { 281 if (*p == '&' && !mimic_gnu) { 282 add_sub(0, string, re, pm); 283 continue; 284 } 285 if (*p == '\\') { 286 if (p[1] == '\\') { 287 addchar(p[1]); 288 p++; 289 continue; 290 } 291 if (p[1] == '&') { 292 if (mimic_gnu) 293 add_sub(0, string, re, pm); 294 else 295 addchar(p[1]); 296 p++; 297 continue; 298 } 299 if (isdigit((unsigned char)p[1])) { 300 add_sub(*(++p) - '0', string, re, pm); 301 continue; 302 } 303 } 304 addchar(*p); 305 } 306 } 307 308 static void 309 do_subst(const char *string, regex_t *re, const char *source, 310 const char *replace, regmatch_t *pm) 311 { 312 int error; 313 int flags = 0; 314 const char *last_match = NULL; 315 316 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 317 if (pm[0].rm_eo != 0) { 318 if (string[pm[0].rm_eo-1] == '\n') 319 flags = 0; 320 else 321 flags = REG_NOTBOL; 322 } 323 324 /* NULL length matches are special... We use the `vi-mode' 325 * rule: don't allow a NULL-match at the last match 326 * position. 327 */ 328 if (pm[0].rm_so == pm[0].rm_eo && 329 string + pm[0].rm_so == last_match) { 330 if (*string == '\0') 331 return; 332 addchar(*string); 333 if (*string++ == '\n') 334 flags = 0; 335 else 336 flags = REG_NOTBOL; 337 continue; 338 } 339 last_match = string + pm[0].rm_so; 340 addchars(string, pm[0].rm_so); 341 add_replace(string, re, replace, pm); 342 string += pm[0].rm_eo; 343 } 344 if (error != REG_NOMATCH) 345 exit_regerror(error, re, source); 346 pbstr(string); 347 } 348 349 static void 350 do_regexp(const char *string, regex_t *re, const char *source, 351 const char *replace, regmatch_t *pm) 352 { 353 int error; 354 355 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 356 case 0: 357 add_replace(string, re, replace, pm); 358 pbstr(getstring()); 359 break; 360 case REG_NOMATCH: 361 break; 362 default: 363 exit_regerror(error, re, source); 364 } 365 } 366 367 static void 368 do_regexpindex(const char *string, regex_t *re, const char *source, 369 regmatch_t *pm) 370 { 371 int error; 372 373 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 374 case 0: 375 pbunsigned(pm[0].rm_so); 376 break; 377 case REG_NOMATCH: 378 pbnum(-1); 379 break; 380 default: 381 exit_regerror(error, re, source); 382 } 383 } 384 385 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 386 * says. So we twiddle with the regexp before passing it to regcomp. 387 */ 388 static char * 389 twiddle(const char *p) 390 { 391 /* + at start of regexp is a normal character for Gnu m4 */ 392 if (*p == '^') { 393 addchar(*p); 394 p++; 395 } 396 if (*p == '+') { 397 addchar('\\'); 398 } 399 /* This could use strcspn for speed... */ 400 while (*p != '\0') { 401 if (*p == '\\') { 402 switch(p[1]) { 403 case '(': 404 case ')': 405 case '|': 406 addchar(p[1]); 407 break; 408 case 'w': 409 addconstantstring("[_a-zA-Z0-9]"); 410 break; 411 case 'W': 412 addconstantstring("[^_a-zA-Z0-9]"); 413 break; 414 case '<': 415 addconstantstring("[[:<:]]"); 416 break; 417 case '>': 418 addconstantstring("[[:>:]]"); 419 break; 420 default: 421 addchars(p, 2); 422 break; 423 } 424 p+=2; 425 continue; 426 } 427 if (*p == '(' || *p == ')' || *p == '|') 428 addchar('\\'); 429 430 addchar(*p); 431 p++; 432 } 433 return getstring(); 434 } 435 436 /* patsubst(string, regexp, opt replacement) */ 437 /* argv[2]: string 438 * argv[3]: regexp 439 * argv[4]: opt rep 440 */ 441 void 442 dopatsubst(const char *argv[], int argc) 443 { 444 if (argc <= 3) { 445 warnx("Too few arguments to patsubst"); 446 return; 447 } 448 /* special case: empty regexp */ 449 if (argv[3][0] == '\0') { 450 const char *s; 451 size_t len; 452 if (argc > 4 && argv[4]) 453 len = strlen(argv[4]); 454 else 455 len = 0; 456 for (s = argv[2]; *s != '\0'; s++) { 457 addchars(argv[4], len); 458 addchar(*s); 459 } 460 } else { 461 int error; 462 regex_t re; 463 regmatch_t *pmatch; 464 int mode = REG_EXTENDED; 465 const char *source; 466 size_t l = strlen(argv[3]); 467 468 if (!mimic_gnu || 469 (argv[3][0] == '^') || 470 (l > 0 && argv[3][l-1] == '$')) 471 mode |= REG_NEWLINE; 472 473 source = mimic_gnu ? twiddle(argv[3]) : argv[3]; 474 error = regcomp(&re, source, mode); 475 if (error != 0) 476 exit_regerror(error, &re, source); 477 478 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), 479 NULL); 480 do_subst(argv[2], &re, source, 481 argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch); 482 free(pmatch); 483 regfree(&re); 484 } 485 pbstr(getstring()); 486 } 487 488 void 489 doregexp(const char *argv[], int argc) 490 { 491 int error; 492 regex_t re; 493 regmatch_t *pmatch; 494 const char *source; 495 496 if (argc <= 3) { 497 warnx("Too few arguments to regexp"); 498 return; 499 } 500 /* special gnu case */ 501 if (argv[3][0] == '\0' && mimic_gnu) { 502 if (argc == 4 || argv[4] == NULL) 503 return; 504 else 505 pbstr(argv[4]); 506 } 507 source = mimic_gnu ? twiddle(argv[3]) : argv[3]; 508 error = regcomp(&re, source, REG_EXTENDED|REG_NEWLINE); 509 if (error != 0) 510 exit_regerror(error, &re, source); 511 512 pmatch = xreallocarray(NULL, re.re_nsub+1, sizeof(regmatch_t), NULL); 513 if (argc == 4 || argv[4] == NULL) 514 do_regexpindex(argv[2], &re, source, pmatch); 515 else 516 do_regexp(argv[2], &re, source, argv[4], pmatch); 517 free(pmatch); 518 regfree(&re); 519 } 520 521 void 522 doformat(const char *argv[], int argc) 523 { 524 const char *format = argv[2]; 525 int pos = 3; 526 int left_padded; 527 long width; 528 size_t l; 529 const char *thisarg; 530 char temp[2]; 531 long extra; 532 533 while (*format != 0) { 534 if (*format != '%') { 535 addchar(*format++); 536 continue; 537 } 538 539 format++; 540 if (*format == '%') { 541 addchar(*format++); 542 continue; 543 } 544 if (*format == 0) { 545 addchar('%'); 546 break; 547 } 548 549 if (*format == '*') { 550 format++; 551 if (pos >= argc) 552 m4errx(1, 553 "Format with too many format specifiers."); 554 width = strtol(argv[pos++], NULL, 10); 555 } else { 556 width = strtol(format, (char **)&format, 10); 557 } 558 if (width < 0) { 559 left_padded = 1; 560 width = -width; 561 } else { 562 left_padded = 0; 563 } 564 if (*format == '.') { 565 format++; 566 if (*format == '*') { 567 format++; 568 if (pos >= argc) 569 m4errx(1, 570 "Format with too many format specifiers."); 571 extra = strtol(argv[pos++], NULL, 10); 572 } else { 573 extra = strtol(format, (char **)&format, 10); 574 } 575 } else { 576 extra = LONG_MAX; 577 } 578 if (pos >= argc) 579 m4errx(1, "Format with too many format specifiers."); 580 switch(*format) { 581 case 's': 582 thisarg = argv[pos++]; 583 break; 584 case 'c': 585 temp[0] = strtoul(argv[pos++], NULL, 10); 586 temp[1] = 0; 587 thisarg = temp; 588 break; 589 default: 590 m4errx(1, "Unsupported format specification: %s.", 591 argv[2]); 592 } 593 format++; 594 l = strlen(thisarg); 595 if (l > extra) 596 l = extra; 597 if (!left_padded) { 598 while (l < width--) 599 addchar(' '); 600 } 601 addchars(thisarg, l); 602 if (left_padded) { 603 while (l < width--) 604 addchar(' '); 605 } 606 } 607 pbstr(getstring()); 608 } 609 610 void 611 doesyscmd(const char *cmd) 612 { 613 int p[2]; 614 pid_t pid, cpid; 615 char *argv[4]; 616 int cc; 617 int status; 618 619 /* Follow gnu m4 documentation: first flush buffers. */ 620 fflush(NULL); 621 622 argv[0] = "sh"; 623 argv[1] = "-c"; 624 argv[2] = (char *)cmd; 625 argv[3] = NULL; 626 627 /* Just set up standard output, share stderr and stdin with m4 */ 628 if (pipe(p) == -1) 629 err(1, "bad pipe"); 630 switch(cpid = fork()) { 631 case -1: 632 err(1, "bad fork"); 633 /* NOTREACHED */ 634 case 0: 635 (void) close(p[0]); 636 (void) dup2(p[1], 1); 637 (void) close(p[1]); 638 execv(_PATH_BSHELL, argv); 639 exit(1); 640 default: 641 /* Read result in two stages, since m4's buffer is 642 * pushback-only. */ 643 (void) close(p[1]); 644 do { 645 char result[BUFSIZE]; 646 cc = read(p[0], result, sizeof result); 647 if (cc > 0) 648 addchars(result, cc); 649 } while (cc > 0 || (cc == -1 && errno == EINTR)); 650 651 (void) close(p[0]); 652 while ((pid = wait(&status)) != cpid && pid >= 0) 653 continue; 654 pbstr(getstring()); 655 } 656 } 657 658 void 659 getdivfile(const char *name) 660 { 661 FILE *f; 662 int c; 663 664 f = fopen(name, "r"); 665 if (!f) 666 return; 667 668 while ((c = getc(f))!= EOF) 669 putc(c, active); 670 (void) fclose(f); 671 } 672