1 /* $OpenBSD: gnum4.c,v 1.41 2010/09/07 19:58:09 marco Exp $ */ 2 3 /* 4 * Copyright (c) 1999 Marc Espie 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * functions needed to support gnu-m4 extensions, including a fake freezing 30 */ 31 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/wait.h> 35 #include <ctype.h> 36 #include <err.h> 37 #include <paths.h> 38 #include <regex.h> 39 #include <stddef.h> 40 #include <stdlib.h> 41 #include <stdio.h> 42 #include <string.h> 43 #include <errno.h> 44 #include <unistd.h> 45 #include "mdef.h" 46 #include "stdd.h" 47 #include "extern.h" 48 49 50 int mimic_gnu = 0; 51 52 /* 53 * Support for include path search 54 * First search in the current directory. 55 * If not found, and the path is not absolute, include path kicks in. 56 * First, -I options, in the order found on the command line. 57 * Then M4PATH env variable 58 */ 59 60 struct path_entry { 61 char *name; 62 struct path_entry *next; 63 } *first, *last; 64 65 static struct path_entry *new_path_entry(const char *); 66 static void ensure_m4path(void); 67 static struct input_file *dopath(struct input_file *, const char *); 68 69 static struct path_entry * 70 new_path_entry(const char *dirname) 71 { 72 struct path_entry *n; 73 74 n = malloc(sizeof(struct path_entry)); 75 if (!n) 76 errx(1, "out of memory"); 77 n->name = strdup(dirname); 78 if (!n->name) 79 errx(1, "out of memory"); 80 n->next = 0; 81 return n; 82 } 83 84 void 85 addtoincludepath(const char *dirname) 86 { 87 struct path_entry *n; 88 89 n = new_path_entry(dirname); 90 91 if (last) { 92 last->next = n; 93 last = n; 94 } 95 else 96 last = first = n; 97 } 98 99 static void 100 ensure_m4path() 101 { 102 static int envpathdone = 0; 103 char *envpath; 104 char *sweep; 105 char *path; 106 107 if (envpathdone) 108 return; 109 envpathdone = TRUE; 110 envpath = getenv("M4PATH"); 111 if (!envpath) 112 return; 113 /* for portability: getenv result is read-only */ 114 envpath = strdup(envpath); 115 if (!envpath) 116 errx(1, "out of memory"); 117 for (sweep = envpath; 118 (path = strsep(&sweep, ":")) != NULL;) 119 addtoincludepath(path); 120 free(envpath); 121 } 122 123 static 124 struct input_file * 125 dopath(struct input_file *i, const char *filename) 126 { 127 char path[MAXPATHLEN]; 128 struct path_entry *pe; 129 FILE *f; 130 131 for (pe = first; pe; pe = pe->next) { 132 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 133 if ((f = fopen(path, "r")) != 0) { 134 set_input(i, f, path); 135 return i; 136 } 137 } 138 return NULL; 139 } 140 141 struct input_file * 142 fopen_trypath(struct input_file *i, const char *filename) 143 { 144 FILE *f; 145 146 f = fopen(filename, "r"); 147 if (f != NULL) { 148 set_input(i, f, filename); 149 return i; 150 } 151 if (filename[0] == '/') 152 return NULL; 153 154 ensure_m4path(); 155 156 return dopath(i, filename); 157 } 158 159 void 160 doindir(const char *argv[], int argc) 161 { 162 ndptr n; 163 struct macro_definition *p; 164 165 n = lookup(argv[2]); 166 if (n == NULL || (p = macro_getdef(n)) == NULL) 167 m4errx(1, "indir: undefined macro %s.", argv[2]); 168 argv[1] = p->defn; 169 170 eval(argv+1, argc-1, p->type, is_traced(n)); 171 } 172 173 void 174 dobuiltin(const char *argv[], int argc) 175 { 176 ndptr p; 177 178 argv[1] = NULL; 179 p = macro_getbuiltin(argv[2]); 180 if (p != NULL) 181 eval(argv+1, argc-1, macro_builtin_type(p), is_traced(p)); 182 else 183 m4errx(1, "unknown builtin %s.", argv[2]); 184 } 185 186 187 /* We need some temporary buffer space, as pb pushes BACK and substitution 188 * proceeds forward... */ 189 static char *buffer; 190 static size_t bufsize = 0; 191 static size_t current = 0; 192 193 static void addchars(const char *, size_t); 194 static void addchar(int); 195 static char *twiddle(const char *); 196 static char *getstring(void); 197 static void exit_regerror(int, regex_t *); 198 static void do_subst(const char *, regex_t *, const char *, regmatch_t *); 199 static void do_regexpindex(const char *, regex_t *, regmatch_t *); 200 static void do_regexp(const char *, regex_t *, const char *, regmatch_t *); 201 static void add_sub(int, const char *, regex_t *, regmatch_t *); 202 static void add_replace(const char *, regex_t *, const char *, regmatch_t *); 203 #define addconstantstring(s) addchars((s), sizeof(s)-1) 204 205 static void 206 addchars(const char *c, size_t n) 207 { 208 if (n == 0) 209 return; 210 while (current + n > bufsize) { 211 if (bufsize == 0) 212 bufsize = 1024; 213 else 214 bufsize *= 2; 215 buffer = xrealloc(buffer, bufsize, NULL); 216 } 217 memcpy(buffer+current, c, n); 218 current += n; 219 } 220 221 static void 222 addchar(int c) 223 { 224 if (current +1 > bufsize) { 225 if (bufsize == 0) 226 bufsize = 1024; 227 else 228 bufsize *= 2; 229 buffer = xrealloc(buffer, bufsize, NULL); 230 } 231 buffer[current++] = c; 232 } 233 234 static char * 235 getstring() 236 { 237 addchar('\0'); 238 current = 0; 239 return buffer; 240 } 241 242 243 static void 244 exit_regerror(int er, regex_t *re) 245 { 246 size_t errlen; 247 char *errbuf; 248 249 errlen = regerror(er, re, NULL, 0); 250 errbuf = xalloc(errlen, 251 "malloc in regerror: %lu", (unsigned long)errlen); 252 regerror(er, re, errbuf, errlen); 253 m4errx(1, "regular expression error: %s.", errbuf); 254 } 255 256 static void 257 add_sub(int n, const char *string, regex_t *re, regmatch_t *pm) 258 { 259 if (n > re->re_nsub) 260 warnx("No subexpression %d", n); 261 /* Subexpressions that did not match are 262 * not an error. */ 263 else if (pm[n].rm_so != -1 && 264 pm[n].rm_eo != -1) { 265 addchars(string + pm[n].rm_so, 266 pm[n].rm_eo - pm[n].rm_so); 267 } 268 } 269 270 /* Add replacement string to the output buffer, recognizing special 271 * constructs and replacing them with substrings of the original string. 272 */ 273 static void 274 add_replace(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 275 { 276 const char *p; 277 278 for (p = replace; *p != '\0'; p++) { 279 if (*p == '&' && !mimic_gnu) { 280 add_sub(0, string, re, pm); 281 continue; 282 } 283 if (*p == '\\') { 284 if (p[1] == '\\') { 285 addchar(p[1]); 286 p++; 287 continue; 288 } 289 if (p[1] == '&') { 290 if (mimic_gnu) 291 add_sub(0, string, re, pm); 292 else 293 addchar(p[1]); 294 p++; 295 continue; 296 } 297 if (isdigit(p[1])) { 298 add_sub(*(++p) - '0', string, re, pm); 299 continue; 300 } 301 } 302 addchar(*p); 303 } 304 } 305 306 static void 307 do_subst(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 308 { 309 int error; 310 int flags = 0; 311 const char *last_match = NULL; 312 313 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 314 if (pm[0].rm_eo != 0) { 315 if (string[pm[0].rm_eo-1] == '\n') 316 flags = 0; 317 else 318 flags = REG_NOTBOL; 319 } 320 321 /* NULL length matches are special... We use the `vi-mode' 322 * rule: don't allow a NULL-match at the last match 323 * position. 324 */ 325 if (pm[0].rm_so == pm[0].rm_eo && 326 string + pm[0].rm_so == last_match) { 327 if (*string == '\0') 328 return; 329 addchar(*string); 330 if (*string++ == '\n') 331 flags = 0; 332 else 333 flags = REG_NOTBOL; 334 continue; 335 } 336 last_match = string + pm[0].rm_so; 337 addchars(string, pm[0].rm_so); 338 add_replace(string, re, replace, pm); 339 string += pm[0].rm_eo; 340 } 341 if (error != REG_NOMATCH) 342 exit_regerror(error, re); 343 pbstr(string); 344 } 345 346 static void 347 do_regexp(const char *string, regex_t *re, const char *replace, regmatch_t *pm) 348 { 349 int error; 350 351 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 352 case 0: 353 add_replace(string, re, replace, pm); 354 pbstr(getstring()); 355 break; 356 case REG_NOMATCH: 357 break; 358 default: 359 exit_regerror(error, re); 360 } 361 } 362 363 static void 364 do_regexpindex(const char *string, regex_t *re, regmatch_t *pm) 365 { 366 int error; 367 368 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 369 case 0: 370 pbunsigned(pm[0].rm_so); 371 break; 372 case REG_NOMATCH: 373 pbnum(-1); 374 break; 375 default: 376 exit_regerror(error, re); 377 } 378 } 379 380 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 381 * says. So we twiddle with the regexp before passing it to regcomp. 382 */ 383 static char * 384 twiddle(const char *p) 385 { 386 /* + at start of regexp is a normal character for Gnu m4 */ 387 if (*p == '^') { 388 addchar(*p); 389 p++; 390 } 391 if (*p == '+') { 392 addchar('\\'); 393 } 394 /* This could use strcspn for speed... */ 395 while (*p != '\0') { 396 if (*p == '\\') { 397 switch(p[1]) { 398 case '(': 399 case ')': 400 case '|': 401 addchar(p[1]); 402 break; 403 case 'w': 404 addconstantstring("[_a-zA-Z0-9]"); 405 break; 406 case 'W': 407 addconstantstring("[^_a-zA-Z0-9]"); 408 break; 409 case '<': 410 addconstantstring("[[:<:]]"); 411 break; 412 case '>': 413 addconstantstring("[[:>:]]"); 414 break; 415 default: 416 addchars(p, 2); 417 break; 418 } 419 p+=2; 420 continue; 421 } 422 if (*p == '(' || *p == ')' || *p == '|') 423 addchar('\\'); 424 425 addchar(*p); 426 p++; 427 } 428 return getstring(); 429 } 430 431 /* patsubst(string, regexp, opt replacement) */ 432 /* argv[2]: string 433 * argv[3]: regexp 434 * argv[4]: opt rep 435 */ 436 void 437 dopatsubst(const char *argv[], int argc) 438 { 439 if (argc <= 3) { 440 warnx("Too few arguments to patsubst"); 441 return; 442 } 443 /* special case: empty regexp */ 444 if (argv[3][0] == '\0') { 445 const char *s; 446 size_t len; 447 if (argc > 4 && argv[4]) 448 len = strlen(argv[4]); 449 else 450 len = 0; 451 for (s = argv[2]; *s != '\0'; s++) { 452 addchars(argv[4], len); 453 addchar(*s); 454 } 455 } else { 456 int error; 457 regex_t re; 458 regmatch_t *pmatch; 459 int mode = REG_EXTENDED; 460 size_t l = strlen(argv[3]); 461 462 if (!mimic_gnu || 463 (argv[3][0] == '^') || 464 (l > 0 && argv[3][l-1] == '$')) 465 mode |= REG_NEWLINE; 466 467 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 468 mode); 469 if (error != 0) 470 exit_regerror(error, &re); 471 472 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL); 473 do_subst(argv[2], &re, 474 argc > 4 && argv[4] != NULL ? argv[4] : "", pmatch); 475 free(pmatch); 476 regfree(&re); 477 } 478 pbstr(getstring()); 479 } 480 481 void 482 doregexp(const char *argv[], int argc) 483 { 484 int error; 485 regex_t re; 486 regmatch_t *pmatch; 487 488 if (argc <= 3) { 489 warnx("Too few arguments to regexp"); 490 return; 491 } 492 /* special gnu case */ 493 if (argv[3][0] == '\0' && mimic_gnu) { 494 if (argc == 4 || argv[4] == NULL) 495 return; 496 else 497 pbstr(argv[4]); 498 } 499 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 500 REG_EXTENDED); 501 if (error != 0) 502 exit_regerror(error, &re); 503 504 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1), NULL); 505 if (argc == 4 || argv[4] == NULL) 506 do_regexpindex(argv[2], &re, pmatch); 507 else 508 do_regexp(argv[2], &re, argv[4], pmatch); 509 free(pmatch); 510 regfree(&re); 511 } 512 513 void 514 doformat(const char *argv[], int argc) 515 { 516 const char *format = argv[2]; 517 int pos = 3; 518 int left_padded; 519 long width; 520 size_t l; 521 const char *thisarg; 522 char temp[2]; 523 long extra; 524 525 while (*format != 0) { 526 if (*format != '%') { 527 addchar(*format++); 528 continue; 529 } 530 531 format++; 532 if (*format == '%') { 533 addchar(*format++); 534 continue; 535 } 536 if (*format == 0) { 537 addchar('%'); 538 break; 539 } 540 541 if (*format == '*') { 542 format++; 543 if (pos >= argc) 544 m4errx(1, 545 "Format with too many format specifiers."); 546 width = strtol(argv[pos++], NULL, 10); 547 } else { 548 width = strtol(format, (char **)&format, 10); 549 } 550 if (width < 0) { 551 left_padded = 1; 552 width = -width; 553 } else { 554 left_padded = 0; 555 } 556 if (*format == '.') { 557 format++; 558 if (*format == '*') { 559 format++; 560 if (pos >= argc) 561 m4errx(1, 562 "Format with too many format specifiers."); 563 extra = strtol(argv[pos++], NULL, 10); 564 } else { 565 extra = strtol(format, (char **)&format, 10); 566 } 567 } else { 568 extra = LONG_MAX; 569 } 570 if (pos >= argc) 571 m4errx(1, "Format with too many format specifiers."); 572 switch(*format) { 573 case 's': 574 thisarg = argv[pos++]; 575 break; 576 case 'c': 577 temp[0] = strtoul(argv[pos++], NULL, 10); 578 temp[1] = 0; 579 thisarg = temp; 580 break; 581 default: 582 m4errx(1, "Unsupported format specification: %s.", 583 argv[2]); 584 } 585 format++; 586 l = strlen(thisarg); 587 if (l > extra) 588 l = extra; 589 if (!left_padded) { 590 while (l < width--) 591 addchar(' '); 592 } 593 addchars(thisarg, l); 594 if (left_padded) { 595 while (l < width--) 596 addchar(' '); 597 } 598 } 599 pbstr(getstring()); 600 } 601 602 void 603 doesyscmd(const char *cmd) 604 { 605 int p[2]; 606 pid_t pid, cpid; 607 char *argv[4]; 608 int cc; 609 int status; 610 611 /* Follow gnu m4 documentation: first flush buffers. */ 612 fflush(NULL); 613 614 argv[0] = "sh"; 615 argv[1] = "-c"; 616 argv[2] = (char *)cmd; 617 argv[3] = NULL; 618 619 /* Just set up standard output, share stderr and stdin with m4 */ 620 if (pipe(p) == -1) 621 err(1, "bad pipe"); 622 switch(cpid = fork()) { 623 case -1: 624 err(1, "bad fork"); 625 /* NOTREACHED */ 626 case 0: 627 (void) close(p[0]); 628 (void) dup2(p[1], 1); 629 (void) close(p[1]); 630 execv(_PATH_BSHELL, argv); 631 exit(1); 632 default: 633 /* Read result in two stages, since m4's buffer is 634 * pushback-only. */ 635 (void) close(p[1]); 636 do { 637 char result[BUFSIZE]; 638 cc = read(p[0], result, sizeof result); 639 if (cc > 0) 640 addchars(result, cc); 641 } while (cc > 0 || (cc == -1 && errno == EINTR)); 642 643 (void) close(p[0]); 644 while ((pid = wait(&status)) != cpid && pid >= 0) 645 continue; 646 pbstr(getstring()); 647 } 648 } 649 650 void 651 getdivfile(const char *name) 652 { 653 FILE *f; 654 int c; 655 656 f = fopen(name, "r"); 657 if (!f) 658 return; 659 660 while ((c = getc(f))!= EOF) 661 putc(c, active); 662 (void) fclose(f); 663 } 664