1 /* $NetBSD: gnum4.c,v 1.5 2004/10/30 20:39:35 dsl Exp $ */ 2 /* $OpenBSD: gnum4.c,v 1.15 2001/10/13 20:18:48 espie Exp $ */ 3 4 /* 5 * Copyright (c) 1999 Marc Espie 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #if HAVE_NBTOOL_CONFIG_H 30 #include "nbtool_config.h" 31 #endif 32 33 /* 34 * functions needed to support gnu-m4 extensions, including a fake freezing 35 */ 36 37 #include <sys/param.h> 38 #include <sys/types.h> 39 #include <sys/wait.h> 40 #include <ctype.h> 41 #include <errno.h> 42 #include <paths.h> 43 #include <regex.h> 44 #include <stddef.h> 45 #include <stdlib.h> 46 #include <stdio.h> 47 #include <string.h> 48 #include "mdef.h" 49 #include "stdd.h" 50 #include "extern.h" 51 52 53 int mimic_gnu = 0; 54 55 /* 56 * Support for include path search 57 * First search in the current directory. 58 * If not found, and the path is not absolute, include path kicks in. 59 * First, -I options, in the order found on the command line. 60 * Then M4PATH env variable 61 */ 62 63 struct path_entry { 64 char *name; 65 struct path_entry *next; 66 } *first, *last; 67 68 static struct path_entry *new_path_entry __P((const char *)); 69 static void ensure_m4path __P((void)); 70 static struct input_file *dopath __P((struct input_file *, const char *)); 71 72 static struct path_entry * 73 new_path_entry(dirname) 74 const char *dirname; 75 { 76 struct path_entry *n; 77 78 n = malloc(sizeof(struct path_entry)); 79 if (!n) 80 errx(1, "out of memory"); 81 n->name = strdup(dirname); 82 if (!n->name) 83 errx(1, "out of memory"); 84 n->next = 0; 85 return n; 86 } 87 88 void 89 addtoincludepath(dirname) 90 const char *dirname; 91 { 92 struct path_entry *n; 93 94 n = new_path_entry(dirname); 95 96 if (last) { 97 last->next = n; 98 last = n; 99 } 100 else 101 last = first = n; 102 } 103 104 static void 105 ensure_m4path() 106 { 107 static int envpathdone = 0; 108 char *envpath; 109 char *sweep; 110 char *path; 111 112 if (envpathdone) 113 return; 114 envpathdone = TRUE; 115 envpath = getenv("M4PATH"); 116 if (!envpath) 117 return; 118 /* for portability: getenv result is read-only */ 119 envpath = strdup(envpath); 120 if (!envpath) 121 errx(1, "out of memory"); 122 for (sweep = envpath; 123 (path = strsep(&sweep, ":")) != NULL;) 124 addtoincludepath(path); 125 free(envpath); 126 } 127 128 static 129 struct input_file * 130 dopath(i, filename) 131 struct input_file *i; 132 const char *filename; 133 { 134 char path[MAXPATHLEN]; 135 struct path_entry *pe; 136 FILE *f; 137 138 for (pe = first; pe; pe = pe->next) { 139 snprintf(path, sizeof(path), "%s/%s", pe->name, filename); 140 if ((f = fopen(path, "r")) != 0) { 141 set_input(i, f, path); 142 return i; 143 } 144 } 145 return NULL; 146 } 147 148 struct input_file * 149 fopen_trypath(i, filename) 150 struct input_file *i; 151 const char *filename; 152 { 153 FILE *f; 154 155 f = fopen(filename, "r"); 156 if (f != NULL) { 157 set_input(i, f, filename); 158 return i; 159 } 160 if (filename[0] == '/') 161 return NULL; 162 163 ensure_m4path(); 164 165 return dopath(i, filename); 166 } 167 168 void 169 doindir(argv, argc) 170 const char *argv[]; 171 int argc; 172 { 173 ndptr p; 174 175 p = lookup(argv[2]); 176 if (p == NULL) 177 errx(1, "undefined macro %s", argv[2]); 178 argv[1] = p->defn; 179 eval(argv+1, argc-1, p->type); 180 } 181 182 void 183 dobuiltin(argv, argc) 184 const char *argv[]; 185 int argc; 186 { 187 int n; 188 argv[1] = NULL; 189 n = builtin_type(argv[2]); 190 if (n != -1) 191 eval(argv+1, argc-1, n); 192 else 193 errx(1, "unknown builtin %s", argv[2]); 194 } 195 196 197 /* We need some temporary buffer space, as pb pushes BACK and substitution 198 * proceeds forward... */ 199 static char *buffer; 200 static size_t bufsize = 0; 201 static size_t current = 0; 202 203 static void addchars __P((const char *, size_t)); 204 static void addchar __P((char)); 205 static char *twiddle __P((const char *)); 206 static char *getstring __P((void)); 207 static void exit_regerror __P((int, regex_t *)); 208 static void do_subst __P((const char *, regex_t *, const char *, regmatch_t *)); 209 static void do_regexpindex __P((const char *, regex_t *, regmatch_t *)); 210 static void do_regexp __P((const char *, regex_t *, const char *, regmatch_t *)); 211 static void add_sub __P((int, const char *, regex_t *, regmatch_t *)); 212 static void add_replace __P((const char *, regex_t *, const char *, regmatch_t *)); 213 #define addconstantstring(s) addchars((s), sizeof(s)-1) 214 215 static void 216 addchars(c, n) 217 const char *c; 218 size_t n; 219 { 220 if (n == 0) 221 return; 222 while (current + n > bufsize) { 223 if (bufsize == 0) 224 bufsize = 1024; 225 else 226 bufsize *= 2; 227 buffer = realloc(buffer, bufsize); 228 if (buffer == NULL) 229 errx(1, "out of memory"); 230 } 231 memcpy(buffer+current, c, n); 232 current += n; 233 } 234 235 static void 236 addchar(c) 237 char c; 238 { 239 if (current +1 > bufsize) { 240 if (bufsize == 0) 241 bufsize = 1024; 242 else 243 bufsize *= 2; 244 buffer = realloc(buffer, bufsize); 245 if (buffer == NULL) 246 errx(1, "out of memory"); 247 } 248 buffer[current++] = c; 249 } 250 251 static char * 252 getstring() 253 { 254 addchar('\0'); 255 current = 0; 256 return buffer; 257 } 258 259 260 static void 261 exit_regerror(er, re) 262 int er; 263 regex_t *re; 264 { 265 size_t errlen; 266 char *errbuf; 267 268 errlen = regerror(er, re, NULL, 0); 269 errbuf = xalloc(errlen); 270 regerror(er, re, errbuf, errlen); 271 errx(1, "regular expression error: %s", errbuf); 272 } 273 274 static void 275 add_sub(n, string, re, pm) 276 int n; 277 const char *string; 278 regex_t *re; 279 regmatch_t *pm; 280 { 281 if (n > re->re_nsub) 282 warnx("No subexpression %d", n); 283 /* Subexpressions that did not match are 284 * not an error. */ 285 else if (pm[n].rm_so != -1 && 286 pm[n].rm_eo != -1) { 287 addchars(string + pm[n].rm_so, 288 pm[n].rm_eo - pm[n].rm_so); 289 } 290 } 291 292 /* Add replacement string to the output buffer, recognizing special 293 * constructs and replacing them with substrings of the original string. 294 */ 295 static void 296 add_replace(string, re, replace, pm) 297 const char *string; 298 regex_t *re; 299 const char *replace; 300 regmatch_t *pm; 301 { 302 const char *p; 303 304 for (p = replace; *p != '\0'; p++) { 305 if (*p == '&' && !mimic_gnu) { 306 add_sub(0, string, re, pm); 307 continue; 308 } 309 if (*p == '\\') { 310 if (p[1] == '\\') { 311 addchar(p[1]); 312 p++; 313 continue; 314 } 315 if (p[1] == '&') { 316 if (mimic_gnu) 317 add_sub(0, string, re, pm); 318 else 319 addchar(p[1]); 320 p++; 321 continue; 322 } 323 if (isdigit((unsigned char)p[1])) { 324 add_sub(*(++p) - '0', string, re, pm); 325 continue; 326 } 327 } 328 addchar(*p); 329 } 330 } 331 332 static void 333 do_subst(string, re, replace, pm) 334 const char *string; 335 regex_t *re; 336 const char *replace; 337 regmatch_t *pm; 338 { 339 int error; 340 int flags = 0; 341 const char *last_match = NULL; 342 343 while ((error = regexec(re, string, re->re_nsub+1, pm, flags)) == 0) { 344 if (pm[0].rm_eo != 0) { 345 if (string[pm[0].rm_eo-1] == '\n') 346 flags = 0; 347 else 348 flags = REG_NOTBOL; 349 } 350 351 /* NULL length matches are special... We use the `vi-mode' 352 * rule: don't allow a NULL-match at the last match 353 * position. 354 */ 355 if (pm[0].rm_so == pm[0].rm_eo && 356 string + pm[0].rm_so == last_match) { 357 if (*string == '\0') 358 return; 359 addchar(*string); 360 if (*string++ == '\n') 361 flags = 0; 362 else 363 flags = REG_NOTBOL; 364 continue; 365 } 366 last_match = string + pm[0].rm_so; 367 addchars(string, pm[0].rm_so); 368 add_replace(string, re, replace, pm); 369 string += pm[0].rm_eo; 370 } 371 if (error != REG_NOMATCH) 372 exit_regerror(error, re); 373 pbstr(string); 374 } 375 376 static void 377 do_regexp(string, re, replace, pm) 378 const char *string; 379 regex_t *re; 380 const char *replace; 381 regmatch_t *pm; 382 { 383 int error; 384 385 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 386 case 0: 387 add_replace(string, re, replace, pm); 388 pbstr(getstring()); 389 break; 390 case REG_NOMATCH: 391 break; 392 default: 393 exit_regerror(error, re); 394 } 395 } 396 397 static void 398 do_regexpindex(string, re, pm) 399 const char *string; 400 regex_t *re; 401 regmatch_t *pm; 402 { 403 int error; 404 405 switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { 406 case 0: 407 pbunsigned(pm[0].rm_so); 408 break; 409 case REG_NOMATCH: 410 pbnum(-1); 411 break; 412 default: 413 exit_regerror(error, re); 414 } 415 } 416 417 /* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 418 * says. So we twiddle with the regexp before passing it to regcomp. 419 */ 420 static char * 421 twiddle(p) 422 const char *p; 423 { 424 /* This could use strcspn for speed... */ 425 while (*p != '\0') { 426 if (*p == '\\') { 427 switch(p[1]) { 428 case '(': 429 case ')': 430 case '|': 431 addchar(p[1]); 432 break; 433 case 'w': 434 addconstantstring("[_a-zA-Z0-9]"); 435 break; 436 case 'W': 437 addconstantstring("[^_a-zA-Z0-9]"); 438 break; 439 case '<': 440 addconstantstring("[[:<:]]"); 441 break; 442 case '>': 443 addconstantstring("[[:>:]]"); 444 break; 445 default: 446 addchars(p, 2); 447 break; 448 } 449 p+=2; 450 continue; 451 } 452 if (*p == '(' || *p == ')' || *p == '|') 453 addchar('\\'); 454 455 addchar(*p); 456 p++; 457 } 458 return getstring(); 459 } 460 461 /* patsubst(string, regexp, opt replacement) */ 462 /* argv[2]: string 463 * argv[3]: regexp 464 * argv[4]: opt rep 465 */ 466 void 467 dopatsubst(argv, argc) 468 const char *argv[]; 469 int argc; 470 { 471 int error; 472 regex_t re; 473 regmatch_t *pmatch; 474 475 if (argc <= 3) { 476 warnx("Too few arguments to patsubst"); 477 return; 478 } 479 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 480 REG_NEWLINE | REG_EXTENDED); 481 if (error != 0) 482 exit_regerror(error, &re); 483 484 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1)); 485 do_subst(argv[2], &re, 486 argc != 4 && argv[4] != NULL ? argv[4] : "", pmatch); 487 pbstr(getstring()); 488 free(pmatch); 489 regfree(&re); 490 } 491 492 void 493 doregexp(argv, argc) 494 const char *argv[]; 495 int argc; 496 { 497 int error; 498 regex_t re; 499 regmatch_t *pmatch; 500 501 if (argc <= 3) { 502 warnx("Too few arguments to regexp"); 503 return; 504 } 505 error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], 506 REG_EXTENDED); 507 if (error != 0) 508 exit_regerror(error, &re); 509 510 pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1)); 511 if (argv[4] == NULL || argc == 4) 512 do_regexpindex(argv[2], &re, pmatch); 513 else 514 do_regexp(argv[2], &re, argv[4], pmatch); 515 free(pmatch); 516 regfree(&re); 517 } 518 519 void 520 doesyscmd(cmd) 521 const char *cmd; 522 { 523 int p[2]; 524 pid_t pid, cpid; 525 char *argv[4]; 526 int cc; 527 int status; 528 529 /* Follow gnu m4 documentation: first flush buffers. */ 530 fflush(NULL); 531 532 argv[0] = "sh"; 533 argv[1] = "-c"; 534 argv[2] = (char *)cmd; 535 argv[3] = NULL; 536 537 /* Just set up standard output, share stderr and stdin with m4 */ 538 if (pipe(p) == -1) 539 err(1, "bad pipe"); 540 switch(cpid = fork()) { 541 case -1: 542 err(1, "bad fork"); 543 /* NOTREACHED */ 544 case 0: 545 (void) close(p[0]); 546 (void) dup2(p[1], 1); 547 (void) close(p[1]); 548 execv(_PATH_BSHELL, argv); 549 exit(1); 550 default: 551 /* Read result in two stages, since m4's buffer is 552 * pushback-only. */ 553 (void) close(p[1]); 554 do { 555 char result[BUFSIZE]; 556 cc = read(p[0], result, sizeof result); 557 if (cc > 0) 558 addchars(result, cc); 559 } while (cc > 0 || (cc == -1 && errno == EINTR)); 560 561 (void) close(p[0]); 562 while ((pid = wait(&status)) != cpid && pid >= 0) 563 continue; 564 pbstr(getstring()); 565 } 566 } 567