1*55992Sbostic /*- 2*55992Sbostic * Copyright (c) 1992 Diomidis Spinellis. 3*55992Sbostic * Copyright (c) 1992 The Regents of the University of California. 4*55992Sbostic * All rights reserved. 5*55992Sbostic * 6*55992Sbostic * This code is derived from software contributed to Berkeley by 7*55992Sbostic * Diomidis Spinellis of Imperial College, University of London. 8*55992Sbostic * 9*55992Sbostic * %sccs.include.redist.c% 10*55992Sbostic */ 11*55992Sbostic 12*55992Sbostic #ifndef lint 13*55992Sbostic static char sccsid[] = "@(#)compile.c 5.1 (Berkeley) 08/23/92"; 14*55992Sbostic #endif /* not lint */ 15*55992Sbostic 16*55992Sbostic #include <sys/types.h> 17*55992Sbostic #include <sys/stat.h> 18*55992Sbostic 19*55992Sbostic #include <ctype.h> 20*55992Sbostic #include <errno.h> 21*55992Sbostic #include <fcntl.h> 22*55992Sbostic #include <limits.h> 23*55992Sbostic #include <regex.h> 24*55992Sbostic #include <stdio.h> 25*55992Sbostic #include <stdlib.h> 26*55992Sbostic #include <string.h> 27*55992Sbostic 28*55992Sbostic #include "defs.h" 29*55992Sbostic #include "extern.h" 30*55992Sbostic 31*55992Sbostic static char *compile_addr __P((char *, struct s_addr *)); 32*55992Sbostic static char *compile_delimited __P((char *, char *)); 33*55992Sbostic static char *compile_flags __P((char *, struct s_subst *)); 34*55992Sbostic static char *compile_re __P((char *, regex_t *, int)); 35*55992Sbostic static char *compile_subst __P((char *, char **, size_t)); 36*55992Sbostic static char *compile_text __P((void)); 37*55992Sbostic static char *compile_tr __P((char *, char **)); 38*55992Sbostic static struct s_command 39*55992Sbostic **compile_stream __P((char *, struct s_command **, char *)); 40*55992Sbostic static char *duptoeol __P((char *)); 41*55992Sbostic static struct s_command 42*55992Sbostic *findlabel __P((struct s_command *, struct s_command *)); 43*55992Sbostic static void fixuplabel __P((struct s_command *, struct s_command *)); 44*55992Sbostic 45*55992Sbostic /* 46*55992Sbostic * Command specification. This is used to drive the command parser. 47*55992Sbostic */ 48*55992Sbostic struct s_format { 49*55992Sbostic char code; /* Command code */ 50*55992Sbostic int naddr; /* Number of address args */ 51*55992Sbostic enum e_args args; /* Argument type */ 52*55992Sbostic }; 53*55992Sbostic 54*55992Sbostic static struct s_format cmd_fmts[] = { 55*55992Sbostic {'{', 2, GROUP}, 56*55992Sbostic {'a', 1, TEXT}, 57*55992Sbostic {'b', 2, BRANCH}, 58*55992Sbostic {'c', 2, TEXT}, 59*55992Sbostic {'d', 2, EMPTY}, 60*55992Sbostic {'D', 2, EMPTY}, 61*55992Sbostic {'g', 2, EMPTY}, 62*55992Sbostic {'G', 2, EMPTY}, 63*55992Sbostic {'h', 2, EMPTY}, 64*55992Sbostic {'H', 2, EMPTY}, 65*55992Sbostic {'i', 1, TEXT}, 66*55992Sbostic {'l', 2, EMPTY}, 67*55992Sbostic {'n', 2, EMPTY}, 68*55992Sbostic {'N', 2, EMPTY}, 69*55992Sbostic {'p', 2, EMPTY}, 70*55992Sbostic {'P', 2, EMPTY}, 71*55992Sbostic {'q', 1, EMPTY}, 72*55992Sbostic {'r', 1, RFILE}, 73*55992Sbostic {'s', 2, SUBST}, 74*55992Sbostic {'t', 2, BRANCH}, 75*55992Sbostic {'w', 2, WFILE}, 76*55992Sbostic {'x', 2, EMPTY}, 77*55992Sbostic {'y', 2, TR}, 78*55992Sbostic {'!', 2, NONSEL}, 79*55992Sbostic {':', 0, LABEL}, 80*55992Sbostic {'#', 0, COMMENT}, 81*55992Sbostic {'=', 1, EMPTY}, 82*55992Sbostic {'\0', 0, COMMENT}, 83*55992Sbostic }; 84*55992Sbostic 85*55992Sbostic /* The compiled program */ 86*55992Sbostic struct s_command *prog; 87*55992Sbostic 88*55992Sbostic /* 89*55992Sbostic * Compile the program into prog. 90*55992Sbostic * Initialise appends 91*55992Sbostic */ 92*55992Sbostic void 93*55992Sbostic compile() 94*55992Sbostic { 95*55992Sbostic *compile_stream(NULL, &prog, NULL) = NULL; 96*55992Sbostic fixuplabel(prog, prog); 97*55992Sbostic appends = xmalloc(sizeof(struct s_appends) * appendnum); 98*55992Sbostic } 99*55992Sbostic 100*55992Sbostic #define EATSPACE() do { \ 101*55992Sbostic if (p) \ 102*55992Sbostic while (*p && isascii(*p) && isspace(*p)) \ 103*55992Sbostic p++; \ 104*55992Sbostic } while (0) 105*55992Sbostic 106*55992Sbostic static struct s_command ** 107*55992Sbostic compile_stream(terminator, link, p) 108*55992Sbostic char *terminator; 109*55992Sbostic struct s_command **link; 110*55992Sbostic register char *p; 111*55992Sbostic { 112*55992Sbostic static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */ 113*55992Sbostic struct s_command *cmd, *cmd2; 114*55992Sbostic struct s_format *fp; 115*55992Sbostic int naddr; /* Number of addresses */ 116*55992Sbostic 117*55992Sbostic if (p != NULL) 118*55992Sbostic goto semicolon; 119*55992Sbostic for (;;) { 120*55992Sbostic if ((p = cu_fgets(lbuf, sizeof(lbuf))) == NULL) { 121*55992Sbostic if (terminator != NULL) 122*55992Sbostic err(COMPILE, "unexpected EOF (pending }'s)"); 123*55992Sbostic return (link); 124*55992Sbostic } 125*55992Sbostic 126*55992Sbostic semicolon: EATSPACE(); 127*55992Sbostic if (p && (*p == '#' || *p == '\0')) 128*55992Sbostic continue; 129*55992Sbostic if (*p == '}') { 130*55992Sbostic if (terminator == NULL) 131*55992Sbostic err(COMPILE, "unexpected }"); 132*55992Sbostic return (link); 133*55992Sbostic } 134*55992Sbostic *link = cmd = xmalloc(sizeof(struct s_command)); 135*55992Sbostic link = &cmd->next; 136*55992Sbostic cmd->nonsel = cmd->inrange = 0; 137*55992Sbostic /* First parse the addresses */ 138*55992Sbostic naddr = 0; 139*55992Sbostic cmd->a1 = cmd->a2 = NULL; 140*55992Sbostic 141*55992Sbostic /* Valid characters to start an address */ 142*55992Sbostic #define addrchar(c) (strchr("0123456789/\\$", (c))) 143*55992Sbostic if (addrchar(*p)) { 144*55992Sbostic naddr++; 145*55992Sbostic cmd->a1 = xmalloc(sizeof(struct s_addr)); 146*55992Sbostic p = compile_addr(p, cmd->a1); 147*55992Sbostic EATSPACE(); /* EXTENSION */ 148*55992Sbostic if (*p == ',') { 149*55992Sbostic naddr++; 150*55992Sbostic p++; 151*55992Sbostic EATSPACE(); /* EXTENSION */ 152*55992Sbostic cmd->a2 = xmalloc(sizeof(struct s_addr)); 153*55992Sbostic p = compile_addr(p, cmd->a2); 154*55992Sbostic } 155*55992Sbostic } 156*55992Sbostic 157*55992Sbostic nonsel: /* Now parse the command */ 158*55992Sbostic EATSPACE(); 159*55992Sbostic if (!*p) 160*55992Sbostic err(COMPILE, "command expected"); 161*55992Sbostic cmd->code = *p; 162*55992Sbostic for (fp = cmd_fmts; fp->code; fp++) 163*55992Sbostic if (fp->code == *p) 164*55992Sbostic break; 165*55992Sbostic if (!fp->code) 166*55992Sbostic err(COMPILE, "invalid command code %c", *p); 167*55992Sbostic if (naddr > fp->naddr) 168*55992Sbostic err(COMPILE, 169*55992Sbostic "command %c expects up to %d address(es), found %d", *p, fp->naddr, naddr); 170*55992Sbostic switch (fp->args) { 171*55992Sbostic case NONSEL: /* ! */ 172*55992Sbostic cmd->nonsel = ! cmd->nonsel; 173*55992Sbostic p++; 174*55992Sbostic goto nonsel; 175*55992Sbostic case GROUP: /* { */ 176*55992Sbostic p++; 177*55992Sbostic EATSPACE(); 178*55992Sbostic if (!*p) 179*55992Sbostic p = NULL; 180*55992Sbostic cmd2 = xmalloc(sizeof(struct s_command)); 181*55992Sbostic cmd2->code = '}'; 182*55992Sbostic *compile_stream("}", &cmd->u.c, p) = cmd2; 183*55992Sbostic cmd->next = cmd2; 184*55992Sbostic link = &cmd2->next; 185*55992Sbostic break; 186*55992Sbostic case EMPTY: /* d D g G h H l n N p P q x = \0 */ 187*55992Sbostic p++; 188*55992Sbostic EATSPACE(); 189*55992Sbostic if (*p == ';') { 190*55992Sbostic p++; 191*55992Sbostic link = &cmd->next; 192*55992Sbostic goto semicolon; 193*55992Sbostic } 194*55992Sbostic if (*p) 195*55992Sbostic err(COMPILE, 196*55992Sbostic "extra characters at the end of %c command", cmd->code); 197*55992Sbostic break; 198*55992Sbostic case TEXT: /* a c i */ 199*55992Sbostic p++; 200*55992Sbostic EATSPACE(); 201*55992Sbostic if (*p != '\\') 202*55992Sbostic err(COMPILE, 203*55992Sbostic "command %c expects \\ followed by text", cmd->code); 204*55992Sbostic p++; 205*55992Sbostic EATSPACE(); 206*55992Sbostic if (*p) 207*55992Sbostic err(COMPILE, 208*55992Sbostic "extra characters after \\ at the end of %c command", cmd->code); 209*55992Sbostic cmd->t = compile_text(); 210*55992Sbostic break; 211*55992Sbostic case COMMENT: /* \0 # */ 212*55992Sbostic break; 213*55992Sbostic case WFILE: /* w */ 214*55992Sbostic p++; 215*55992Sbostic EATSPACE(); 216*55992Sbostic if (*p == '\0') 217*55992Sbostic err(COMPILE, "filename expected"); 218*55992Sbostic cmd->t = duptoeol(p); 219*55992Sbostic if (aflag) 220*55992Sbostic cmd->u.fd = -1; 221*55992Sbostic else if ((cmd->u.fd = open(p, 222*55992Sbostic O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 223*55992Sbostic DEFFILEMODE)) == -1) 224*55992Sbostic err(FATAL, "%s: %s\n", p, strerror(errno)); 225*55992Sbostic break; 226*55992Sbostic case RFILE: /* r */ 227*55992Sbostic p++; 228*55992Sbostic EATSPACE(); 229*55992Sbostic if (*p == '\0') 230*55992Sbostic err(COMPILE, "filename expected"); 231*55992Sbostic else 232*55992Sbostic cmd->t = duptoeol(p); 233*55992Sbostic break; 234*55992Sbostic case BRANCH: /* b t */ 235*55992Sbostic p++; 236*55992Sbostic EATSPACE(); 237*55992Sbostic if (*p == '\0') 238*55992Sbostic cmd->t = NULL; 239*55992Sbostic else 240*55992Sbostic cmd->t = duptoeol(p); 241*55992Sbostic break; 242*55992Sbostic case LABEL: /* : */ 243*55992Sbostic p++; 244*55992Sbostic EATSPACE(); 245*55992Sbostic cmd->t = duptoeol(p); 246*55992Sbostic if (strlen(p) == 0) 247*55992Sbostic err(COMPILE, "empty label"); 248*55992Sbostic break; 249*55992Sbostic case SUBST: /* s */ 250*55992Sbostic p++; 251*55992Sbostic if (*p == '\0' || *p == '\\') 252*55992Sbostic err(COMPILE, 253*55992Sbostic "substitute pattern can not be delimited by newline or backslash"); 254*55992Sbostic cmd->u.s = xmalloc(sizeof(struct s_subst)); 255*55992Sbostic p = compile_re(p, &cmd->u.s->re, 0); 256*55992Sbostic if (p == NULL) 257*55992Sbostic err(COMPILE, "newline in substitution pattern"); 258*55992Sbostic cmd->u.s->pmatch = xmalloc((cmd->u.s->re.re_nsub + 1) * 259*55992Sbostic sizeof(regmatch_t)); 260*55992Sbostic p--; 261*55992Sbostic p = compile_subst(p, 262*55992Sbostic &cmd->u.s->new, cmd->u.s->re.re_nsub); 263*55992Sbostic if (p == NULL) 264*55992Sbostic err(COMPILE, 265*55992Sbostic "unterminated substitute replace in regular expression"); 266*55992Sbostic p = compile_flags(p, cmd->u.s); 267*55992Sbostic EATSPACE(); 268*55992Sbostic if (*p == ';') { 269*55992Sbostic p++; 270*55992Sbostic link = &cmd->next; 271*55992Sbostic goto semicolon; 272*55992Sbostic } 273*55992Sbostic break; 274*55992Sbostic case TR: /* y */ 275*55992Sbostic p++; 276*55992Sbostic p = compile_tr(p, (char **)&cmd->u.y); 277*55992Sbostic EATSPACE(); 278*55992Sbostic if (*p == ';') { 279*55992Sbostic p++; 280*55992Sbostic link = &cmd->next; 281*55992Sbostic goto semicolon; 282*55992Sbostic } 283*55992Sbostic if (*p) 284*55992Sbostic err(COMPILE, 285*55992Sbostic "extra text at the end of a transform command"); 286*55992Sbostic break; 287*55992Sbostic } 288*55992Sbostic } 289*55992Sbostic } 290*55992Sbostic 291*55992Sbostic /* 292*55992Sbostic * Get a delimited string. P points to the delimeter of the string; d points 293*55992Sbostic * to a buffer area. Newline and delimiter escapes are processed; other 294*55992Sbostic * escapes are ignored. 295*55992Sbostic * 296*55992Sbostic * Returns a pointer to the first character after the final delimiter or NULL 297*55992Sbostic * in the case of a non-terminated string. The character array d is filled 298*55992Sbostic * with the processed string. 299*55992Sbostic */ 300*55992Sbostic static char * 301*55992Sbostic compile_delimited(p, d) 302*55992Sbostic char *p, *d; 303*55992Sbostic { 304*55992Sbostic char c; 305*55992Sbostic 306*55992Sbostic c = *p++; 307*55992Sbostic if (c == '\0') 308*55992Sbostic return (NULL); 309*55992Sbostic else if (c == '\\') 310*55992Sbostic err(COMPILE, "\\ can not be used as a string delimiter"); 311*55992Sbostic else if (c == '\n') 312*55992Sbostic err(COMPILE, "newline can not be used as a string delimiter"); 313*55992Sbostic while (*p) { 314*55992Sbostic if (*p == '\\' && p[1] == c) 315*55992Sbostic p++; 316*55992Sbostic else if (*p == '\\' && p[1] == 'n') { 317*55992Sbostic *d++ = '\n'; 318*55992Sbostic p += 2; 319*55992Sbostic continue; 320*55992Sbostic } else if (*p == c) { 321*55992Sbostic *d = '\0'; 322*55992Sbostic return (p + 1); 323*55992Sbostic } 324*55992Sbostic *d++ = *p++; 325*55992Sbostic } 326*55992Sbostic return (NULL); 327*55992Sbostic } 328*55992Sbostic 329*55992Sbostic /* 330*55992Sbostic * Get a regular expression. P points to the delimeter of the regular 331*55992Sbostic * expression; d points a regexp pointer. Newline and delimiter escapes 332*55992Sbostic * are processed; other escapes are ignored. 333*55992Sbostic * Returns a pointer to the first character after the final delimiter 334*55992Sbostic * or NULL in the case of a non terminated regular expression. 335*55992Sbostic * The regexp pointer is set to the compiled regular expression. 336*55992Sbostic * Cflags are passed to regcomp. 337*55992Sbostic */ 338*55992Sbostic static char * 339*55992Sbostic compile_re(p, rep, cflags) 340*55992Sbostic char *p; 341*55992Sbostic regex_t *rep; 342*55992Sbostic int cflags; 343*55992Sbostic { 344*55992Sbostic int eval; 345*55992Sbostic char re[_POSIX2_LINE_MAX + 1]; 346*55992Sbostic 347*55992Sbostic p = compile_delimited(p, re); 348*55992Sbostic if (p && (eval = regcomp(rep, re, cflags)) != 0) 349*55992Sbostic err(COMPILE, "RE error: %s", strregerror(eval, rep)); 350*55992Sbostic return (p); 351*55992Sbostic } 352*55992Sbostic 353*55992Sbostic /* 354*55992Sbostic * Compile the substitution string of a regular expression and set res to 355*55992Sbostic * point to a saved copy of it. Nsub is the number of parenthesized regular 356*55992Sbostic * expressions. 357*55992Sbostic */ 358*55992Sbostic static char * 359*55992Sbostic compile_subst(p, res, nsub) 360*55992Sbostic char *p, **res; 361*55992Sbostic size_t nsub; 362*55992Sbostic { 363*55992Sbostic static char lbuf[_POSIX2_LINE_MAX + 1]; 364*55992Sbostic int asize, size; 365*55992Sbostic char c, *text, *op, *s; 366*55992Sbostic 367*55992Sbostic c = *p++; /* Terminator character */ 368*55992Sbostic if (c == '\0') 369*55992Sbostic return (NULL); 370*55992Sbostic 371*55992Sbostic asize = 2 * _POSIX2_LINE_MAX + 1; 372*55992Sbostic text = xmalloc(asize); 373*55992Sbostic size = 0; 374*55992Sbostic do { 375*55992Sbostic op = s = text + size; 376*55992Sbostic for (; *p; p++) { 377*55992Sbostic if (*p == '\\') { 378*55992Sbostic p++; 379*55992Sbostic if (strchr("123456789", *p) != NULL) { 380*55992Sbostic *s++ = '\\'; 381*55992Sbostic if (*p - '1' > nsub) 382*55992Sbostic err(COMPILE, 383*55992Sbostic "\\%c not defined in regular expression (use \\1-\\%d)", *p, nsub + 1); 384*55992Sbostic } else if (*p == '&') 385*55992Sbostic *s++ = '\\'; 386*55992Sbostic } else if (*p == c) { 387*55992Sbostic p++; 388*55992Sbostic *s++ = '\0'; 389*55992Sbostic size += s - op; 390*55992Sbostic *res = xrealloc(text, size); 391*55992Sbostic return (p); 392*55992Sbostic } else if (*p == '\n') { 393*55992Sbostic err(COMPILE, 394*55992Sbostic "unescaped newline inside substitute pattern"); 395*55992Sbostic return (NULL); 396*55992Sbostic } 397*55992Sbostic *s++ = *p; 398*55992Sbostic } 399*55992Sbostic size += s - op; 400*55992Sbostic if (asize - size < _POSIX2_LINE_MAX + 1) { 401*55992Sbostic asize *= 2; 402*55992Sbostic text = xmalloc(asize); 403*55992Sbostic } 404*55992Sbostic } while (cu_fgets(p = lbuf, sizeof(lbuf))); 405*55992Sbostic err(COMPILE, "EOF in substitute pattern"); 406*55992Sbostic return (NULL); 407*55992Sbostic } 408*55992Sbostic 409*55992Sbostic /* 410*55992Sbostic * Compile the flags of the s command 411*55992Sbostic */ 412*55992Sbostic static char * 413*55992Sbostic compile_flags(p, s) 414*55992Sbostic char *p; 415*55992Sbostic struct s_subst *s; 416*55992Sbostic { 417*55992Sbostic int gn; /* True if we have seen g or n */ 418*55992Sbostic char wfile[_POSIX2_LINE_MAX + 1], *q; 419*55992Sbostic 420*55992Sbostic s->n = 1; /* Default */ 421*55992Sbostic s->p = 0; 422*55992Sbostic s->wfile = NULL; 423*55992Sbostic s->wfd = -1; 424*55992Sbostic for (gn = 0;;) { 425*55992Sbostic EATSPACE(); /* EXTENSION */ 426*55992Sbostic switch (*p) { 427*55992Sbostic case 'g': 428*55992Sbostic if (gn) 429*55992Sbostic err(WARNING, 430*55992Sbostic "both g and number in substitute flags"); 431*55992Sbostic gn = 1; 432*55992Sbostic s->n = 0; 433*55992Sbostic break; 434*55992Sbostic case '\0': 435*55992Sbostic case '\n': 436*55992Sbostic case ';': 437*55992Sbostic return (p); 438*55992Sbostic case 'p': 439*55992Sbostic s->p = 1; 440*55992Sbostic break; 441*55992Sbostic case '1': case '2': case '3': 442*55992Sbostic case '4': case '5': case '6': 443*55992Sbostic case '7': case '8': case '9': 444*55992Sbostic if (gn) 445*55992Sbostic err(WARNING, 446*55992Sbostic "both g and number in substitute flags"); 447*55992Sbostic gn = 1; 448*55992Sbostic /* XXX Check for overflow */ 449*55992Sbostic s->n = (int)strtol(p, &p, 10); 450*55992Sbostic break; 451*55992Sbostic case 'w': 452*55992Sbostic p++; 453*55992Sbostic #ifdef HISTORIC_PRACTICE 454*55992Sbostic if (*p != ' ') { 455*55992Sbostic err(WARNING, "space missing before w wfile"); 456*55992Sbostic return (p); 457*55992Sbostic } 458*55992Sbostic #endif 459*55992Sbostic EATSPACE(); 460*55992Sbostic q = wfile; 461*55992Sbostic while (*p) { 462*55992Sbostic if (*p == '\n') 463*55992Sbostic break; 464*55992Sbostic *q++ = *p++; 465*55992Sbostic } 466*55992Sbostic *q = '\0'; 467*55992Sbostic if (q == wfile) 468*55992Sbostic err(COMPILE, "empty wfile specified"); 469*55992Sbostic s->wfile = strdup(wfile); 470*55992Sbostic if (!aflag && (s->wfd = open(wfile, 471*55992Sbostic O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 472*55992Sbostic DEFFILEMODE)) == -1) 473*55992Sbostic err(FATAL, "%s: %s\n", wfile, strerror(errno)); 474*55992Sbostic return (p); 475*55992Sbostic default: 476*55992Sbostic err(COMPILE, 477*55992Sbostic "bad flag in substitute command: '%c'", p[-1]); 478*55992Sbostic break; 479*55992Sbostic } 480*55992Sbostic p++; 481*55992Sbostic } 482*55992Sbostic } 483*55992Sbostic 484*55992Sbostic /* 485*55992Sbostic * Compile a translation set of strings into a lookup table. 486*55992Sbostic */ 487*55992Sbostic static char * 488*55992Sbostic compile_tr(p, transtab) 489*55992Sbostic char *p; 490*55992Sbostic char **transtab; 491*55992Sbostic { 492*55992Sbostic int i; 493*55992Sbostic char *lt, *op, *np; 494*55992Sbostic char old[_POSIX2_LINE_MAX + 1]; 495*55992Sbostic char new[_POSIX2_LINE_MAX + 1]; 496*55992Sbostic 497*55992Sbostic if (*p == '\0' || *p == '\\') 498*55992Sbostic err(COMPILE, 499*55992Sbostic "transform pattern can not be delimited by newline or backslash"); 500*55992Sbostic p = compile_delimited(p, old); 501*55992Sbostic if (p == NULL) { 502*55992Sbostic err(COMPILE, "unterminated transform source string"); 503*55992Sbostic return (NULL); 504*55992Sbostic } 505*55992Sbostic p = compile_delimited(--p, new); 506*55992Sbostic if (p == NULL) { 507*55992Sbostic err(COMPILE, "unterminated transform target string"); 508*55992Sbostic return (NULL); 509*55992Sbostic } 510*55992Sbostic EATSPACE(); 511*55992Sbostic if (strlen(new) != strlen(old)) { 512*55992Sbostic err(COMPILE, "transform strings are not the same length"); 513*55992Sbostic return (NULL); 514*55992Sbostic } 515*55992Sbostic /* We assume characters are 8 bits */ 516*55992Sbostic lt = xmalloc(UCHAR_MAX); 517*55992Sbostic for (i = 0; i <= UCHAR_MAX; i++) 518*55992Sbostic lt[i] = (char)i; 519*55992Sbostic for (op = old, np = new; *op; op++, np++) 520*55992Sbostic lt[(u_char)*op] = *np; 521*55992Sbostic *transtab = lt; 522*55992Sbostic return (p); 523*55992Sbostic } 524*55992Sbostic 525*55992Sbostic /* 526*55992Sbostic * Compile the text following an a or i command. 527*55992Sbostic */ 528*55992Sbostic static char * 529*55992Sbostic compile_text() 530*55992Sbostic { 531*55992Sbostic int asize, size; 532*55992Sbostic char *text, *p, *op, *s; 533*55992Sbostic char lbuf[_POSIX2_LINE_MAX + 1]; 534*55992Sbostic 535*55992Sbostic asize = 2 * _POSIX2_LINE_MAX + 1; 536*55992Sbostic text = xmalloc(asize); 537*55992Sbostic size = 0; 538*55992Sbostic while (cu_fgets(lbuf, sizeof(lbuf))) { 539*55992Sbostic op = s = text + size; 540*55992Sbostic p = lbuf; 541*55992Sbostic EATSPACE(); 542*55992Sbostic for (; *p; p++) { 543*55992Sbostic if (*p == '\\') 544*55992Sbostic p++; 545*55992Sbostic *s++ = *p; 546*55992Sbostic } 547*55992Sbostic size += s - op; 548*55992Sbostic if (p[-2] != '\\') { 549*55992Sbostic *s = '\0'; 550*55992Sbostic break; 551*55992Sbostic } 552*55992Sbostic if (asize - size < _POSIX2_LINE_MAX + 1) { 553*55992Sbostic asize *= 2; 554*55992Sbostic text = xmalloc(asize); 555*55992Sbostic } 556*55992Sbostic } 557*55992Sbostic return (xrealloc(text, size + 1)); 558*55992Sbostic } 559*55992Sbostic 560*55992Sbostic /* 561*55992Sbostic * Get an address and return a pointer to the first character after 562*55992Sbostic * it. Fill the structure pointed to according to the address. 563*55992Sbostic */ 564*55992Sbostic static char * 565*55992Sbostic compile_addr(p, a) 566*55992Sbostic char *p; 567*55992Sbostic struct s_addr *a; 568*55992Sbostic { 569*55992Sbostic regex_t *re; 570*55992Sbostic char *end; 571*55992Sbostic 572*55992Sbostic switch (*p) { 573*55992Sbostic case '\\': /* Context address */ 574*55992Sbostic re = xmalloc(sizeof(regex_t)); 575*55992Sbostic a->u.r = re; 576*55992Sbostic p = compile_re(p + 1, re, REG_NOSUB); 577*55992Sbostic if (p == NULL) 578*55992Sbostic err(COMPILE, "unterminated regular expression"); 579*55992Sbostic a->type = AT_RE; 580*55992Sbostic return (p); 581*55992Sbostic case '/': /* Context address */ 582*55992Sbostic re = xmalloc(sizeof(regex_t)); 583*55992Sbostic a->u.r = re; 584*55992Sbostic p = compile_re(p, a->u.r, REG_NOSUB); 585*55992Sbostic if (p == NULL) 586*55992Sbostic err(COMPILE, "unterminated regular expression"); 587*55992Sbostic a->type = AT_RE; 588*55992Sbostic return (p); 589*55992Sbostic case '$': /* Last line */ 590*55992Sbostic a->type = AT_LAST; 591*55992Sbostic return (p + 1); 592*55992Sbostic /* Line number */ 593*55992Sbostic case '0': case '1': case '2': case '3': case '4': 594*55992Sbostic case '5': case '6': case '7': case '8': case '9': 595*55992Sbostic a->type = AT_LINE; 596*55992Sbostic a->u.l = strtol(p, &end, 10); 597*55992Sbostic return (end); 598*55992Sbostic default: 599*55992Sbostic err(COMPILE, "expected context address"); 600*55992Sbostic return (NULL); 601*55992Sbostic } 602*55992Sbostic } 603*55992Sbostic 604*55992Sbostic /* 605*55992Sbostic * Return a copy of all the characters up to \n or \0 606*55992Sbostic */ 607*55992Sbostic static char * 608*55992Sbostic duptoeol(s) 609*55992Sbostic register char *s; 610*55992Sbostic { 611*55992Sbostic size_t len; 612*55992Sbostic char *start; 613*55992Sbostic 614*55992Sbostic for (start = s; *s != '\0' && *s != '\n'; ++s); 615*55992Sbostic *s = '\0'; 616*55992Sbostic len = s - start + 1; 617*55992Sbostic return (memmove(xmalloc(len), start, len)); 618*55992Sbostic } 619*55992Sbostic 620*55992Sbostic /* 621*55992Sbostic * Find the label contained in the command l in the command linked list cp. 622*55992Sbostic * L is excluded from the search. Return NULL if not found. 623*55992Sbostic */ 624*55992Sbostic static struct s_command * 625*55992Sbostic findlabel(l, cp) 626*55992Sbostic struct s_command *l, *cp; 627*55992Sbostic { 628*55992Sbostic struct s_command *r; 629*55992Sbostic 630*55992Sbostic for (; cp; cp = cp->next) 631*55992Sbostic if (cp->code == ':' && cp != l && strcmp(l->t, cp->t) == 0) 632*55992Sbostic return (cp); 633*55992Sbostic else if (cp->code == '{' && (r = findlabel(l, cp->u.c))) 634*55992Sbostic return (r); 635*55992Sbostic return (NULL); 636*55992Sbostic } 637*55992Sbostic 638*55992Sbostic /* 639*55992Sbostic * Convert goto label names to addresses. 640*55992Sbostic * Detect duplicate labels. 641*55992Sbostic * Set appendnum to the number of a and r commands in the script. 642*55992Sbostic * Free the memory used by labels in b and t commands (but not by :) 643*55992Sbostic * Root is a pointer to the script linked list; cp points to the 644*55992Sbostic * search start. 645*55992Sbostic * TODO: Remove } nodes 646*55992Sbostic */ 647*55992Sbostic static void 648*55992Sbostic fixuplabel(root, cp) 649*55992Sbostic struct s_command *root, *cp; 650*55992Sbostic { 651*55992Sbostic struct s_command *cp2; 652*55992Sbostic 653*55992Sbostic for (; cp; cp = cp->next) 654*55992Sbostic switch (cp->code) { 655*55992Sbostic case 'a': 656*55992Sbostic case 'r': 657*55992Sbostic appendnum++; 658*55992Sbostic break; 659*55992Sbostic case 'b': 660*55992Sbostic case 't': 661*55992Sbostic if (cp->t == NULL) { 662*55992Sbostic cp->u.c = NULL; 663*55992Sbostic break; 664*55992Sbostic } 665*55992Sbostic if ((cp2 = findlabel(cp, root)) == NULL) 666*55992Sbostic err(COMPILE2, "unspecified label %s", cp->t); 667*55992Sbostic free(cp->t); 668*55992Sbostic cp->u.c = cp2; 669*55992Sbostic break; 670*55992Sbostic case '{': 671*55992Sbostic fixuplabel(root, cp->u.c); 672*55992Sbostic break; 673*55992Sbostic case ':': 674*55992Sbostic if (findlabel(cp, root)) 675*55992Sbostic err(COMPILE2, "duplicate label %s", cp->t); 676*55992Sbostic break; 677*55992Sbostic } 678*55992Sbostic } 679