155992Sbostic /*- 255992Sbostic * Copyright (c) 1992 Diomidis Spinellis. 3*62229Sbostic * Copyright (c) 1992, 1993 4*62229Sbostic * The Regents of the University of California. All rights reserved. 555992Sbostic * 655992Sbostic * This code is derived from software contributed to Berkeley by 755992Sbostic * Diomidis Spinellis of Imperial College, University of London. 855992Sbostic * 955992Sbostic * %sccs.include.redist.c% 1055992Sbostic */ 1155992Sbostic 1255992Sbostic #ifndef lint 13*62229Sbostic static char sccsid[] = "@(#)compile.c 8.1 (Berkeley) 06/06/93"; 1455992Sbostic #endif /* not lint */ 1555992Sbostic 1655992Sbostic #include <sys/types.h> 1756019Sbostic #include <sys/stat.h> 1855992Sbostic 1955992Sbostic #include <ctype.h> 2055992Sbostic #include <errno.h> 2155992Sbostic #include <fcntl.h> 2255992Sbostic #include <limits.h> 2355992Sbostic #include <regex.h> 2455992Sbostic #include <stdio.h> 2555992Sbostic #include <stdlib.h> 2655992Sbostic #include <string.h> 2755992Sbostic 2855992Sbostic #include "defs.h" 2955992Sbostic #include "extern.h" 3055992Sbostic 3159141Storek #define LHSZ 128 3259141Storek #define LHMASK (LHSZ - 1) 3359141Storek static struct labhash { 3459141Storek struct labhash *lh_next; 3559141Storek u_int lh_hash; 3659141Storek struct s_command *lh_cmd; 3759141Storek int lh_ref; 3859141Storek } *labels[LHSZ]; 3959141Storek 4055992Sbostic static char *compile_addr __P((char *, struct s_addr *)); 4155992Sbostic static char *compile_delimited __P((char *, char *)); 4255992Sbostic static char *compile_flags __P((char *, struct s_subst *)); 4356077Sbostic static char *compile_re __P((char *, regex_t **)); 4456019Sbostic static char *compile_subst __P((char *, struct s_subst *)); 4555992Sbostic static char *compile_text __P((void)); 4655992Sbostic static char *compile_tr __P((char *, char **)); 4755992Sbostic static struct s_command 4855992Sbostic **compile_stream __P((char *, struct s_command **, char *)); 4958540Sbostic static char *duptoeol __P((char *, char *)); 5059141Storek static void enterlabel __P((struct s_command *)); 5155992Sbostic static struct s_command 5259141Storek *findlabel __P((char *)); 5359141Storek static void fixuplabel __P((struct s_command *, struct s_command *)); 5459141Storek static void uselabel __P((void)); 5555992Sbostic 5655992Sbostic /* 5755992Sbostic * Command specification. This is used to drive the command parser. 5855992Sbostic */ 5955992Sbostic struct s_format { 6055992Sbostic char code; /* Command code */ 6155992Sbostic int naddr; /* Number of address args */ 6255992Sbostic enum e_args args; /* Argument type */ 6355992Sbostic }; 6455992Sbostic 6555992Sbostic static struct s_format cmd_fmts[] = { 6655992Sbostic {'{', 2, GROUP}, 6755992Sbostic {'a', 1, TEXT}, 6855992Sbostic {'b', 2, BRANCH}, 6955992Sbostic {'c', 2, TEXT}, 7055992Sbostic {'d', 2, EMPTY}, 7155992Sbostic {'D', 2, EMPTY}, 7255992Sbostic {'g', 2, EMPTY}, 7355992Sbostic {'G', 2, EMPTY}, 7455992Sbostic {'h', 2, EMPTY}, 7555992Sbostic {'H', 2, EMPTY}, 7655992Sbostic {'i', 1, TEXT}, 7755992Sbostic {'l', 2, EMPTY}, 7855992Sbostic {'n', 2, EMPTY}, 7955992Sbostic {'N', 2, EMPTY}, 8055992Sbostic {'p', 2, EMPTY}, 8155992Sbostic {'P', 2, EMPTY}, 8255992Sbostic {'q', 1, EMPTY}, 8355992Sbostic {'r', 1, RFILE}, 8455992Sbostic {'s', 2, SUBST}, 8555992Sbostic {'t', 2, BRANCH}, 8655992Sbostic {'w', 2, WFILE}, 8755992Sbostic {'x', 2, EMPTY}, 8855992Sbostic {'y', 2, TR}, 8955992Sbostic {'!', 2, NONSEL}, 9055992Sbostic {':', 0, LABEL}, 9155992Sbostic {'#', 0, COMMENT}, 9255992Sbostic {'=', 1, EMPTY}, 9355992Sbostic {'\0', 0, COMMENT}, 9455992Sbostic }; 9555992Sbostic 9656019Sbostic /* The compiled program. */ 9755992Sbostic struct s_command *prog; 9855992Sbostic 9955992Sbostic /* 10055992Sbostic * Compile the program into prog. 10156019Sbostic * Initialise appends. 10255992Sbostic */ 10355992Sbostic void 10455992Sbostic compile() 10555992Sbostic { 10655992Sbostic *compile_stream(NULL, &prog, NULL) = NULL; 10759141Storek fixuplabel(prog, NULL); 10859141Storek uselabel(); 10955992Sbostic appends = xmalloc(sizeof(struct s_appends) * appendnum); 11056077Sbostic match = xmalloc((maxnsub + 1) * sizeof(regmatch_t)); 11155992Sbostic } 11255992Sbostic 11355992Sbostic #define EATSPACE() do { \ 11455992Sbostic if (p) \ 11555992Sbostic while (*p && isascii(*p) && isspace(*p)) \ 11655992Sbostic p++; \ 11755992Sbostic } while (0) 11855992Sbostic 11955992Sbostic static struct s_command ** 12055992Sbostic compile_stream(terminator, link, p) 12155992Sbostic char *terminator; 12255992Sbostic struct s_command **link; 12355992Sbostic register char *p; 12455992Sbostic { 12555992Sbostic static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */ 12655992Sbostic struct s_command *cmd, *cmd2; 12755992Sbostic struct s_format *fp; 12855992Sbostic int naddr; /* Number of addresses */ 12955992Sbostic 13055992Sbostic if (p != NULL) 13155992Sbostic goto semicolon; 13255992Sbostic for (;;) { 13355992Sbostic if ((p = cu_fgets(lbuf, sizeof(lbuf))) == NULL) { 13455992Sbostic if (terminator != NULL) 13555992Sbostic err(COMPILE, "unexpected EOF (pending }'s)"); 13655992Sbostic return (link); 13755992Sbostic } 13855992Sbostic 13955992Sbostic semicolon: EATSPACE(); 14055992Sbostic if (p && (*p == '#' || *p == '\0')) 14155992Sbostic continue; 14255992Sbostic if (*p == '}') { 14355992Sbostic if (terminator == NULL) 14455992Sbostic err(COMPILE, "unexpected }"); 14555992Sbostic return (link); 14655992Sbostic } 14755992Sbostic *link = cmd = xmalloc(sizeof(struct s_command)); 14855992Sbostic link = &cmd->next; 14959141Storek cmd->nonsel = cmd->inrange = 0; 15055992Sbostic /* First parse the addresses */ 15155992Sbostic naddr = 0; 15255992Sbostic cmd->a1 = cmd->a2 = NULL; 15355992Sbostic 15455992Sbostic /* Valid characters to start an address */ 15555992Sbostic #define addrchar(c) (strchr("0123456789/\\$", (c))) 15655992Sbostic if (addrchar(*p)) { 15755992Sbostic naddr++; 15855992Sbostic cmd->a1 = xmalloc(sizeof(struct s_addr)); 15955992Sbostic p = compile_addr(p, cmd->a1); 16055992Sbostic EATSPACE(); /* EXTENSION */ 16155992Sbostic if (*p == ',') { 16255992Sbostic naddr++; 16355992Sbostic p++; 16455992Sbostic EATSPACE(); /* EXTENSION */ 16555992Sbostic cmd->a2 = xmalloc(sizeof(struct s_addr)); 16655992Sbostic p = compile_addr(p, cmd->a2); 16755992Sbostic } 16855992Sbostic } 16955992Sbostic 17055992Sbostic nonsel: /* Now parse the command */ 17155992Sbostic EATSPACE(); 17255992Sbostic if (!*p) 17355992Sbostic err(COMPILE, "command expected"); 17455992Sbostic cmd->code = *p; 17555992Sbostic for (fp = cmd_fmts; fp->code; fp++) 17655992Sbostic if (fp->code == *p) 17755992Sbostic break; 17855992Sbostic if (!fp->code) 17955992Sbostic err(COMPILE, "invalid command code %c", *p); 18055992Sbostic if (naddr > fp->naddr) 18155992Sbostic err(COMPILE, 18255992Sbostic "command %c expects up to %d address(es), found %d", *p, fp->naddr, naddr); 18355992Sbostic switch (fp->args) { 18455992Sbostic case NONSEL: /* ! */ 18555992Sbostic cmd->nonsel = ! cmd->nonsel; 18655992Sbostic p++; 18755992Sbostic goto nonsel; 18855992Sbostic case GROUP: /* { */ 18955992Sbostic p++; 19055992Sbostic EATSPACE(); 19155992Sbostic if (!*p) 19255992Sbostic p = NULL; 19355992Sbostic cmd2 = xmalloc(sizeof(struct s_command)); 19455992Sbostic cmd2->code = '}'; 19555992Sbostic *compile_stream("}", &cmd->u.c, p) = cmd2; 19655992Sbostic cmd->next = cmd2; 19755992Sbostic link = &cmd2->next; 19855992Sbostic break; 19955992Sbostic case EMPTY: /* d D g G h H l n N p P q x = \0 */ 20055992Sbostic p++; 20155992Sbostic EATSPACE(); 20255992Sbostic if (*p == ';') { 20355992Sbostic p++; 20455992Sbostic link = &cmd->next; 20555992Sbostic goto semicolon; 20655992Sbostic } 20755992Sbostic if (*p) 20855992Sbostic err(COMPILE, 20955992Sbostic "extra characters at the end of %c command", cmd->code); 21055992Sbostic break; 21155992Sbostic case TEXT: /* a c i */ 21255992Sbostic p++; 21355992Sbostic EATSPACE(); 21455992Sbostic if (*p != '\\') 21555992Sbostic err(COMPILE, 21655992Sbostic "command %c expects \\ followed by text", cmd->code); 21755992Sbostic p++; 21855992Sbostic EATSPACE(); 21955992Sbostic if (*p) 22055992Sbostic err(COMPILE, 22155992Sbostic "extra characters after \\ at the end of %c command", cmd->code); 22255992Sbostic cmd->t = compile_text(); 22355992Sbostic break; 22455992Sbostic case COMMENT: /* \0 # */ 22555992Sbostic break; 22655992Sbostic case WFILE: /* w */ 22755992Sbostic p++; 22855992Sbostic EATSPACE(); 22955992Sbostic if (*p == '\0') 23055992Sbostic err(COMPILE, "filename expected"); 23158540Sbostic cmd->t = duptoeol(p, "w command"); 23255992Sbostic if (aflag) 23355992Sbostic cmd->u.fd = -1; 23455992Sbostic else if ((cmd->u.fd = open(p, 23555992Sbostic O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 23655992Sbostic DEFFILEMODE)) == -1) 23755992Sbostic err(FATAL, "%s: %s\n", p, strerror(errno)); 23855992Sbostic break; 23955992Sbostic case RFILE: /* r */ 24055992Sbostic p++; 24155992Sbostic EATSPACE(); 24255992Sbostic if (*p == '\0') 24355992Sbostic err(COMPILE, "filename expected"); 24455992Sbostic else 24558540Sbostic cmd->t = duptoeol(p, "read command"); 24655992Sbostic break; 24755992Sbostic case BRANCH: /* b t */ 24855992Sbostic p++; 24955992Sbostic EATSPACE(); 25055992Sbostic if (*p == '\0') 25155992Sbostic cmd->t = NULL; 25255992Sbostic else 25358540Sbostic cmd->t = duptoeol(p, "branch"); 25455992Sbostic break; 25555992Sbostic case LABEL: /* : */ 25655992Sbostic p++; 25755992Sbostic EATSPACE(); 25858540Sbostic cmd->t = duptoeol(p, "label"); 25955992Sbostic if (strlen(p) == 0) 26055992Sbostic err(COMPILE, "empty label"); 26159141Storek enterlabel(cmd); 26255992Sbostic break; 26355992Sbostic case SUBST: /* s */ 26455992Sbostic p++; 26555992Sbostic if (*p == '\0' || *p == '\\') 26655992Sbostic err(COMPILE, 26755992Sbostic "substitute pattern can not be delimited by newline or backslash"); 26855992Sbostic cmd->u.s = xmalloc(sizeof(struct s_subst)); 26956077Sbostic p = compile_re(p, &cmd->u.s->re); 27055992Sbostic if (p == NULL) 27156004Sbostic err(COMPILE, "unterminated substitute pattern"); 27256019Sbostic --p; 27356019Sbostic p = compile_subst(p, cmd->u.s); 27455992Sbostic p = compile_flags(p, cmd->u.s); 27555992Sbostic EATSPACE(); 27655992Sbostic if (*p == ';') { 27755992Sbostic p++; 27855992Sbostic link = &cmd->next; 27955992Sbostic goto semicolon; 28055992Sbostic } 28155992Sbostic break; 28255992Sbostic case TR: /* y */ 28355992Sbostic p++; 28455992Sbostic p = compile_tr(p, (char **)&cmd->u.y); 28555992Sbostic EATSPACE(); 28655992Sbostic if (*p == ';') { 28755992Sbostic p++; 28855992Sbostic link = &cmd->next; 28955992Sbostic goto semicolon; 29055992Sbostic } 29155992Sbostic if (*p) 29255992Sbostic err(COMPILE, 29355992Sbostic "extra text at the end of a transform command"); 29455992Sbostic break; 29555992Sbostic } 29655992Sbostic } 29755992Sbostic } 29855992Sbostic 29955992Sbostic /* 30055992Sbostic * Get a delimited string. P points to the delimeter of the string; d points 30155992Sbostic * to a buffer area. Newline and delimiter escapes are processed; other 30255992Sbostic * escapes are ignored. 30355992Sbostic * 30455992Sbostic * Returns a pointer to the first character after the final delimiter or NULL 30555992Sbostic * in the case of a non-terminated string. The character array d is filled 30655992Sbostic * with the processed string. 30755992Sbostic */ 30855992Sbostic static char * 30955992Sbostic compile_delimited(p, d) 31055992Sbostic char *p, *d; 31155992Sbostic { 31255992Sbostic char c; 31355992Sbostic 31455992Sbostic c = *p++; 31555992Sbostic if (c == '\0') 31655992Sbostic return (NULL); 31755992Sbostic else if (c == '\\') 31855992Sbostic err(COMPILE, "\\ can not be used as a string delimiter"); 31955992Sbostic else if (c == '\n') 32055992Sbostic err(COMPILE, "newline can not be used as a string delimiter"); 32155992Sbostic while (*p) { 32255992Sbostic if (*p == '\\' && p[1] == c) 32356019Sbostic p++; 32455992Sbostic else if (*p == '\\' && p[1] == 'n') { 32556019Sbostic *d++ = '\n'; 32656019Sbostic p += 2; 32756019Sbostic continue; 32856663Sbostic } else if (*p == '\\' && p[1] == '\\') 32956663Sbostic *d++ = *p++; 33056663Sbostic else if (*p == c) { 33155992Sbostic *d = '\0'; 33255992Sbostic return (p + 1); 33355992Sbostic } 33455992Sbostic *d++ = *p++; 33555992Sbostic } 33655992Sbostic return (NULL); 33755992Sbostic } 33855992Sbostic 33955992Sbostic /* 34056019Sbostic * Get a regular expression. P points to the delimiter of the regular 34156019Sbostic * expression; repp points to the address of a regexp pointer. Newline 34256019Sbostic * and delimiter escapes are processed; other escapes are ignored. 34355992Sbostic * Returns a pointer to the first character after the final delimiter 34456019Sbostic * or NULL in the case of a non terminated regular expression. The regexp 34556019Sbostic * pointer is set to the compiled regular expression. 34655992Sbostic * Cflags are passed to regcomp. 34755992Sbostic */ 34855992Sbostic static char * 34956077Sbostic compile_re(p, repp) 35055992Sbostic char *p; 35156019Sbostic regex_t **repp; 35255992Sbostic { 35355992Sbostic int eval; 35455992Sbostic char re[_POSIX2_LINE_MAX + 1]; 35555992Sbostic 35655992Sbostic p = compile_delimited(p, re); 35756019Sbostic if (p && strlen(re) == 0) { 35856019Sbostic *repp = NULL; 35956019Sbostic return (p); 36056019Sbostic } 36156019Sbostic *repp = xmalloc(sizeof(regex_t)); 36256077Sbostic if (p && (eval = regcomp(*repp, re, 0)) != 0) 36356019Sbostic err(COMPILE, "RE error: %s", strregerror(eval, *repp)); 36456077Sbostic if (maxnsub < (*repp)->re_nsub) 36556077Sbostic maxnsub = (*repp)->re_nsub; 36655992Sbostic return (p); 36755992Sbostic } 36855992Sbostic 36955992Sbostic /* 37055992Sbostic * Compile the substitution string of a regular expression and set res to 37155992Sbostic * point to a saved copy of it. Nsub is the number of parenthesized regular 37255992Sbostic * expressions. 37355992Sbostic */ 37455992Sbostic static char * 37556019Sbostic compile_subst(p, s) 37656019Sbostic char *p; 37756019Sbostic struct s_subst *s; 37855992Sbostic { 37955992Sbostic static char lbuf[_POSIX2_LINE_MAX + 1]; 38056019Sbostic int asize, ref, size; 38156019Sbostic char c, *text, *op, *sp; 38255992Sbostic 38355992Sbostic c = *p++; /* Terminator character */ 38455992Sbostic if (c == '\0') 38555992Sbostic return (NULL); 38655992Sbostic 38756019Sbostic s->maxbref = 0; 38856019Sbostic s->linenum = linenum; 38955992Sbostic asize = 2 * _POSIX2_LINE_MAX + 1; 39055992Sbostic text = xmalloc(asize); 39155992Sbostic size = 0; 39255992Sbostic do { 39356019Sbostic op = sp = text + size; 39455992Sbostic for (; *p; p++) { 39555992Sbostic if (*p == '\\') { 39655992Sbostic p++; 39755992Sbostic if (strchr("123456789", *p) != NULL) { 39856019Sbostic *sp++ = '\\'; 39956019Sbostic ref = *p - '0'; 40056019Sbostic if (s->re != NULL && 40156019Sbostic ref > s->re->re_nsub) 40255992Sbostic err(COMPILE, 40356019Sbostic "\\%c not defined in the RE", *p); 40456077Sbostic if (s->maxbref < ref) 40556077Sbostic s->maxbref = ref; 40656663Sbostic } else if (*p == '&' || *p == '\\') 40756019Sbostic *sp++ = '\\'; 40855992Sbostic } else if (*p == c) { 40955992Sbostic p++; 41056019Sbostic *sp++ = '\0'; 41156019Sbostic size += sp - op; 41256019Sbostic s->new = xrealloc(text, size); 41355992Sbostic return (p); 41455992Sbostic } else if (*p == '\n') { 41555992Sbostic err(COMPILE, 41655992Sbostic "unescaped newline inside substitute pattern"); 41756019Sbostic /* NOTREACHED */ 41855992Sbostic } 41956019Sbostic *sp++ = *p; 42055992Sbostic } 42156019Sbostic size += sp - op; 42255992Sbostic if (asize - size < _POSIX2_LINE_MAX + 1) { 42355992Sbostic asize *= 2; 42455992Sbostic text = xmalloc(asize); 42555992Sbostic } 42655992Sbostic } while (cu_fgets(p = lbuf, sizeof(lbuf))); 42756019Sbostic err(COMPILE, "unterminated substitute in regular expression"); 42856019Sbostic /* NOTREACHED */ 42955992Sbostic } 43055992Sbostic 43155992Sbostic /* 43255992Sbostic * Compile the flags of the s command 43355992Sbostic */ 43455992Sbostic static char * 43555992Sbostic compile_flags(p, s) 43655992Sbostic char *p; 43755992Sbostic struct s_subst *s; 43855992Sbostic { 43955992Sbostic int gn; /* True if we have seen g or n */ 44055992Sbostic char wfile[_POSIX2_LINE_MAX + 1], *q; 44155992Sbostic 44255992Sbostic s->n = 1; /* Default */ 44355992Sbostic s->p = 0; 44455992Sbostic s->wfile = NULL; 44555992Sbostic s->wfd = -1; 44655992Sbostic for (gn = 0;;) { 44755992Sbostic EATSPACE(); /* EXTENSION */ 44855992Sbostic switch (*p) { 44955992Sbostic case 'g': 45055992Sbostic if (gn) 45156004Sbostic err(COMPILE, 45256004Sbostic "more than one number or 'g' in substitute flags"); 45355992Sbostic gn = 1; 45455992Sbostic s->n = 0; 45555992Sbostic break; 45655992Sbostic case '\0': 45755992Sbostic case '\n': 45855992Sbostic case ';': 45955992Sbostic return (p); 46055992Sbostic case 'p': 46155992Sbostic s->p = 1; 46255992Sbostic break; 46355992Sbostic case '1': case '2': case '3': 46455992Sbostic case '4': case '5': case '6': 46555992Sbostic case '7': case '8': case '9': 46655992Sbostic if (gn) 46756004Sbostic err(COMPILE, 46856004Sbostic "more than one number or 'g' in substitute flags"); 46955992Sbostic gn = 1; 47055992Sbostic /* XXX Check for overflow */ 47155992Sbostic s->n = (int)strtol(p, &p, 10); 47255992Sbostic break; 47355992Sbostic case 'w': 47455992Sbostic p++; 47555992Sbostic #ifdef HISTORIC_PRACTICE 47655992Sbostic if (*p != ' ') { 47755992Sbostic err(WARNING, "space missing before w wfile"); 47855992Sbostic return (p); 47955992Sbostic } 48055992Sbostic #endif 48155992Sbostic EATSPACE(); 48255992Sbostic q = wfile; 48355992Sbostic while (*p) { 48455992Sbostic if (*p == '\n') 48555992Sbostic break; 48655992Sbostic *q++ = *p++; 48755992Sbostic } 48855992Sbostic *q = '\0'; 48955992Sbostic if (q == wfile) 49056004Sbostic err(COMPILE, "no wfile specified"); 49155992Sbostic s->wfile = strdup(wfile); 49255992Sbostic if (!aflag && (s->wfd = open(wfile, 49355992Sbostic O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 49455992Sbostic DEFFILEMODE)) == -1) 49555992Sbostic err(FATAL, "%s: %s\n", wfile, strerror(errno)); 49655992Sbostic return (p); 49755992Sbostic default: 49855992Sbostic err(COMPILE, 49956004Sbostic "bad flag in substitute command: '%c'", *p); 50055992Sbostic break; 50155992Sbostic } 50255992Sbostic p++; 50355992Sbostic } 50455992Sbostic } 50555992Sbostic 50655992Sbostic /* 50755992Sbostic * Compile a translation set of strings into a lookup table. 50855992Sbostic */ 50955992Sbostic static char * 51055992Sbostic compile_tr(p, transtab) 51155992Sbostic char *p; 51255992Sbostic char **transtab; 51355992Sbostic { 51455992Sbostic int i; 51555992Sbostic char *lt, *op, *np; 51655992Sbostic char old[_POSIX2_LINE_MAX + 1]; 51755992Sbostic char new[_POSIX2_LINE_MAX + 1]; 51855992Sbostic 51955992Sbostic if (*p == '\0' || *p == '\\') 52055992Sbostic err(COMPILE, 52155992Sbostic "transform pattern can not be delimited by newline or backslash"); 52255992Sbostic p = compile_delimited(p, old); 52355992Sbostic if (p == NULL) { 52455992Sbostic err(COMPILE, "unterminated transform source string"); 52555992Sbostic return (NULL); 52655992Sbostic } 52755992Sbostic p = compile_delimited(--p, new); 52855992Sbostic if (p == NULL) { 52955992Sbostic err(COMPILE, "unterminated transform target string"); 53055992Sbostic return (NULL); 53155992Sbostic } 53255992Sbostic EATSPACE(); 53355992Sbostic if (strlen(new) != strlen(old)) { 53455992Sbostic err(COMPILE, "transform strings are not the same length"); 53555992Sbostic return (NULL); 53655992Sbostic } 53755992Sbostic /* We assume characters are 8 bits */ 53855992Sbostic lt = xmalloc(UCHAR_MAX); 53955992Sbostic for (i = 0; i <= UCHAR_MAX; i++) 54055992Sbostic lt[i] = (char)i; 54155992Sbostic for (op = old, np = new; *op; op++, np++) 54255992Sbostic lt[(u_char)*op] = *np; 54355992Sbostic *transtab = lt; 54455992Sbostic return (p); 54555992Sbostic } 54655992Sbostic 54755992Sbostic /* 54855992Sbostic * Compile the text following an a or i command. 54955992Sbostic */ 55055992Sbostic static char * 55155992Sbostic compile_text() 55255992Sbostic { 55355992Sbostic int asize, size; 55455992Sbostic char *text, *p, *op, *s; 55555992Sbostic char lbuf[_POSIX2_LINE_MAX + 1]; 55655992Sbostic 55755992Sbostic asize = 2 * _POSIX2_LINE_MAX + 1; 55855992Sbostic text = xmalloc(asize); 55955992Sbostic size = 0; 56055992Sbostic while (cu_fgets(lbuf, sizeof(lbuf))) { 56155992Sbostic op = s = text + size; 56255992Sbostic p = lbuf; 56355992Sbostic EATSPACE(); 56455992Sbostic for (; *p; p++) { 56555992Sbostic if (*p == '\\') 56655992Sbostic p++; 56755992Sbostic *s++ = *p; 56855992Sbostic } 56955992Sbostic size += s - op; 57055992Sbostic if (p[-2] != '\\') { 57155992Sbostic *s = '\0'; 57255992Sbostic break; 57355992Sbostic } 57455992Sbostic if (asize - size < _POSIX2_LINE_MAX + 1) { 57555992Sbostic asize *= 2; 57655992Sbostic text = xmalloc(asize); 57755992Sbostic } 57855992Sbostic } 57955992Sbostic return (xrealloc(text, size + 1)); 58055992Sbostic } 58155992Sbostic 58255992Sbostic /* 58355992Sbostic * Get an address and return a pointer to the first character after 58455992Sbostic * it. Fill the structure pointed to according to the address. 58555992Sbostic */ 58655992Sbostic static char * 58755992Sbostic compile_addr(p, a) 58855992Sbostic char *p; 58955992Sbostic struct s_addr *a; 59055992Sbostic { 59155992Sbostic char *end; 59255992Sbostic 59355992Sbostic switch (*p) { 59455992Sbostic case '\\': /* Context address */ 59556019Sbostic ++p; 59656019Sbostic /* FALLTHROUGH */ 59755992Sbostic case '/': /* Context address */ 59856077Sbostic p = compile_re(p, &a->u.r); 59955992Sbostic if (p == NULL) 60055992Sbostic err(COMPILE, "unterminated regular expression"); 60155992Sbostic a->type = AT_RE; 60255992Sbostic return (p); 60356019Sbostic 60455992Sbostic case '$': /* Last line */ 60555992Sbostic a->type = AT_LAST; 60655992Sbostic return (p + 1); 60755992Sbostic /* Line number */ 60855992Sbostic case '0': case '1': case '2': case '3': case '4': 60955992Sbostic case '5': case '6': case '7': case '8': case '9': 61055992Sbostic a->type = AT_LINE; 61155992Sbostic a->u.l = strtol(p, &end, 10); 61255992Sbostic return (end); 61355992Sbostic default: 61455992Sbostic err(COMPILE, "expected context address"); 61555992Sbostic return (NULL); 61655992Sbostic } 61755992Sbostic } 61855992Sbostic 61955992Sbostic /* 62058540Sbostic * duptoeol -- 62158540Sbostic * Return a copy of all the characters up to \n or \0. 62255992Sbostic */ 62355992Sbostic static char * 62458540Sbostic duptoeol(s, ctype) 62555992Sbostic register char *s; 62658540Sbostic char *ctype; 62755992Sbostic { 62855992Sbostic size_t len; 62958540Sbostic int ws; 63055992Sbostic char *start; 63155992Sbostic 63258540Sbostic ws = 0; 63358540Sbostic for (start = s; *s != '\0' && *s != '\n'; ++s) 63458540Sbostic ws = isspace(*s); 63555992Sbostic *s = '\0'; 63658540Sbostic if (ws) 63758540Sbostic err(WARNING, "whitespace after %s", ctype); 63855992Sbostic len = s - start + 1; 63955992Sbostic return (memmove(xmalloc(len), start, len)); 64055992Sbostic } 64155992Sbostic 64255992Sbostic /* 64359141Storek * Convert goto label names to addresses, and count a and r commands, in 64459141Storek * the given subset of the script. Free the memory used by labels in b 64559141Storek * and t commands (but not by :). 64658540Sbostic * 64755992Sbostic * TODO: Remove } nodes 64855992Sbostic */ 64955992Sbostic static void 65059141Storek fixuplabel(cp, end) 65159141Storek struct s_command *cp, *end; 65255992Sbostic { 65355992Sbostic 65456093Sbostic for (; cp != end; cp = cp->next) 65555992Sbostic switch (cp->code) { 65655992Sbostic case 'a': 65755992Sbostic case 'r': 65855992Sbostic appendnum++; 65955992Sbostic break; 66055992Sbostic case 'b': 66155992Sbostic case 't': 66259141Storek /* Resolve branch target. */ 66355992Sbostic if (cp->t == NULL) { 66455992Sbostic cp->u.c = NULL; 66555992Sbostic break; 66655992Sbostic } 66759141Storek if ((cp->u.c = findlabel(cp->t)) == NULL) 66856004Sbostic err(COMPILE2, "undefined label '%s'", cp->t); 66955992Sbostic free(cp->t); 67055992Sbostic break; 67155992Sbostic case '{': 67259141Storek /* Do interior commands. */ 67359141Storek fixuplabel(cp->u.c, cp->next); 67455992Sbostic break; 67555992Sbostic } 67655992Sbostic } 67758540Sbostic 67858540Sbostic /* 67959141Storek * Associate the given command label for later lookup. 68059141Storek */ 68159141Storek static void 68259141Storek enterlabel(cp) 68359141Storek struct s_command *cp; 68459141Storek { 68559141Storek register struct labhash **lhp, *lh; 68659141Storek register u_char *p; 68759141Storek register u_int h, c; 68859141Storek 68959141Storek for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++) 69059141Storek h = (h << 5) + h + c; 69159141Storek lhp = &labels[h & LHMASK]; 69259141Storek for (lh = *lhp; lh != NULL; lh = lh->lh_next) 69359141Storek if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0) 69459141Storek err(COMPILE2, "duplicate label '%s'", cp->t); 69559141Storek lh = xmalloc(sizeof *lh); 69659141Storek lh->lh_next = *lhp; 69759141Storek lh->lh_hash = h; 69859141Storek lh->lh_cmd = cp; 69959141Storek lh->lh_ref = 0; 70059141Storek *lhp = lh; 70159141Storek } 70259141Storek 70359141Storek /* 70458540Sbostic * Find the label contained in the command l in the command linked 70558540Sbostic * list cp. L is excluded from the search. Return NULL if not found. 70658540Sbostic */ 70758540Sbostic static struct s_command * 70859141Storek findlabel(name) 70959141Storek char *name; 71058540Sbostic { 71159141Storek register struct labhash *lh; 71259141Storek register u_char *p; 71359141Storek register u_int h, c; 71458540Sbostic 71559141Storek for (h = 0, p = (u_char *)name; (c = *p) != 0; p++) 71659141Storek h = (h << 5) + h + c; 71759141Storek for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) { 71859141Storek if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) { 71959141Storek lh->lh_ref = 1; 72059141Storek return (lh->lh_cmd); 72159141Storek } 72258540Sbostic } 72358540Sbostic return (NULL); 72458540Sbostic } 72558540Sbostic 72658540Sbostic /* 72759141Storek * Warn about any unused labels. As a side effect, release the label hash 72859141Storek * table space. 72958540Sbostic */ 73058540Sbostic static void 73159141Storek uselabel() 73258540Sbostic { 73359141Storek register struct labhash *lh, *next; 73459141Storek register int i; 73559141Storek 73659141Storek for (i = 0; i < LHSZ; i++) { 73759141Storek for (lh = labels[i]; lh != NULL; lh = next) { 73859141Storek next = lh->lh_next; 73959141Storek if (!lh->lh_ref) 74059141Storek err(WARNING, "unused label '%s'", 74159141Storek lh->lh_cmd->t); 74259141Storek free(lh); 74359141Storek } 74458540Sbostic } 74558540Sbostic } 746