11175Sbill /* 21175Sbill * egrep -- print lines containing (or not containing) a regular expression 31175Sbill * 41175Sbill * status returns: 51175Sbill * 0 - ok, and some matches 61175Sbill * 1 - ok, but no matches 71175Sbill * 2 - some error 81175Sbill */ 91175Sbill %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST 101175Sbill %left OR 111175Sbill %left CHAR DOT CCL NCCL '(' 121175Sbill %left CAT 131175Sbill %left STAR PLUS QUEST 141175Sbill 151175Sbill %{ 16*9117Srrh static char *sccsid = "@(#)old.egrep.y 4.2 (Berkeley) 11/08/82"; 171175Sbill #include <stdio.h> 181175Sbill 191175Sbill #define MAXLIN 350 201175Sbill #define MAXPOS 4000 211175Sbill #define NCHARS 128 221175Sbill #define NSTATES 128 231175Sbill #define FINAL -1 241175Sbill char gotofn[NSTATES][NCHARS]; 251175Sbill int state[NSTATES]; 261175Sbill char out[NSTATES]; 271175Sbill int line = 1; 281175Sbill int name[MAXLIN]; 291175Sbill int left[MAXLIN]; 301175Sbill int right[MAXLIN]; 311175Sbill int parent[MAXLIN]; 321175Sbill int foll[MAXLIN]; 331175Sbill int positions[MAXPOS]; 341175Sbill char chars[MAXLIN]; 351175Sbill int nxtpos; 361175Sbill int nxtchar = 0; 371175Sbill int tmpstat[MAXLIN]; 381175Sbill int initstat[MAXLIN]; 391175Sbill int xstate; 401175Sbill int count; 411175Sbill int icount; 421175Sbill char *input; 43*9117Srrh FILE *exprfile; 441175Sbill 451175Sbill long lnum; 461175Sbill int bflag; 471175Sbill int cflag; 481175Sbill int fflag; 491175Sbill int lflag; 501175Sbill int nflag; 511175Sbill int hflag = 1; 521175Sbill int sflag; 531175Sbill int vflag; 541175Sbill int nfile; 551175Sbill int blkno; 561175Sbill long tln; 571175Sbill int nsucc; 581175Sbill 591175Sbill int f; 60*9117Srrh char *fname; 611175Sbill %} 621175Sbill 631175Sbill %% 641175Sbill s: t 651175Sbill ={ unary(FINAL, $1); 661175Sbill line--; 671175Sbill } 681175Sbill ; 691175Sbill t: b r 701175Sbill ={ $$ = node(CAT, $1, $2); } 711175Sbill | OR b r OR 721175Sbill ={ $$ = node(CAT, $2, $3); } 731175Sbill | OR b r 741175Sbill ={ $$ = node(CAT, $2, $3); } 751175Sbill | b r OR 761175Sbill ={ $$ = node(CAT, $1, $2); } 771175Sbill ; 781175Sbill b: 791175Sbill ={ $$ = enter(DOT); 801175Sbill $$ = unary(STAR, $$); } 811175Sbill ; 821175Sbill r: CHAR 831175Sbill ={ $$ = enter($1); } 841175Sbill | DOT 851175Sbill ={ $$ = enter(DOT); } 861175Sbill | CCL 871175Sbill ={ $$ = cclenter(CCL); } 881175Sbill | NCCL 891175Sbill ={ $$ = cclenter(NCCL); } 901175Sbill ; 911175Sbill 921175Sbill r: r OR r 931175Sbill ={ $$ = node(OR, $1, $3); } 941175Sbill | r r %prec CAT 951175Sbill ={ $$ = node(CAT, $1, $2); } 961175Sbill | r STAR 971175Sbill ={ $$ = unary(STAR, $1); } 981175Sbill | r PLUS 991175Sbill ={ $$ = unary(PLUS, $1); } 1001175Sbill | r QUEST 1011175Sbill ={ $$ = unary(QUEST, $1); } 1021175Sbill | '(' r ')' 1031175Sbill ={ $$ = $2; } 1041175Sbill | error 1051175Sbill ; 1061175Sbill 1071175Sbill %% 1081175Sbill yyerror(s) { 1091175Sbill fprintf(stderr, "egrep: %s\n", s); 1101175Sbill exit(2); 1111175Sbill } 1121175Sbill 1131175Sbill yylex() { 1141175Sbill extern int yylval; 1151175Sbill int cclcnt, x; 1161175Sbill register char c, d; 1171175Sbill switch(c = nextch()) { 1181175Sbill case '$': 1191175Sbill case '^': c = '\n'; 1201175Sbill goto defchar; 1211175Sbill case '|': return (OR); 1221175Sbill case '*': return (STAR); 1231175Sbill case '+': return (PLUS); 1241175Sbill case '?': return (QUEST); 1251175Sbill case '(': return (c); 1261175Sbill case ')': return (c); 1271175Sbill case '.': return (DOT); 1281175Sbill case '\0': return (0); 1291175Sbill case '\n': return (OR); 1301175Sbill case '[': 1311175Sbill x = CCL; 1321175Sbill cclcnt = 0; 1331175Sbill count = nxtchar++; 1341175Sbill if ((c = nextch()) == '^') { 1351175Sbill x = NCCL; 1361175Sbill c = nextch(); 1371175Sbill } 1381175Sbill do { 1391175Sbill if (c == '\0') synerror(); 1401175Sbill if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { 1411175Sbill if ((d = nextch()) != 0) { 1421175Sbill c = chars[nxtchar-1]; 1431175Sbill while (c < d) { 1441175Sbill if (nxtchar >= MAXLIN) overflo(); 1451175Sbill chars[nxtchar++] = ++c; 1461175Sbill cclcnt++; 1471175Sbill } 1481175Sbill continue; 1491175Sbill } 1501175Sbill } 1511175Sbill if (nxtchar >= MAXLIN) overflo(); 1521175Sbill chars[nxtchar++] = c; 1531175Sbill cclcnt++; 1541175Sbill } while ((c = nextch()) != ']'); 1551175Sbill chars[count] = cclcnt; 1561175Sbill return (x); 1571175Sbill case '\\': 1581175Sbill if ((c = nextch()) == '\0') synerror(); 1591175Sbill defchar: 1601175Sbill default: yylval = c; return (CHAR); 1611175Sbill } 1621175Sbill } 1631175Sbill nextch() { 1641175Sbill register char c; 1651175Sbill if (fflag) { 166*9117Srrh if ((c = getc(exprfile)) == EOF) { 167*9117Srrh fclose(exprfile); 168*9117Srrh return(0); 169*9117Srrh } 1701175Sbill } 1711175Sbill else c = *input++; 1721175Sbill return(c); 1731175Sbill } 1741175Sbill 1751175Sbill synerror() { 1761175Sbill fprintf(stderr, "egrep: syntax error\n"); 1771175Sbill exit(2); 1781175Sbill } 1791175Sbill 1801175Sbill enter(x) int x; { 1811175Sbill if(line >= MAXLIN) overflo(); 1821175Sbill name[line] = x; 1831175Sbill left[line] = 0; 1841175Sbill right[line] = 0; 1851175Sbill return(line++); 1861175Sbill } 1871175Sbill 1881175Sbill cclenter(x) int x; { 1891175Sbill register linno; 1901175Sbill linno = enter(x); 1911175Sbill right[linno] = count; 1921175Sbill return (linno); 1931175Sbill } 1941175Sbill 1951175Sbill node(x, l, r) { 1961175Sbill if(line >= MAXLIN) overflo(); 1971175Sbill name[line] = x; 1981175Sbill left[line] = l; 1991175Sbill right[line] = r; 2001175Sbill parent[l] = line; 2011175Sbill parent[r] = line; 2021175Sbill return(line++); 2031175Sbill } 2041175Sbill 2051175Sbill unary(x, d) { 2061175Sbill if(line >= MAXLIN) overflo(); 2071175Sbill name[line] = x; 2081175Sbill left[line] = d; 2091175Sbill right[line] = 0; 2101175Sbill parent[d] = line; 2111175Sbill return(line++); 2121175Sbill } 2131175Sbill overflo() { 2141175Sbill fprintf(stderr, "egrep: regular expression too long\n"); 2151175Sbill exit(2); 2161175Sbill } 2171175Sbill 2181175Sbill cfoll(v) { 2191175Sbill register i; 2201175Sbill if (left[v] == 0) { 2211175Sbill count = 0; 2221175Sbill for (i=1; i<=line; i++) tmpstat[i] = 0; 2231175Sbill follow(v); 2241175Sbill add(foll, v); 2251175Sbill } 2261175Sbill else if (right[v] == 0) cfoll(left[v]); 2271175Sbill else { 2281175Sbill cfoll(left[v]); 2291175Sbill cfoll(right[v]); 2301175Sbill } 2311175Sbill } 2321175Sbill cgotofn() { 2331175Sbill register c, i, k; 2341175Sbill int n, s; 2351175Sbill char symbol[NCHARS]; 2361175Sbill int j, nc, pc, pos; 2371175Sbill int curpos, num; 2381175Sbill int number, newpos; 2391175Sbill count = 0; 2401175Sbill for (n=3; n<=line; n++) tmpstat[n] = 0; 2411175Sbill if (cstate(line-1)==0) { 2421175Sbill tmpstat[line] = 1; 2431175Sbill count++; 2441175Sbill out[0] = 1; 2451175Sbill } 2461175Sbill for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; 2471175Sbill count--; /*leave out position 1 */ 2481175Sbill icount = count; 2491175Sbill tmpstat[1] = 0; 2501175Sbill add(state, 0); 2511175Sbill n = 0; 2521175Sbill for (s=0; s<=n; s++) { 2531175Sbill if (out[s] == 1) continue; 2541175Sbill for (i=0; i<NCHARS; i++) symbol[i] = 0; 2551175Sbill num = positions[state[s]]; 2561175Sbill count = icount; 2571175Sbill for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 2581175Sbill pos = state[s] + 1; 2591175Sbill for (i=0; i<num; i++) { 2601175Sbill curpos = positions[pos]; 2611175Sbill if ((c = name[curpos]) >= 0) { 2621175Sbill if (c < NCHARS) symbol[c] = 1; 2631175Sbill else if (c == DOT) { 2641175Sbill for (k=0; k<NCHARS; k++) 2651175Sbill if (k!='\n') symbol[k] = 1; 2661175Sbill } 2671175Sbill else if (c == CCL) { 2681175Sbill nc = chars[right[curpos]]; 2691175Sbill pc = right[curpos] + 1; 2701175Sbill for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; 2711175Sbill } 2721175Sbill else if (c == NCCL) { 2731175Sbill nc = chars[right[curpos]]; 2741175Sbill for (j = 0; j < NCHARS; j++) { 2751175Sbill pc = right[curpos] + 1; 2761175Sbill for (k = 0; k < nc; k++) 2771175Sbill if (j==chars[pc++]) goto cont; 2781175Sbill if (j!='\n') symbol[j] = 1; 2791175Sbill cont:; 2801175Sbill } 2811175Sbill } 2821175Sbill else printf("something's funny\n"); 2831175Sbill } 2841175Sbill pos++; 2851175Sbill } 2861175Sbill for (c=0; c<NCHARS; c++) { 2871175Sbill if (symbol[c] == 1) { /* nextstate(s,c) */ 2881175Sbill count = icount; 2891175Sbill for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; 2901175Sbill pos = state[s] + 1; 2911175Sbill for (i=0; i<num; i++) { 2921175Sbill curpos = positions[pos]; 2931175Sbill if ((k = name[curpos]) >= 0) 2941175Sbill if ( 2951175Sbill (k == c) 2961175Sbill | (k == DOT) 2971175Sbill | (k == CCL && member(c, right[curpos], 1)) 2981175Sbill | (k == NCCL && member(c, right[curpos], 0)) 2991175Sbill ) { 3001175Sbill number = positions[foll[curpos]]; 3011175Sbill newpos = foll[curpos] + 1; 3021175Sbill for (k=0; k<number; k++) { 3031175Sbill if (tmpstat[positions[newpos]] != 1) { 3041175Sbill tmpstat[positions[newpos]] = 1; 3051175Sbill count++; 3061175Sbill } 3071175Sbill newpos++; 3081175Sbill } 3091175Sbill } 3101175Sbill pos++; 3111175Sbill } /* end nextstate */ 3121175Sbill if (notin(n)) { 3131175Sbill if (n >= NSTATES) overflo(); 3141175Sbill add(state, ++n); 3151175Sbill if (tmpstat[line] == 1) out[n] = 1; 3161175Sbill gotofn[s][c] = n; 3171175Sbill } 3181175Sbill else { 3191175Sbill gotofn[s][c] = xstate; 3201175Sbill } 3211175Sbill } 3221175Sbill } 3231175Sbill } 3241175Sbill } 3251175Sbill 3261175Sbill cstate(v) { 3271175Sbill register b; 3281175Sbill if (left[v] == 0) { 3291175Sbill if (tmpstat[v] != 1) { 3301175Sbill tmpstat[v] = 1; 3311175Sbill count++; 3321175Sbill } 3331175Sbill return(1); 3341175Sbill } 3351175Sbill else if (right[v] == 0) { 3361175Sbill if (cstate(left[v]) == 0) return (0); 3371175Sbill else if (name[v] == PLUS) return (1); 3381175Sbill else return (0); 3391175Sbill } 3401175Sbill else if (name[v] == CAT) { 3411175Sbill if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 3421175Sbill else return (1); 3431175Sbill } 3441175Sbill else { /* name[v] == OR */ 3451175Sbill b = cstate(right[v]); 3461175Sbill if (cstate(left[v]) == 0 || b == 0) return (0); 3471175Sbill else return (1); 3481175Sbill } 3491175Sbill } 3501175Sbill 3511175Sbill 3521175Sbill member(symb, set, torf) { 3531175Sbill register i, num, pos; 3541175Sbill num = chars[set]; 3551175Sbill pos = set + 1; 3561175Sbill for (i=0; i<num; i++) 3571175Sbill if (symb == chars[pos++]) return (torf); 3581175Sbill return (!torf); 3591175Sbill } 3601175Sbill 3611175Sbill notin(n) { 3621175Sbill register i, j, pos; 3631175Sbill for (i=0; i<=n; i++) { 3641175Sbill if (positions[state[i]] == count) { 3651175Sbill pos = state[i] + 1; 3661175Sbill for (j=0; j < count; j++) 3671175Sbill if (tmpstat[positions[pos++]] != 1) goto nxt; 3681175Sbill xstate = i; 3691175Sbill return (0); 3701175Sbill } 3711175Sbill nxt: ; 3721175Sbill } 3731175Sbill return (1); 3741175Sbill } 3751175Sbill 3761175Sbill add(array, n) int *array; { 3771175Sbill register i; 3781175Sbill if (nxtpos + count > MAXPOS) overflo(); 3791175Sbill array[n] = nxtpos; 3801175Sbill positions[nxtpos++] = count; 3811175Sbill for (i=3; i <= line; i++) { 3821175Sbill if (tmpstat[i] == 1) { 3831175Sbill positions[nxtpos++] = i; 3841175Sbill } 3851175Sbill } 3861175Sbill } 3871175Sbill 3881175Sbill follow(v) int v; { 3891175Sbill int p; 3901175Sbill if (v == line) return; 3911175Sbill p = parent[v]; 3921175Sbill switch(name[p]) { 3931175Sbill case STAR: 3941175Sbill case PLUS: cstate(v); 3951175Sbill follow(p); 3961175Sbill return; 3971175Sbill 3981175Sbill case OR: 3991175Sbill case QUEST: follow(p); 4001175Sbill return; 4011175Sbill 4021175Sbill case CAT: if (v == left[p]) { 4031175Sbill if (cstate(right[p]) == 0) { 4041175Sbill follow(p); 4051175Sbill return; 4061175Sbill } 4071175Sbill } 4081175Sbill else follow(p); 4091175Sbill return; 4101175Sbill case FINAL: if (tmpstat[line] != 1) { 4111175Sbill tmpstat[line] = 1; 4121175Sbill count++; 4131175Sbill } 4141175Sbill return; 4151175Sbill } 4161175Sbill } 4171175Sbill 4181175Sbill 4191175Sbill main(argc, argv) 4201175Sbill char **argv; 4211175Sbill { 4221175Sbill while (--argc > 0 && (++argv)[0][0]=='-') 4231175Sbill switch (argv[0][1]) { 4241175Sbill 4251175Sbill case 's': 4261175Sbill sflag++; 4271175Sbill continue; 4281175Sbill 4291175Sbill case 'h': 4301175Sbill hflag = 0; 4311175Sbill continue; 4321175Sbill 4331175Sbill case 'b': 4341175Sbill bflag++; 4351175Sbill continue; 4361175Sbill 4371175Sbill case 'c': 4381175Sbill cflag++; 4391175Sbill continue; 4401175Sbill 4411175Sbill case 'e': 4421175Sbill argc--; 4431175Sbill argv++; 4441175Sbill goto out; 4451175Sbill 4461175Sbill case 'f': 4471175Sbill fflag++; 4481175Sbill continue; 4491175Sbill 4501175Sbill case 'l': 4511175Sbill lflag++; 4521175Sbill continue; 4531175Sbill 4541175Sbill case 'n': 4551175Sbill nflag++; 4561175Sbill continue; 4571175Sbill 4581175Sbill case 'v': 4591175Sbill vflag++; 4601175Sbill continue; 4611175Sbill 4621175Sbill default: 4631175Sbill fprintf(stderr, "egrep: unknown flag\n"); 4641175Sbill continue; 4651175Sbill } 4661175Sbill out: 4671175Sbill if (argc<=0) 4681175Sbill exit(2); 4691175Sbill if (fflag) { 470*9117Srrh fname = *argv; 471*9117Srrh exprfile = fopen(fname, "r"); 472*9117Srrh if (exprfile == (FILE *)NULL) { 4731175Sbill fprintf(stderr, "egrep: can't open %s\n", fname); 4741175Sbill exit(2); 4751175Sbill } 4761175Sbill } 4771175Sbill else input = *argv; 4781175Sbill argc--; 4791175Sbill argv++; 4801175Sbill 4811175Sbill yyparse(); 4821175Sbill 4831175Sbill cfoll(line-1); 4841175Sbill cgotofn(); 4851175Sbill nfile = argc; 4861175Sbill if (argc<=0) { 4871175Sbill if (lflag) exit(1); 4881175Sbill execute(0); 4891175Sbill } 4901175Sbill else while (--argc >= 0) { 4911175Sbill execute(*argv); 4921175Sbill argv++; 4931175Sbill } 4941175Sbill exit(nsucc == 0); 4951175Sbill } 4961175Sbill 4971175Sbill execute(file) 4981175Sbill char *file; 4991175Sbill { 5001175Sbill register char *p; 5011175Sbill register cstat; 5021175Sbill register ccount; 5031175Sbill char buf[1024]; 5041175Sbill char *nlp; 5051175Sbill int istat; 5061175Sbill if (file) { 5071175Sbill if ((f = open(file, 0)) < 0) { 5081175Sbill fprintf(stderr, "egrep: can't open %s\n", file); 5091175Sbill exit(2); 5101175Sbill } 5111175Sbill } 5121175Sbill else f = 0; 5131175Sbill ccount = 0; 5141175Sbill lnum = 1; 5151175Sbill tln = 0; 5161175Sbill blkno = 0; 5171175Sbill p = buf; 5181175Sbill nlp = p; 5191175Sbill if ((ccount = read(f,p,512))<=0) goto done; 5201175Sbill istat = cstat = gotofn[0]['\n']; 5211175Sbill if (out[cstat]) goto found; 5221175Sbill for (;;) { 5231175Sbill cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ 5241175Sbill if (out[cstat]) { 5251175Sbill found: for(;;) { 5261175Sbill if (*p++ == '\n') { 5271175Sbill if (vflag == 0) { 5281175Sbill succeed: nsucc = 1; 5291175Sbill if (cflag) tln++; 5301175Sbill else if (sflag) 5311175Sbill ; /* ugh */ 5321175Sbill else if (lflag) { 5331175Sbill printf("%s\n", file); 5341175Sbill close(f); 5351175Sbill return; 5361175Sbill } 5371175Sbill else { 5381175Sbill if (nfile > 1 && hflag) printf("%s:", file); 5391175Sbill if (bflag) printf("%d:", blkno); 5401175Sbill if (nflag) printf("%ld:", lnum); 5411175Sbill if (p <= nlp) { 5421175Sbill while (nlp < &buf[1024]) putchar(*nlp++); 5431175Sbill nlp = buf; 5441175Sbill } 5451175Sbill while (nlp < p) putchar(*nlp++); 5461175Sbill } 5471175Sbill } 5481175Sbill lnum++; 5491175Sbill nlp = p; 5501175Sbill if ((out[(cstat=istat)]) == 0) goto brk2; 5511175Sbill } 5521175Sbill cfound: 5531175Sbill if (--ccount <= 0) { 5541175Sbill if (p <= &buf[512]) { 5551175Sbill if ((ccount = read(f, p, 512)) <= 0) goto done; 5561175Sbill } 5571175Sbill else if (p == &buf[1024]) { 5581175Sbill p = buf; 5591175Sbill if ((ccount = read(f, p, 512)) <= 0) goto done; 5601175Sbill } 5611175Sbill else { 5621175Sbill if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done; 5631175Sbill } 5641175Sbill blkno++; 5651175Sbill } 5661175Sbill } 5671175Sbill } 5681175Sbill if (*p++ == '\n') { 5691175Sbill if (vflag) goto succeed; 5701175Sbill else { 5711175Sbill lnum++; 5721175Sbill nlp = p; 5731175Sbill if (out[(cstat=istat)]) goto cfound; 5741175Sbill } 5751175Sbill } 5761175Sbill brk2: 5771175Sbill if (--ccount <= 0) { 5781175Sbill if (p <= &buf[512]) { 5791175Sbill if ((ccount = read(f, p, 512)) <= 0) break; 5801175Sbill } 5811175Sbill else if (p == &buf[1024]) { 5821175Sbill p = buf; 5831175Sbill if ((ccount = read(f, p, 512)) <= 0) break; 5841175Sbill } 5851175Sbill else { 5861175Sbill if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 5871175Sbill } 5881175Sbill blkno++; 5891175Sbill } 5901175Sbill } 5911175Sbill done: close(f); 5921175Sbill if (cflag) { 5931175Sbill if (nfile > 1) 5941175Sbill printf("%s:", file); 5951175Sbill printf("%ld\n", tln); 5961175Sbill } 5971175Sbill } 598