11175Sbill /* 21175Sbill * egrep -- print lines containing (or not containing) a regular expression 31175Sbill * 41175Sbill * status returns: 51175Sbill * 0 - ok, and some matches 61175Sbill * 1 - ok, but no matches 71175Sbill * 2 - some error 81175Sbill */ 91175Sbill %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST 101175Sbill %left OR 111175Sbill %left CHAR DOT CCL NCCL '(' 121175Sbill %left CAT 131175Sbill %left STAR PLUS QUEST 141175Sbill 151175Sbill %{ 16*11561Sedward static char *sccsid = "@(#)old.egrep.y 4.3 (Berkeley) 03/13/83"; 171175Sbill #include <stdio.h> 181175Sbill 191175Sbill #define MAXLIN 350 201175Sbill #define MAXPOS 4000 211175Sbill #define NCHARS 128 221175Sbill #define NSTATES 128 231175Sbill #define FINAL -1 241175Sbill char gotofn[NSTATES][NCHARS]; 251175Sbill int state[NSTATES]; 261175Sbill char out[NSTATES]; 271175Sbill int line = 1; 281175Sbill int name[MAXLIN]; 291175Sbill int left[MAXLIN]; 301175Sbill int right[MAXLIN]; 311175Sbill int parent[MAXLIN]; 321175Sbill int foll[MAXLIN]; 331175Sbill int positions[MAXPOS]; 341175Sbill char chars[MAXLIN]; 351175Sbill int nxtpos; 361175Sbill int nxtchar = 0; 371175Sbill int tmpstat[MAXLIN]; 381175Sbill int initstat[MAXLIN]; 391175Sbill int xstate; 401175Sbill int count; 411175Sbill int icount; 421175Sbill char *input; 439117Srrh FILE *exprfile; 441175Sbill 451175Sbill long lnum; 461175Sbill int bflag; 471175Sbill int cflag; 481175Sbill int fflag; 491175Sbill int lflag; 501175Sbill int nflag; 511175Sbill int hflag = 1; 521175Sbill int sflag; 531175Sbill int vflag; 54*11561Sedward int retcode = 0; 551175Sbill int nfile; 561175Sbill int blkno; 571175Sbill long tln; 581175Sbill int nsucc; 591175Sbill 601175Sbill int f; 619117Srrh char *fname; 621175Sbill %} 631175Sbill 641175Sbill %% 651175Sbill s: t 661175Sbill ={ unary(FINAL, $1); 671175Sbill line--; 681175Sbill } 691175Sbill ; 701175Sbill t: b r 711175Sbill ={ $$ = node(CAT, $1, $2); } 721175Sbill | OR b r OR 731175Sbill ={ $$ = node(CAT, $2, $3); } 741175Sbill | OR b r 751175Sbill ={ $$ = node(CAT, $2, $3); } 761175Sbill | b r OR 771175Sbill ={ $$ = node(CAT, $1, $2); } 781175Sbill ; 791175Sbill b: 801175Sbill ={ $$ = enter(DOT); 811175Sbill $$ = unary(STAR, $$); } 821175Sbill ; 831175Sbill r: CHAR 841175Sbill ={ $$ = enter($1); } 851175Sbill | DOT 861175Sbill ={ $$ = enter(DOT); } 871175Sbill | CCL 881175Sbill ={ $$ = cclenter(CCL); } 891175Sbill | NCCL 901175Sbill ={ $$ = cclenter(NCCL); } 911175Sbill ; 921175Sbill 931175Sbill r: r OR r 941175Sbill ={ $$ = node(OR, $1, $3); } 951175Sbill | r r %prec CAT 961175Sbill ={ $$ = node(CAT, $1, $2); } 971175Sbill | r STAR 981175Sbill ={ $$ = unary(STAR, $1); } 991175Sbill | r PLUS 1001175Sbill ={ $$ = unary(PLUS, $1); } 1011175Sbill | r QUEST 1021175Sbill ={ $$ = unary(QUEST, $1); } 1031175Sbill | '(' r ')' 1041175Sbill ={ $$ = $2; } 1051175Sbill | error 1061175Sbill ; 1071175Sbill 1081175Sbill %% 1091175Sbill yyerror(s) { 1101175Sbill fprintf(stderr, "egrep: %s\n", s); 1111175Sbill exit(2); 1121175Sbill } 1131175Sbill 1141175Sbill yylex() { 1151175Sbill extern int yylval; 1161175Sbill int cclcnt, x; 1171175Sbill register char c, d; 1181175Sbill switch(c = nextch()) { 1191175Sbill case '$': 1201175Sbill case '^': c = '\n'; 1211175Sbill goto defchar; 1221175Sbill case '|': return (OR); 1231175Sbill case '*': return (STAR); 1241175Sbill case '+': return (PLUS); 1251175Sbill case '?': return (QUEST); 1261175Sbill case '(': return (c); 1271175Sbill case ')': return (c); 1281175Sbill case '.': return (DOT); 1291175Sbill case '\0': return (0); 1301175Sbill case '\n': return (OR); 1311175Sbill case '[': 1321175Sbill x = CCL; 1331175Sbill cclcnt = 0; 1341175Sbill count = nxtchar++; 1351175Sbill if ((c = nextch()) == '^') { 1361175Sbill x = NCCL; 1371175Sbill c = nextch(); 1381175Sbill } 1391175Sbill do { 1401175Sbill if (c == '\0') synerror(); 1411175Sbill if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { 1421175Sbill if ((d = nextch()) != 0) { 1431175Sbill c = chars[nxtchar-1]; 1441175Sbill while (c < d) { 1451175Sbill if (nxtchar >= MAXLIN) overflo(); 1461175Sbill chars[nxtchar++] = ++c; 1471175Sbill cclcnt++; 1481175Sbill } 1491175Sbill continue; 1501175Sbill } 1511175Sbill } 1521175Sbill if (nxtchar >= MAXLIN) overflo(); 1531175Sbill chars[nxtchar++] = c; 1541175Sbill cclcnt++; 1551175Sbill } while ((c = nextch()) != ']'); 1561175Sbill chars[count] = cclcnt; 1571175Sbill return (x); 1581175Sbill case '\\': 1591175Sbill if ((c = nextch()) == '\0') synerror(); 1601175Sbill defchar: 1611175Sbill default: yylval = c; return (CHAR); 1621175Sbill } 1631175Sbill } 1641175Sbill nextch() { 1651175Sbill register char c; 1661175Sbill if (fflag) { 1679117Srrh if ((c = getc(exprfile)) == EOF) { 1689117Srrh fclose(exprfile); 1699117Srrh return(0); 1709117Srrh } 1711175Sbill } 1721175Sbill else c = *input++; 1731175Sbill return(c); 1741175Sbill } 1751175Sbill 1761175Sbill synerror() { 1771175Sbill fprintf(stderr, "egrep: syntax error\n"); 1781175Sbill exit(2); 1791175Sbill } 1801175Sbill 1811175Sbill enter(x) int x; { 1821175Sbill if(line >= MAXLIN) overflo(); 1831175Sbill name[line] = x; 1841175Sbill left[line] = 0; 1851175Sbill right[line] = 0; 1861175Sbill return(line++); 1871175Sbill } 1881175Sbill 1891175Sbill cclenter(x) int x; { 1901175Sbill register linno; 1911175Sbill linno = enter(x); 1921175Sbill right[linno] = count; 1931175Sbill return (linno); 1941175Sbill } 1951175Sbill 1961175Sbill node(x, l, r) { 1971175Sbill if(line >= MAXLIN) overflo(); 1981175Sbill name[line] = x; 1991175Sbill left[line] = l; 2001175Sbill right[line] = r; 2011175Sbill parent[l] = line; 2021175Sbill parent[r] = line; 2031175Sbill return(line++); 2041175Sbill } 2051175Sbill 2061175Sbill unary(x, d) { 2071175Sbill if(line >= MAXLIN) overflo(); 2081175Sbill name[line] = x; 2091175Sbill left[line] = d; 2101175Sbill right[line] = 0; 2111175Sbill parent[d] = line; 2121175Sbill return(line++); 2131175Sbill } 2141175Sbill overflo() { 2151175Sbill fprintf(stderr, "egrep: regular expression too long\n"); 2161175Sbill exit(2); 2171175Sbill } 2181175Sbill 2191175Sbill cfoll(v) { 2201175Sbill register i; 2211175Sbill if (left[v] == 0) { 2221175Sbill count = 0; 2231175Sbill for (i=1; i<=line; i++) tmpstat[i] = 0; 2241175Sbill follow(v); 2251175Sbill add(foll, v); 2261175Sbill } 2271175Sbill else if (right[v] == 0) cfoll(left[v]); 2281175Sbill else { 2291175Sbill cfoll(left[v]); 2301175Sbill cfoll(right[v]); 2311175Sbill } 2321175Sbill } 2331175Sbill cgotofn() { 2341175Sbill register c, i, k; 2351175Sbill int n, s; 2361175Sbill char symbol[NCHARS]; 2371175Sbill int j, nc, pc, pos; 2381175Sbill int curpos, num; 2391175Sbill int number, newpos; 2401175Sbill count = 0; 2411175Sbill for (n=3; n<=line; n++) tmpstat[n] = 0; 2421175Sbill if (cstate(line-1)==0) { 2431175Sbill tmpstat[line] = 1; 2441175Sbill count++; 2451175Sbill out[0] = 1; 2461175Sbill } 2471175Sbill for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; 2481175Sbill count--; /*leave out position 1 */ 2491175Sbill icount = count; 2501175Sbill tmpstat[1] = 0; 2511175Sbill add(state, 0); 2521175Sbill n = 0; 2531175Sbill for (s=0; s<=n; s++) { 2541175Sbill if (out[s] == 1) continue; 2551175Sbill for (i=0; i<NCHARS; i++) symbol[i] = 0; 2561175Sbill num = positions[state[s]]; 2571175Sbill count = icount; 2581175Sbill for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 2591175Sbill pos = state[s] + 1; 2601175Sbill for (i=0; i<num; i++) { 2611175Sbill curpos = positions[pos]; 2621175Sbill if ((c = name[curpos]) >= 0) { 2631175Sbill if (c < NCHARS) symbol[c] = 1; 2641175Sbill else if (c == DOT) { 2651175Sbill for (k=0; k<NCHARS; k++) 2661175Sbill if (k!='\n') symbol[k] = 1; 2671175Sbill } 2681175Sbill else if (c == CCL) { 2691175Sbill nc = chars[right[curpos]]; 2701175Sbill pc = right[curpos] + 1; 2711175Sbill for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; 2721175Sbill } 2731175Sbill else if (c == NCCL) { 2741175Sbill nc = chars[right[curpos]]; 2751175Sbill for (j = 0; j < NCHARS; j++) { 2761175Sbill pc = right[curpos] + 1; 2771175Sbill for (k = 0; k < nc; k++) 2781175Sbill if (j==chars[pc++]) goto cont; 2791175Sbill if (j!='\n') symbol[j] = 1; 2801175Sbill cont:; 2811175Sbill } 2821175Sbill } 2831175Sbill else printf("something's funny\n"); 2841175Sbill } 2851175Sbill pos++; 2861175Sbill } 2871175Sbill for (c=0; c<NCHARS; c++) { 2881175Sbill if (symbol[c] == 1) { /* nextstate(s,c) */ 2891175Sbill count = icount; 2901175Sbill for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; 2911175Sbill pos = state[s] + 1; 2921175Sbill for (i=0; i<num; i++) { 2931175Sbill curpos = positions[pos]; 2941175Sbill if ((k = name[curpos]) >= 0) 2951175Sbill if ( 2961175Sbill (k == c) 2971175Sbill | (k == DOT) 2981175Sbill | (k == CCL && member(c, right[curpos], 1)) 2991175Sbill | (k == NCCL && member(c, right[curpos], 0)) 3001175Sbill ) { 3011175Sbill number = positions[foll[curpos]]; 3021175Sbill newpos = foll[curpos] + 1; 3031175Sbill for (k=0; k<number; k++) { 3041175Sbill if (tmpstat[positions[newpos]] != 1) { 3051175Sbill tmpstat[positions[newpos]] = 1; 3061175Sbill count++; 3071175Sbill } 3081175Sbill newpos++; 3091175Sbill } 3101175Sbill } 3111175Sbill pos++; 3121175Sbill } /* end nextstate */ 3131175Sbill if (notin(n)) { 3141175Sbill if (n >= NSTATES) overflo(); 3151175Sbill add(state, ++n); 3161175Sbill if (tmpstat[line] == 1) out[n] = 1; 3171175Sbill gotofn[s][c] = n; 3181175Sbill } 3191175Sbill else { 3201175Sbill gotofn[s][c] = xstate; 3211175Sbill } 3221175Sbill } 3231175Sbill } 3241175Sbill } 3251175Sbill } 3261175Sbill 3271175Sbill cstate(v) { 3281175Sbill register b; 3291175Sbill if (left[v] == 0) { 3301175Sbill if (tmpstat[v] != 1) { 3311175Sbill tmpstat[v] = 1; 3321175Sbill count++; 3331175Sbill } 3341175Sbill return(1); 3351175Sbill } 3361175Sbill else if (right[v] == 0) { 3371175Sbill if (cstate(left[v]) == 0) return (0); 3381175Sbill else if (name[v] == PLUS) return (1); 3391175Sbill else return (0); 3401175Sbill } 3411175Sbill else if (name[v] == CAT) { 3421175Sbill if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 3431175Sbill else return (1); 3441175Sbill } 3451175Sbill else { /* name[v] == OR */ 3461175Sbill b = cstate(right[v]); 3471175Sbill if (cstate(left[v]) == 0 || b == 0) return (0); 3481175Sbill else return (1); 3491175Sbill } 3501175Sbill } 3511175Sbill 3521175Sbill 3531175Sbill member(symb, set, torf) { 3541175Sbill register i, num, pos; 3551175Sbill num = chars[set]; 3561175Sbill pos = set + 1; 3571175Sbill for (i=0; i<num; i++) 3581175Sbill if (symb == chars[pos++]) return (torf); 3591175Sbill return (!torf); 3601175Sbill } 3611175Sbill 3621175Sbill notin(n) { 3631175Sbill register i, j, pos; 3641175Sbill for (i=0; i<=n; i++) { 3651175Sbill if (positions[state[i]] == count) { 3661175Sbill pos = state[i] + 1; 3671175Sbill for (j=0; j < count; j++) 3681175Sbill if (tmpstat[positions[pos++]] != 1) goto nxt; 3691175Sbill xstate = i; 3701175Sbill return (0); 3711175Sbill } 3721175Sbill nxt: ; 3731175Sbill } 3741175Sbill return (1); 3751175Sbill } 3761175Sbill 3771175Sbill add(array, n) int *array; { 3781175Sbill register i; 3791175Sbill if (nxtpos + count > MAXPOS) overflo(); 3801175Sbill array[n] = nxtpos; 3811175Sbill positions[nxtpos++] = count; 3821175Sbill for (i=3; i <= line; i++) { 3831175Sbill if (tmpstat[i] == 1) { 3841175Sbill positions[nxtpos++] = i; 3851175Sbill } 3861175Sbill } 3871175Sbill } 3881175Sbill 3891175Sbill follow(v) int v; { 3901175Sbill int p; 3911175Sbill if (v == line) return; 3921175Sbill p = parent[v]; 3931175Sbill switch(name[p]) { 3941175Sbill case STAR: 3951175Sbill case PLUS: cstate(v); 3961175Sbill follow(p); 3971175Sbill return; 3981175Sbill 3991175Sbill case OR: 4001175Sbill case QUEST: follow(p); 4011175Sbill return; 4021175Sbill 4031175Sbill case CAT: if (v == left[p]) { 4041175Sbill if (cstate(right[p]) == 0) { 4051175Sbill follow(p); 4061175Sbill return; 4071175Sbill } 4081175Sbill } 4091175Sbill else follow(p); 4101175Sbill return; 4111175Sbill case FINAL: if (tmpstat[line] != 1) { 4121175Sbill tmpstat[line] = 1; 4131175Sbill count++; 4141175Sbill } 4151175Sbill return; 4161175Sbill } 4171175Sbill } 4181175Sbill 4191175Sbill 4201175Sbill main(argc, argv) 4211175Sbill char **argv; 4221175Sbill { 4231175Sbill while (--argc > 0 && (++argv)[0][0]=='-') 4241175Sbill switch (argv[0][1]) { 4251175Sbill 4261175Sbill case 's': 4271175Sbill sflag++; 4281175Sbill continue; 4291175Sbill 4301175Sbill case 'h': 4311175Sbill hflag = 0; 4321175Sbill continue; 4331175Sbill 4341175Sbill case 'b': 4351175Sbill bflag++; 4361175Sbill continue; 4371175Sbill 4381175Sbill case 'c': 4391175Sbill cflag++; 4401175Sbill continue; 4411175Sbill 4421175Sbill case 'e': 4431175Sbill argc--; 4441175Sbill argv++; 4451175Sbill goto out; 4461175Sbill 4471175Sbill case 'f': 4481175Sbill fflag++; 4491175Sbill continue; 4501175Sbill 4511175Sbill case 'l': 4521175Sbill lflag++; 4531175Sbill continue; 4541175Sbill 4551175Sbill case 'n': 4561175Sbill nflag++; 4571175Sbill continue; 4581175Sbill 4591175Sbill case 'v': 4601175Sbill vflag++; 4611175Sbill continue; 4621175Sbill 4631175Sbill default: 4641175Sbill fprintf(stderr, "egrep: unknown flag\n"); 4651175Sbill continue; 4661175Sbill } 4671175Sbill out: 4681175Sbill if (argc<=0) 4691175Sbill exit(2); 4701175Sbill if (fflag) { 4719117Srrh fname = *argv; 4729117Srrh exprfile = fopen(fname, "r"); 4739117Srrh if (exprfile == (FILE *)NULL) { 4741175Sbill fprintf(stderr, "egrep: can't open %s\n", fname); 4751175Sbill exit(2); 4761175Sbill } 4771175Sbill } 4781175Sbill else input = *argv; 4791175Sbill argc--; 4801175Sbill argv++; 4811175Sbill 4821175Sbill yyparse(); 4831175Sbill 4841175Sbill cfoll(line-1); 4851175Sbill cgotofn(); 4861175Sbill nfile = argc; 4871175Sbill if (argc<=0) { 4881175Sbill if (lflag) exit(1); 4891175Sbill execute(0); 4901175Sbill } 4911175Sbill else while (--argc >= 0) { 4921175Sbill execute(*argv); 4931175Sbill argv++; 4941175Sbill } 495*11561Sedward exit(retcode != 0 ? retcode : nsucc == 0); 4961175Sbill } 4971175Sbill 4981175Sbill execute(file) 4991175Sbill char *file; 5001175Sbill { 5011175Sbill register char *p; 5021175Sbill register cstat; 5031175Sbill register ccount; 5041175Sbill char buf[1024]; 5051175Sbill char *nlp; 5061175Sbill int istat; 5071175Sbill if (file) { 5081175Sbill if ((f = open(file, 0)) < 0) { 5091175Sbill fprintf(stderr, "egrep: can't open %s\n", file); 510*11561Sedward retcode = 2; 511*11561Sedward return; 5121175Sbill } 5131175Sbill } 5141175Sbill else f = 0; 5151175Sbill ccount = 0; 5161175Sbill lnum = 1; 5171175Sbill tln = 0; 5181175Sbill blkno = 0; 5191175Sbill p = buf; 5201175Sbill nlp = p; 5211175Sbill if ((ccount = read(f,p,512))<=0) goto done; 5221175Sbill istat = cstat = gotofn[0]['\n']; 5231175Sbill if (out[cstat]) goto found; 5241175Sbill for (;;) { 5251175Sbill cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ 5261175Sbill if (out[cstat]) { 5271175Sbill found: for(;;) { 5281175Sbill if (*p++ == '\n') { 5291175Sbill if (vflag == 0) { 5301175Sbill succeed: nsucc = 1; 5311175Sbill if (cflag) tln++; 5321175Sbill else if (sflag) 5331175Sbill ; /* ugh */ 5341175Sbill else if (lflag) { 5351175Sbill printf("%s\n", file); 5361175Sbill close(f); 5371175Sbill return; 5381175Sbill } 5391175Sbill else { 5401175Sbill if (nfile > 1 && hflag) printf("%s:", file); 5411175Sbill if (bflag) printf("%d:", blkno); 5421175Sbill if (nflag) printf("%ld:", lnum); 5431175Sbill if (p <= nlp) { 5441175Sbill while (nlp < &buf[1024]) putchar(*nlp++); 5451175Sbill nlp = buf; 5461175Sbill } 5471175Sbill while (nlp < p) putchar(*nlp++); 5481175Sbill } 5491175Sbill } 5501175Sbill lnum++; 5511175Sbill nlp = p; 5521175Sbill if ((out[(cstat=istat)]) == 0) goto brk2; 5531175Sbill } 5541175Sbill cfound: 5551175Sbill if (--ccount <= 0) { 5561175Sbill if (p <= &buf[512]) { 5571175Sbill if ((ccount = read(f, p, 512)) <= 0) goto done; 5581175Sbill } 5591175Sbill else if (p == &buf[1024]) { 5601175Sbill p = buf; 5611175Sbill if ((ccount = read(f, p, 512)) <= 0) goto done; 5621175Sbill } 5631175Sbill else { 5641175Sbill if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done; 5651175Sbill } 5661175Sbill blkno++; 5671175Sbill } 5681175Sbill } 5691175Sbill } 5701175Sbill if (*p++ == '\n') { 5711175Sbill if (vflag) goto succeed; 5721175Sbill else { 5731175Sbill lnum++; 5741175Sbill nlp = p; 5751175Sbill if (out[(cstat=istat)]) goto cfound; 5761175Sbill } 5771175Sbill } 5781175Sbill brk2: 5791175Sbill if (--ccount <= 0) { 5801175Sbill if (p <= &buf[512]) { 5811175Sbill if ((ccount = read(f, p, 512)) <= 0) break; 5821175Sbill } 5831175Sbill else if (p == &buf[1024]) { 5841175Sbill p = buf; 5851175Sbill if ((ccount = read(f, p, 512)) <= 0) break; 5861175Sbill } 5871175Sbill else { 5881175Sbill if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 5891175Sbill } 5901175Sbill blkno++; 5911175Sbill } 5921175Sbill } 5931175Sbill done: close(f); 5941175Sbill if (cflag) { 5951175Sbill if (nfile > 1) 5961175Sbill printf("%s:", file); 5971175Sbill printf("%ld\n", tln); 5981175Sbill } 5991175Sbill } 600