1*9075Srrh #ifndef lint 2*9075Srrh static char sccsid[] = "@(#)dprog.c 4.1 (Berkeley) 82/11/06"; 3*9075Srrh #endif not lint 4*9075Srrh 5*9075Srrh /* 6*9075Srrh * diction -- print all sentences containing one of default phrases 7*9075Srrh * 8*9075Srrh * status returns: 9*9075Srrh * 0 - ok, and some matches 10*9075Srrh * 1 - ok, but no matches 11*9075Srrh * 2 - some error 12*9075Srrh */ 13*9075Srrh 14*9075Srrh #include <stdio.h> 15*9075Srrh #include <ctype.h> 16*9075Srrh 17*9075Srrh #define MAXSIZ 6500 18*9075Srrh #define QSIZE 650 19*9075Srrh struct words { 20*9075Srrh char inp; 21*9075Srrh char out; 22*9075Srrh struct words *nst; 23*9075Srrh struct words *link; 24*9075Srrh struct words *fail; 25*9075Srrh } w[MAXSIZ], *smax, *q; 26*9075Srrh 27*9075Srrh int fflag; 28*9075Srrh int nflag = 1; /*use default file*/ 29*9075Srrh char *filename; 30*9075Srrh int nfile; 31*9075Srrh int nsucc; 32*9075Srrh long nsent = 0; 33*9075Srrh long nhits = 0; 34*9075Srrh char *nlp; 35*9075Srrh char *begp, *endp; 36*9075Srrh int oct = 0; 37*9075Srrh FILE *wordf; 38*9075Srrh char *argptr; 39*9075Srrh 40*9075Srrh main(argc, argv) 41*9075Srrh char **argv; 42*9075Srrh { 43*9075Srrh while (--argc > 0 && (++argv)[0][0]=='-') 44*9075Srrh switch (argv[0][1]) { 45*9075Srrh 46*9075Srrh case 'f': 47*9075Srrh fflag++; 48*9075Srrh filename = ++argv; 49*9075Srrh argc--; 50*9075Srrh continue; 51*9075Srrh 52*9075Srrh case 'n': 53*9075Srrh nflag = 0; 54*9075Srrh continue; 55*9075Srrh case 'd': 56*9075Srrh continue; 57*9075Srrh default: 58*9075Srrh fprintf(stderr, "diction: unknown flag\n"); 59*9075Srrh continue; 60*9075Srrh } 61*9075Srrh out: 62*9075Srrh if(nflag){ 63*9075Srrh wordf = fopen(DICT,"r"); 64*9075Srrh if(wordf == NULL){ 65*9075Srrh fprintf(stderr,"diction: can't open default dictionary\n"); 66*9075Srrh exit(2); 67*9075Srrh } 68*9075Srrh } 69*9075Srrh else { 70*9075Srrh wordf = fopen(*filename,"r"); 71*9075Srrh if(wordf == NULL){ 72*9075Srrh fprintf(stderr,"diction: can't open %s\n",filename); 73*9075Srrh exit(2); 74*9075Srrh } 75*9075Srrh } 76*9075Srrh 77*9075Srrh cgotofn(); 78*9075Srrh cfail(); 79*9075Srrh nfile = argc; 80*9075Srrh if (argc<=0) { 81*9075Srrh execute((char *)NULL); 82*9075Srrh } 83*9075Srrh else while (--argc >= 0) { 84*9075Srrh execute(*argv); 85*9075Srrh argv++; 86*9075Srrh } 87*9075Srrh printf("number of sentences %ld number of hits %ld\n",nsent,nhits); 88*9075Srrh exit(nsucc == 0); 89*9075Srrh } 90*9075Srrh 91*9075Srrh execute(file) 92*9075Srrh char *file; 93*9075Srrh { 94*9075Srrh register char *p; 95*9075Srrh register struct words *c; 96*9075Srrh register ccount; 97*9075Srrh struct words *savc; 98*9075Srrh char *savp; 99*9075Srrh int savct; 100*9075Srrh int scr; 101*9075Srrh char buf[1024]; 102*9075Srrh int f; 103*9075Srrh int hit; 104*9075Srrh if (file) { 105*9075Srrh if ((f = open(file, 0)) < 0) { 106*9075Srrh fprintf(stderr, "diction: can't open %s\n", file); 107*9075Srrh exit(2); 108*9075Srrh } 109*9075Srrh } 110*9075Srrh else f = 0; 111*9075Srrh ccount = 0; 112*9075Srrh p = buf; 113*9075Srrh nlp = p; 114*9075Srrh c = w; 115*9075Srrh oct = hit = 0; 116*9075Srrh savc = savp = 0; 117*9075Srrh for (;;) { 118*9075Srrh if (--ccount <= 0) { 119*9075Srrh if (p == &buf[1024]) p = buf; 120*9075Srrh if (p > &buf[512]) { 121*9075Srrh if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 122*9075Srrh } 123*9075Srrh else if ((ccount = read(f, p, 512)) <= 0) break; 124*9075Srrh convert(p,ccount); 125*9075Srrh } 126*9075Srrh if(p == &buf[1024])p=buf; 127*9075Srrh nstate: 128*9075Srrh if (c->inp == *p) { 129*9075Srrh c = c->nst; 130*9075Srrh } 131*9075Srrh else if (c->link != 0) { 132*9075Srrh c = c->link; 133*9075Srrh goto nstate; 134*9075Srrh } 135*9075Srrh else { 136*9075Srrh if(savp != 0){ 137*9075Srrh c=savc; 138*9075Srrh p=savp; 139*9075Srrh if(ccount > savct)ccount += savct; 140*9075Srrh else ccount = savct; 141*9075Srrh savc=savp=0; 142*9075Srrh goto hadone; 143*9075Srrh } 144*9075Srrh c = c->fail; 145*9075Srrh if (c==0) { 146*9075Srrh c = w; 147*9075Srrh istate: 148*9075Srrh if (c->inp == *p) { 149*9075Srrh c = c->nst; 150*9075Srrh } 151*9075Srrh else if (c->link != 0) { 152*9075Srrh c = c->link; 153*9075Srrh goto istate; 154*9075Srrh } 155*9075Srrh } 156*9075Srrh else goto nstate; 157*9075Srrh } 158*9075Srrh if(c->out){ 159*9075Srrh if((c->inp == *(p+1)) && (c->nst != 0)){ 160*9075Srrh savp=p; 161*9075Srrh savc=c; 162*9075Srrh savct=ccount; 163*9075Srrh goto cont; 164*9075Srrh } 165*9075Srrh else if(c->link != 0){ 166*9075Srrh savc=c; 167*9075Srrh while((savc=savc->link)!= 0){ 168*9075Srrh if(savc->inp == *(p+1)){ 169*9075Srrh savp=p; 170*9075Srrh savc=c; 171*9075Srrh savct=ccount; 172*9075Srrh goto cont; 173*9075Srrh } 174*9075Srrh } 175*9075Srrh } 176*9075Srrh hadone: 177*9075Srrh savc=savp=0; 178*9075Srrh if(c->out == (char)(0377)){ 179*9075Srrh c=w; 180*9075Srrh goto nstate; 181*9075Srrh } 182*9075Srrh begp = p - (c->out); 183*9075Srrh if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp); 184*9075Srrh endp=p; 185*9075Srrh hit = 1; 186*9075Srrh nhits++; 187*9075Srrh if (*p++ == '.') { 188*9075Srrh if (--ccount <= 0) { 189*9075Srrh if (p == &buf[1024]) p = buf; 190*9075Srrh if (p > &buf[512]) { 191*9075Srrh if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 192*9075Srrh } 193*9075Srrh else if ((ccount = read(f, p, 512)) <= 0) break; 194*9075Srrh convert(p,ccount); 195*9075Srrh } 196*9075Srrh } 197*9075Srrh succeed: nsucc = 1; 198*9075Srrh { 199*9075Srrh if (p <= nlp) { 200*9075Srrh outc(&buf[1024]); 201*9075Srrh nlp = buf; 202*9075Srrh } 203*9075Srrh outc(p); 204*9075Srrh } 205*9075Srrh nomatch: 206*9075Srrh nlp = p; 207*9075Srrh c = w; 208*9075Srrh begp = endp = 0; 209*9075Srrh continue; 210*9075Srrh } 211*9075Srrh cont: 212*9075Srrh if (*p++ == '.'){ 213*9075Srrh if(hit){ 214*9075Srrh if(p <= nlp){ 215*9075Srrh outc(&buf[1024]); 216*9075Srrh nlp = buf; 217*9075Srrh } 218*9075Srrh outc(p); 219*9075Srrh putchar('\n'); putchar('\n'); 220*9075Srrh } 221*9075Srrh hit = 0; 222*9075Srrh oct = 0; 223*9075Srrh nlp = p; 224*9075Srrh c = w; 225*9075Srrh begp = endp = 0; 226*9075Srrh } 227*9075Srrh } 228*9075Srrh close(f); 229*9075Srrh } 230*9075Srrh 231*9075Srrh getargc() 232*9075Srrh { 233*9075Srrh register c; 234*9075Srrh if (wordf){ 235*9075Srrh if((c=getc(wordf))==EOF){ 236*9075Srrh fclose(wordf); 237*9075Srrh if(nflag && fflag){ 238*9075Srrh nflag=0; 239*9075Srrh wordf=fopen(*filename,"r"); 240*9075Srrh if(wordf == NULL){ 241*9075Srrh fprintf("can't open %s\n",filename); 242*9075Srrh exit(2); 243*9075Srrh } 244*9075Srrh return(getc(wordf)); 245*9075Srrh } 246*9075Srrh else return(EOF); 247*9075Srrh } 248*9075Srrh else return(c); 249*9075Srrh } 250*9075Srrh if ((c = *argptr++) == '\0') 251*9075Srrh return(EOF); 252*9075Srrh return(c); 253*9075Srrh } 254*9075Srrh 255*9075Srrh cgotofn() { 256*9075Srrh register c; 257*9075Srrh register struct words *s; 258*9075Srrh register ct; 259*9075Srrh int neg; 260*9075Srrh 261*9075Srrh s = smax = w; 262*9075Srrh neg = ct = 0; 263*9075Srrh nword: for(;;) { 264*9075Srrh c = getargc(); 265*9075Srrh if(c == '~'){ 266*9075Srrh neg++; 267*9075Srrh c = getargc(); 268*9075Srrh } 269*9075Srrh if (c==EOF) 270*9075Srrh return; 271*9075Srrh if (c == '\n') { 272*9075Srrh if(neg)s->out = 0377; 273*9075Srrh else s->out = ct-1; 274*9075Srrh neg = ct = 0; 275*9075Srrh s = w; 276*9075Srrh } else { 277*9075Srrh loop: if (s->inp == c) { 278*9075Srrh s = s->nst; 279*9075Srrh ct++; 280*9075Srrh continue; 281*9075Srrh } 282*9075Srrh if (s->inp == 0) goto enter; 283*9075Srrh if (s->link == 0) { 284*9075Srrh if (smax >= &w[MAXSIZ - 1]) overflo(); 285*9075Srrh s->link = ++smax; 286*9075Srrh s = smax; 287*9075Srrh goto enter; 288*9075Srrh } 289*9075Srrh s = s->link; 290*9075Srrh goto loop; 291*9075Srrh } 292*9075Srrh } 293*9075Srrh 294*9075Srrh enter: 295*9075Srrh do { 296*9075Srrh s->inp = c; 297*9075Srrh ct++; 298*9075Srrh if (smax >= &w[MAXSIZ - 1]) overflo(); 299*9075Srrh s->nst = ++smax; 300*9075Srrh s = smax; 301*9075Srrh } while ((c = getargc()) != '\n' && c!=EOF); 302*9075Srrh if(neg)smax->out = 0377; 303*9075Srrh else smax->out = ct-1; 304*9075Srrh neg = ct = 0; 305*9075Srrh s = w; 306*9075Srrh if (c != EOF) 307*9075Srrh goto nword; 308*9075Srrh } 309*9075Srrh 310*9075Srrh overflo() { 311*9075Srrh fprintf(stderr, "wordlist too large\n"); 312*9075Srrh exit(2); 313*9075Srrh } 314*9075Srrh cfail() { 315*9075Srrh struct words *queue[QSIZE]; 316*9075Srrh struct words **front, **rear; 317*9075Srrh struct words *state; 318*9075Srrh int bstart; 319*9075Srrh register char c; 320*9075Srrh register struct words *s; 321*9075Srrh s = w; 322*9075Srrh front = rear = queue; 323*9075Srrh init: if ((s->inp) != 0) { 324*9075Srrh *rear++ = s->nst; 325*9075Srrh if (rear >= &queue[QSIZE - 1]) overflo(); 326*9075Srrh } 327*9075Srrh if ((s = s->link) != 0) { 328*9075Srrh goto init; 329*9075Srrh } 330*9075Srrh 331*9075Srrh while (rear!=front) { 332*9075Srrh s = *front; 333*9075Srrh if (front == &queue[QSIZE-1]) 334*9075Srrh front = queue; 335*9075Srrh else front++; 336*9075Srrh cloop: if ((c = s->inp) != 0) { 337*9075Srrh bstart=0; 338*9075Srrh *rear = (q = s->nst); 339*9075Srrh if (front < rear) 340*9075Srrh if (rear >= &queue[QSIZE-1]) 341*9075Srrh if (front == queue) overflo(); 342*9075Srrh else rear = queue; 343*9075Srrh else rear++; 344*9075Srrh else 345*9075Srrh if (++rear == front) overflo(); 346*9075Srrh state = s->fail; 347*9075Srrh floop: if (state == 0){ state = w;bstart=1;} 348*9075Srrh if (state->inp == c) { 349*9075Srrh qloop: q->fail = state->nst; 350*9075Srrh if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out; 351*9075Srrh if((q=q->link) != 0)goto qloop; 352*9075Srrh } 353*9075Srrh else if ((state = state->link) != 0) 354*9075Srrh goto floop; 355*9075Srrh else if(bstart==0){state=0; goto floop;} 356*9075Srrh } 357*9075Srrh if ((s = s->link) != 0) 358*9075Srrh goto cloop; 359*9075Srrh } 360*9075Srrh /* for(s=w;s<=smax;s++) 361*9075Srrh printf("s %d ch %c out %d nst %d link %d fail %d\n",s, 362*9075Srrh s->inp,s->out,s->nst,s->link,s->fail); 363*9075Srrh */ 364*9075Srrh } 365*9075Srrh convert(p,ccount) 366*9075Srrh char *p; 367*9075Srrh { 368*9075Srrh int ct; 369*9075Srrh char *pt; 370*9075Srrh for(pt=p,ct=ccount;--ct>=0;pt++){ 371*9075Srrh if(isupper(*pt))*pt=tolower(*pt); 372*9075Srrh else if(isspace(*pt))*pt=' '; 373*9075Srrh else if(*pt=='.' || *pt=='?'||*pt=='!'){ 374*9075Srrh *pt='.'; 375*9075Srrh nsent++; 376*9075Srrh } 377*9075Srrh else if(ispunct(*pt))*pt=' '; 378*9075Srrh } 379*9075Srrh } 380*9075Srrh outc(addr) 381*9075Srrh char *addr; 382*9075Srrh { 383*9075Srrh 384*9075Srrh while(nlp < addr){ 385*9075Srrh if(oct++ > 70 && *nlp == ' ' && nlp != begp && nlp != endp){ 386*9075Srrh oct=0; 387*9075Srrh putchar('\n'); 388*9075Srrh } 389*9075Srrh if(nlp == begp){ 390*9075Srrh putchar('['); 391*9075Srrh } 392*9075Srrh putchar(*nlp); 393*9075Srrh if(nlp == endp){ 394*9075Srrh putchar(']'); 395*9075Srrh } 396*9075Srrh nlp++; 397*9075Srrh } 398*9075Srrh } 399