1 #ifndef lint 2 static char sccsid[] = "@(#)b.c 4.3 07/07/86"; 3 #endif 4 5 #include "awk.def" 6 #include "stdio.h" 7 #include "awk.h" 8 9 extern node *op2(); 10 extern struct fa *cgotofn(); 11 #define MAXLIN 256 12 #define NCHARS 128 13 #define NSTATES 256 14 15 #define type(v) v->nobj 16 #define left(v) v->narg[0] 17 #define right(v) v->narg[1] 18 #define parent(v) v->nnext 19 20 #define LEAF case CCL: case NCCL: case CHAR: case DOT: 21 #define UNARY case FINAL: case STAR: case PLUS: case QUEST: 22 23 /* encoding in tree nodes: 24 leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value 25 unary (FINAL, STAR, PLUS, QUEST): left is child, right is null 26 binary (CAT, OR): left and right are children 27 parent contains pointer to parent 28 */ 29 30 struct fa { 31 int cch; 32 struct fa *st; 33 }; 34 35 int *state[NSTATES]; 36 int *foll[MAXLIN]; 37 char chars[MAXLIN]; 38 int setvec[MAXLIN]; 39 node *point[MAXLIN]; 40 41 int setcnt; 42 int line; 43 int maxfoll; /* index of highest foll[] entry set by cfoll() */ 44 45 46 struct fa *makedfa(p) /* returns dfa for tree pointed to by p */ 47 node *p; 48 { 49 node *p1; 50 struct fa *fap; 51 p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p); 52 /* put DOT STAR in front of reg. exp. */ 53 p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */ 54 55 line = 0; 56 penter(p1); /* enter parent pointers and leaf indices */ 57 point[line] = p1; /* FINAL node */ 58 setvec[0] = 1; /* for initial DOT STAR */ 59 cfoll(p1); /* set up follow sets */ 60 fap = cgotofn(); 61 freetr(p1); /* add this when alloc works */ 62 return(fap); 63 } 64 65 penter(p) /* set up parent pointers and leaf indices */ 66 node *p; 67 { 68 switch(type(p)) { 69 LEAF 70 left(p) = (node *) line; 71 point[line++] = p; 72 break; 73 UNARY 74 penter(left(p)); 75 parent(left(p)) = p; 76 break; 77 case CAT: 78 case OR: 79 penter(left(p)); 80 penter(right(p)); 81 parent(left(p)) = p; 82 parent(right(p)) = p; 83 break; 84 default: 85 error(FATAL, "unknown type %d in penter\n", type(p)); 86 break; 87 } 88 } 89 90 freetr(p) /* free parse tree and follow sets */ 91 node *p; 92 { 93 switch(type(p)) { 94 LEAF 95 foll_free((int) left(p)); 96 xfree(p); 97 break; 98 UNARY 99 freetr(left(p)); 100 xfree(p); 101 break; 102 case CAT: 103 case OR: 104 freetr(left(p)); 105 freetr(right(p)); 106 xfree(p); 107 break; 108 default: 109 error(FATAL, "unknown type %d in freetr", type(p)); 110 break; 111 } 112 } 113 char *cclenter(p) 114 register char *p; 115 { 116 register i, c; 117 char *op; 118 119 op = p; 120 i = 0; 121 while ((c = *p++) != 0) { 122 if (c == '-' && i > 0 && chars[i-1] != 0) { 123 if (*p != 0) { 124 c = chars[i-1]; 125 while (c < *p) { 126 if (i >= MAXLIN) 127 overflo(); 128 chars[i++] = ++c; 129 } 130 p++; 131 continue; 132 } 133 } 134 if (i >= MAXLIN) 135 overflo(); 136 chars[i++] = c; 137 } 138 chars[i++] = '\0'; 139 dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL); 140 xfree(op); 141 return(tostring(chars)); 142 } 143 144 overflo() 145 { 146 error(FATAL, "regular expression too long\n"); 147 } 148 149 cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */ 150 register node *v; 151 { 152 register i; 153 int prev; 154 int *add(); 155 156 maxfoll = -1; 157 switch(type(v)) { 158 LEAF 159 setcnt = 0; 160 for (i=1; i<=line; i++) 161 setvec[i] = 0; 162 follow(v); 163 if (notin(foll, ( (int) left(v))-1, &prev)) { 164 foll[(int) left(v)] = add(setcnt); 165 } 166 else 167 foll[ (int) left(v)] = foll[prev]; 168 if ((int)left(v) > maxfoll) 169 maxfoll = (int)left(v); 170 break; 171 UNARY 172 cfoll(left(v)); 173 break; 174 case CAT: 175 case OR: 176 cfoll(left(v)); 177 cfoll(right(v)); 178 break; 179 default: 180 error(FATAL, "unknown type %d in cfoll", type(v)); 181 } 182 } 183 184 first(p) /* collects initially active leaves of p into setvec */ 185 register node *p; /* returns 0 or 1 depending on whether p matches empty string */ 186 { 187 register b; 188 189 switch(type(p)) { 190 LEAF 191 if (setvec[(int) left(p)] != 1) { 192 setvec[(int) left(p)] = 1; 193 setcnt++; 194 } 195 if (type(p) == CCL && (*(char *) right(p)) == '\0') 196 return(0); /* empty CCL */ 197 else return(1); 198 case FINAL: 199 case PLUS: 200 if (first(left(p)) == 0) return(0); 201 return(1); 202 case STAR: 203 case QUEST: 204 first(left(p)); 205 return(0); 206 case CAT: 207 if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 208 return(1); 209 case OR: 210 b = first(right(p)); 211 if (first(left(p)) == 0 || b == 0) return(0); 212 return(1); 213 } 214 error(FATAL, "unknown type %d in first\n", type(p)); 215 return(-1); 216 } 217 218 follow(v) 219 node *v; /* collects leaves that can follow v into setvec */ 220 { 221 node *p; 222 223 if (type(v) == FINAL) 224 return; 225 p = parent(v); 226 switch (type(p)) { 227 case STAR: 228 case PLUS: first(v); 229 follow(p); 230 return; 231 232 case OR: 233 case QUEST: follow(p); 234 return; 235 236 case CAT: if (v == left(p)) { /* v is left child of p */ 237 if (first(right(p)) == 0) { 238 follow(p); 239 return; 240 } 241 } 242 else /* v is right child */ 243 follow(p); 244 return; 245 case FINAL: if (setvec[line] != 1) { 246 setvec[line] = 1; 247 setcnt++; 248 } 249 return; 250 } 251 } 252 253 member(c, s) /* is c in s? */ 254 register char c, *s; 255 { 256 while (*s) 257 if (c == *s++) 258 return(1); 259 return(0); 260 } 261 262 notin(array, n, prev) /* is setvec in array[0] thru array[n]? */ 263 int **array; 264 int *prev; { 265 register i, j; 266 int *ptr; 267 for (i=0; i<=n; i++) { 268 ptr = array[i]; 269 if (*ptr == setcnt) { 270 for (j=0; j < setcnt; j++) 271 if (setvec[*(++ptr)] != 1) goto nxt; 272 *prev = i; 273 return(0); 274 } 275 nxt: ; 276 } 277 return(1); 278 } 279 280 int *add(n) { /* remember setvec */ 281 int *ptr, *p; 282 register i; 283 if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL) 284 overflo(); 285 *ptr = n; 286 dprintf("add(%d)\n", n, NULL, NULL); 287 for (i=1; i <= line; i++) 288 if (setvec[i] == 1) { 289 *(++ptr) = i; 290 dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i); 291 } 292 dprintf("\n", NULL, NULL, NULL); 293 return(p); 294 } 295 296 struct fa *cgotofn() 297 { 298 register i, k; 299 register int *ptr; 300 char c; 301 char *p; 302 node *cp; 303 int j, n, s, ind, numtrans; 304 int finflg; 305 int curpos, num, prev; 306 struct fa *where[NSTATES]; 307 308 int fatab[257]; 309 struct fa *pfa; 310 311 char index[MAXLIN]; 312 char iposns[MAXLIN]; 313 int sposns[MAXLIN]; 314 int spmax, spinit; 315 316 char symbol[NCHARS]; 317 char isyms[NCHARS]; 318 char ssyms[NCHARS]; 319 int ssmax, ssinit; 320 321 for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0; 322 for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0; 323 setcnt = 0; 324 /* compute initial positions and symbols of state 0 */ 325 ssmax = 0; 326 ptr = state[0] = foll[0]; 327 spinit = *ptr; 328 for (i=0; i<spinit; i++) { 329 curpos = *(++ptr); 330 sposns[i] = curpos; 331 iposns[curpos] = 1; 332 cp = point[curpos]; 333 dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos); 334 switch (type(cp)) { 335 case CHAR: 336 k = (int) right(cp); 337 if (isyms[k] != 1) { 338 isyms[k] = 1; 339 ssyms[ssmax++] = k; 340 } 341 break; 342 case DOT: 343 for (k=1; k<NCHARS; k++) { 344 if (k != HAT) { 345 if (isyms[k] != 1) { 346 isyms[k] = 1; 347 ssyms[ssmax++] = k; 348 } 349 } 350 } 351 break; 352 case CCL: 353 for (p = (char *) right(cp); *p; p++) { 354 if (*p != HAT) { 355 if (isyms[*p] != 1) { 356 isyms[*p] = 1; 357 ssyms[ssmax++] = *p; 358 } 359 } 360 } 361 break; 362 case NCCL: 363 for (k=1; k<NCHARS; k++) { 364 if (k != HAT && !member(k, (char *) right(cp))) { 365 if (isyms[k] != 1) { 366 isyms[k] = 1; 367 ssyms[ssmax++] = k; 368 } 369 } 370 } 371 } 372 } 373 ssinit = ssmax; 374 n = 0; 375 for (s=0; s<=n; s++) { 376 dprintf("s = %d\n", s, NULL, NULL); 377 ind = 0; 378 numtrans = 0; 379 finflg = 0; 380 if (*(state[s] + *state[s]) == line) { /* s final? */ 381 finflg = 1; 382 goto tenter; 383 } 384 spmax = spinit; 385 ssmax = ssinit; 386 ptr = state[s]; 387 num = *ptr; 388 for (i=0; i<num; i++) { 389 curpos = *(++ptr); 390 if (iposns[curpos] != 1 && index[curpos] != 1) { 391 index[curpos] = 1; 392 sposns[spmax++] = curpos; 393 } 394 cp = point[curpos]; 395 switch (type(cp)) { 396 case CHAR: 397 k = (int) right(cp); 398 if (isyms[k] == 0 && symbol[k] == 0) { 399 symbol[k] = 1; 400 ssyms[ssmax++] = k; 401 } 402 break; 403 case DOT: 404 for (k=1; k<NCHARS; k++) { 405 if (k != HAT) { 406 if (isyms[k] == 0 && symbol[k] == 0) { 407 symbol[k] = 1; 408 ssyms[ssmax++] = k; 409 } 410 } 411 } 412 break; 413 case CCL: 414 for (p = (char *) right(cp); *p; p++) { 415 if (*p != HAT) { 416 if (isyms[*p] == 0 && symbol[*p] == 0) { 417 symbol[*p] = 1; 418 ssyms[ssmax++] = *p; 419 } 420 } 421 } 422 break; 423 case NCCL: 424 for (k=1; k<NCHARS; k++) { 425 if (k != HAT && !member(k, (char *) right(cp))) { 426 if (isyms[k] == 0 && symbol[k] == 0) { 427 symbol[k] = 1; 428 ssyms[ssmax++] = k; 429 } 430 } 431 } 432 } 433 } 434 for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */ 435 c = ssyms[j]; 436 symbol[c] = 0; 437 setcnt = 0; 438 for (k=0; k<=line; k++) setvec[k] = 0; 439 for (i=0; i<spmax; i++) { 440 index[sposns[i]] = 0; 441 cp = point[sposns[i]]; 442 if ((k = type(cp)) != FINAL) 443 if (k == CHAR && c == (int) right(cp) 444 || k == DOT 445 || k == CCL && member(c, (char *) right(cp)) 446 || k == NCCL && !member(c, (char *) right(cp))) { 447 ptr = foll[sposns[i]]; 448 num = *ptr; 449 for (k=0; k<num; k++) { 450 if (setvec[*(++ptr)] != 1 451 && iposns[*ptr] != 1) { 452 setvec[*ptr] = 1; 453 setcnt++; 454 } 455 } 456 } 457 } /* end nextstate */ 458 if (notin(state, n, &prev)) { 459 if (n >= NSTATES) { 460 dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL); 461 overflo(); 462 } 463 state[++n] = add(setcnt); 464 dprintf(" delta(%d,%o) = %d", s,c,n); 465 dprintf(", ind = %d\n", ind+1, NULL, NULL); 466 fatab[++ind] = c; 467 fatab[++ind] = n; 468 numtrans++; 469 } 470 else { 471 if (prev != 0) { 472 dprintf(" delta(%d,%o) = %d", s,c,prev); 473 dprintf(", ind = %d\n", ind+1, NULL, NULL); 474 fatab[++ind] = c; 475 fatab[++ind] = prev; 476 numtrans++; 477 } 478 } 479 } 480 tenter: 481 if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL) 482 overflo(); 483 where[s] = pfa; 484 if (finflg) 485 pfa->cch = -1; /* s is a final state */ 486 else 487 pfa->cch = numtrans; 488 pfa->st = 0; 489 for (i=1, pfa += 1; i<=numtrans; i++, pfa++) { 490 pfa->cch = fatab[2*i-1]; 491 pfa->st = (struct fa *) fatab[2*i]; 492 } 493 } 494 for (i=0; i<=n; i++) { 495 /* N.b. state[0] == foll[0], not separately allocated */ 496 if (i>0) 497 xfree(state[i]); /* free state[i] */ 498 pfa = where[i]; 499 pfa->st = where[0]; 500 dprintf("state %d: (%o)\n", i, pfa, NULL); 501 dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL); 502 for (k=1; k<=pfa->cch; k++) { 503 (pfa+k)->st = where[ (int) (pfa+k)->st]; 504 dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL); 505 } 506 } 507 pfa = where[0]; 508 if ((num = pfa->cch) < 0) 509 return(where[0]); 510 for (pfa += num; num; num--, pfa--) 511 if (pfa->cch == HAT) { 512 return(pfa->st); 513 } 514 return(where[0]); 515 } 516 517 match(pfa, p) 518 register struct fa *pfa; 519 register char *p; 520 { 521 register count; 522 char c; 523 if (p == 0) return(0); 524 if (pfa->cch == 1) { /* fast test for first character, if possible */ 525 c = (++pfa)->cch; 526 do 527 if (c == *p) { 528 p++; 529 pfa = pfa->st; 530 goto adv; 531 } 532 while (*p++ != 0); 533 return(0); 534 } 535 adv: if ((count = pfa->cch) < 0) return(1); 536 do { 537 for (pfa += count; count; count--, pfa--) 538 if (pfa->cch == *p) { 539 break; 540 } 541 pfa = pfa->st; 542 if ((count = pfa->cch) < 0) return(1); 543 } while(*p++ != 0); 544 return(0); 545 } 546 547 /* 548 * Free foll[i], taking into account identical foll[] entries. 549 * This is necessary because cfoll() uses the same physical follow set for 550 * several foll[] entries when the set is identical. Called by freetr(). 551 */ 552 foll_free(i) 553 int i; 554 { 555 register int j; 556 int *p = foll[i]; 557 if (p==NULL) return; 558 for (j=0; j<=maxfoll; j++) 559 if (foll[j]==p) foll[j]=NULL; 560 xfree(p); 561 } 562