1 #ifndef lint 2 static char sccsid[] = "@(#)b.c 4.2 08/11/83"; 3 #endif 4 5 #include "awk.def" 6 #include "stdio.h" 7 #include "awk.h" 8 9 extern node *op2(); 10 extern struct fa *cgotofn(); 11 #define MAXLIN 256 12 #define NCHARS 128 13 #define NSTATES 256 14 15 #define type(v) v->nobj 16 #define left(v) v->narg[0] 17 #define right(v) v->narg[1] 18 #define parent(v) v->nnext 19 20 #define LEAF case CCL: case NCCL: case CHAR: case DOT: 21 #define UNARY case FINAL: case STAR: case PLUS: case QUEST: 22 23 /* encoding in tree nodes: 24 leaf (CCL, NCCL, CHAR, DOT): left is index, right contains value or pointer to value 25 unary (FINAL, STAR, PLUS, QUEST): left is child, right is null 26 binary (CAT, OR): left and right are children 27 parent contains pointer to parent 28 */ 29 30 struct fa { 31 int cch; 32 struct fa *st; 33 }; 34 35 int *state[NSTATES]; 36 int *foll[MAXLIN]; 37 char chars[MAXLIN]; 38 int setvec[MAXLIN]; 39 node *point[MAXLIN]; 40 41 int setcnt; 42 int line; 43 44 45 struct fa *makedfa(p) /* returns dfa for tree pointed to by p */ 46 node *p; 47 { 48 node *p1; 49 struct fa *fap; 50 p1 = op2(CAT, op2(STAR, op2(DOT, (node *) 0, (node *) 0), (node *) 0), p); 51 /* put DOT STAR in front of reg. exp. */ 52 p1 = op2(FINAL, p1, (node *) 0); /* install FINAL node */ 53 54 line = 0; 55 penter(p1); /* enter parent pointers and leaf indices */ 56 point[line] = p1; /* FINAL node */ 57 setvec[0] = 1; /* for initial DOT STAR */ 58 cfoll(p1); /* set up follow sets */ 59 fap = cgotofn(); 60 freetr(p1); /* add this when alloc works */ 61 return(fap); 62 } 63 64 penter(p) /* set up parent pointers and leaf indices */ 65 node *p; 66 { 67 switch(type(p)) { 68 LEAF 69 left(p) = (node *) line; 70 point[line++] = p; 71 break; 72 UNARY 73 penter(left(p)); 74 parent(left(p)) = p; 75 break; 76 case CAT: 77 case OR: 78 penter(left(p)); 79 penter(right(p)); 80 parent(left(p)) = p; 81 parent(right(p)) = p; 82 break; 83 default: 84 error(FATAL, "unknown type %d in penter\n", type(p)); 85 break; 86 } 87 } 88 89 freetr(p) /* free parse tree and follow sets */ 90 node *p; 91 { 92 switch(type(p)) { 93 LEAF 94 xfree(foll[(int) left(p)]); 95 xfree(p); 96 break; 97 UNARY 98 freetr(left(p)); 99 xfree(p); 100 break; 101 case CAT: 102 case OR: 103 freetr(left(p)); 104 freetr(right(p)); 105 xfree(p); 106 break; 107 default: 108 error(FATAL, "unknown type %d in freetr", type(p)); 109 break; 110 } 111 } 112 char *cclenter(p) 113 register char *p; 114 { 115 register i, c; 116 char *op; 117 118 op = p; 119 i = 0; 120 while ((c = *p++) != 0) { 121 if (c == '-' && i > 0 && chars[i-1] != 0) { 122 if (*p != 0) { 123 c = chars[i-1]; 124 while (c < *p) { 125 if (i >= MAXLIN) 126 overflo(); 127 chars[i++] = ++c; 128 } 129 p++; 130 continue; 131 } 132 } 133 if (i >= MAXLIN) 134 overflo(); 135 chars[i++] = c; 136 } 137 chars[i++] = '\0'; 138 dprintf("cclenter: in = |%s|, out = |%s|\n", op, chars, NULL); 139 xfree(op); 140 return(tostring(chars)); 141 } 142 143 overflo() 144 { 145 error(FATAL, "regular expression too long\n"); 146 } 147 148 cfoll(v) /* enter follow set of each leaf of vertex v into foll[leaf] */ 149 register node *v; 150 { 151 register i; 152 int prev; 153 int *add(); 154 155 switch(type(v)) { 156 LEAF 157 setcnt = 0; 158 for (i=1; i<=line; i++) 159 setvec[i] = 0; 160 follow(v); 161 if (notin(foll, ( (int) left(v))-1, &prev)) { 162 foll[(int) left(v)] = add(setcnt); 163 } 164 else 165 foll[ (int) left(v)] = foll[prev]; 166 break; 167 UNARY 168 cfoll(left(v)); 169 break; 170 case CAT: 171 case OR: 172 cfoll(left(v)); 173 cfoll(right(v)); 174 break; 175 default: 176 error(FATAL, "unknown type %d in cfoll", type(v)); 177 } 178 } 179 180 first(p) /* collects initially active leaves of p into setvec */ 181 register node *p; /* returns 0 or 1 depending on whether p matches empty string */ 182 { 183 register b; 184 185 switch(type(p)) { 186 LEAF 187 if (setvec[(int) left(p)] != 1) { 188 setvec[(int) left(p)] = 1; 189 setcnt++; 190 } 191 if (type(p) == CCL && (*(char *) right(p)) == '\0') 192 return(0); /* empty CCL */ 193 else return(1); 194 case FINAL: 195 case PLUS: 196 if (first(left(p)) == 0) return(0); 197 return(1); 198 case STAR: 199 case QUEST: 200 first(left(p)); 201 return(0); 202 case CAT: 203 if (first(left(p)) == 0 && first(right(p)) == 0) return(0); 204 return(1); 205 case OR: 206 b = first(right(p)); 207 if (first(left(p)) == 0 || b == 0) return(0); 208 return(1); 209 } 210 error(FATAL, "unknown type %d in first\n", type(p)); 211 return(-1); 212 } 213 214 follow(v) 215 node *v; /* collects leaves that can follow v into setvec */ 216 { 217 node *p; 218 219 if (type(v) == FINAL) 220 return; 221 p = parent(v); 222 switch (type(p)) { 223 case STAR: 224 case PLUS: first(v); 225 follow(p); 226 return; 227 228 case OR: 229 case QUEST: follow(p); 230 return; 231 232 case CAT: if (v == left(p)) { /* v is left child of p */ 233 if (first(right(p)) == 0) { 234 follow(p); 235 return; 236 } 237 } 238 else /* v is right child */ 239 follow(p); 240 return; 241 case FINAL: if (setvec[line] != 1) { 242 setvec[line] = 1; 243 setcnt++; 244 } 245 return; 246 } 247 } 248 249 member(c, s) /* is c in s? */ 250 register char c, *s; 251 { 252 while (*s) 253 if (c == *s++) 254 return(1); 255 return(0); 256 } 257 258 notin(array, n, prev) /* is setvec in array[0] thru array[n]? */ 259 int **array; 260 int *prev; { 261 register i, j; 262 int *ptr; 263 for (i=0; i<=n; i++) { 264 ptr = array[i]; 265 if (*ptr == setcnt) { 266 for (j=0; j < setcnt; j++) 267 if (setvec[*(++ptr)] != 1) goto nxt; 268 *prev = i; 269 return(0); 270 } 271 nxt: ; 272 } 273 return(1); 274 } 275 276 int *add(n) { /* remember setvec */ 277 int *ptr, *p; 278 register i; 279 if ((p = ptr = (int *) malloc((n+1)*sizeof(int))) == NULL) 280 overflo(); 281 *ptr = n; 282 dprintf("add(%d)\n", n, NULL, NULL); 283 for (i=1; i <= line; i++) 284 if (setvec[i] == 1) { 285 *(++ptr) = i; 286 dprintf(" ptr = %o, *ptr = %d, i = %d\n", ptr, *ptr, i); 287 } 288 dprintf("\n", NULL, NULL, NULL); 289 return(p); 290 } 291 292 struct fa *cgotofn() 293 { 294 register i, k; 295 register int *ptr; 296 char c; 297 char *p; 298 node *cp; 299 int j, n, s, ind, numtrans; 300 int finflg; 301 int curpos, num, prev; 302 struct fa *where[NSTATES]; 303 304 int fatab[257]; 305 struct fa *pfa; 306 307 char index[MAXLIN]; 308 char iposns[MAXLIN]; 309 int sposns[MAXLIN]; 310 int spmax, spinit; 311 312 char symbol[NCHARS]; 313 char isyms[NCHARS]; 314 char ssyms[NCHARS]; 315 int ssmax, ssinit; 316 317 for (i=0; i<=line; i++) index[i] = iposns[i] = setvec[i] = 0; 318 for (i=0; i<NCHARS; i++) isyms[i] = symbol[i] = 0; 319 setcnt = 0; 320 /* compute initial positions and symbols of state 0 */ 321 ssmax = 0; 322 ptr = state[0] = foll[0]; 323 spinit = *ptr; 324 for (i=0; i<spinit; i++) { 325 curpos = *(++ptr); 326 sposns[i] = curpos; 327 iposns[curpos] = 1; 328 cp = point[curpos]; 329 dprintf("i = %d, spinit = %d, curpos = %d\n", i, spinit, curpos); 330 switch (type(cp)) { 331 case CHAR: 332 k = (int) right(cp); 333 if (isyms[k] != 1) { 334 isyms[k] = 1; 335 ssyms[ssmax++] = k; 336 } 337 break; 338 case DOT: 339 for (k=1; k<NCHARS; k++) { 340 if (k != HAT) { 341 if (isyms[k] != 1) { 342 isyms[k] = 1; 343 ssyms[ssmax++] = k; 344 } 345 } 346 } 347 break; 348 case CCL: 349 for (p = (char *) right(cp); *p; p++) { 350 if (*p != HAT) { 351 if (isyms[*p] != 1) { 352 isyms[*p] = 1; 353 ssyms[ssmax++] = *p; 354 } 355 } 356 } 357 break; 358 case NCCL: 359 for (k=1; k<NCHARS; k++) { 360 if (k != HAT && !member(k, (char *) right(cp))) { 361 if (isyms[k] != 1) { 362 isyms[k] = 1; 363 ssyms[ssmax++] = k; 364 } 365 } 366 } 367 } 368 } 369 ssinit = ssmax; 370 n = 0; 371 for (s=0; s<=n; s++) { 372 dprintf("s = %d\n", s, NULL, NULL); 373 ind = 0; 374 numtrans = 0; 375 finflg = 0; 376 if (*(state[s] + *state[s]) == line) { /* s final? */ 377 finflg = 1; 378 goto tenter; 379 } 380 spmax = spinit; 381 ssmax = ssinit; 382 ptr = state[s]; 383 num = *ptr; 384 for (i=0; i<num; i++) { 385 curpos = *(++ptr); 386 if (iposns[curpos] != 1 && index[curpos] != 1) { 387 index[curpos] = 1; 388 sposns[spmax++] = curpos; 389 } 390 cp = point[curpos]; 391 switch (type(cp)) { 392 case CHAR: 393 k = (int) right(cp); 394 if (isyms[k] == 0 && symbol[k] == 0) { 395 symbol[k] = 1; 396 ssyms[ssmax++] = k; 397 } 398 break; 399 case DOT: 400 for (k=1; k<NCHARS; k++) { 401 if (k != HAT) { 402 if (isyms[k] == 0 && symbol[k] == 0) { 403 symbol[k] = 1; 404 ssyms[ssmax++] = k; 405 } 406 } 407 } 408 break; 409 case CCL: 410 for (p = (char *) right(cp); *p; p++) { 411 if (*p != HAT) { 412 if (isyms[*p] == 0 && symbol[*p] == 0) { 413 symbol[*p] = 1; 414 ssyms[ssmax++] = *p; 415 } 416 } 417 } 418 break; 419 case NCCL: 420 for (k=1; k<NCHARS; k++) { 421 if (k != HAT && !member(k, (char *) right(cp))) { 422 if (isyms[k] == 0 && symbol[k] == 0) { 423 symbol[k] = 1; 424 ssyms[ssmax++] = k; 425 } 426 } 427 } 428 } 429 } 430 for (j=0; j<ssmax; j++) { /* nextstate(s, ssyms[j]) */ 431 c = ssyms[j]; 432 symbol[c] = 0; 433 setcnt = 0; 434 for (k=0; k<=line; k++) setvec[k] = 0; 435 for (i=0; i<spmax; i++) { 436 index[sposns[i]] = 0; 437 cp = point[sposns[i]]; 438 if ((k = type(cp)) != FINAL) 439 if (k == CHAR && c == (int) right(cp) 440 || k == DOT 441 || k == CCL && member(c, (char *) right(cp)) 442 || k == NCCL && !member(c, (char *) right(cp))) { 443 ptr = foll[sposns[i]]; 444 num = *ptr; 445 for (k=0; k<num; k++) { 446 if (setvec[*(++ptr)] != 1 447 && iposns[*ptr] != 1) { 448 setvec[*ptr] = 1; 449 setcnt++; 450 } 451 } 452 } 453 } /* end nextstate */ 454 if (notin(state, n, &prev)) { 455 if (n >= NSTATES) { 456 dprintf("cgotofn: notin; state = %d, n = %d\n", state, n, NULL); 457 overflo(); 458 } 459 state[++n] = add(setcnt); 460 dprintf(" delta(%d,%o) = %d", s,c,n); 461 dprintf(", ind = %d\n", ind+1, NULL, NULL); 462 fatab[++ind] = c; 463 fatab[++ind] = n; 464 numtrans++; 465 } 466 else { 467 if (prev != 0) { 468 dprintf(" delta(%d,%o) = %d", s,c,prev); 469 dprintf(", ind = %d\n", ind+1, NULL, NULL); 470 fatab[++ind] = c; 471 fatab[++ind] = prev; 472 numtrans++; 473 } 474 } 475 } 476 tenter: 477 if ((pfa = (struct fa *) malloc((numtrans + 1) * sizeof(struct fa))) == NULL) 478 overflo(); 479 where[s] = pfa; 480 if (finflg) 481 pfa->cch = -1; /* s is a final state */ 482 else 483 pfa->cch = numtrans; 484 pfa->st = 0; 485 for (i=1, pfa += 1; i<=numtrans; i++, pfa++) { 486 pfa->cch = fatab[2*i-1]; 487 pfa->st = (struct fa *) fatab[2*i]; 488 } 489 } 490 for (i=0; i<=n; i++) { 491 xfree(state[i]); /* free state[i] */ 492 pfa = where[i]; 493 pfa->st = where[0]; 494 dprintf("state %d: (%o)\n", i, pfa, NULL); 495 dprintf(" numtrans = %d, default = %o\n", pfa->cch, pfa->st, NULL); 496 for (k=1; k<=pfa->cch; k++) { 497 (pfa+k)->st = where[ (int) (pfa+k)->st]; 498 dprintf(" char = %o, nextstate = %o\n",(pfa+k)->cch, (pfa+k)->st, NULL); 499 } 500 } 501 pfa = where[0]; 502 if ((num = pfa->cch) < 0) 503 return(where[0]); 504 for (pfa += num; num; num--, pfa--) 505 if (pfa->cch == HAT) { 506 return(pfa->st); 507 } 508 return(where[0]); 509 } 510 511 match(pfa, p) 512 register struct fa *pfa; 513 register char *p; 514 { 515 register count; 516 char c; 517 if (p == 0) return(0); 518 if (pfa->cch == 1) { /* fast test for first character, if possible */ 519 c = (++pfa)->cch; 520 do 521 if (c == *p) { 522 p++; 523 pfa = pfa->st; 524 goto adv; 525 } 526 while (*p++ != 0); 527 return(0); 528 } 529 adv: if ((count = pfa->cch) < 0) return(1); 530 do { 531 for (pfa += count; count; count--, pfa--) 532 if (pfa->cch == *p) { 533 break; 534 } 535 pfa = pfa->st; 536 if ((count = pfa->cch) < 0) return(1); 537 } while(*p++ != 0); 538 return(0); 539 } 540