1 /* $NetBSD: lexer.c,v 1.2 2014/12/20 13:15:48 prlw1 Exp $ */ 2 3 /* 4 * Copyright (C) 2012 by Darren Reed. 5 * 6 * See the IPFILTER.LICENCE file for details on licencing. 7 */ 8 #include <ctype.h> 9 #include "ipf.h" 10 #ifdef IPFILTER_SCAN 11 # include "netinet/ip_scan.h" 12 #endif 13 #include <sys/ioctl.h> 14 #include <syslog.h> 15 #ifdef TEST_LEXER 16 # define NO_YACC 17 union { 18 int num; 19 char *str; 20 struct in_addr ipa; 21 i6addr_t ip6; 22 } yylval; 23 #endif 24 #include "lexer.h" 25 #include "y.tab.h" 26 27 FILE *yyin; 28 29 #define ishex(c) (ISDIGIT(c) || ((c) >= 'a' && (c) <= 'f') || \ 30 ((c) >= 'A' && (c) <= 'F')) 31 #define TOOLONG -3 32 33 extern int string_start; 34 extern int string_end; 35 extern char *string_val; 36 extern int pos; 37 extern int yydebug; 38 39 char *yystr = NULL; 40 int yytext[YYBUFSIZ+1]; 41 char yychars[YYBUFSIZ+1]; 42 int yylineNum = 1; 43 int yypos = 0; 44 int yylast = -1; 45 int yydictfixed = 0; 46 int yyexpectaddr = 0; 47 int yybreakondot = 0; 48 int yyvarnext = 0; 49 int yytokentype = 0; 50 wordtab_t *yywordtab = NULL; 51 int yysavedepth = 0; 52 wordtab_t *yysavewords[30]; 53 54 55 static wordtab_t *yyfindkey __P((char *)); 56 static int yygetc __P((int)); 57 static void yyunputc __P((int)); 58 static int yyswallow __P((int)); 59 static char *yytexttostr __P((int, int)); 60 static void yystrtotext __P((char *)); 61 static char *yytexttochar __P((void)); 62 63 static int yygetc(docont) 64 int docont; 65 { 66 int c; 67 68 if (yypos < yylast) { 69 c = yytext[yypos++]; 70 if (c == '\n') 71 yylineNum++; 72 return c; 73 } 74 75 if (yypos == YYBUFSIZ) 76 return TOOLONG; 77 78 if (pos >= string_start && pos <= string_end) { 79 c = string_val[pos - string_start]; 80 yypos++; 81 } else { 82 c = fgetc(yyin); 83 if (docont && (c == '\\')) { 84 c = fgetc(yyin); 85 if (c == '\n') { 86 yylineNum++; 87 c = fgetc(yyin); 88 } 89 } 90 } 91 if (c == '\n') 92 yylineNum++; 93 yytext[yypos++] = c; 94 yylast = yypos; 95 yytext[yypos] = '\0'; 96 97 return c; 98 } 99 100 101 static void yyunputc(c) 102 int c; 103 { 104 if (c == '\n') 105 yylineNum--; 106 yytext[--yypos] = c; 107 } 108 109 110 static int yyswallow(last) 111 int last; 112 { 113 int c; 114 115 while (((c = yygetc(0)) > '\0') && (c != last)) 116 ; 117 118 if (c != EOF) 119 yyunputc(c); 120 if (c == last) 121 return 0; 122 return -1; 123 } 124 125 126 static char *yytexttochar() 127 { 128 int i; 129 130 for (i = 0; i < yypos; i++) 131 yychars[i] = (char)(yytext[i] & 0xff); 132 yychars[i] = '\0'; 133 return yychars; 134 } 135 136 137 static void yystrtotext(str) 138 char *str; 139 { 140 int len; 141 char *s; 142 143 len = strlen(str); 144 if (len > YYBUFSIZ) 145 len = YYBUFSIZ; 146 147 for (s = str; *s != '\0' && len > 0; s++, len--) 148 yytext[yylast++] = *s; 149 yytext[yylast] = '\0'; 150 } 151 152 153 static char *yytexttostr(offset, max) 154 int offset, max; 155 { 156 char *str; 157 int i; 158 159 if ((yytext[offset] == '\'' || yytext[offset] == '"') && 160 (yytext[offset] == yytext[offset + max - 1])) { 161 offset++; 162 max--; 163 } 164 165 if (max > yylast) 166 max = yylast; 167 str = malloc(max + 1); 168 if (str != NULL) { 169 for (i = offset; i < max; i++) 170 str[i - offset] = (char)(yytext[i] & 0xff); 171 str[i - offset] = '\0'; 172 } 173 return str; 174 } 175 176 177 int yylex() 178 { 179 #ifdef USE_INET6 180 static int prior = 0; 181 static int priornum = 0; 182 #endif 183 int c, n, isbuilding, rval, lnext, nokey = 0; 184 char *name; 185 int triedv6 = 0; 186 187 isbuilding = 0; 188 lnext = 0; 189 rval = 0; 190 191 if (yystr != NULL) { 192 free(yystr); 193 yystr = NULL; 194 } 195 196 nextchar: 197 c = yygetc(0); 198 if (yydebug > 1) 199 printf("yygetc = (%x) %c [%*.*s]\n", 200 c, c, yypos, yypos, yytexttochar()); 201 202 switch (c) 203 { 204 case '\n' : 205 lnext = 0; 206 nokey = 0; 207 case '\t' : 208 case '\r' : 209 case ' ' : 210 if (isbuilding == 1) { 211 yyunputc(c); 212 goto done; 213 } 214 if (yylast > yypos) { 215 bcopy(yytext + yypos, yytext, 216 sizeof(yytext[0]) * (yylast - yypos + 1)); 217 } 218 yylast -= yypos; 219 if (yyexpectaddr == 2) 220 yyexpectaddr = 0; 221 yypos = 0; 222 lnext = 0; 223 nokey = 0; 224 goto nextchar; 225 226 case '\\' : 227 if (lnext == 0) { 228 lnext = 1; 229 if (yylast == yypos) { 230 yylast--; 231 yypos--; 232 } else 233 yypos--; 234 if (yypos == 0) 235 nokey = 1; 236 goto nextchar; 237 } 238 break; 239 } 240 241 if (lnext == 1) { 242 lnext = 0; 243 if ((isbuilding == 0) && !ISALNUM(c)) { 244 #ifdef USE_INET6 245 prior = c; 246 #endif 247 return c; 248 } 249 goto nextchar; 250 } 251 252 switch (c) 253 { 254 case '#' : 255 if (isbuilding == 1) { 256 yyunputc(c); 257 goto done; 258 } 259 yyswallow('\n'); 260 rval = YY_COMMENT; 261 goto done; 262 263 case '$' : 264 if (isbuilding == 1) { 265 yyunputc(c); 266 goto done; 267 } 268 n = yygetc(0); 269 if (n == '{') { 270 if (yyswallow('}') == -1) { 271 rval = -2; 272 goto done; 273 } 274 (void) yygetc(0); 275 } else { 276 if (!ISALPHA(n)) { 277 yyunputc(n); 278 break; 279 } 280 do { 281 n = yygetc(1); 282 } while (ISALPHA(n) || ISDIGIT(n) || n == '_'); 283 yyunputc(n); 284 } 285 286 name = yytexttostr(1, yypos); /* skip $ */ 287 288 if (name != NULL) { 289 string_val = get_variable(name, NULL, yylineNum); 290 free(name); 291 if (string_val != NULL) { 292 name = yytexttostr(yypos, yylast); 293 if (name != NULL) { 294 yypos = 0; 295 yylast = 0; 296 yystrtotext(string_val); 297 yystrtotext(name); 298 free(string_val); 299 free(name); 300 goto nextchar; 301 } 302 free(string_val); 303 } 304 } 305 break; 306 307 case '\'': 308 case '"' : 309 if (isbuilding == 1) { 310 goto done; 311 } 312 do { 313 n = yygetc(1); 314 if (n == EOF || n == TOOLONG) { 315 rval = -2; 316 goto done; 317 } 318 if (n == '\n') { 319 yyunputc(' '); 320 yypos++; 321 } 322 } while (n != c); 323 rval = YY_STR; 324 goto done; 325 /* NOTREACHED */ 326 327 case EOF : 328 yylineNum = 1; 329 yypos = 0; 330 yylast = -1; 331 yyexpectaddr = 0; 332 yybreakondot = 0; 333 yyvarnext = 0; 334 yytokentype = 0; 335 if (yydebug) 336 fprintf(stderr, "reset at EOF\n"); 337 #ifdef USE_INET6 338 prior = 0; 339 #endif 340 return 0; 341 } 342 343 if (strchr("=,/;{}()@", c) != NULL) { 344 if (isbuilding == 1) { 345 yyunputc(c); 346 goto done; 347 } 348 rval = c; 349 goto done; 350 } else if (c == '.') { 351 if (isbuilding == 0) { 352 rval = c; 353 goto done; 354 } 355 if (yybreakondot != 0) { 356 yyunputc(c); 357 goto done; 358 } 359 } 360 361 switch (c) 362 { 363 case '-' : 364 n = yygetc(0); 365 if (n == '>') { 366 isbuilding = 1; 367 goto done; 368 } 369 yyunputc(n); 370 if (yyexpectaddr) { 371 if (isbuilding == 1) 372 yyunputc(c); 373 else 374 rval = '-'; 375 goto done; 376 } 377 if (isbuilding == 1) 378 break; 379 rval = '-'; 380 goto done; 381 382 case '!' : 383 if (isbuilding == 1) { 384 yyunputc(c); 385 goto done; 386 } 387 n = yygetc(0); 388 if (n == '=') { 389 rval = YY_CMP_NE; 390 goto done; 391 } 392 yyunputc(n); 393 rval = '!'; 394 goto done; 395 396 case '<' : 397 if (yyexpectaddr) 398 break; 399 if (isbuilding == 1) { 400 yyunputc(c); 401 goto done; 402 } 403 n = yygetc(0); 404 if (n == '=') { 405 rval = YY_CMP_LE; 406 goto done; 407 } 408 if (n == '>') { 409 rval = YY_RANGE_OUT; 410 goto done; 411 } 412 yyunputc(n); 413 rval = YY_CMP_LT; 414 goto done; 415 416 case '>' : 417 if (yyexpectaddr) 418 break; 419 if (isbuilding == 1) { 420 yyunputc(c); 421 goto done; 422 } 423 n = yygetc(0); 424 if (n == '=') { 425 rval = YY_CMP_GE; 426 goto done; 427 } 428 if (n == '<') { 429 rval = YY_RANGE_IN; 430 goto done; 431 } 432 yyunputc(n); 433 rval = YY_CMP_GT; 434 goto done; 435 } 436 437 /* 438 * Now for the reason this is here...IPv6 address parsing. 439 * The longest string we can expect is of this form: 440 * 0000:0000:0000:0000:0000:0000:000.000.000.000 441 * not: 442 * 0000:0000:0000:0000:0000:0000:0000:0000 443 */ 444 #ifdef USE_INET6 445 if (yyexpectaddr != 0 && isbuilding == 0 && 446 (ishex(c) || isdigit(c) || c == ':')) { 447 char ipv6buf[45 + 1], *s, oc; 448 int start; 449 450 buildipv6: 451 start = yypos; 452 s = ipv6buf; 453 oc = c; 454 455 if (prior == YY_NUMBER && c == ':') { 456 sprintf(s, "%d", priornum); 457 s += strlen(s); 458 } 459 460 /* 461 * Perhaps we should implement stricter controls on what we 462 * swallow up here, but surely it would just be duplicating 463 * the code in inet_pton() anyway. 464 */ 465 do { 466 *s++ = c; 467 c = yygetc(1); 468 } while ((ishex(c) || c == ':' || c == '.') && 469 (s - ipv6buf < 46)); 470 yyunputc(c); 471 *s = '\0'; 472 473 if (inet_pton(AF_INET6, ipv6buf, &yylval.ip6) == 1) { 474 rval = YY_IPV6; 475 yyexpectaddr = 0; 476 goto done; 477 } 478 yypos = start; 479 c = oc; 480 } 481 #endif 482 483 if ((c == ':') && (rval != YY_IPV6) && (triedv6 == 0)) { 484 #ifdef USE_INET6 485 yystr = yytexttostr(0, yypos - 1); 486 if (yystr != NULL) { 487 char *s; 488 489 for (s = yystr; *s && ishex(*s); s++) 490 ; 491 if (!*s && *yystr) { 492 isbuilding = 0; 493 c = *yystr; 494 free(yystr); 495 triedv6 = 1; 496 yypos = 1; 497 goto buildipv6; 498 } 499 free(yystr); 500 } 501 #endif 502 if (isbuilding == 1) { 503 yyunputc(c); 504 goto done; 505 } 506 rval = ':'; 507 goto done; 508 } 509 510 if (isbuilding == 0 && c == '0') { 511 n = yygetc(0); 512 if (n == 'x') { 513 do { 514 n = yygetc(1); 515 } while (ishex(n)); 516 yyunputc(n); 517 rval = YY_HEX; 518 goto done; 519 } 520 yyunputc(n); 521 } 522 523 /* 524 * No negative numbers with leading - sign.. 525 */ 526 if (isbuilding == 0 && ISDIGIT(c)) { 527 do { 528 n = yygetc(1); 529 } while (ISDIGIT(n)); 530 yyunputc(n); 531 rval = YY_NUMBER; 532 goto done; 533 } 534 535 isbuilding = 1; 536 goto nextchar; 537 538 done: 539 yystr = yytexttostr(0, yypos); 540 541 if (yydebug) 542 printf("isbuilding %d yyvarnext %d nokey %d fixed %d addr %d\n", 543 isbuilding, yyvarnext, nokey, yydictfixed, yyexpectaddr); 544 if (isbuilding == 1) { 545 wordtab_t *w; 546 547 w = NULL; 548 isbuilding = 0; 549 550 if ((yyvarnext == 0) && (nokey == 0)) { 551 w = yyfindkey(yystr); 552 if (w == NULL && yywordtab != NULL && !yydictfixed) { 553 yyresetdict(); 554 w = yyfindkey(yystr); 555 } 556 } else 557 yyvarnext = 0; 558 if (w != NULL) 559 rval = w->w_value; 560 else 561 rval = YY_STR; 562 } 563 564 if (rval == YY_STR) { 565 if (yysavedepth > 0 && !yydictfixed) 566 yyresetdict(); 567 if (yyexpectaddr != 0) 568 yyexpectaddr = 0; 569 } 570 571 yytokentype = rval; 572 573 if (yydebug) 574 printf("lexed(%s) %d,%d,%d [%d,%d,%d] => %d @%d\n", 575 yystr, isbuilding, yyexpectaddr, yysavedepth, 576 string_start, string_end, pos, rval, yysavedepth); 577 578 switch (rval) 579 { 580 case YY_NUMBER : 581 sscanf(yystr, "%u", &yylval.num); 582 break; 583 584 case YY_HEX : 585 sscanf(yystr, "0x%x", (u_int *)&yylval.num); 586 break; 587 588 case YY_STR : 589 yylval.str = strdup(yystr); 590 break; 591 592 default : 593 break; 594 } 595 596 if (yylast > 0) { 597 bcopy(yytext + yypos, yytext, 598 sizeof(yytext[0]) * (yylast - yypos + 1)); 599 yylast -= yypos; 600 yypos = 0; 601 } 602 603 #ifdef USE_INET6 604 if (rval == YY_NUMBER) 605 priornum = yylval.num; 606 prior = rval; 607 #endif 608 return rval; 609 } 610 611 612 static wordtab_t *yyfindkey(key) 613 char *key; 614 { 615 wordtab_t *w; 616 617 if (yywordtab == NULL) 618 return NULL; 619 620 for (w = yywordtab; w->w_word != 0; w++) 621 if (strcasecmp(key, w->w_word) == 0) 622 return w; 623 return NULL; 624 } 625 626 627 char *yykeytostr(num) 628 int num; 629 { 630 wordtab_t *w; 631 632 if (yywordtab == NULL) 633 return "<unknown>"; 634 635 for (w = yywordtab; w->w_word; w++) 636 if (w->w_value == num) 637 return w->w_word; 638 return "<unknown>"; 639 } 640 641 642 wordtab_t *yysettab(words) 643 wordtab_t *words; 644 { 645 wordtab_t *save; 646 647 save = yywordtab; 648 yywordtab = words; 649 return save; 650 } 651 652 653 void yyerror(msg) 654 char *msg; 655 { 656 char *txt, letter[2]; 657 int freetxt = 0; 658 659 if (yytokentype < 256) { 660 letter[0] = yytokentype; 661 letter[1] = '\0'; 662 txt = letter; 663 } else if (yytokentype == YY_STR || yytokentype == YY_HEX || 664 yytokentype == YY_NUMBER) { 665 if (yystr == NULL) { 666 txt = yytexttostr(yypos, YYBUFSIZ); 667 freetxt = 1; 668 } else 669 txt = yystr; 670 } else { 671 txt = yykeytostr(yytokentype); 672 } 673 fprintf(stderr, "%s error at \"%s\", line %d\n", msg, txt, yylineNum); 674 if (freetxt == 1) 675 free(txt); 676 exit(1); 677 } 678 679 680 void yysetfixeddict(newdict) 681 wordtab_t *newdict; 682 { 683 if (yydebug) 684 printf("yysetfixeddict(%lx)\n", (u_long)newdict); 685 686 if (yysavedepth == sizeof(yysavewords)/sizeof(yysavewords[0])) { 687 fprintf(stderr, "%d: at maximum dictionary depth\n", 688 yylineNum); 689 return; 690 } 691 692 yysavewords[yysavedepth++] = yysettab(newdict); 693 if (yydebug) 694 printf("yysavedepth++ => %d\n", yysavedepth); 695 yydictfixed = 1; 696 } 697 698 699 void yysetdict(newdict) 700 wordtab_t *newdict; 701 { 702 if (yydebug) 703 printf("yysetdict(%lx)\n", (u_long)newdict); 704 705 if (yysavedepth == sizeof(yysavewords)/sizeof(yysavewords[0])) { 706 fprintf(stderr, "%d: at maximum dictionary depth\n", 707 yylineNum); 708 return; 709 } 710 711 yysavewords[yysavedepth++] = yysettab(newdict); 712 if (yydebug) 713 printf("yysavedepth++ => %d\n", yysavedepth); 714 } 715 716 void yyresetdict() 717 { 718 if (yydebug) 719 printf("yyresetdict(%d)\n", yysavedepth); 720 if (yysavedepth > 0) { 721 yysettab(yysavewords[--yysavedepth]); 722 if (yydebug) 723 printf("yysavedepth-- => %d\n", yysavedepth); 724 } 725 yydictfixed = 0; 726 } 727 728 729 730 #ifdef TEST_LEXER 731 int main(argc, argv) 732 int argc; 733 char *argv[]; 734 { 735 int n; 736 737 yyin = stdin; 738 739 while ((n = yylex()) != 0) 740 printf("%d.n = %d [%s] %d %d\n", 741 yylineNum, n, yystr, yypos, yylast); 742 } 743 #endif 744