1 /* $NetBSD: parse.y,v 1.2 2016/01/09 17:38:57 christos Exp $ */ 2 3 /* parse.y - parser for flex input */ 4 5 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP 6 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE 7 %token OPT_TABLES 8 9 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH 10 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT 11 12 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH 13 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT 14 15 %left CCL_OP_DIFF CCL_OP_UNION 16 17 /* 18 *POSIX and AT&T lex place the 19 * precedence of the repeat operator, {}, below that of concatenation. 20 * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended 21 * Regular Expression (ERE) precedence that has the repeat operator 22 * higher than concatenation. This causes ab{3} to yield abbb. 23 * 24 * In order to support the POSIX and AT&T precedence and the flex 25 * precedence we define two token sets for the begin and end tokens of 26 * the repeat operator, '{' and '}'. The lexical scanner chooses 27 * which tokens to return based on whether posix_compat or lex_compat 28 * are specified. Specifying either posix_compat or lex_compat will 29 * cause flex to parse scanner files as per the AT&T and 30 * POSIX-mandated behavior. 31 */ 32 33 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX 34 35 36 %{ 37 /* Copyright (c) 1990 The Regents of the University of California. */ 38 /* All rights reserved. */ 39 40 /* This code is derived from software contributed to Berkeley by */ 41 /* Vern Paxson. */ 42 43 /* The United States Government has rights in this work pursuant */ 44 /* to contract no. DE-AC03-76SF00098 between the United States */ 45 /* Department of Energy and the University of California. */ 46 47 /* This file is part of flex. */ 48 49 /* Redistribution and use in source and binary forms, with or without */ 50 /* modification, are permitted provided that the following conditions */ 51 /* are met: */ 52 53 /* 1. Redistributions of source code must retain the above copyright */ 54 /* notice, this list of conditions and the following disclaimer. */ 55 /* 2. Redistributions in binary form must reproduce the above copyright */ 56 /* notice, this list of conditions and the following disclaimer in the */ 57 /* documentation and/or other materials provided with the distribution. */ 58 59 /* Neither the name of the University nor the names of its contributors */ 60 /* may be used to endorse or promote products derived from this software */ 61 /* without specific prior written permission. */ 62 63 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 64 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 65 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 66 /* PURPOSE. */ 67 #include "flexdef.h" 68 __RCSID("$NetBSD: parse.y,v 1.2 2016/01/09 17:38:57 christos Exp $"); 69 70 #include "tables.h" 71 72 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen; 73 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule; 74 75 int *scon_stk; 76 int scon_stk_ptr; 77 78 static int madeany = false; /* whether we've made the '.' character class */ 79 static int ccldot, cclany; 80 int previous_continued_action; /* whether the previous rule's action was '|' */ 81 82 #define format_warn3(fmt, a1, a2) \ 83 do{ \ 84 char fw3_msg[MAXLINE];\ 85 snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\ 86 lwarn( fw3_msg );\ 87 }while(0) 88 89 /* Expand a POSIX character class expression. */ 90 #define CCL_EXPR(func) \ 91 do{ \ 92 int c; \ 93 for ( c = 0; c < csize; ++c ) \ 94 if ( isascii(c) && func(c) ) \ 95 ccladd( currccl, c ); \ 96 }while(0) 97 98 /* negated class */ 99 #define CCL_NEG_EXPR(func) \ 100 do{ \ 101 int c; \ 102 for ( c = 0; c < csize; ++c ) \ 103 if ( !func(c) ) \ 104 ccladd( currccl, c ); \ 105 }while(0) 106 107 /* While POSIX defines isblank(), it's not ANSI C. */ 108 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t') 109 110 /* On some over-ambitious machines, such as DEC Alpha's, the default 111 * token type is "long" instead of "int"; this leads to problems with 112 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen 113 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the 114 * following should ensure that the default token type is "int". 115 */ 116 #define YYSTYPE int 117 118 %} 119 120 %% 121 goal : initlex sect1 sect1end sect2 initforrule 122 { /* add default rule */ 123 int def_rule; 124 125 pat = cclinit(); 126 cclnegate( pat ); 127 128 def_rule = mkstate( -pat ); 129 130 /* Remember the number of the default rule so we 131 * don't generate "can't match" warnings for it. 132 */ 133 default_rule = num_rules; 134 135 finish_rule( def_rule, false, 0, 0, 0); 136 137 for ( i = 1; i <= lastsc; ++i ) 138 scset[i] = mkbranch( scset[i], def_rule ); 139 140 if ( spprdflt ) 141 add_action( 142 "YY_FATAL_ERROR( \"flex scanner jammed\" )" ); 143 else 144 add_action( "ECHO" ); 145 146 add_action( ";\n\tYY_BREAK\n" ); 147 } 148 ; 149 150 initlex : 151 { /* initialize for processing rules */ 152 153 /* Create default DFA start condition. */ 154 scinstal( "INITIAL", false ); 155 } 156 ; 157 158 sect1 : sect1 startconddecl namelist1 159 | sect1 options 160 | 161 | error 162 { synerr( _("unknown error processing section 1") ); } 163 ; 164 165 sect1end : SECTEND 166 { 167 check_options(); 168 scon_stk = allocate_integer_array( lastsc + 1 ); 169 scon_stk_ptr = 0; 170 } 171 ; 172 173 startconddecl : SCDECL 174 { xcluflg = false; } 175 176 | XSCDECL 177 { xcluflg = true; } 178 ; 179 180 namelist1 : namelist1 NAME 181 { scinstal( nmstr, xcluflg ); } 182 183 | NAME 184 { scinstal( nmstr, xcluflg ); } 185 186 | error 187 { synerr( _("bad start condition list") ); } 188 ; 189 190 options : OPTION_OP optionlist 191 ; 192 193 optionlist : optionlist option 194 | 195 ; 196 197 option : OPT_OUTFILE '=' NAME 198 { 199 outfilename = copy_string( nmstr ); 200 did_outfilename = 1; 201 } 202 | OPT_EXTRA_TYPE '=' NAME 203 { extra_type = copy_string( nmstr ); } 204 | OPT_PREFIX '=' NAME 205 { prefix = copy_string( nmstr ); } 206 | OPT_YYCLASS '=' NAME 207 { yyclass = copy_string( nmstr ); } 208 | OPT_HEADER '=' NAME 209 { headerfilename = copy_string( nmstr ); } 210 | OPT_TABLES '=' NAME 211 { tablesext = true; tablesfilename = copy_string( nmstr ); } 212 ; 213 214 sect2 : sect2 scon initforrule flexrule '\n' 215 { scon_stk_ptr = $2; } 216 | sect2 scon '{' sect2 '}' 217 { scon_stk_ptr = $2; } 218 | 219 ; 220 221 initforrule : 222 { 223 /* Initialize for a parse of one rule. */ 224 trlcontxt = variable_trail_rule = varlength = false; 225 trailcnt = headcnt = rulelen = 0; 226 current_state_type = STATE_NORMAL; 227 previous_continued_action = continued_action; 228 in_rule = true; 229 230 new_rule(); 231 } 232 ; 233 234 flexrule : '^' rule 235 { 236 pat = $2; 237 finish_rule( pat, variable_trail_rule, 238 headcnt, trailcnt , previous_continued_action); 239 240 if ( scon_stk_ptr > 0 ) 241 { 242 for ( i = 1; i <= scon_stk_ptr; ++i ) 243 scbol[scon_stk[i]] = 244 mkbranch( scbol[scon_stk[i]], 245 pat ); 246 } 247 248 else 249 { 250 /* Add to all non-exclusive start conditions, 251 * including the default (0) start condition. 252 */ 253 254 for ( i = 1; i <= lastsc; ++i ) 255 if ( ! scxclu[i] ) 256 scbol[i] = mkbranch( scbol[i], 257 pat ); 258 } 259 260 if ( ! bol_needed ) 261 { 262 bol_needed = true; 263 264 if ( performance_report > 1 ) 265 pinpoint_message( 266 "'^' operator results in sub-optimal performance" ); 267 } 268 } 269 270 | rule 271 { 272 pat = $1; 273 finish_rule( pat, variable_trail_rule, 274 headcnt, trailcnt , previous_continued_action); 275 276 if ( scon_stk_ptr > 0 ) 277 { 278 for ( i = 1; i <= scon_stk_ptr; ++i ) 279 scset[scon_stk[i]] = 280 mkbranch( scset[scon_stk[i]], 281 pat ); 282 } 283 284 else 285 { 286 for ( i = 1; i <= lastsc; ++i ) 287 if ( ! scxclu[i] ) 288 scset[i] = 289 mkbranch( scset[i], 290 pat ); 291 } 292 } 293 294 | EOF_OP 295 { 296 if ( scon_stk_ptr > 0 ) 297 build_eof_action(); 298 299 else 300 { 301 /* This EOF applies to all start conditions 302 * which don't already have EOF actions. 303 */ 304 for ( i = 1; i <= lastsc; ++i ) 305 if ( ! sceof[i] ) 306 scon_stk[++scon_stk_ptr] = i; 307 308 if ( scon_stk_ptr == 0 ) 309 lwarn( 310 "all start conditions already have <<EOF>> rules" ); 311 312 else 313 build_eof_action(); 314 } 315 } 316 317 | error 318 { synerr( _("unrecognized rule") ); } 319 ; 320 321 scon_stk_ptr : 322 { $$ = scon_stk_ptr; } 323 ; 324 325 scon : '<' scon_stk_ptr namelist2 '>' 326 { $$ = $2; } 327 328 | '<' '*' '>' 329 { 330 $$ = scon_stk_ptr; 331 332 for ( i = 1; i <= lastsc; ++i ) 333 { 334 int j; 335 336 for ( j = 1; j <= scon_stk_ptr; ++j ) 337 if ( scon_stk[j] == i ) 338 break; 339 340 if ( j > scon_stk_ptr ) 341 scon_stk[++scon_stk_ptr] = i; 342 } 343 } 344 345 | 346 { $$ = scon_stk_ptr; } 347 ; 348 349 namelist2 : namelist2 ',' sconname 350 351 | sconname 352 353 | error 354 { synerr( _("bad start condition list") ); } 355 ; 356 357 sconname : NAME 358 { 359 if ( (scnum = sclookup( nmstr )) == 0 ) 360 format_pinpoint_message( 361 "undeclared start condition %s", 362 nmstr ); 363 else 364 { 365 for ( i = 1; i <= scon_stk_ptr; ++i ) 366 if ( scon_stk[i] == scnum ) 367 { 368 format_warn( 369 "<%s> specified twice", 370 scname[scnum] ); 371 break; 372 } 373 374 if ( i > scon_stk_ptr ) 375 scon_stk[++scon_stk_ptr] = scnum; 376 } 377 } 378 ; 379 380 rule : re2 re 381 { 382 if ( transchar[lastst[$2]] != SYM_EPSILON ) 383 /* Provide final transition \now/ so it 384 * will be marked as a trailing context 385 * state. 386 */ 387 $2 = link_machines( $2, 388 mkstate( SYM_EPSILON ) ); 389 390 mark_beginning_as_normal( $2 ); 391 current_state_type = STATE_NORMAL; 392 393 if ( previous_continued_action ) 394 { 395 /* We need to treat this as variable trailing 396 * context so that the backup does not happen 397 * in the action but before the action switch 398 * statement. If the backup happens in the 399 * action, then the rules "falling into" this 400 * one's action will *also* do the backup, 401 * erroneously. 402 */ 403 if ( ! varlength || headcnt != 0 ) 404 lwarn( 405 "trailing context made variable due to preceding '|' action" ); 406 407 /* Mark as variable. */ 408 varlength = true; 409 headcnt = 0; 410 411 } 412 413 if ( lex_compat || (varlength && headcnt == 0) ) 414 { /* variable trailing context rule */ 415 /* Mark the first part of the rule as the 416 * accepting "head" part of a trailing 417 * context rule. 418 * 419 * By the way, we didn't do this at the 420 * beginning of this production because back 421 * then current_state_type was set up for a 422 * trail rule, and add_accept() can create 423 * a new state ... 424 */ 425 add_accept( $1, 426 num_rules | YY_TRAILING_HEAD_MASK ); 427 variable_trail_rule = true; 428 } 429 430 else 431 trailcnt = rulelen; 432 433 $$ = link_machines( $1, $2 ); 434 } 435 436 | re2 re '$' 437 { synerr( _("trailing context used twice") ); } 438 439 | re '$' 440 { 441 headcnt = 0; 442 trailcnt = 1; 443 rulelen = 1; 444 varlength = false; 445 446 current_state_type = STATE_TRAILING_CONTEXT; 447 448 if ( trlcontxt ) 449 { 450 synerr( _("trailing context used twice") ); 451 $$ = mkstate( SYM_EPSILON ); 452 } 453 454 else if ( previous_continued_action ) 455 { 456 /* See the comment in the rule for "re2 re" 457 * above. 458 */ 459 lwarn( 460 "trailing context made variable due to preceding '|' action" ); 461 462 varlength = true; 463 } 464 465 if ( lex_compat || varlength ) 466 { 467 /* Again, see the comment in the rule for 468 * "re2 re" above. 469 */ 470 add_accept( $1, 471 num_rules | YY_TRAILING_HEAD_MASK ); 472 variable_trail_rule = true; 473 } 474 475 trlcontxt = true; 476 477 eps = mkstate( SYM_EPSILON ); 478 $$ = link_machines( $1, 479 link_machines( eps, mkstate( '\n' ) ) ); 480 } 481 482 | re 483 { 484 $$ = $1; 485 486 if ( trlcontxt ) 487 { 488 if ( lex_compat || (varlength && headcnt == 0) ) 489 /* Both head and trail are 490 * variable-length. 491 */ 492 variable_trail_rule = true; 493 else 494 trailcnt = rulelen; 495 } 496 } 497 ; 498 499 500 re : re '|' series 501 { 502 varlength = true; 503 $$ = mkor( $1, $3 ); 504 } 505 506 | series 507 { $$ = $1; } 508 ; 509 510 511 re2 : re '/' 512 { 513 /* This rule is written separately so the 514 * reduction will occur before the trailing 515 * series is parsed. 516 */ 517 518 if ( trlcontxt ) 519 synerr( _("trailing context used twice") ); 520 else 521 trlcontxt = true; 522 523 if ( varlength ) 524 /* We hope the trailing context is 525 * fixed-length. 526 */ 527 varlength = false; 528 else 529 headcnt = rulelen; 530 531 rulelen = 0; 532 533 current_state_type = STATE_TRAILING_CONTEXT; 534 $$ = $1; 535 } 536 ; 537 538 series : series singleton 539 { 540 /* This is where concatenation of adjacent patterns 541 * gets done. 542 */ 543 $$ = link_machines( $1, $2 ); 544 } 545 546 | singleton 547 { $$ = $1; } 548 549 | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX 550 { 551 varlength = true; 552 553 if ( $3 > $5 || $3 < 0 ) 554 { 555 synerr( _("bad iteration values") ); 556 $$ = $1; 557 } 558 else 559 { 560 if ( $3 == 0 ) 561 { 562 if ( $5 <= 0 ) 563 { 564 synerr( 565 _("bad iteration values") ); 566 $$ = $1; 567 } 568 else 569 $$ = mkopt( 570 mkrep( $1, 1, $5 ) ); 571 } 572 else 573 $$ = mkrep( $1, $3, $5 ); 574 } 575 } 576 577 | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX 578 { 579 varlength = true; 580 581 if ( $3 <= 0 ) 582 { 583 synerr( _("iteration value must be positive") ); 584 $$ = $1; 585 } 586 587 else 588 $$ = mkrep( $1, $3, INFINITE_REPEAT ); 589 } 590 591 | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX 592 { 593 /* The series could be something like "(foo)", 594 * in which case we have no idea what its length 595 * is, so we punt here. 596 */ 597 varlength = true; 598 599 if ( $3 <= 0 ) 600 { 601 synerr( _("iteration value must be positive") 602 ); 603 $$ = $1; 604 } 605 606 else 607 $$ = link_machines( $1, 608 copysingl( $1, $3 - 1 ) ); 609 } 610 611 ; 612 613 singleton : singleton '*' 614 { 615 varlength = true; 616 617 $$ = mkclos( $1 ); 618 } 619 620 | singleton '+' 621 { 622 varlength = true; 623 $$ = mkposcl( $1 ); 624 } 625 626 | singleton '?' 627 { 628 varlength = true; 629 $$ = mkopt( $1 ); 630 } 631 632 | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX 633 { 634 varlength = true; 635 636 if ( $3 > $5 || $3 < 0 ) 637 { 638 synerr( _("bad iteration values") ); 639 $$ = $1; 640 } 641 else 642 { 643 if ( $3 == 0 ) 644 { 645 if ( $5 <= 0 ) 646 { 647 synerr( 648 _("bad iteration values") ); 649 $$ = $1; 650 } 651 else 652 $$ = mkopt( 653 mkrep( $1, 1, $5 ) ); 654 } 655 else 656 $$ = mkrep( $1, $3, $5 ); 657 } 658 } 659 660 | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX 661 { 662 varlength = true; 663 664 if ( $3 <= 0 ) 665 { 666 synerr( _("iteration value must be positive") ); 667 $$ = $1; 668 } 669 670 else 671 $$ = mkrep( $1, $3, INFINITE_REPEAT ); 672 } 673 674 | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX 675 { 676 /* The singleton could be something like "(foo)", 677 * in which case we have no idea what its length 678 * is, so we punt here. 679 */ 680 varlength = true; 681 682 if ( $3 <= 0 ) 683 { 684 synerr( _("iteration value must be positive") ); 685 $$ = $1; 686 } 687 688 else 689 $$ = link_machines( $1, 690 copysingl( $1, $3 - 1 ) ); 691 } 692 693 | '.' 694 { 695 if ( ! madeany ) 696 { 697 /* Create the '.' character class. */ 698 ccldot = cclinit(); 699 ccladd( ccldot, '\n' ); 700 cclnegate( ccldot ); 701 702 if ( useecs ) 703 mkeccl( ccltbl + cclmap[ccldot], 704 ccllen[ccldot], nextecm, 705 ecgroup, csize, csize ); 706 707 /* Create the (?s:'.') character class. */ 708 cclany = cclinit(); 709 cclnegate( cclany ); 710 711 if ( useecs ) 712 mkeccl( ccltbl + cclmap[cclany], 713 ccllen[cclany], nextecm, 714 ecgroup, csize, csize ); 715 716 madeany = true; 717 } 718 719 ++rulelen; 720 721 if (sf_dot_all()) 722 $$ = mkstate( -cclany ); 723 else 724 $$ = mkstate( -ccldot ); 725 } 726 727 | fullccl 728 { 729 /* Sort characters for fast searching. 730 */ 731 qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp ); 732 733 if ( useecs ) 734 mkeccl( ccltbl + cclmap[$1], ccllen[$1], 735 nextecm, ecgroup, csize, csize ); 736 737 ++rulelen; 738 739 if (ccl_has_nl[$1]) 740 rule_has_nl[num_rules] = true; 741 742 $$ = mkstate( -$1 ); 743 } 744 745 | PREVCCL 746 { 747 ++rulelen; 748 749 if (ccl_has_nl[$1]) 750 rule_has_nl[num_rules] = true; 751 752 $$ = mkstate( -$1 ); 753 } 754 755 | '"' string '"' 756 { $$ = $2; } 757 758 | '(' re ')' 759 { $$ = $2; } 760 761 | CHAR 762 { 763 ++rulelen; 764 765 if ($1 == nlch) 766 rule_has_nl[num_rules] = true; 767 768 if (sf_case_ins() && has_case($1)) 769 /* create an alternation, as in (a|A) */ 770 $$ = mkor (mkstate($1), mkstate(reverse_case($1))); 771 else 772 $$ = mkstate( $1 ); 773 } 774 ; 775 fullccl: 776 fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); } 777 | fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); } 778 | braceccl 779 ; 780 781 braceccl: 782 783 '[' ccl ']' { $$ = $2; } 784 785 | '[' '^' ccl ']' 786 { 787 cclnegate( $3 ); 788 $$ = $3; 789 } 790 ; 791 792 ccl : ccl CHAR '-' CHAR 793 { 794 795 if (sf_case_ins()) 796 { 797 798 /* If one end of the range has case and the other 799 * does not, or the cases are different, then we're not 800 * sure what range the user is trying to express. 801 * Examples: [@-z] or [S-t] 802 */ 803 if (has_case ($2) != has_case ($4) 804 || (has_case ($2) && (b_islower ($2) != b_islower ($4))) 805 || (has_case ($2) && (b_isupper ($2) != b_isupper ($4)))) 806 format_warn3 ( 807 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), 808 $2, $4); 809 810 /* If the range spans uppercase characters but not 811 * lowercase (or vice-versa), then should we automatically 812 * include lowercase characters in the range? 813 * Example: [@-_] spans [a-z] but not [A-Z] 814 */ 815 else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4)) 816 format_warn3 ( 817 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), 818 $2, $4); 819 } 820 821 if ( $2 > $4 ) 822 synerr( _("negative range in character class") ); 823 824 else 825 { 826 for ( i = $2; i <= $4; ++i ) 827 ccladd( $1, i ); 828 829 /* Keep track if this ccl is staying in 830 * alphabetical order. 831 */ 832 cclsorted = cclsorted && ($2 > lastchar); 833 lastchar = $4; 834 835 /* Do it again for upper/lowercase */ 836 if (sf_case_ins() && has_case($2) && has_case($4)){ 837 $2 = reverse_case ($2); 838 $4 = reverse_case ($4); 839 840 for ( i = $2; i <= $4; ++i ) 841 ccladd( $1, i ); 842 843 cclsorted = cclsorted && ($2 > lastchar); 844 lastchar = $4; 845 } 846 847 } 848 849 $$ = $1; 850 } 851 852 | ccl CHAR 853 { 854 ccladd( $1, $2 ); 855 cclsorted = cclsorted && ($2 > lastchar); 856 lastchar = $2; 857 858 /* Do it again for upper/lowercase */ 859 if (sf_case_ins() && has_case($2)){ 860 $2 = reverse_case ($2); 861 ccladd ($1, $2); 862 863 cclsorted = cclsorted && ($2 > lastchar); 864 lastchar = $2; 865 } 866 867 $$ = $1; 868 } 869 870 | ccl ccl_expr 871 { 872 /* Too hard to properly maintain cclsorted. */ 873 cclsorted = false; 874 $$ = $1; 875 } 876 877 | 878 { 879 cclsorted = true; 880 lastchar = 0; 881 currccl = $$ = cclinit(); 882 } 883 ; 884 885 ccl_expr: 886 CCE_ALNUM { CCL_EXPR(isalnum); } 887 | CCE_ALPHA { CCL_EXPR(isalpha); } 888 | CCE_BLANK { CCL_EXPR(IS_BLANK); } 889 | CCE_CNTRL { CCL_EXPR(iscntrl); } 890 | CCE_DIGIT { CCL_EXPR(isdigit); } 891 | CCE_GRAPH { CCL_EXPR(isgraph); } 892 | CCE_LOWER { 893 CCL_EXPR(islower); 894 if (sf_case_ins()) 895 CCL_EXPR(isupper); 896 } 897 | CCE_PRINT { CCL_EXPR(isprint); } 898 | CCE_PUNCT { CCL_EXPR(ispunct); } 899 | CCE_SPACE { CCL_EXPR(isspace); } 900 | CCE_XDIGIT { CCL_EXPR(isxdigit); } 901 | CCE_UPPER { 902 CCL_EXPR(isupper); 903 if (sf_case_ins()) 904 CCL_EXPR(islower); 905 } 906 907 | CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); } 908 | CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); } 909 | CCE_NEG_BLANK { CCL_NEG_EXPR(IS_BLANK); } 910 | CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); } 911 | CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); } 912 | CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); } 913 | CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); } 914 | CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); } 915 | CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); } 916 | CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); } 917 | CCE_NEG_LOWER { 918 if ( sf_case_ins() ) 919 lwarn(_("[:^lower:] is ambiguous in case insensitive scanner")); 920 else 921 CCL_NEG_EXPR(islower); 922 } 923 | CCE_NEG_UPPER { 924 if ( sf_case_ins() ) 925 lwarn(_("[:^upper:] ambiguous in case insensitive scanner")); 926 else 927 CCL_NEG_EXPR(isupper); 928 } 929 ; 930 931 string : string CHAR 932 { 933 if ( $2 == nlch ) 934 rule_has_nl[num_rules] = true; 935 936 ++rulelen; 937 938 if (sf_case_ins() && has_case($2)) 939 $$ = mkor (mkstate($2), mkstate(reverse_case($2))); 940 else 941 $$ = mkstate ($2); 942 943 $$ = link_machines( $1, $$); 944 } 945 946 | 947 { $$ = mkstate( SYM_EPSILON ); } 948 ; 949 950 %% 951 952 953 /* build_eof_action - build the "<<EOF>>" action for the active start 954 * conditions 955 */ 956 957 void build_eof_action() 958 { 959 int i; 960 char action_text[MAXLINE]; 961 962 for ( i = 1; i <= scon_stk_ptr; ++i ) 963 { 964 if ( sceof[scon_stk[i]] ) 965 format_pinpoint_message( 966 "multiple <<EOF>> rules for start condition %s", 967 scname[scon_stk[i]] ); 968 969 else 970 { 971 sceof[scon_stk[i]] = true; 972 973 if (previous_continued_action /* && previous action was regular */) 974 add_action("YY_RULE_SETUP\n"); 975 976 snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n", 977 scname[scon_stk[i]] ); 978 add_action( action_text ); 979 } 980 } 981 982 line_directive_out( (FILE *) 0, 1 ); 983 984 /* This isn't a normal rule after all - don't count it as 985 * such, so we don't have any holes in the rule numbering 986 * (which make generating "rule can never match" warnings 987 * more difficult. 988 */ 989 --num_rules; 990 ++num_eof_rules; 991 } 992 993 994 /* format_synerr - write out formatted syntax error */ 995 996 void format_synerr( msg, arg ) 997 const char *msg, arg[]; 998 { 999 char errmsg[MAXLINE]; 1000 1001 (void) snprintf( errmsg, sizeof(errmsg), msg, arg ); 1002 synerr( errmsg ); 1003 } 1004 1005 1006 /* synerr - report a syntax error */ 1007 1008 void synerr( str ) 1009 const char *str; 1010 { 1011 syntaxerror = true; 1012 pinpoint_message( str ); 1013 } 1014 1015 1016 /* format_warn - write out formatted warning */ 1017 1018 void format_warn( msg, arg ) 1019 const char *msg, arg[]; 1020 { 1021 char warn_msg[MAXLINE]; 1022 1023 snprintf( warn_msg, sizeof(warn_msg), msg, arg ); 1024 lwarn( warn_msg ); 1025 } 1026 1027 1028 /* lwarn - report a warning, unless -w was given */ 1029 1030 void lwarn( str ) 1031 const char *str; 1032 { 1033 line_warning( str, linenum ); 1034 } 1035 1036 /* format_pinpoint_message - write out a message formatted with one string, 1037 * pinpointing its location 1038 */ 1039 1040 void format_pinpoint_message( msg, arg ) 1041 const char *msg, arg[]; 1042 { 1043 char errmsg[MAXLINE]; 1044 1045 snprintf( errmsg, sizeof(errmsg), msg, arg ); 1046 pinpoint_message( errmsg ); 1047 } 1048 1049 1050 /* pinpoint_message - write out a message, pinpointing its location */ 1051 1052 void pinpoint_message( str ) 1053 const char *str; 1054 { 1055 line_pinpoint( str, linenum ); 1056 } 1057 1058 1059 /* line_warning - report a warning at a given line, unless -w was given */ 1060 1061 void line_warning( str, line ) 1062 const char *str; 1063 int line; 1064 { 1065 char warning[MAXLINE]; 1066 1067 if ( ! nowarn ) 1068 { 1069 snprintf( warning, sizeof(warning), "warning, %s", str ); 1070 line_pinpoint( warning, line ); 1071 } 1072 } 1073 1074 1075 /* line_pinpoint - write out a message, pinpointing it at the given line */ 1076 1077 void line_pinpoint( str, line ) 1078 const char *str; 1079 int line; 1080 { 1081 fprintf( stderr, "%s:%d: %s\n", infilename, line, str ); 1082 } 1083 1084 1085 /* yyerror - eat up an error message from the parser; 1086 * currently, messages are ignore 1087 */ 1088 1089 void yyerror( msg ) 1090 const char *msg; 1091 { 1092 (void)msg; 1093 } 1094