1 /* $NetBSD: parse.y,v 1.3 2017/01/02 17:45:27 christos Exp $ */ 2 3 /* parse.y - parser for flex input */ 4 5 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP 6 %token TOK_OPTION TOK_OUTFILE TOK_PREFIX TOK_YYCLASS TOK_HEADER_FILE TOK_EXTRA_TYPE 7 %token TOK_TABLES_FILE 8 9 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH 10 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT 11 12 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH 13 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT 14 15 %left CCL_OP_DIFF CCL_OP_UNION 16 17 /* 18 *POSIX and AT&T lex place the 19 * precedence of the repeat operator, {}, below that of concatenation. 20 * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended 21 * Regular Expression (ERE) precedence that has the repeat operator 22 * higher than concatenation. This causes ab{3} to yield abbb. 23 * 24 * In order to support the POSIX and AT&T precedence and the flex 25 * precedence we define two token sets for the begin and end tokens of 26 * the repeat operator, '{' and '}'. The lexical scanner chooses 27 * which tokens to return based on whether posix_compat or lex_compat 28 * are specified. Specifying either posix_compat or lex_compat will 29 * cause flex to parse scanner files as per the AT&T and 30 * POSIX-mandated behavior. 31 */ 32 33 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX 34 35 36 %{ 37 /* Copyright (c) 1990 The Regents of the University of California. */ 38 /* All rights reserved. */ 39 40 /* This code is derived from software contributed to Berkeley by */ 41 /* Vern Paxson. */ 42 43 /* The United States Government has rights in this work pursuant */ 44 /* to contract no. DE-AC03-76SF00098 between the United States */ 45 /* Department of Energy and the University of California. */ 46 47 /* This file is part of flex. */ 48 49 /* Redistribution and use in source and binary forms, with or without */ 50 /* modification, are permitted provided that the following conditions */ 51 /* are met: */ 52 53 /* 1. Redistributions of source code must retain the above copyright */ 54 /* notice, this list of conditions and the following disclaimer. */ 55 /* 2. Redistributions in binary form must reproduce the above copyright */ 56 /* notice, this list of conditions and the following disclaimer in the */ 57 /* documentation and/or other materials provided with the distribution. */ 58 59 /* Neither the name of the University nor the names of its contributors */ 60 /* may be used to endorse or promote products derived from this software */ 61 /* without specific prior written permission. */ 62 63 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 64 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 65 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 66 /* PURPOSE. */ 67 #include "flexdef.h" 68 __RCSID("$NetBSD: parse.y,v 1.3 2017/01/02 17:45:27 christos Exp $"); 69 70 #include "tables.h" 71 72 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen; 73 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule; 74 75 int *scon_stk; 76 int scon_stk_ptr; 77 78 static int madeany = false; /* whether we've made the '.' character class */ 79 static int ccldot, cclany; 80 int previous_continued_action; /* whether the previous rule's action was '|' */ 81 82 #define format_warn3(fmt, a1, a2) \ 83 do{ \ 84 char fw3_msg[MAXLINE];\ 85 snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\ 86 lwarn( fw3_msg );\ 87 }while(0) 88 89 /* Expand a POSIX character class expression. */ 90 #define CCL_EXPR(func) \ 91 do{ \ 92 int c; \ 93 for ( c = 0; c < csize; ++c ) \ 94 if ( isascii(c) && func(c) ) \ 95 ccladd( currccl, c ); \ 96 }while(0) 97 98 /* negated class */ 99 #define CCL_NEG_EXPR(func) \ 100 do{ \ 101 int c; \ 102 for ( c = 0; c < csize; ++c ) \ 103 if ( !func(c) ) \ 104 ccladd( currccl, c ); \ 105 }while(0) 106 107 /* While POSIX defines isblank(), it's not ANSI C. */ 108 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t') 109 110 /* On some over-ambitious machines, such as DEC Alpha's, the default 111 * token type is "long" instead of "int"; this leads to problems with 112 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen 113 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the 114 * following should ensure that the default token type is "int". 115 */ 116 #define YYSTYPE int 117 118 %} 119 120 %% 121 goal : initlex sect1 sect1end sect2 initforrule 122 { /* add default rule */ 123 int def_rule; 124 125 pat = cclinit(); 126 cclnegate( pat ); 127 128 def_rule = mkstate( -pat ); 129 130 /* Remember the number of the default rule so we 131 * don't generate "can't match" warnings for it. 132 */ 133 default_rule = num_rules; 134 135 finish_rule( def_rule, false, 0, 0, 0); 136 137 for ( i = 1; i <= lastsc; ++i ) 138 scset[i] = mkbranch( scset[i], def_rule ); 139 140 if ( spprdflt ) 141 add_action( 142 "YY_FATAL_ERROR( \"flex scanner jammed\" )" ); 143 else 144 add_action( "ECHO" ); 145 146 add_action( ";\n\tYY_BREAK]]\n" ); 147 } 148 ; 149 150 initlex : 151 { /* initialize for processing rules */ 152 153 /* Create default DFA start condition. */ 154 scinstal( "INITIAL", false ); 155 } 156 ; 157 158 sect1 : sect1 startconddecl namelist1 159 | sect1 options 160 | 161 | error 162 { synerr( _("unknown error processing section 1") ); } 163 ; 164 165 sect1end : SECTEND 166 { 167 check_options(); 168 scon_stk = allocate_integer_array( lastsc + 1 ); 169 scon_stk_ptr = 0; 170 } 171 ; 172 173 startconddecl : SCDECL 174 { xcluflg = false; } 175 176 | XSCDECL 177 { xcluflg = true; } 178 ; 179 180 namelist1 : namelist1 NAME 181 { scinstal( nmstr, xcluflg ); } 182 183 | NAME 184 { scinstal( nmstr, xcluflg ); } 185 186 | error 187 { synerr( _("bad start condition list") ); } 188 ; 189 190 options : TOK_OPTION optionlist 191 ; 192 193 optionlist : optionlist option 194 | 195 ; 196 197 option : TOK_OUTFILE '=' NAME 198 { 199 outfilename = xstrdup(nmstr); 200 did_outfilename = 1; 201 } 202 | TOK_EXTRA_TYPE '=' NAME 203 { extra_type = xstrdup(nmstr); } 204 | TOK_PREFIX '=' NAME 205 { prefix = xstrdup(nmstr); 206 if (strchr(prefix, '[') || strchr(prefix, ']')) 207 flexerror(_("Prefix must not contain [ or ]")); } 208 | TOK_YYCLASS '=' NAME 209 { yyclass = xstrdup(nmstr); } 210 | TOK_HEADER_FILE '=' NAME 211 { headerfilename = xstrdup(nmstr); } 212 | TOK_TABLES_FILE '=' NAME 213 { tablesext = true; tablesfilename = xstrdup(nmstr); } 214 ; 215 216 sect2 : sect2 scon initforrule flexrule '\n' 217 { scon_stk_ptr = $2; } 218 | sect2 scon '{' sect2 '}' 219 { scon_stk_ptr = $2; } 220 | 221 ; 222 223 initforrule : 224 { 225 /* Initialize for a parse of one rule. */ 226 trlcontxt = variable_trail_rule = varlength = false; 227 trailcnt = headcnt = rulelen = 0; 228 current_state_type = STATE_NORMAL; 229 previous_continued_action = continued_action; 230 in_rule = true; 231 232 new_rule(); 233 } 234 ; 235 236 flexrule : '^' rule 237 { 238 pat = $2; 239 finish_rule( pat, variable_trail_rule, 240 headcnt, trailcnt , previous_continued_action); 241 242 if ( scon_stk_ptr > 0 ) 243 { 244 for ( i = 1; i <= scon_stk_ptr; ++i ) 245 scbol[scon_stk[i]] = 246 mkbranch( scbol[scon_stk[i]], 247 pat ); 248 } 249 250 else 251 { 252 /* Add to all non-exclusive start conditions, 253 * including the default (0) start condition. 254 */ 255 256 for ( i = 1; i <= lastsc; ++i ) 257 if ( ! scxclu[i] ) 258 scbol[i] = mkbranch( scbol[i], 259 pat ); 260 } 261 262 if ( ! bol_needed ) 263 { 264 bol_needed = true; 265 266 if ( performance_report > 1 ) 267 pinpoint_message( 268 "'^' operator results in sub-optimal performance" ); 269 } 270 } 271 272 | rule 273 { 274 pat = $1; 275 finish_rule( pat, variable_trail_rule, 276 headcnt, trailcnt , previous_continued_action); 277 278 if ( scon_stk_ptr > 0 ) 279 { 280 for ( i = 1; i <= scon_stk_ptr; ++i ) 281 scset[scon_stk[i]] = 282 mkbranch( scset[scon_stk[i]], 283 pat ); 284 } 285 286 else 287 { 288 for ( i = 1; i <= lastsc; ++i ) 289 if ( ! scxclu[i] ) 290 scset[i] = 291 mkbranch( scset[i], 292 pat ); 293 } 294 } 295 296 | EOF_OP 297 { 298 if ( scon_stk_ptr > 0 ) 299 build_eof_action(); 300 301 else 302 { 303 /* This EOF applies to all start conditions 304 * which don't already have EOF actions. 305 */ 306 for ( i = 1; i <= lastsc; ++i ) 307 if ( ! sceof[i] ) 308 scon_stk[++scon_stk_ptr] = i; 309 310 if ( scon_stk_ptr == 0 ) 311 lwarn( 312 "all start conditions already have <<EOF>> rules" ); 313 314 else 315 build_eof_action(); 316 } 317 } 318 319 | error 320 { synerr( _("unrecognized rule") ); } 321 ; 322 323 scon_stk_ptr : 324 { $$ = scon_stk_ptr; } 325 ; 326 327 scon : '<' scon_stk_ptr namelist2 '>' 328 { $$ = $2; } 329 330 | '<' '*' '>' 331 { 332 $$ = scon_stk_ptr; 333 334 for ( i = 1; i <= lastsc; ++i ) 335 { 336 int j; 337 338 for ( j = 1; j <= scon_stk_ptr; ++j ) 339 if ( scon_stk[j] == i ) 340 break; 341 342 if ( j > scon_stk_ptr ) 343 scon_stk[++scon_stk_ptr] = i; 344 } 345 } 346 347 | 348 { $$ = scon_stk_ptr; } 349 ; 350 351 namelist2 : namelist2 ',' sconname 352 353 | sconname 354 355 | error 356 { synerr( _("bad start condition list") ); } 357 ; 358 359 sconname : NAME 360 { 361 if ( (scnum = sclookup( nmstr )) == 0 ) 362 format_pinpoint_message( 363 "undeclared start condition %s", 364 nmstr ); 365 else 366 { 367 for ( i = 1; i <= scon_stk_ptr; ++i ) 368 if ( scon_stk[i] == scnum ) 369 { 370 format_warn( 371 "<%s> specified twice", 372 scname[scnum] ); 373 break; 374 } 375 376 if ( i > scon_stk_ptr ) 377 scon_stk[++scon_stk_ptr] = scnum; 378 } 379 } 380 ; 381 382 rule : re2 re 383 { 384 if ( transchar[lastst[$2]] != SYM_EPSILON ) 385 /* Provide final transition \now/ so it 386 * will be marked as a trailing context 387 * state. 388 */ 389 $2 = link_machines( $2, 390 mkstate( SYM_EPSILON ) ); 391 392 mark_beginning_as_normal( $2 ); 393 current_state_type = STATE_NORMAL; 394 395 if ( previous_continued_action ) 396 { 397 /* We need to treat this as variable trailing 398 * context so that the backup does not happen 399 * in the action but before the action switch 400 * statement. If the backup happens in the 401 * action, then the rules "falling into" this 402 * one's action will *also* do the backup, 403 * erroneously. 404 */ 405 if ( ! varlength || headcnt != 0 ) 406 lwarn( 407 "trailing context made variable due to preceding '|' action" ); 408 409 /* Mark as variable. */ 410 varlength = true; 411 headcnt = 0; 412 413 } 414 415 if ( lex_compat || (varlength && headcnt == 0) ) 416 { /* variable trailing context rule */ 417 /* Mark the first part of the rule as the 418 * accepting "head" part of a trailing 419 * context rule. 420 * 421 * By the way, we didn't do this at the 422 * beginning of this production because back 423 * then current_state_type was set up for a 424 * trail rule, and add_accept() can create 425 * a new state ... 426 */ 427 add_accept( $1, 428 num_rules | YY_TRAILING_HEAD_MASK ); 429 variable_trail_rule = true; 430 } 431 432 else 433 trailcnt = rulelen; 434 435 $$ = link_machines( $1, $2 ); 436 } 437 438 | re2 re '$' 439 { synerr( _("trailing context used twice") ); } 440 441 | re '$' 442 { 443 headcnt = 0; 444 trailcnt = 1; 445 rulelen = 1; 446 varlength = false; 447 448 current_state_type = STATE_TRAILING_CONTEXT; 449 450 if ( trlcontxt ) 451 { 452 synerr( _("trailing context used twice") ); 453 $$ = mkstate( SYM_EPSILON ); 454 } 455 456 else if ( previous_continued_action ) 457 { 458 /* See the comment in the rule for "re2 re" 459 * above. 460 */ 461 lwarn( 462 "trailing context made variable due to preceding '|' action" ); 463 464 varlength = true; 465 } 466 467 if ( lex_compat || varlength ) 468 { 469 /* Again, see the comment in the rule for 470 * "re2 re" above. 471 */ 472 add_accept( $1, 473 num_rules | YY_TRAILING_HEAD_MASK ); 474 variable_trail_rule = true; 475 } 476 477 trlcontxt = true; 478 479 eps = mkstate( SYM_EPSILON ); 480 $$ = link_machines( $1, 481 link_machines( eps, mkstate( '\n' ) ) ); 482 } 483 484 | re 485 { 486 $$ = $1; 487 488 if ( trlcontxt ) 489 { 490 if ( lex_compat || (varlength && headcnt == 0) ) 491 /* Both head and trail are 492 * variable-length. 493 */ 494 variable_trail_rule = true; 495 else 496 trailcnt = rulelen; 497 } 498 } 499 ; 500 501 502 re : re '|' series 503 { 504 varlength = true; 505 $$ = mkor( $1, $3 ); 506 } 507 508 | series 509 { $$ = $1; } 510 ; 511 512 513 re2 : re '/' 514 { 515 /* This rule is written separately so the 516 * reduction will occur before the trailing 517 * series is parsed. 518 */ 519 520 if ( trlcontxt ) 521 synerr( _("trailing context used twice") ); 522 else 523 trlcontxt = true; 524 525 if ( varlength ) 526 /* We hope the trailing context is 527 * fixed-length. 528 */ 529 varlength = false; 530 else 531 headcnt = rulelen; 532 533 rulelen = 0; 534 535 current_state_type = STATE_TRAILING_CONTEXT; 536 $$ = $1; 537 } 538 ; 539 540 series : series singleton 541 { 542 /* This is where concatenation of adjacent patterns 543 * gets done. 544 */ 545 $$ = link_machines( $1, $2 ); 546 } 547 548 | singleton 549 { $$ = $1; } 550 551 | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX 552 { 553 varlength = true; 554 555 if ( $3 > $5 || $3 < 0 ) 556 { 557 synerr( _("bad iteration values") ); 558 $$ = $1; 559 } 560 else 561 { 562 if ( $3 == 0 ) 563 { 564 if ( $5 <= 0 ) 565 { 566 synerr( 567 _("bad iteration values") ); 568 $$ = $1; 569 } 570 else 571 $$ = mkopt( 572 mkrep( $1, 1, $5 ) ); 573 } 574 else 575 $$ = mkrep( $1, $3, $5 ); 576 } 577 } 578 579 | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX 580 { 581 varlength = true; 582 583 if ( $3 <= 0 ) 584 { 585 synerr( _("iteration value must be positive") ); 586 $$ = $1; 587 } 588 589 else 590 $$ = mkrep( $1, $3, INFINITE_REPEAT ); 591 } 592 593 | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX 594 { 595 /* The series could be something like "(foo)", 596 * in which case we have no idea what its length 597 * is, so we punt here. 598 */ 599 varlength = true; 600 601 if ( $3 <= 0 ) 602 { 603 synerr( _("iteration value must be positive") 604 ); 605 $$ = $1; 606 } 607 608 else 609 $$ = link_machines( $1, 610 copysingl( $1, $3 - 1 ) ); 611 } 612 613 ; 614 615 singleton : singleton '*' 616 { 617 varlength = true; 618 619 $$ = mkclos( $1 ); 620 } 621 622 | singleton '+' 623 { 624 varlength = true; 625 $$ = mkposcl( $1 ); 626 } 627 628 | singleton '?' 629 { 630 varlength = true; 631 $$ = mkopt( $1 ); 632 } 633 634 | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX 635 { 636 varlength = true; 637 638 if ( $3 > $5 || $3 < 0 ) 639 { 640 synerr( _("bad iteration values") ); 641 $$ = $1; 642 } 643 else 644 { 645 if ( $3 == 0 ) 646 { 647 if ( $5 <= 0 ) 648 { 649 synerr( 650 _("bad iteration values") ); 651 $$ = $1; 652 } 653 else 654 $$ = mkopt( 655 mkrep( $1, 1, $5 ) ); 656 } 657 else 658 $$ = mkrep( $1, $3, $5 ); 659 } 660 } 661 662 | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX 663 { 664 varlength = true; 665 666 if ( $3 <= 0 ) 667 { 668 synerr( _("iteration value must be positive") ); 669 $$ = $1; 670 } 671 672 else 673 $$ = mkrep( $1, $3, INFINITE_REPEAT ); 674 } 675 676 | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX 677 { 678 /* The singleton could be something like "(foo)", 679 * in which case we have no idea what its length 680 * is, so we punt here. 681 */ 682 varlength = true; 683 684 if ( $3 <= 0 ) 685 { 686 synerr( _("iteration value must be positive") ); 687 $$ = $1; 688 } 689 690 else 691 $$ = link_machines( $1, 692 copysingl( $1, $3 - 1 ) ); 693 } 694 695 | '.' 696 { 697 if ( ! madeany ) 698 { 699 /* Create the '.' character class. */ 700 ccldot = cclinit(); 701 ccladd( ccldot, '\n' ); 702 cclnegate( ccldot ); 703 704 if ( useecs ) 705 mkeccl( ccltbl + cclmap[ccldot], 706 ccllen[ccldot], nextecm, 707 ecgroup, csize, csize ); 708 709 /* Create the (?s:'.') character class. */ 710 cclany = cclinit(); 711 cclnegate( cclany ); 712 713 if ( useecs ) 714 mkeccl( ccltbl + cclmap[cclany], 715 ccllen[cclany], nextecm, 716 ecgroup, csize, csize ); 717 718 madeany = true; 719 } 720 721 ++rulelen; 722 723 if (sf_dot_all()) 724 $$ = mkstate( -cclany ); 725 else 726 $$ = mkstate( -ccldot ); 727 } 728 729 | fullccl 730 { 731 /* Sort characters for fast searching. 732 */ 733 qsort( ccltbl + cclmap[$1], (size_t) ccllen[$1], sizeof (*ccltbl), cclcmp ); 734 735 if ( useecs ) 736 mkeccl( ccltbl + cclmap[$1], ccllen[$1], 737 nextecm, ecgroup, csize, csize ); 738 739 ++rulelen; 740 741 if (ccl_has_nl[$1]) 742 rule_has_nl[num_rules] = true; 743 744 $$ = mkstate( -$1 ); 745 } 746 747 | PREVCCL 748 { 749 ++rulelen; 750 751 if (ccl_has_nl[$1]) 752 rule_has_nl[num_rules] = true; 753 754 $$ = mkstate( -$1 ); 755 } 756 757 | '"' string '"' 758 { $$ = $2; } 759 760 | '(' re ')' 761 { $$ = $2; } 762 763 | CHAR 764 { 765 ++rulelen; 766 767 if ($1 == nlch) 768 rule_has_nl[num_rules] = true; 769 770 if (sf_case_ins() && has_case($1)) 771 /* create an alternation, as in (a|A) */ 772 $$ = mkor (mkstate($1), mkstate(reverse_case($1))); 773 else 774 $$ = mkstate( $1 ); 775 } 776 ; 777 fullccl: 778 fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); } 779 | fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); } 780 | braceccl 781 ; 782 783 braceccl: 784 785 '[' ccl ']' { $$ = $2; } 786 787 | '[' '^' ccl ']' 788 { 789 cclnegate( $3 ); 790 $$ = $3; 791 } 792 ; 793 794 ccl : ccl CHAR '-' CHAR 795 { 796 797 if (sf_case_ins()) 798 { 799 800 /* If one end of the range has case and the other 801 * does not, or the cases are different, then we're not 802 * sure what range the user is trying to express. 803 * Examples: [@-z] or [S-t] 804 */ 805 if (has_case ($2) != has_case ($4) 806 || (has_case ($2) && (b_islower ($2) != b_islower ($4))) 807 || (has_case ($2) && (b_isupper ($2) != b_isupper ($4)))) 808 format_warn3 ( 809 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), 810 $2, $4); 811 812 /* If the range spans uppercase characters but not 813 * lowercase (or vice-versa), then should we automatically 814 * include lowercase characters in the range? 815 * Example: [@-_] spans [a-z] but not [A-Z] 816 */ 817 else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4)) 818 format_warn3 ( 819 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), 820 $2, $4); 821 } 822 823 if ( $2 > $4 ) 824 synerr( _("negative range in character class") ); 825 826 else 827 { 828 for ( i = $2; i <= $4; ++i ) 829 ccladd( $1, i ); 830 831 /* Keep track if this ccl is staying in 832 * alphabetical order. 833 */ 834 cclsorted = cclsorted && ($2 > lastchar); 835 lastchar = $4; 836 837 /* Do it again for upper/lowercase */ 838 if (sf_case_ins() && has_case($2) && has_case($4)){ 839 $2 = reverse_case ($2); 840 $4 = reverse_case ($4); 841 842 for ( i = $2; i <= $4; ++i ) 843 ccladd( $1, i ); 844 845 cclsorted = cclsorted && ($2 > lastchar); 846 lastchar = $4; 847 } 848 849 } 850 851 $$ = $1; 852 } 853 854 | ccl CHAR 855 { 856 ccladd( $1, $2 ); 857 cclsorted = cclsorted && ($2 > lastchar); 858 lastchar = $2; 859 860 /* Do it again for upper/lowercase */ 861 if (sf_case_ins() && has_case($2)){ 862 $2 = reverse_case ($2); 863 ccladd ($1, $2); 864 865 cclsorted = cclsorted && ($2 > lastchar); 866 lastchar = $2; 867 } 868 869 $$ = $1; 870 } 871 872 | ccl ccl_expr 873 { 874 /* Too hard to properly maintain cclsorted. */ 875 cclsorted = false; 876 $$ = $1; 877 } 878 879 | 880 { 881 cclsorted = true; 882 lastchar = 0; 883 currccl = $$ = cclinit(); 884 } 885 ; 886 887 ccl_expr: 888 CCE_ALNUM { CCL_EXPR(isalnum); } 889 | CCE_ALPHA { CCL_EXPR(isalpha); } 890 | CCE_BLANK { CCL_EXPR(IS_BLANK); } 891 | CCE_CNTRL { CCL_EXPR(iscntrl); } 892 | CCE_DIGIT { CCL_EXPR(isdigit); } 893 | CCE_GRAPH { CCL_EXPR(isgraph); } 894 | CCE_LOWER { 895 CCL_EXPR(islower); 896 if (sf_case_ins()) 897 CCL_EXPR(isupper); 898 } 899 | CCE_PRINT { CCL_EXPR(isprint); } 900 | CCE_PUNCT { CCL_EXPR(ispunct); } 901 | CCE_SPACE { CCL_EXPR(isspace); } 902 | CCE_XDIGIT { CCL_EXPR(isxdigit); } 903 | CCE_UPPER { 904 CCL_EXPR(isupper); 905 if (sf_case_ins()) 906 CCL_EXPR(islower); 907 } 908 909 | CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); } 910 | CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); } 911 | CCE_NEG_BLANK { CCL_NEG_EXPR(IS_BLANK); } 912 | CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); } 913 | CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); } 914 | CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); } 915 | CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); } 916 | CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); } 917 | CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); } 918 | CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); } 919 | CCE_NEG_LOWER { 920 if ( sf_case_ins() ) 921 lwarn(_("[:^lower:] is ambiguous in case insensitive scanner")); 922 else 923 CCL_NEG_EXPR(islower); 924 } 925 | CCE_NEG_UPPER { 926 if ( sf_case_ins() ) 927 lwarn(_("[:^upper:] ambiguous in case insensitive scanner")); 928 else 929 CCL_NEG_EXPR(isupper); 930 } 931 ; 932 933 string : string CHAR 934 { 935 if ( $2 == nlch ) 936 rule_has_nl[num_rules] = true; 937 938 ++rulelen; 939 940 if (sf_case_ins() && has_case($2)) 941 $$ = mkor (mkstate($2), mkstate(reverse_case($2))); 942 else 943 $$ = mkstate ($2); 944 945 $$ = link_machines( $1, $$); 946 } 947 948 | 949 { $$ = mkstate( SYM_EPSILON ); } 950 ; 951 952 %% 953 954 955 /* build_eof_action - build the "<<EOF>>" action for the active start 956 * conditions 957 */ 958 959 void build_eof_action(void) 960 { 961 int i; 962 char action_text[MAXLINE]; 963 964 for ( i = 1; i <= scon_stk_ptr; ++i ) 965 { 966 if ( sceof[scon_stk[i]] ) 967 format_pinpoint_message( 968 "multiple <<EOF>> rules for start condition %s", 969 scname[scon_stk[i]] ); 970 971 else 972 { 973 sceof[scon_stk[i]] = true; 974 975 if (previous_continued_action /* && previous action was regular */) 976 add_action("YY_RULE_SETUP\n"); 977 978 snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n", 979 scname[scon_stk[i]] ); 980 add_action( action_text ); 981 } 982 } 983 984 line_directive_out(NULL, 1); 985 add_action("[["); 986 987 /* This isn't a normal rule after all - don't count it as 988 * such, so we don't have any holes in the rule numbering 989 * (which make generating "rule can never match" warnings 990 * more difficult. 991 */ 992 --num_rules; 993 ++num_eof_rules; 994 } 995 996 997 /* format_synerr - write out formatted syntax error */ 998 999 void format_synerr( const char *msg, const char arg[] ) 1000 { 1001 char errmsg[MAXLINE]; 1002 1003 (void) snprintf( errmsg, sizeof(errmsg), msg, arg ); 1004 synerr( errmsg ); 1005 } 1006 1007 1008 /* synerr - report a syntax error */ 1009 1010 void synerr( const char *str ) 1011 { 1012 syntaxerror = true; 1013 pinpoint_message( str ); 1014 } 1015 1016 1017 /* format_warn - write out formatted warning */ 1018 1019 void format_warn( const char *msg, const char arg[] ) 1020 { 1021 char warn_msg[MAXLINE]; 1022 1023 snprintf( warn_msg, sizeof(warn_msg), msg, arg ); 1024 lwarn( warn_msg ); 1025 } 1026 1027 1028 /* lwarn - report a warning, unless -w was given */ 1029 1030 void lwarn( const char *str ) 1031 { 1032 line_warning( str, linenum ); 1033 } 1034 1035 /* format_pinpoint_message - write out a message formatted with one string, 1036 * pinpointing its location 1037 */ 1038 1039 void format_pinpoint_message( const char *msg, const char arg[] ) 1040 { 1041 char errmsg[MAXLINE]; 1042 1043 snprintf( errmsg, sizeof(errmsg), msg, arg ); 1044 pinpoint_message( errmsg ); 1045 } 1046 1047 1048 /* pinpoint_message - write out a message, pinpointing its location */ 1049 1050 void pinpoint_message( const char *str ) 1051 { 1052 line_pinpoint( str, linenum ); 1053 } 1054 1055 1056 /* line_warning - report a warning at a given line, unless -w was given */ 1057 1058 void line_warning( const char *str, int line ) 1059 { 1060 char warning[MAXLINE]; 1061 1062 if ( ! nowarn ) 1063 { 1064 snprintf( warning, sizeof(warning), "warning, %s", str ); 1065 line_pinpoint( warning, line ); 1066 } 1067 } 1068 1069 1070 /* line_pinpoint - write out a message, pinpointing it at the given line */ 1071 1072 void line_pinpoint( const char *str, int line ) 1073 { 1074 fprintf( stderr, "%s:%d: %s\n", infilename, line, str ); 1075 } 1076 1077 1078 /* yyerror - eat up an error message from the parser; 1079 * currently, messages are ignore 1080 */ 1081 1082 void yyerror( const char *msg ) 1083 { 1084 (void)msg; 1085 } 1086