1 /* $NetBSD: scan.l,v 1.6 2018/12/23 16:27:17 christos Exp $ */ 2 3 /* scan.l - scanner for flex input -*-C-*- */ 4 5 %{ 6 /* Copyright (c) 1990 The Regents of the University of California. */ 7 /* All rights reserved. */ 8 9 /* This code is derived from software contributed to Berkeley by */ 10 /* Vern Paxson. */ 11 12 /* The United States Government has rights in this work pursuant */ 13 /* to contract no. DE-AC03-76SF00098 between the United States */ 14 /* Department of Energy and the University of California. */ 15 16 /* This file is part of flex. */ 17 18 /* Redistribution and use in source and binary forms, with or without */ 19 /* modification, are permitted provided that the following conditions */ 20 /* are met: */ 21 22 /* 1. Redistributions of source code must retain the above copyright */ 23 /* notice, this list of conditions and the following disclaimer. */ 24 /* 2. Redistributions in binary form must reproduce the above copyright */ 25 /* notice, this list of conditions and the following disclaimer in the */ 26 /* documentation and/or other materials provided with the distribution. */ 27 28 /* Neither the name of the University nor the names of its contributors */ 29 /* may be used to endorse or promote products derived from this software */ 30 /* without specific prior written permission. */ 31 32 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 33 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 34 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 35 /* PURPOSE. */ 36 #include "flexdef.h" 37 __RCSID("$NetBSD: scan.l,v 1.6 2018/12/23 16:27:17 christos Exp $"); 38 39 #include "parse.h" 40 extern bool tablesverify, tablesext; 41 extern int trlcontxt; /* Set in parse.y for each rule. */ 42 extern const char *escaped_qstart, *escaped_qend; 43 extern int yylval; /* XXX: for bootstrap */ 44 45 #define M4QSTART "[""[" 46 #define M4QEND "]""]" 47 48 #define ESCAPED_QSTART "[" M4QEND M4QSTART "[" M4QEND M4QSTART 49 #define ESCAPED_QEND M4QEND "]" M4QSTART M4QEND "]" M4QSTART 50 51 #define ACTION_ECHO add_action( yytext ) 52 #define ACTION_IFDEF(def, should_define) \ 53 { \ 54 if ( should_define ) \ 55 action_define( def, 1 ); \ 56 } 57 58 #define ACTION_ECHO_QSTART add_action (ESCAPED_QSTART) 59 #define ACTION_ECHO_QEND add_action (ESCAPED_QEND) 60 61 #define ACTION_M4_IFDEF(def, should_define) \ 62 do{ \ 63 if ( should_define ) \ 64 buf_m4_define( &m4defs_buf, def, NULL);\ 65 else \ 66 buf_m4_undefine( &m4defs_buf, def);\ 67 } while(0) 68 69 #define MARK_END_OF_PROLOG mark_prolog(); 70 71 #define YY_DECL \ 72 int flexscan(void) 73 74 #define RETURNCHAR \ 75 yylval = (unsigned char) yytext[0]; \ 76 return CHAR; 77 78 #define RETURNNAME \ 79 if(yyleng < MAXLINE) \ 80 { \ 81 strlcpy( nmstr, yytext, sizeof(nmstr) ); \ 82 return NAME; \ 83 } \ 84 else \ 85 do { \ 86 synerr(_("Input line too long\n")); \ 87 FLEX_EXIT(EXIT_FAILURE); \ 88 } while (0) 89 90 #define PUT_BACK_STRING(str, start) \ 91 { size_t i = strlen( str ); \ 92 while ( i > start ) \ 93 unput((str)[--i]); \ 94 } 95 96 #define CHECK_REJECT(str) \ 97 if ( all_upper( str ) ) \ 98 reject = true; 99 100 #define CHECK_YYMORE(str) \ 101 if ( all_lower( str ) ) \ 102 yymore_used = true; 103 104 #define YY_USER_INIT \ 105 if ( getenv("POSIXLY_CORRECT") ) \ 106 posix_compat = true; 107 108 #define START_CODEBLOCK(x) do { \ 109 /* Emit the needed line directive... */\ 110 if (indented_code == false) { \ 111 linenum++; \ 112 line_directive_out(NULL, 1); \ 113 } \ 114 add_action(M4QSTART); \ 115 yy_push_state(CODEBLOCK); \ 116 if ((indented_code = x)) ACTION_ECHO; \ 117 } while(0) 118 119 #define END_CODEBLOCK do { \ 120 yy_pop_state();\ 121 add_action(M4QEND); \ 122 if (!indented_code) line_directive_out(NULL, 0);\ 123 } while (0) 124 125 %} 126 127 %option caseless nodefault noreject stack noyy_top_state 128 %option nostdinit 129 130 %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE 131 %x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION 132 %x OPTION LINEDIR CODEBLOCK_MATCH_BRACE 133 %x GROUP_WITH_PARAMS 134 %x GROUP_MINUS_PARAMS 135 %x EXTENDED_COMMENT 136 %x COMMENT_DISCARD CODE_COMMENT 137 %x SECT3_NOESCAPE 138 %x CHARACTER_CONSTANT 139 140 WS [[:blank:]]+ 141 OPTWS [[:blank:]]* 142 NOT_WS [^[:blank:]\r\n] 143 144 NL \r?\n 145 146 NAME ([[:alpha:]_][[:alnum:]_-]*) 147 NOT_NAME [^[:alpha:]_*\n]+ 148 149 SCNAME {NAME} 150 151 ESCSEQ (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2})) 152 153 FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ}) 154 CCL_CHAR ([^\\\n\]]|{ESCSEQ}) 155 CCL_EXPR ("[:"^?[[:alpha:]]+":]") 156 157 LEXOPT [aceknopr] 158 159 M4QSTART "[""[" 160 M4QEND "]""]" 161 162 %% 163 static int bracelevel, didadef, indented_code; 164 static int doing_rule_action = false; 165 static int option_sense; 166 167 int doing_codeblock = false; 168 int brace_depth=0, brace_start_line=0; 169 char nmdef[MAXLINE]; 170 171 172 <INITIAL>{ 173 ^{WS} START_CODEBLOCK(true); 174 ^"/*" add_action("/*[""["); yy_push_state( COMMENT ); 175 ^#{OPTWS}line{WS} yy_push_state( LINEDIR ); 176 ^"%s"{NAME}? return SCDECL; 177 ^"%x"{NAME}? return XSCDECL; 178 ^"%{".*{NL} START_CODEBLOCK(false); 179 ^"%top"[[:blank:]]*"{"[[:blank:]]*{NL} { 180 brace_start_line = linenum; 181 ++linenum; 182 buf_linedir( &top_buf, infilename?infilename:"<stdin>", linenum); 183 brace_depth = 1; 184 yy_push_state(CODEBLOCK_MATCH_BRACE); 185 } 186 187 ^"%top".* synerr( _("malformed '%top' directive") ); 188 189 {WS} /* discard */ 190 191 ^"%%".* { 192 sectnum = 2; 193 bracelevel = 0; 194 mark_defs1(); 195 line_directive_out(NULL, 1); 196 BEGIN(SECT2PROLOG); 197 return SECTEND; 198 } 199 200 ^"%pointer".*{NL} yytext_is_array = false; ++linenum; 201 ^"%array".*{NL} yytext_is_array = true; ++linenum; 202 203 ^"%option" BEGIN(OPTION); return TOK_OPTION; 204 205 ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} ++linenum; /* ignore */ 206 ^"%"{LEXOPT}{WS}.*{NL} ++linenum; /* ignore */ 207 208 /* xgettext: no-c-format */ 209 ^"%"[^sxaceknopr{}].* synerr( _( "unrecognized '%' directive" ) ); 210 211 ^{NAME} { 212 if(yyleng < MAXLINE) 213 { 214 strlcpy( nmstr, yytext, sizeof(nmstr) ); 215 } 216 else 217 { 218 synerr( _("Definition name too long\n")); 219 FLEX_EXIT(EXIT_FAILURE); 220 } 221 222 didadef = false; 223 BEGIN(PICKUPDEF); 224 } 225 226 {SCNAME} RETURNNAME; 227 ^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */ 228 {OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */ 229 } 230 231 232 <COMMENT,CODE_COMMENT>{ /* */ 233 [^\[\]\*\n]* ACTION_ECHO; 234 . ACTION_ECHO; 235 236 {NL} ++linenum; ACTION_ECHO; 237 } 238 <COMMENT>{ 239 "*/" add_action("*/]""]"); yy_pop_state(); 240 } 241 <CODE_COMMENT>{ 242 "*/" ACTION_ECHO; yy_pop_state(); 243 } 244 245 <COMMENT_DISCARD>{ 246 /* This is the same as COMMENT, but is discarded rather than output. */ 247 "*/" yy_pop_state(); 248 "*" ; 249 [^*\n] ; 250 {NL} ++linenum; 251 } 252 253 <EXTENDED_COMMENT>{ 254 ")" yy_pop_state(); 255 [^\n\)]+ ; 256 {NL} ++linenum; 257 } 258 259 <LINEDIR>{ 260 \n yy_pop_state(); 261 [[:digit:]]+ linenum = myctoi( yytext ); 262 263 \"[^"\n]*\" { 264 free(infilename); 265 infilename = xstrdup(yytext + 1); 266 infilename[strlen( infilename ) - 1] = '\0'; 267 } 268 . /* ignore spurious characters */ 269 } 270 <ACTION,CODEBLOCK,ACTION_STRING,PERCENT_BRACE_ACTION,CHARACTER_CONSTANT,COMMENT,CODE_COMMENT>{ 271 {M4QSTART} ACTION_ECHO_QSTART; 272 {M4QEND} ACTION_ECHO_QEND; 273 } 274 275 <CODEBLOCK>{ 276 ^"%}".*{NL} ++linenum; END_CODEBLOCK; 277 [^\n%\[\]]* ACTION_ECHO; 278 . ACTION_ECHO; 279 {NL} { 280 ++linenum; 281 ACTION_ECHO; 282 if ( indented_code ) END_CODEBLOCK; 283 } 284 } 285 286 <CODEBLOCK_MATCH_BRACE>{ 287 "}" { 288 if( --brace_depth == 0){ 289 /* TODO: Matched. */ 290 yy_pop_state(); 291 }else 292 buf_strnappend(&top_buf, yytext, yyleng); 293 } 294 295 "{" { 296 brace_depth++; 297 buf_strnappend(&top_buf, yytext, yyleng); 298 } 299 300 {NL} { 301 ++linenum; 302 buf_strnappend(&top_buf, yytext, yyleng); 303 } 304 305 {M4QSTART} buf_strnappend(&top_buf, escaped_qstart, (int) strlen(escaped_qstart)); 306 {M4QEND} buf_strnappend(&top_buf, escaped_qend, (int) strlen(escaped_qend)); 307 ([^{}\r\n\[\]]+)|[^{}\r\n] { 308 buf_strnappend(&top_buf, yytext, yyleng); 309 } 310 311 <<EOF>> { 312 linenum = brace_start_line; 313 synerr(_("Unmatched '{'")); 314 yyterminate(); 315 } 316 } 317 318 319 <PICKUPDEF>{ 320 {WS} /* separates name and definition */ 321 322 {NOT_WS}[^\r\n]* { 323 if(yyleng < MAXLINE) 324 { 325 strlcpy( nmdef, yytext, sizeof(nmdef) ); 326 } 327 else 328 { 329 format_synerr( _("Definition value for {%s} too long\n"), nmstr); 330 FLEX_EXIT(EXIT_FAILURE); 331 } 332 /* Skip trailing whitespace. */ 333 { 334 size_t i = strlen( nmdef ); 335 while (i > 0 && (nmdef[i-1] == ' ' || nmdef[i-1] == '\t')) 336 --i; 337 nmdef[i] = '\0'; 338 } 339 340 ndinstal( nmstr, nmdef ); 341 didadef = true; 342 } 343 344 {NL} { 345 if ( ! didadef ) 346 synerr( _( "incomplete name definition" ) ); 347 BEGIN(INITIAL); 348 ++linenum; 349 } 350 } 351 352 353 <OPTION>{ 354 {NL} ++linenum; BEGIN(INITIAL); 355 {WS} option_sense = true; 356 357 "=" return '='; 358 359 no option_sense = ! option_sense; 360 361 7bit csize = option_sense ? 128 : 256; 362 8bit csize = option_sense ? 256 : 128; 363 364 align long_align = option_sense; 365 always-interactive { 366 ACTION_M4_IFDEF( "M4""_YY_ALWAYS_INTERACTIVE", option_sense ); 367 interactive = option_sense; 368 } 369 array yytext_is_array = option_sense; 370 backup backing_up_report = option_sense; 371 batch interactive = ! option_sense; 372 bison-bridge bison_bridge_lval = option_sense; 373 bison-locations { if((bison_bridge_lloc = option_sense)) 374 bison_bridge_lval = true; 375 } 376 "c++" C_plus_plus = option_sense; 377 caseful|case-sensitive sf_set_case_ins(!option_sense); 378 caseless|case-insensitive sf_set_case_ins(option_sense); 379 debug ddebug = option_sense; 380 default spprdflt = ! option_sense; 381 ecs useecs = option_sense; 382 fast { 383 useecs = usemecs = false; 384 use_read = fullspd = true; 385 } 386 full { 387 useecs = usemecs = false; 388 use_read = fulltbl = true; 389 } 390 input ACTION_IFDEF("YY_NO_INPUT", ! option_sense); 391 interactive interactive = option_sense; 392 lex-compat lex_compat = option_sense; 393 posix-compat posix_compat = option_sense; 394 line gen_line_dirs = option_sense; 395 main { 396 ACTION_M4_IFDEF( "M4""_YY_MAIN", option_sense); 397 /* Override yywrap */ 398 if( option_sense == true ) 399 do_yywrap = false; 400 } 401 meta-ecs usemecs = option_sense; 402 never-interactive { 403 ACTION_M4_IFDEF( "M4""_YY_NEVER_INTERACTIVE", option_sense ); 404 interactive = !option_sense; 405 } 406 perf-report performance_report += option_sense ? 1 : -1; 407 pointer yytext_is_array = ! option_sense; 408 read use_read = option_sense; 409 reentrant reentrant = option_sense; 410 reject reject_really_used = option_sense; 411 stack ACTION_M4_IFDEF( "M4""_YY_STACK_USED", option_sense ); 412 stdinit do_stdinit = option_sense; 413 stdout use_stdout = option_sense; 414 unistd ACTION_IFDEF("YY_NO_UNISTD_H", ! option_sense); 415 unput ACTION_M4_IFDEF("M4""_YY_NO_UNPUT", ! option_sense); 416 verbose printstats = option_sense; 417 warn nowarn = ! option_sense; 418 yylineno do_yylineno = option_sense; ACTION_M4_IFDEF("M4""_YY_USE_LINENO", option_sense); 419 yymore yymore_really_used = option_sense; 420 yywrap do_yywrap = option_sense; 421 422 yy_push_state ACTION_M4_IFDEF("M4""_YY_NO_PUSH_STATE", ! option_sense); 423 yy_pop_state ACTION_M4_IFDEF("M4""_YY_NO_POP_STATE", ! option_sense); 424 yy_top_state ACTION_M4_IFDEF("M4""_YY_NO_TOP_STATE", ! option_sense); 425 426 yy_scan_buffer ACTION_M4_IFDEF("M4""_YY_NO_SCAN_BUFFER", ! option_sense); 427 yy_scan_bytes ACTION_M4_IFDEF("M4""_YY_NO_SCAN_BYTES", ! option_sense); 428 yy_scan_string ACTION_M4_IFDEF("M4""_YY_NO_SCAN_STRING", ! option_sense); 429 430 yyalloc ACTION_M4_IFDEF("M4""_YY_NO_FLEX_ALLOC", ! option_sense); 431 yyrealloc ACTION_M4_IFDEF("M4""_YY_NO_FLEX_REALLOC", ! option_sense); 432 yyfree ACTION_M4_IFDEF("M4""_YY_NO_FLEX_FREE", ! option_sense); 433 434 yyget_debug ACTION_M4_IFDEF("M4""_YY_NO_GET_DEBUG", ! option_sense); 435 yyset_debug ACTION_M4_IFDEF("M4""_YY_NO_SET_DEBUG", ! option_sense); 436 yyget_extra ACTION_M4_IFDEF("M4""_YY_NO_GET_EXTRA", ! option_sense); 437 yyset_extra ACTION_M4_IFDEF("M4""_YY_NO_SET_EXTRA", ! option_sense); 438 yyget_leng ACTION_M4_IFDEF("M4""_YY_NO_GET_LENG", ! option_sense); 439 yyget_text ACTION_M4_IFDEF("M4""_YY_NO_GET_TEXT", ! option_sense); 440 yyget_lineno ACTION_M4_IFDEF("M4""_YY_NO_GET_LINENO", ! option_sense); 441 yyset_lineno ACTION_M4_IFDEF("M4""_YY_NO_SET_LINENO", ! option_sense); 442 yyget_in ACTION_M4_IFDEF("M4""_YY_NO_GET_IN", ! option_sense); 443 yyset_in ACTION_M4_IFDEF("M4""_YY_NO_SET_IN", ! option_sense); 444 yyget_out ACTION_M4_IFDEF("M4""_YY_NO_GET_OUT", ! option_sense); 445 yyset_out ACTION_M4_IFDEF("M4""_YY_NO_SET_OUT", ! option_sense); 446 yyget_lval ACTION_M4_IFDEF("M4""_YY_NO_GET_LVAL", ! option_sense); 447 yyset_lval ACTION_M4_IFDEF("M4""_YY_NO_SET_LVAL", ! option_sense); 448 yyget_lloc ACTION_M4_IFDEF("M4""_YY_NO_GET_LLOC", ! option_sense); 449 yyset_lloc ACTION_M4_IFDEF("M4""_YY_NO_SET_LLOC", ! option_sense); 450 451 extra-type return TOK_EXTRA_TYPE; 452 outfile return TOK_OUTFILE; 453 prefix return TOK_PREFIX; 454 yyclass return TOK_YYCLASS; 455 header(-file)? return TOK_HEADER_FILE; 456 tables-file return TOK_TABLES_FILE; 457 tables-verify { 458 tablesverify = option_sense; 459 if(!tablesext && option_sense) 460 tablesext = true; 461 } 462 463 464 \"[^"\n]*\" { 465 if(yyleng-1 < MAXLINE) 466 { 467 strlcpy( nmstr, yytext + 1, sizeof(nmstr) ); 468 } 469 else 470 { 471 synerr( _("Option line too long\n")); 472 FLEX_EXIT(EXIT_FAILURE); 473 } 474 nmstr[strlen( nmstr ) - 1] = '\0'; 475 return NAME; 476 } 477 478 (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. { 479 format_synerr( _( "unrecognized %%option: %s" ), 480 yytext ); 481 BEGIN(RECOVER); 482 } 483 } 484 485 <RECOVER>.*{NL} ++linenum; BEGIN(INITIAL); 486 487 488 <SECT2PROLOG>{ 489 ^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */ 490 ^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */ 491 492 ^{WS} START_CODEBLOCK(true); /* indented code in prolog */ 493 494 ^{NOT_WS}.* { 495 /* non-indented code */ 496 if ( bracelevel <= 0 ) { 497 /* not in %{ ... %} */ 498 yyless( 0 ); /* put it all back */ 499 yy_set_bol( 1 ); 500 mark_prolog(); 501 BEGIN(SECT2); 502 } else { 503 START_CODEBLOCK(true); 504 } 505 } 506 507 . ACTION_ECHO; 508 {NL} ++linenum; ACTION_ECHO; 509 510 <<EOF>> { 511 mark_prolog(); 512 sectnum = 0; 513 yyterminate(); /* to stop the parser */ 514 } 515 } 516 517 <SECT2>{ 518 ^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */ 519 520 ^{OPTWS}"%{" { 521 indented_code = false; 522 doing_codeblock = true; 523 bracelevel = 1; 524 BEGIN(PERCENT_BRACE_ACTION); 525 } 526 527 ^{OPTWS}"<" { 528 /* Allow "<" to appear in (?x) patterns. */ 529 if (!sf_skip_ws()) 530 BEGIN(SC); 531 return '<'; 532 } 533 ^{OPTWS}"^" return '^'; 534 \" BEGIN(QUOTE); return '"'; 535 "{"/[[:digit:]] { 536 BEGIN(NUM); 537 if ( lex_compat || posix_compat ) 538 return BEGIN_REPEAT_POSIX; 539 else 540 return BEGIN_REPEAT_FLEX; 541 } 542 "$"/([[:blank:]]|{NL}) return '$'; 543 544 {WS}"%{" { 545 bracelevel = 1; 546 BEGIN(PERCENT_BRACE_ACTION); 547 548 if ( in_rule ) 549 { 550 doing_rule_action = true; 551 in_rule = false; 552 return '\n'; 553 } 554 } 555 {WS}"|".*{NL} { 556 if (sf_skip_ws()){ 557 /* We're in the middle of a (?x: ) pattern. */ 558 /* Push back everything starting at the "|" */ 559 int amt = (int) (strchr (yytext, '|') - yytext); 560 yyless(amt); 561 } 562 else { 563 add_action("]""]"); 564 continued_action = true; 565 ++linenum; 566 return '\n'; 567 } 568 } 569 570 ^{WS}"/*" { 571 572 if (sf_skip_ws()){ 573 /* We're in the middle of a (?x: ) pattern. */ 574 yy_push_state(COMMENT_DISCARD); 575 } 576 else{ 577 yyless( yyleng - 2 ); /* put back '/', '*' */ 578 bracelevel = 0; 579 continued_action = false; 580 BEGIN(ACTION); 581 } 582 } 583 584 ^{WS} /* allow indented rules */ ; 585 586 {WS} { 587 if (sf_skip_ws()){ 588 /* We're in the middle of a (?x: ) pattern. */ 589 } 590 else{ 591 /* This rule is separate from the one below because 592 * otherwise we get variable trailing context, so 593 * we can't build the scanner using -{f,F}. 594 */ 595 bracelevel = 0; 596 continued_action = false; 597 BEGIN(ACTION); 598 599 if ( in_rule ) 600 { 601 doing_rule_action = true; 602 in_rule = false; 603 return '\n'; 604 } 605 } 606 } 607 608 {OPTWS}{NL} { 609 if (sf_skip_ws()){ 610 /* We're in the middle of a (?x: ) pattern. */ 611 ++linenum; 612 } 613 else{ 614 bracelevel = 0; 615 continued_action = false; 616 BEGIN(ACTION); 617 unput( '\n' ); /* so <ACTION> sees it */ 618 619 if ( in_rule ) 620 { 621 doing_rule_action = true; 622 in_rule = false; 623 return '\n'; 624 } 625 } 626 } 627 628 ^{OPTWS}"<<EOF>>" | 629 "<<EOF>>" return EOF_OP; 630 631 ^"%%".* { 632 sectnum = 3; 633 BEGIN(no_section3_escape ? SECT3_NOESCAPE : SECT3); 634 outn("/* Begin user sect3 */"); 635 yyterminate(); /* to stop the parser */ 636 637 } 638 639 "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* { 640 int cclval; 641 642 if(yyleng < MAXLINE) 643 { 644 strlcpy( nmstr, yytext, sizeof(nmstr) ); 645 } 646 else 647 { 648 synerr( _("Input line too long\n")); 649 FLEX_EXIT(EXIT_FAILURE); 650 } 651 652 /* Check to see if we've already encountered this 653 * ccl. 654 */ 655 if (0 /* <--- This "0" effectively disables the reuse of a 656 * character class (purely based on its source text). 657 * The reason it was disabled is so yacc/bison can parse 658 * ccl operations, such as ccl difference and union. 659 */ 660 && (cclval = ccllookup( nmstr )) != 0 ) 661 { 662 if ( input() != ']' ) 663 synerr( _( "bad character class" ) ); 664 665 yylval = cclval; 666 ++cclreuse; 667 return PREVCCL; 668 } 669 else 670 { 671 /* We fudge a bit. We know that this ccl will 672 * soon be numbered as lastccl + 1 by cclinit. 673 */ 674 cclinstal( nmstr, lastccl + 1 ); 675 676 /* Push back everything but the leading bracket 677 * so the ccl can be rescanned. 678 */ 679 yyless( 1 ); 680 681 BEGIN(FIRSTCCL); 682 return '['; 683 } 684 } 685 "{-}" return CCL_OP_DIFF; 686 "{+}" return CCL_OP_UNION; 687 688 689 /* Check for :space: at the end of the rule so we don't 690 * wrap the expanded regex in '(' ')' -- breaking trailing 691 * context. 692 */ 693 "{"{NAME}"}"[[:space:]]? { 694 char *nmdefptr; 695 int end_is_ws, end_ch; 696 697 end_ch = yytext[yyleng-1]; 698 end_is_ws = end_ch != '}' ? 1 : 0; 699 700 if(yyleng-1 < MAXLINE) 701 { 702 strlcpy( nmstr, yytext + 1, sizeof(nmstr) ); 703 } 704 else 705 { 706 synerr( _("Input line too long\n")); 707 FLEX_EXIT(EXIT_FAILURE); 708 } 709 nmstr[yyleng - 2 - end_is_ws] = '\0'; /* chop trailing brace */ 710 711 if ( (nmdefptr = ndlookup( nmstr )) == 0 ) 712 format_synerr( 713 _( "undefined definition {%s}" ), 714 nmstr ); 715 716 else 717 { /* push back name surrounded by ()'s */ 718 size_t len = strlen( nmdefptr ); 719 if (end_is_ws) 720 unput(end_ch); 721 722 if ( lex_compat || nmdefptr[0] == '^' || 723 (len > 0 && nmdefptr[len - 1] == '$') 724 || (end_is_ws && trlcontxt && !sf_skip_ws())) 725 { /* don't use ()'s after all */ 726 PUT_BACK_STRING(nmdefptr, 0); 727 728 if ( nmdefptr[0] == '^' ) 729 BEGIN(CARETISBOL); 730 } 731 732 else 733 { 734 unput(')'); 735 PUT_BACK_STRING(nmdefptr, 0); 736 unput('('); 737 } 738 } 739 } 740 741 "/*" { 742 if (sf_skip_ws()) 743 yy_push_state(COMMENT_DISCARD); 744 else{ 745 /* Push back the "*" and return "/" as usual. */ 746 yyless(1); 747 return '/'; 748 } 749 } 750 751 "(?#" { 752 if (lex_compat || posix_compat){ 753 /* Push back the "?#" and treat it like a normal parens. */ 754 yyless(1); 755 sf_push(); 756 return '('; 757 } 758 else 759 yy_push_state(EXTENDED_COMMENT); 760 } 761 "(?" { 762 sf_push(); 763 if (lex_compat || posix_compat) 764 /* Push back the "?" and treat it like a normal parens. */ 765 yyless(1); 766 else 767 BEGIN(GROUP_WITH_PARAMS); 768 return '('; 769 } 770 "(" sf_push(); return '('; 771 ")" { 772 if (_sf_top_ix > 0) { 773 sf_pop(); 774 return ')'; 775 } else 776 synerr(_("unbalanced parenthesis")); 777 } 778 779 [/|*+?.(){}] return (unsigned char) yytext[0]; 780 . RETURNCHAR; 781 } 782 783 784 <SC>{ 785 {OPTWS}{NL}{OPTWS} ++linenum; /* Allow blank lines & continuations */ 786 [,*] return (unsigned char) yytext[0]; 787 ">" BEGIN(SECT2); return '>'; 788 ">"/^ BEGIN(CARETISBOL); return '>'; 789 {SCNAME} RETURNNAME; 790 . { 791 format_synerr( _( "bad <start condition>: %s" ), 792 yytext ); 793 } 794 } 795 796 <CARETISBOL>"^" BEGIN(SECT2); return '^'; 797 798 799 <QUOTE>{ 800 [^"\n] RETURNCHAR; 801 \" BEGIN(SECT2); return '"'; 802 803 {NL} { 804 synerr( _( "missing quote" ) ); 805 BEGIN(SECT2); 806 ++linenum; 807 return '"'; 808 } 809 } 810 811 <GROUP_WITH_PARAMS>{ 812 ":" BEGIN(SECT2); 813 "-" BEGIN(GROUP_MINUS_PARAMS); 814 i sf_set_case_ins(1); 815 s sf_set_dot_all(1); 816 x sf_set_skip_ws(1); 817 } 818 <GROUP_MINUS_PARAMS>{ 819 ":" BEGIN(SECT2); 820 i sf_set_case_ins(0); 821 s sf_set_dot_all(0); 822 x sf_set_skip_ws(0); 823 } 824 825 <FIRSTCCL>{ 826 "^"/[^-\]\n] BEGIN(CCL); return '^'; 827 "^"/("-"|"]") return '^'; 828 . BEGIN(CCL); RETURNCHAR; 829 } 830 831 <CCL>{ 832 -/[^\]\n] return '-'; 833 [^\]\n] RETURNCHAR; 834 "]" BEGIN(SECT2); return ']'; 835 .|{NL} { 836 synerr( _( "bad character class" ) ); 837 BEGIN(SECT2); 838 return ']'; 839 } 840 } 841 842 <FIRSTCCL,CCL>{ 843 "[:alnum:]" BEGIN(CCL); return CCE_ALNUM; 844 "[:alpha:]" BEGIN(CCL); return CCE_ALPHA; 845 "[:blank:]" BEGIN(CCL); return CCE_BLANK; 846 "[:cntrl:]" BEGIN(CCL); return CCE_CNTRL; 847 "[:digit:]" BEGIN(CCL); return CCE_DIGIT; 848 "[:graph:]" BEGIN(CCL); return CCE_GRAPH; 849 "[:lower:]" BEGIN(CCL); return CCE_LOWER; 850 "[:print:]" BEGIN(CCL); return CCE_PRINT; 851 "[:punct:]" BEGIN(CCL); return CCE_PUNCT; 852 "[:space:]" BEGIN(CCL); return CCE_SPACE; 853 "[:upper:]" BEGIN(CCL); return CCE_UPPER; 854 "[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT; 855 856 "[:^alnum:]" BEGIN(CCL); return CCE_NEG_ALNUM; 857 "[:^alpha:]" BEGIN(CCL); return CCE_NEG_ALPHA; 858 "[:^blank:]" BEGIN(CCL); return CCE_NEG_BLANK; 859 "[:^cntrl:]" BEGIN(CCL); return CCE_NEG_CNTRL; 860 "[:^digit:]" BEGIN(CCL); return CCE_NEG_DIGIT; 861 "[:^graph:]" BEGIN(CCL); return CCE_NEG_GRAPH; 862 "[:^lower:]" BEGIN(CCL); return CCE_NEG_LOWER; 863 "[:^print:]" BEGIN(CCL); return CCE_NEG_PRINT; 864 "[:^punct:]" BEGIN(CCL); return CCE_NEG_PUNCT; 865 "[:^space:]" BEGIN(CCL); return CCE_NEG_SPACE; 866 "[:^upper:]" BEGIN(CCL); return CCE_NEG_UPPER; 867 "[:^xdigit:]" BEGIN(CCL); return CCE_NEG_XDIGIT; 868 {CCL_EXPR} { 869 format_synerr( 870 _( "bad character class expression: %s" ), 871 yytext ); 872 BEGIN(CCL); return CCE_ALNUM; 873 } 874 } 875 876 <NUM>{ 877 [[:digit:]]+ { 878 yylval = myctoi( yytext ); 879 return NUMBER; 880 } 881 882 "," return ','; 883 "}" { 884 BEGIN(SECT2); 885 if ( lex_compat || posix_compat ) 886 return END_REPEAT_POSIX; 887 else 888 return END_REPEAT_FLEX; 889 } 890 891 . { 892 synerr( _( "bad character inside {}'s" ) ); 893 BEGIN(SECT2); 894 return '}'; 895 } 896 897 {NL} { 898 synerr( _( "missing }" ) ); 899 BEGIN(SECT2); 900 ++linenum; 901 return '}'; 902 } 903 } 904 905 906 <PERCENT_BRACE_ACTION>{ 907 {OPTWS}"%}".* bracelevel = 0; 908 909 <ACTION>"/*" ACTION_ECHO; yy_push_state( CODE_COMMENT ); 910 911 <CODEBLOCK,ACTION>{ 912 "reject" { 913 ACTION_ECHO; 914 CHECK_REJECT(yytext); 915 } 916 "yymore" { 917 ACTION_ECHO; 918 CHECK_YYMORE(yytext); 919 } 920 } 921 922 . ACTION_ECHO; 923 {NL} { 924 ++linenum; 925 ACTION_ECHO; 926 if (bracelevel <= 0 || (doing_codeblock && indented_code)) { 927 if ( doing_rule_action ) 928 add_action( "\tYY_BREAK]""]\n" ); 929 930 doing_rule_action = doing_codeblock = false; 931 BEGIN(SECT2); 932 } 933 } 934 } 935 936 937 /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */ 938 <ACTION>{ 939 "{" ACTION_ECHO; ++bracelevel; 940 "}" ACTION_ECHO; --bracelevel; 941 [^[:alpha:]_{}\"'/\n\[\]]+ ACTION_ECHO; 942 {NAME} ACTION_ECHO; 943 "'"([^\'\\\n]|\\.)"'" ACTION_ECHO; /* character constant */ 944 "'" ACTION_ECHO; BEGIN(CHARACTER_CONSTANT); 945 \" ACTION_ECHO; BEGIN(ACTION_STRING); 946 {NL} { 947 ++linenum; 948 ACTION_ECHO; 949 if (bracelevel <= 0) { 950 if ( doing_rule_action ) 951 add_action( "\tYY_BREAK]""]\n" ); 952 953 doing_rule_action = false; 954 BEGIN(SECT2); 955 } 956 } 957 . ACTION_ECHO; 958 } 959 960 <ACTION_STRING>{ 961 [^\[\]\"\\\n]+ ACTION_ECHO; 962 \" ACTION_ECHO; BEGIN(ACTION); 963 } 964 <CHARACTER_CONSTANT>{ 965 [^\[\]\'\\\n]+ ACTION_ECHO; 966 \' ACTION_ECHO; BEGIN(ACTION); 967 } 968 <ACTION_STRING,CHARACTER_CONSTANT>{ 969 (\\\n)* ACTION_ECHO; 970 \\(\\\n)*. ACTION_ECHO; 971 {NL} ++linenum; ACTION_ECHO; if (bracelevel <= 0) { BEGIN(SECT2); } else { BEGIN(ACTION); } 972 . ACTION_ECHO; 973 } 974 975 <COMMENT,CODE_COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING,CHARACTER_CONSTANT><<EOF>> { 976 synerr( _( "EOF encountered inside an action" ) ); 977 yyterminate(); 978 } 979 980 <EXTENDED_COMMENT,GROUP_WITH_PARAMS,GROUP_MINUS_PARAMS><<EOF>> { 981 synerr( _( "EOF encountered inside pattern" ) ); 982 yyterminate(); 983 } 984 985 <SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} { 986 yylval = myesc( (unsigned char *) yytext ); 987 988 if ( YY_START == FIRSTCCL ) 989 BEGIN(CCL); 990 991 return CHAR; 992 } 993 994 <SECT3>{ 995 {M4QSTART} fputs(escaped_qstart, yyout); 996 {M4QEND} fputs(escaped_qend, yyout); 997 [^\[\]]* ECHO; 998 [][] ECHO; 999 <<EOF>> { 1000 sectnum = 0; 1001 yyterminate(); 1002 } 1003 } 1004 <SECT3_NOESCAPE>{ 1005 {M4QSTART} fprintf(yyout, "[""[%s]""]", escaped_qstart); 1006 {M4QEND} fprintf(yyout, "[""[%s]""]", escaped_qend); 1007 [^][]* ECHO; 1008 [][] ECHO; 1009 <<EOF>> { 1010 sectnum = 0; 1011 yyterminate(); 1012 } 1013 } 1014 <*>.|\n format_synerr( _( "bad character: %s" ), yytext ); 1015 1016 %% 1017 1018 1019 int yywrap(void) 1020 { 1021 if ( --num_input_files > 0 ) 1022 { 1023 set_input_file( *++input_files ); 1024 return 0; 1025 } 1026 1027 else 1028 return 1; 1029 } 1030 1031 1032 /* set_input_file - open the given file (if NULL, stdin) for scanning */ 1033 1034 void set_input_file( char *file ) 1035 { 1036 if ( file && strcmp( file, "-" ) ) 1037 { 1038 infilename = xstrdup(file); 1039 yyin = fopen( infilename, "r" ); 1040 1041 if ( yyin == NULL ) 1042 lerr( _( "can't open %s" ), file ); 1043 } 1044 1045 else 1046 { 1047 yyin = stdin; 1048 infilename = xstrdup("<stdin>"); 1049 } 1050 1051 linenum = 1; 1052 } 1053