1 /* $NetBSD: parse.y,v 1.3 2017/01/02 17:45:27 christos Exp $ */
2
3 /* parse.y - parser for flex input */
4
5 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
6 %token TOK_OPTION TOK_OUTFILE TOK_PREFIX TOK_YYCLASS TOK_HEADER_FILE TOK_EXTRA_TYPE
7 %token TOK_TABLES_FILE
8
9 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
10 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
11
12 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
13 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
14
15 %left CCL_OP_DIFF CCL_OP_UNION
16
17 /*
18 *POSIX and AT&T lex place the
19 * precedence of the repeat operator, {}, below that of concatenation.
20 * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended
21 * Regular Expression (ERE) precedence that has the repeat operator
22 * higher than concatenation. This causes ab{3} to yield abbb.
23 *
24 * In order to support the POSIX and AT&T precedence and the flex
25 * precedence we define two token sets for the begin and end tokens of
26 * the repeat operator, '{' and '}'. The lexical scanner chooses
27 * which tokens to return based on whether posix_compat or lex_compat
28 * are specified. Specifying either posix_compat or lex_compat will
29 * cause flex to parse scanner files as per the AT&T and
30 * POSIX-mandated behavior.
31 */
32
33 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
34
35
36 %{
37 /* Copyright (c) 1990 The Regents of the University of California. */
38 /* All rights reserved. */
39
40 /* This code is derived from software contributed to Berkeley by */
41 /* Vern Paxson. */
42
43 /* The United States Government has rights in this work pursuant */
44 /* to contract no. DE-AC03-76SF00098 between the United States */
45 /* Department of Energy and the University of California. */
46
47 /* This file is part of flex. */
48
49 /* Redistribution and use in source and binary forms, with or without */
50 /* modification, are permitted provided that the following conditions */
51 /* are met: */
52
53 /* 1. Redistributions of source code must retain the above copyright */
54 /* notice, this list of conditions and the following disclaimer. */
55 /* 2. Redistributions in binary form must reproduce the above copyright */
56 /* notice, this list of conditions and the following disclaimer in the */
57 /* documentation and/or other materials provided with the distribution. */
58
59 /* Neither the name of the University nor the names of its contributors */
60 /* may be used to endorse or promote products derived from this software */
61 /* without specific prior written permission. */
62
63 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
64 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
65 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
66 /* PURPOSE. */
67 #include "flexdef.h"
68 __RCSID("$NetBSD: parse.y,v 1.3 2017/01/02 17:45:27 christos Exp $");
69
70 #include "tables.h"
71
72 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
73 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
74
75 int *scon_stk;
76 int scon_stk_ptr;
77
78 static int madeany = false; /* whether we've made the '.' character class */
79 static int ccldot, cclany;
80 int previous_continued_action; /* whether the previous rule's action was '|' */
81
82 #define format_warn3(fmt, a1, a2) \
83 do{ \
84 char fw3_msg[MAXLINE];\
85 snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
86 lwarn( fw3_msg );\
87 }while(0)
88
89 /* Expand a POSIX character class expression. */
90 #define CCL_EXPR(func) \
91 do{ \
92 int c; \
93 for ( c = 0; c < csize; ++c ) \
94 if ( isascii(c) && func(c) ) \
95 ccladd( currccl, c ); \
96 }while(0)
97
98 /* negated class */
99 #define CCL_NEG_EXPR(func) \
100 do{ \
101 int c; \
102 for ( c = 0; c < csize; ++c ) \
103 if ( !func(c) ) \
104 ccladd( currccl, c ); \
105 }while(0)
106
107 /* While POSIX defines isblank(), it's not ANSI C. */
108 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
109
110 /* On some over-ambitious machines, such as DEC Alpha's, the default
111 * token type is "long" instead of "int"; this leads to problems with
112 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
113 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
114 * following should ensure that the default token type is "int".
115 */
116 #define YYSTYPE int
117
118 %}
119
120 %%
121 goal : initlex sect1 sect1end sect2 initforrule
122 { /* add default rule */
123 int def_rule;
124
125 pat = cclinit();
126 cclnegate( pat );
127
128 def_rule = mkstate( -pat );
129
130 /* Remember the number of the default rule so we
131 * don't generate "can't match" warnings for it.
132 */
133 default_rule = num_rules;
134
135 finish_rule( def_rule, false, 0, 0, 0);
136
137 for ( i = 1; i <= lastsc; ++i )
138 scset[i] = mkbranch( scset[i], def_rule );
139
140 if ( spprdflt )
141 add_action(
142 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
143 else
144 add_action( "ECHO" );
145
146 add_action( ";\n\tYY_BREAK]]\n" );
147 }
148 ;
149
150 initlex :
151 { /* initialize for processing rules */
152
153 /* Create default DFA start condition. */
154 scinstal( "INITIAL", false );
155 }
156 ;
157
158 sect1 : sect1 startconddecl namelist1
159 | sect1 options
160 |
161 | error
162 { synerr( _("unknown error processing section 1") ); }
163 ;
164
165 sect1end : SECTEND
166 {
167 check_options();
168 scon_stk = allocate_integer_array( lastsc + 1 );
169 scon_stk_ptr = 0;
170 }
171 ;
172
173 startconddecl : SCDECL
174 { xcluflg = false; }
175
176 | XSCDECL
177 { xcluflg = true; }
178 ;
179
180 namelist1 : namelist1 NAME
181 { scinstal( nmstr, xcluflg ); }
182
183 | NAME
184 { scinstal( nmstr, xcluflg ); }
185
186 | error
187 { synerr( _("bad start condition list") ); }
188 ;
189
190 options : TOK_OPTION optionlist
191 ;
192
193 optionlist : optionlist option
194 |
195 ;
196
197 option : TOK_OUTFILE '=' NAME
198 {
199 outfilename = xstrdup(nmstr);
200 did_outfilename = 1;
201 }
202 | TOK_EXTRA_TYPE '=' NAME
203 { extra_type = xstrdup(nmstr); }
204 | TOK_PREFIX '=' NAME
205 { prefix = xstrdup(nmstr);
206 if (strchr(prefix, '[') || strchr(prefix, ']'))
207 flexerror(_("Prefix must not contain [ or ]")); }
208 | TOK_YYCLASS '=' NAME
209 { yyclass = xstrdup(nmstr); }
210 | TOK_HEADER_FILE '=' NAME
211 { headerfilename = xstrdup(nmstr); }
212 | TOK_TABLES_FILE '=' NAME
213 { tablesext = true; tablesfilename = xstrdup(nmstr); }
214 ;
215
216 sect2 : sect2 scon initforrule flexrule '\n'
217 { scon_stk_ptr = $2; }
218 | sect2 scon '{' sect2 '}'
219 { scon_stk_ptr = $2; }
220 |
221 ;
222
223 initforrule :
224 {
225 /* Initialize for a parse of one rule. */
226 trlcontxt = variable_trail_rule = varlength = false;
227 trailcnt = headcnt = rulelen = 0;
228 current_state_type = STATE_NORMAL;
229 previous_continued_action = continued_action;
230 in_rule = true;
231
232 new_rule();
233 }
234 ;
235
236 flexrule : '^' rule
237 {
238 pat = $2;
239 finish_rule( pat, variable_trail_rule,
240 headcnt, trailcnt , previous_continued_action);
241
242 if ( scon_stk_ptr > 0 )
243 {
244 for ( i = 1; i <= scon_stk_ptr; ++i )
245 scbol[scon_stk[i]] =
246 mkbranch( scbol[scon_stk[i]],
247 pat );
248 }
249
250 else
251 {
252 /* Add to all non-exclusive start conditions,
253 * including the default (0) start condition.
254 */
255
256 for ( i = 1; i <= lastsc; ++i )
257 if ( ! scxclu[i] )
258 scbol[i] = mkbranch( scbol[i],
259 pat );
260 }
261
262 if ( ! bol_needed )
263 {
264 bol_needed = true;
265
266 if ( performance_report > 1 )
267 pinpoint_message(
268 "'^' operator results in sub-optimal performance" );
269 }
270 }
271
272 | rule
273 {
274 pat = $1;
275 finish_rule( pat, variable_trail_rule,
276 headcnt, trailcnt , previous_continued_action);
277
278 if ( scon_stk_ptr > 0 )
279 {
280 for ( i = 1; i <= scon_stk_ptr; ++i )
281 scset[scon_stk[i]] =
282 mkbranch( scset[scon_stk[i]],
283 pat );
284 }
285
286 else
287 {
288 for ( i = 1; i <= lastsc; ++i )
289 if ( ! scxclu[i] )
290 scset[i] =
291 mkbranch( scset[i],
292 pat );
293 }
294 }
295
296 | EOF_OP
297 {
298 if ( scon_stk_ptr > 0 )
299 build_eof_action();
300
301 else
302 {
303 /* This EOF applies to all start conditions
304 * which don't already have EOF actions.
305 */
306 for ( i = 1; i <= lastsc; ++i )
307 if ( ! sceof[i] )
308 scon_stk[++scon_stk_ptr] = i;
309
310 if ( scon_stk_ptr == 0 )
311 lwarn(
312 "all start conditions already have <<EOF>> rules" );
313
314 else
315 build_eof_action();
316 }
317 }
318
319 | error
320 { synerr( _("unrecognized rule") ); }
321 ;
322
323 scon_stk_ptr :
324 { $$ = scon_stk_ptr; }
325 ;
326
327 scon : '<' scon_stk_ptr namelist2 '>'
328 { $$ = $2; }
329
330 | '<' '*' '>'
331 {
332 $$ = scon_stk_ptr;
333
334 for ( i = 1; i <= lastsc; ++i )
335 {
336 int j;
337
338 for ( j = 1; j <= scon_stk_ptr; ++j )
339 if ( scon_stk[j] == i )
340 break;
341
342 if ( j > scon_stk_ptr )
343 scon_stk[++scon_stk_ptr] = i;
344 }
345 }
346
347 |
348 { $$ = scon_stk_ptr; }
349 ;
350
351 namelist2 : namelist2 ',' sconname
352
353 | sconname
354
355 | error
356 { synerr( _("bad start condition list") ); }
357 ;
358
359 sconname : NAME
360 {
361 if ( (scnum = sclookup( nmstr )) == 0 )
362 format_pinpoint_message(
363 "undeclared start condition %s",
364 nmstr );
365 else
366 {
367 for ( i = 1; i <= scon_stk_ptr; ++i )
368 if ( scon_stk[i] == scnum )
369 {
370 format_warn(
371 "<%s> specified twice",
372 scname[scnum] );
373 break;
374 }
375
376 if ( i > scon_stk_ptr )
377 scon_stk[++scon_stk_ptr] = scnum;
378 }
379 }
380 ;
381
382 rule : re2 re
383 {
384 if ( transchar[lastst[$2]] != SYM_EPSILON )
385 /* Provide final transition \now/ so it
386 * will be marked as a trailing context
387 * state.
388 */
389 $2 = link_machines( $2,
390 mkstate( SYM_EPSILON ) );
391
392 mark_beginning_as_normal( $2 );
393 current_state_type = STATE_NORMAL;
394
395 if ( previous_continued_action )
396 {
397 /* We need to treat this as variable trailing
398 * context so that the backup does not happen
399 * in the action but before the action switch
400 * statement. If the backup happens in the
401 * action, then the rules "falling into" this
402 * one's action will *also* do the backup,
403 * erroneously.
404 */
405 if ( ! varlength || headcnt != 0 )
406 lwarn(
407 "trailing context made variable due to preceding '|' action" );
408
409 /* Mark as variable. */
410 varlength = true;
411 headcnt = 0;
412
413 }
414
415 if ( lex_compat || (varlength && headcnt == 0) )
416 { /* variable trailing context rule */
417 /* Mark the first part of the rule as the
418 * accepting "head" part of a trailing
419 * context rule.
420 *
421 * By the way, we didn't do this at the
422 * beginning of this production because back
423 * then current_state_type was set up for a
424 * trail rule, and add_accept() can create
425 * a new state ...
426 */
427 add_accept( $1,
428 num_rules | YY_TRAILING_HEAD_MASK );
429 variable_trail_rule = true;
430 }
431
432 else
433 trailcnt = rulelen;
434
435 $$ = link_machines( $1, $2 );
436 }
437
438 | re2 re '$'
439 { synerr( _("trailing context used twice") ); }
440
441 | re '$'
442 {
443 headcnt = 0;
444 trailcnt = 1;
445 rulelen = 1;
446 varlength = false;
447
448 current_state_type = STATE_TRAILING_CONTEXT;
449
450 if ( trlcontxt )
451 {
452 synerr( _("trailing context used twice") );
453 $$ = mkstate( SYM_EPSILON );
454 }
455
456 else if ( previous_continued_action )
457 {
458 /* See the comment in the rule for "re2 re"
459 * above.
460 */
461 lwarn(
462 "trailing context made variable due to preceding '|' action" );
463
464 varlength = true;
465 }
466
467 if ( lex_compat || varlength )
468 {
469 /* Again, see the comment in the rule for
470 * "re2 re" above.
471 */
472 add_accept( $1,
473 num_rules | YY_TRAILING_HEAD_MASK );
474 variable_trail_rule = true;
475 }
476
477 trlcontxt = true;
478
479 eps = mkstate( SYM_EPSILON );
480 $$ = link_machines( $1,
481 link_machines( eps, mkstate( '\n' ) ) );
482 }
483
484 | re
485 {
486 $$ = $1;
487
488 if ( trlcontxt )
489 {
490 if ( lex_compat || (varlength && headcnt == 0) )
491 /* Both head and trail are
492 * variable-length.
493 */
494 variable_trail_rule = true;
495 else
496 trailcnt = rulelen;
497 }
498 }
499 ;
500
501
502 re : re '|' series
503 {
504 varlength = true;
505 $$ = mkor( $1, $3 );
506 }
507
508 | series
509 { $$ = $1; }
510 ;
511
512
513 re2 : re '/'
514 {
515 /* This rule is written separately so the
516 * reduction will occur before the trailing
517 * series is parsed.
518 */
519
520 if ( trlcontxt )
521 synerr( _("trailing context used twice") );
522 else
523 trlcontxt = true;
524
525 if ( varlength )
526 /* We hope the trailing context is
527 * fixed-length.
528 */
529 varlength = false;
530 else
531 headcnt = rulelen;
532
533 rulelen = 0;
534
535 current_state_type = STATE_TRAILING_CONTEXT;
536 $$ = $1;
537 }
538 ;
539
540 series : series singleton
541 {
542 /* This is where concatenation of adjacent patterns
543 * gets done.
544 */
545 $$ = link_machines( $1, $2 );
546 }
547
548 | singleton
549 { $$ = $1; }
550
551 | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
552 {
553 varlength = true;
554
555 if ( $3 > $5 || $3 < 0 )
556 {
557 synerr( _("bad iteration values") );
558 $$ = $1;
559 }
560 else
561 {
562 if ( $3 == 0 )
563 {
564 if ( $5 <= 0 )
565 {
566 synerr(
567 _("bad iteration values") );
568 $$ = $1;
569 }
570 else
571 $$ = mkopt(
572 mkrep( $1, 1, $5 ) );
573 }
574 else
575 $$ = mkrep( $1, $3, $5 );
576 }
577 }
578
579 | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
580 {
581 varlength = true;
582
583 if ( $3 <= 0 )
584 {
585 synerr( _("iteration value must be positive") );
586 $$ = $1;
587 }
588
589 else
590 $$ = mkrep( $1, $3, INFINITE_REPEAT );
591 }
592
593 | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
594 {
595 /* The series could be something like "(foo)",
596 * in which case we have no idea what its length
597 * is, so we punt here.
598 */
599 varlength = true;
600
601 if ( $3 <= 0 )
602 {
603 synerr( _("iteration value must be positive")
604 );
605 $$ = $1;
606 }
607
608 else
609 $$ = link_machines( $1,
610 copysingl( $1, $3 - 1 ) );
611 }
612
613 ;
614
615 singleton : singleton '*'
616 {
617 varlength = true;
618
619 $$ = mkclos( $1 );
620 }
621
622 | singleton '+'
623 {
624 varlength = true;
625 $$ = mkposcl( $1 );
626 }
627
628 | singleton '?'
629 {
630 varlength = true;
631 $$ = mkopt( $1 );
632 }
633
634 | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
635 {
636 varlength = true;
637
638 if ( $3 > $5 || $3 < 0 )
639 {
640 synerr( _("bad iteration values") );
641 $$ = $1;
642 }
643 else
644 {
645 if ( $3 == 0 )
646 {
647 if ( $5 <= 0 )
648 {
649 synerr(
650 _("bad iteration values") );
651 $$ = $1;
652 }
653 else
654 $$ = mkopt(
655 mkrep( $1, 1, $5 ) );
656 }
657 else
658 $$ = mkrep( $1, $3, $5 );
659 }
660 }
661
662 | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
663 {
664 varlength = true;
665
666 if ( $3 <= 0 )
667 {
668 synerr( _("iteration value must be positive") );
669 $$ = $1;
670 }
671
672 else
673 $$ = mkrep( $1, $3, INFINITE_REPEAT );
674 }
675
676 | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
677 {
678 /* The singleton could be something like "(foo)",
679 * in which case we have no idea what its length
680 * is, so we punt here.
681 */
682 varlength = true;
683
684 if ( $3 <= 0 )
685 {
686 synerr( _("iteration value must be positive") );
687 $$ = $1;
688 }
689
690 else
691 $$ = link_machines( $1,
692 copysingl( $1, $3 - 1 ) );
693 }
694
695 | '.'
696 {
697 if ( ! madeany )
698 {
699 /* Create the '.' character class. */
700 ccldot = cclinit();
701 ccladd( ccldot, '\n' );
702 cclnegate( ccldot );
703
704 if ( useecs )
705 mkeccl( ccltbl + cclmap[ccldot],
706 ccllen[ccldot], nextecm,
707 ecgroup, csize, csize );
708
709 /* Create the (?s:'.') character class. */
710 cclany = cclinit();
711 cclnegate( cclany );
712
713 if ( useecs )
714 mkeccl( ccltbl + cclmap[cclany],
715 ccllen[cclany], nextecm,
716 ecgroup, csize, csize );
717
718 madeany = true;
719 }
720
721 ++rulelen;
722
723 if (sf_dot_all())
724 $$ = mkstate( -cclany );
725 else
726 $$ = mkstate( -ccldot );
727 }
728
729 | fullccl
730 {
731 /* Sort characters for fast searching.
732 */
733 qsort( ccltbl + cclmap[$1], (size_t) ccllen[$1], sizeof (*ccltbl), cclcmp );
734
735 if ( useecs )
736 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
737 nextecm, ecgroup, csize, csize );
738
739 ++rulelen;
740
741 if (ccl_has_nl[$1])
742 rule_has_nl[num_rules] = true;
743
744 $$ = mkstate( -$1 );
745 }
746
747 | PREVCCL
748 {
749 ++rulelen;
750
751 if (ccl_has_nl[$1])
752 rule_has_nl[num_rules] = true;
753
754 $$ = mkstate( -$1 );
755 }
756
757 | '"' string '"'
758 { $$ = $2; }
759
760 | '(' re ')'
761 { $$ = $2; }
762
763 | CHAR
764 {
765 ++rulelen;
766
767 if ($1 == nlch)
768 rule_has_nl[num_rules] = true;
769
770 if (sf_case_ins() && has_case($1))
771 /* create an alternation, as in (a|A) */
772 $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
773 else
774 $$ = mkstate( $1 );
775 }
776 ;
777 fullccl:
778 fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); }
779 | fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); }
780 | braceccl
781 ;
782
783 braceccl:
784
785 '[' ccl ']' { $$ = $2; }
786
787 | '[' '^' ccl ']'
788 {
789 cclnegate( $3 );
790 $$ = $3;
791 }
792 ;
793
794 ccl : ccl CHAR '-' CHAR
795 {
796
797 if (sf_case_ins())
798 {
799
800 /* If one end of the range has case and the other
801 * does not, or the cases are different, then we're not
802 * sure what range the user is trying to express.
803 * Examples: [@-z] or [S-t]
804 */
805 if (has_case ($2) != has_case ($4)
806 || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
807 || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
808 format_warn3 (
809 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
810 $2, $4);
811
812 /* If the range spans uppercase characters but not
813 * lowercase (or vice-versa), then should we automatically
814 * include lowercase characters in the range?
815 * Example: [@-_] spans [a-z] but not [A-Z]
816 */
817 else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
818 format_warn3 (
819 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
820 $2, $4);
821 }
822
823 if ( $2 > $4 )
824 synerr( _("negative range in character class") );
825
826 else
827 {
828 for ( i = $2; i <= $4; ++i )
829 ccladd( $1, i );
830
831 /* Keep track if this ccl is staying in
832 * alphabetical order.
833 */
834 cclsorted = cclsorted && ($2 > lastchar);
835 lastchar = $4;
836
837 /* Do it again for upper/lowercase */
838 if (sf_case_ins() && has_case($2) && has_case($4)){
839 $2 = reverse_case ($2);
840 $4 = reverse_case ($4);
841
842 for ( i = $2; i <= $4; ++i )
843 ccladd( $1, i );
844
845 cclsorted = cclsorted && ($2 > lastchar);
846 lastchar = $4;
847 }
848
849 }
850
851 $$ = $1;
852 }
853
854 | ccl CHAR
855 {
856 ccladd( $1, $2 );
857 cclsorted = cclsorted && ($2 > lastchar);
858 lastchar = $2;
859
860 /* Do it again for upper/lowercase */
861 if (sf_case_ins() && has_case($2)){
862 $2 = reverse_case ($2);
863 ccladd ($1, $2);
864
865 cclsorted = cclsorted && ($2 > lastchar);
866 lastchar = $2;
867 }
868
869 $$ = $1;
870 }
871
872 | ccl ccl_expr
873 {
874 /* Too hard to properly maintain cclsorted. */
875 cclsorted = false;
876 $$ = $1;
877 }
878
879 |
880 {
881 cclsorted = true;
882 lastchar = 0;
883 currccl = $$ = cclinit();
884 }
885 ;
886
887 ccl_expr:
888 CCE_ALNUM { CCL_EXPR(isalnum); }
889 | CCE_ALPHA { CCL_EXPR(isalpha); }
890 | CCE_BLANK { CCL_EXPR(IS_BLANK); }
891 | CCE_CNTRL { CCL_EXPR(iscntrl); }
892 | CCE_DIGIT { CCL_EXPR(isdigit); }
893 | CCE_GRAPH { CCL_EXPR(isgraph); }
894 | CCE_LOWER {
895 CCL_EXPR(islower);
896 if (sf_case_ins())
897 CCL_EXPR(isupper);
898 }
899 | CCE_PRINT { CCL_EXPR(isprint); }
900 | CCE_PUNCT { CCL_EXPR(ispunct); }
901 | CCE_SPACE { CCL_EXPR(isspace); }
902 | CCE_XDIGIT { CCL_EXPR(isxdigit); }
903 | CCE_UPPER {
904 CCL_EXPR(isupper);
905 if (sf_case_ins())
906 CCL_EXPR(islower);
907 }
908
909 | CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); }
910 | CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); }
911 | CCE_NEG_BLANK { CCL_NEG_EXPR(IS_BLANK); }
912 | CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); }
913 | CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); }
914 | CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); }
915 | CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); }
916 | CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); }
917 | CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); }
918 | CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); }
919 | CCE_NEG_LOWER {
920 if ( sf_case_ins() )
921 lwarn(_("[:^lower:] is ambiguous in case insensitive scanner"));
922 else
923 CCL_NEG_EXPR(islower);
924 }
925 | CCE_NEG_UPPER {
926 if ( sf_case_ins() )
927 lwarn(_("[:^upper:] ambiguous in case insensitive scanner"));
928 else
929 CCL_NEG_EXPR(isupper);
930 }
931 ;
932
933 string : string CHAR
934 {
935 if ( $2 == nlch )
936 rule_has_nl[num_rules] = true;
937
938 ++rulelen;
939
940 if (sf_case_ins() && has_case($2))
941 $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
942 else
943 $$ = mkstate ($2);
944
945 $$ = link_machines( $1, $$);
946 }
947
948 |
949 { $$ = mkstate( SYM_EPSILON ); }
950 ;
951
952 %%
953
954
955 /* build_eof_action - build the "<<EOF>>" action for the active start
956 * conditions
957 */
958
959 void build_eof_action(void)
960 {
961 int i;
962 char action_text[MAXLINE];
963
964 for ( i = 1; i <= scon_stk_ptr; ++i )
965 {
966 if ( sceof[scon_stk[i]] )
967 format_pinpoint_message(
968 "multiple <<EOF>> rules for start condition %s",
969 scname[scon_stk[i]] );
970
971 else
972 {
973 sceof[scon_stk[i]] = true;
974
975 if (previous_continued_action /* && previous action was regular */)
976 add_action("YY_RULE_SETUP\n");
977
978 snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
979 scname[scon_stk[i]] );
980 add_action( action_text );
981 }
982 }
983
984 line_directive_out(NULL, 1);
985 add_action("[[");
986
987 /* This isn't a normal rule after all - don't count it as
988 * such, so we don't have any holes in the rule numbering
989 * (which make generating "rule can never match" warnings
990 * more difficult.
991 */
992 --num_rules;
993 ++num_eof_rules;
994 }
995
996
997 /* format_synerr - write out formatted syntax error */
998
format_synerr(const char * msg,const char arg[])999 void format_synerr( const char *msg, const char arg[] )
1000 {
1001 char errmsg[MAXLINE];
1002
1003 (void) snprintf( errmsg, sizeof(errmsg), msg, arg );
1004 synerr( errmsg );
1005 }
1006
1007
1008 /* synerr - report a syntax error */
1009
synerr(const char * str)1010 void synerr( const char *str )
1011 {
1012 syntaxerror = true;
1013 pinpoint_message( str );
1014 }
1015
1016
1017 /* format_warn - write out formatted warning */
1018
format_warn(const char * msg,const char arg[])1019 void format_warn( const char *msg, const char arg[] )
1020 {
1021 char warn_msg[MAXLINE];
1022
1023 snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1024 lwarn( warn_msg );
1025 }
1026
1027
1028 /* lwarn - report a warning, unless -w was given */
1029
lwarn(const char * str)1030 void lwarn( const char *str )
1031 {
1032 line_warning( str, linenum );
1033 }
1034
1035 /* format_pinpoint_message - write out a message formatted with one string,
1036 * pinpointing its location
1037 */
1038
format_pinpoint_message(const char * msg,const char arg[])1039 void format_pinpoint_message( const char *msg, const char arg[] )
1040 {
1041 char errmsg[MAXLINE];
1042
1043 snprintf( errmsg, sizeof(errmsg), msg, arg );
1044 pinpoint_message( errmsg );
1045 }
1046
1047
1048 /* pinpoint_message - write out a message, pinpointing its location */
1049
pinpoint_message(const char * str)1050 void pinpoint_message( const char *str )
1051 {
1052 line_pinpoint( str, linenum );
1053 }
1054
1055
1056 /* line_warning - report a warning at a given line, unless -w was given */
1057
line_warning(const char * str,int line)1058 void line_warning( const char *str, int line )
1059 {
1060 char warning[MAXLINE];
1061
1062 if ( ! nowarn )
1063 {
1064 snprintf( warning, sizeof(warning), "warning, %s", str );
1065 line_pinpoint( warning, line );
1066 }
1067 }
1068
1069
1070 /* line_pinpoint - write out a message, pinpointing it at the given line */
1071
line_pinpoint(const char * str,int line)1072 void line_pinpoint( const char *str, int line )
1073 {
1074 fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1075 }
1076
1077
1078 /* yyerror - eat up an error message from the parser;
1079 * currently, messages are ignore
1080 */
1081
yyerror(const char * msg)1082 void yyerror( const char *msg )
1083 {
1084 (void)msg;
1085 }
1086