1 /* $NetBSD: parse.y,v 1.5 2014/10/30 18:44:05 christos Exp $ */
2
3 /* parse.y - parser for flex input */
4
5 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
6 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
7 %token OPT_TABLES
8
9 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
10 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
11
12 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
13 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
14
15 %left CCL_OP_DIFF CCL_OP_UNION
16
17 /*
18 *POSIX and AT&T lex place the
19 * precedence of the repeat operator, {}, below that of concatenation.
20 * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended
21 * Regular Expression (ERE) precedence that has the repeat operator
22 * higher than concatenation. This causes ab{3} to yield abbb.
23 *
24 * In order to support the POSIX and AT&T precedence and the flex
25 * precedence we define two token sets for the begin and end tokens of
26 * the repeat operator, '{' and '}'. The lexical scanner chooses
27 * which tokens to return based on whether posix_compat or lex_compat
28 * are specified. Specifying either posix_compat or lex_compat will
29 * cause flex to parse scanner files as per the AT&T and
30 * POSIX-mandated behavior.
31 */
32
33 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
34
35
36 %{
37 /* Copyright (c) 1990 The Regents of the University of California. */
38 /* All rights reserved. */
39
40 /* This code is derived from software contributed to Berkeley by */
41 /* Vern Paxson. */
42
43 /* The United States Government has rights in this work pursuant */
44 /* to contract no. DE-AC03-76SF00098 between the United States */
45 /* Department of Energy and the University of California. */
46
47 /* This file is part of flex. */
48
49 /* Redistribution and use in source and binary forms, with or without */
50 /* modification, are permitted provided that the following conditions */
51 /* are met: */
52
53 /* 1. Redistributions of source code must retain the above copyright */
54 /* notice, this list of conditions and the following disclaimer. */
55 /* 2. Redistributions in binary form must reproduce the above copyright */
56 /* notice, this list of conditions and the following disclaimer in the */
57 /* documentation and/or other materials provided with the distribution. */
58
59 /* Neither the name of the University nor the names of its contributors */
60 /* may be used to endorse or promote products derived from this software */
61 /* without specific prior written permission. */
62
63 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
64 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
65 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
66 /* PURPOSE. */
67 #include "flexdef.h"
68 __RCSID("$NetBSD: parse.y,v 1.5 2014/10/30 18:44:05 christos Exp $");
69
70 #include "tables.h"
71
72 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
73 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
74
75 int *scon_stk;
76 int scon_stk_ptr;
77
78 static int madeany = false; /* whether we've made the '.' character class */
79 static int ccldot, cclany;
80 int previous_continued_action; /* whether the previous rule's action was '|' */
81
82 #define format_warn3(fmt, a1, a2) \
83 do{ \
84 char fw3_msg[MAXLINE];\
85 snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
86 lwarn( fw3_msg );\
87 }while(0)
88
89 /* Expand a POSIX character class expression. */
90 #define CCL_EXPR(func) \
91 do{ \
92 int c; \
93 for ( c = 0; c < csize; ++c ) \
94 if ( isascii(c) && func(c) ) \
95 ccladd( currccl, c ); \
96 }while(0)
97
98 /* negated class */
99 #define CCL_NEG_EXPR(func) \
100 do{ \
101 int c; \
102 for ( c = 0; c < csize; ++c ) \
103 if ( !func(c) ) \
104 ccladd( currccl, c ); \
105 }while(0)
106
107 /* While POSIX defines isblank(), it's not ANSI C. */
108 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
109
110 /* On some over-ambitious machines, such as DEC Alpha's, the default
111 * token type is "long" instead of "int"; this leads to problems with
112 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
113 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
114 * following should ensure that the default token type is "int".
115 */
116 #define YYSTYPE int
117
118 %}
119
120 %%
121 goal : initlex sect1 sect1end sect2 initforrule
122 { /* add default rule */
123 int def_rule;
124
125 pat = cclinit();
126 cclnegate( pat );
127
128 def_rule = mkstate( -pat );
129
130 /* Remember the number of the default rule so we
131 * don't generate "can't match" warnings for it.
132 */
133 default_rule = num_rules;
134
135 finish_rule( def_rule, false, 0, 0, 0);
136
137 for ( i = 1; i <= lastsc; ++i )
138 scset[i] = mkbranch( scset[i], def_rule );
139
140 if ( spprdflt )
141 add_action(
142 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
143 else
144 add_action( "ECHO" );
145
146 add_action( ";\n\tYY_BREAK\n" );
147 }
148 ;
149
150 initlex :
151 { /* initialize for processing rules */
152
153 /* Create default DFA start condition. */
154 scinstal( "INITIAL", false );
155 }
156 ;
157
158 sect1 : sect1 startconddecl namelist1
159 | sect1 options
160 |
161 | error
162 { synerr( _("unknown error processing section 1") ); }
163 ;
164
165 sect1end : SECTEND
166 {
167 check_options();
168 scon_stk = allocate_integer_array( lastsc + 1 );
169 scon_stk_ptr = 0;
170 }
171 ;
172
173 startconddecl : SCDECL
174 { xcluflg = false; }
175
176 | XSCDECL
177 { xcluflg = true; }
178 ;
179
180 namelist1 : namelist1 NAME
181 { scinstal( nmstr, xcluflg ); }
182
183 | NAME
184 { scinstal( nmstr, xcluflg ); }
185
186 | error
187 { synerr( _("bad start condition list") ); }
188 ;
189
190 options : OPTION_OP optionlist
191 ;
192
193 optionlist : optionlist option
194 |
195 ;
196
197 option : OPT_OUTFILE '=' NAME
198 {
199 outfilename = copy_string( nmstr );
200 did_outfilename = 1;
201 }
202 | OPT_EXTRA_TYPE '=' NAME
203 { extra_type = copy_string( nmstr ); }
204 | OPT_PREFIX '=' NAME
205 { prefix = copy_string( nmstr ); }
206 | OPT_YYCLASS '=' NAME
207 { yyclass = copy_string( nmstr ); }
208 | OPT_HEADER '=' NAME
209 { headerfilename = copy_string( nmstr ); }
210 | OPT_TABLES '=' NAME
211 { tablesext = true; tablesfilename = copy_string( nmstr ); }
212 ;
213
214 sect2 : sect2 scon initforrule flexrule '\n'
215 { scon_stk_ptr = $2; }
216 | sect2 scon '{' sect2 '}'
217 { scon_stk_ptr = $2; }
218 |
219 ;
220
221 initforrule :
222 {
223 /* Initialize for a parse of one rule. */
224 trlcontxt = variable_trail_rule = varlength = false;
225 trailcnt = headcnt = rulelen = 0;
226 current_state_type = STATE_NORMAL;
227 previous_continued_action = continued_action;
228 in_rule = true;
229
230 new_rule();
231 }
232 ;
233
234 flexrule : '^' rule
235 {
236 pat = $2;
237 finish_rule( pat, variable_trail_rule,
238 headcnt, trailcnt , previous_continued_action);
239
240 if ( scon_stk_ptr > 0 )
241 {
242 for ( i = 1; i <= scon_stk_ptr; ++i )
243 scbol[scon_stk[i]] =
244 mkbranch( scbol[scon_stk[i]],
245 pat );
246 }
247
248 else
249 {
250 /* Add to all non-exclusive start conditions,
251 * including the default (0) start condition.
252 */
253
254 for ( i = 1; i <= lastsc; ++i )
255 if ( ! scxclu[i] )
256 scbol[i] = mkbranch( scbol[i],
257 pat );
258 }
259
260 if ( ! bol_needed )
261 {
262 bol_needed = true;
263
264 if ( performance_report > 1 )
265 pinpoint_message(
266 "'^' operator results in sub-optimal performance" );
267 }
268 }
269
270 | rule
271 {
272 pat = $1;
273 finish_rule( pat, variable_trail_rule,
274 headcnt, trailcnt , previous_continued_action);
275
276 if ( scon_stk_ptr > 0 )
277 {
278 for ( i = 1; i <= scon_stk_ptr; ++i )
279 scset[scon_stk[i]] =
280 mkbranch( scset[scon_stk[i]],
281 pat );
282 }
283
284 else
285 {
286 for ( i = 1; i <= lastsc; ++i )
287 if ( ! scxclu[i] )
288 scset[i] =
289 mkbranch( scset[i],
290 pat );
291 }
292 }
293
294 | EOF_OP
295 {
296 if ( scon_stk_ptr > 0 )
297 build_eof_action();
298
299 else
300 {
301 /* This EOF applies to all start conditions
302 * which don't already have EOF actions.
303 */
304 for ( i = 1; i <= lastsc; ++i )
305 if ( ! sceof[i] )
306 scon_stk[++scon_stk_ptr] = i;
307
308 if ( scon_stk_ptr == 0 )
309 lwarn(
310 "all start conditions already have <<EOF>> rules" );
311
312 else
313 build_eof_action();
314 }
315 }
316
317 | error
318 { synerr( _("unrecognized rule") ); }
319 ;
320
321 scon_stk_ptr :
322 { $$ = scon_stk_ptr; }
323 ;
324
325 scon : '<' scon_stk_ptr namelist2 '>'
326 { $$ = $2; }
327
328 | '<' '*' '>'
329 {
330 $$ = scon_stk_ptr;
331
332 for ( i = 1; i <= lastsc; ++i )
333 {
334 int j;
335
336 for ( j = 1; j <= scon_stk_ptr; ++j )
337 if ( scon_stk[j] == i )
338 break;
339
340 if ( j > scon_stk_ptr )
341 scon_stk[++scon_stk_ptr] = i;
342 }
343 }
344
345 |
346 { $$ = scon_stk_ptr; }
347 ;
348
349 namelist2 : namelist2 ',' sconname
350
351 | sconname
352
353 | error
354 { synerr( _("bad start condition list") ); }
355 ;
356
357 sconname : NAME
358 {
359 if ( (scnum = sclookup( nmstr )) == 0 )
360 format_pinpoint_message(
361 "undeclared start condition %s",
362 nmstr );
363 else
364 {
365 for ( i = 1; i <= scon_stk_ptr; ++i )
366 if ( scon_stk[i] == scnum )
367 {
368 format_warn(
369 "<%s> specified twice",
370 scname[scnum] );
371 break;
372 }
373
374 if ( i > scon_stk_ptr )
375 scon_stk[++scon_stk_ptr] = scnum;
376 }
377 }
378 ;
379
380 rule : re2 re
381 {
382 if ( transchar[lastst[$2]] != SYM_EPSILON )
383 /* Provide final transition \now/ so it
384 * will be marked as a trailing context
385 * state.
386 */
387 $2 = link_machines( $2,
388 mkstate( SYM_EPSILON ) );
389
390 mark_beginning_as_normal( $2 );
391 current_state_type = STATE_NORMAL;
392
393 if ( previous_continued_action )
394 {
395 /* We need to treat this as variable trailing
396 * context so that the backup does not happen
397 * in the action but before the action switch
398 * statement. If the backup happens in the
399 * action, then the rules "falling into" this
400 * one's action will *also* do the backup,
401 * erroneously.
402 */
403 if ( ! varlength || headcnt != 0 )
404 lwarn(
405 "trailing context made variable due to preceding '|' action" );
406
407 /* Mark as variable. */
408 varlength = true;
409 headcnt = 0;
410
411 }
412
413 if ( lex_compat || (varlength && headcnt == 0) )
414 { /* variable trailing context rule */
415 /* Mark the first part of the rule as the
416 * accepting "head" part of a trailing
417 * context rule.
418 *
419 * By the way, we didn't do this at the
420 * beginning of this production because back
421 * then current_state_type was set up for a
422 * trail rule, and add_accept() can create
423 * a new state ...
424 */
425 add_accept( $1,
426 num_rules | YY_TRAILING_HEAD_MASK );
427 variable_trail_rule = true;
428 }
429
430 else
431 trailcnt = rulelen;
432
433 $$ = link_machines( $1, $2 );
434 }
435
436 | re2 re '$'
437 { synerr( _("trailing context used twice") ); }
438
439 | re '$'
440 {
441 headcnt = 0;
442 trailcnt = 1;
443 rulelen = 1;
444 varlength = false;
445
446 current_state_type = STATE_TRAILING_CONTEXT;
447
448 if ( trlcontxt )
449 {
450 synerr( _("trailing context used twice") );
451 $$ = mkstate( SYM_EPSILON );
452 }
453
454 else if ( previous_continued_action )
455 {
456 /* See the comment in the rule for "re2 re"
457 * above.
458 */
459 lwarn(
460 "trailing context made variable due to preceding '|' action" );
461
462 varlength = true;
463 }
464
465 if ( lex_compat || varlength )
466 {
467 /* Again, see the comment in the rule for
468 * "re2 re" above.
469 */
470 add_accept( $1,
471 num_rules | YY_TRAILING_HEAD_MASK );
472 variable_trail_rule = true;
473 }
474
475 trlcontxt = true;
476
477 eps = mkstate( SYM_EPSILON );
478 $$ = link_machines( $1,
479 link_machines( eps, mkstate( '\n' ) ) );
480 }
481
482 | re
483 {
484 $$ = $1;
485
486 if ( trlcontxt )
487 {
488 if ( lex_compat || (varlength && headcnt == 0) )
489 /* Both head and trail are
490 * variable-length.
491 */
492 variable_trail_rule = true;
493 else
494 trailcnt = rulelen;
495 }
496 }
497 ;
498
499
500 re : re '|' series
501 {
502 varlength = true;
503 $$ = mkor( $1, $3 );
504 }
505
506 | series
507 { $$ = $1; }
508 ;
509
510
511 re2 : re '/'
512 {
513 /* This rule is written separately so the
514 * reduction will occur before the trailing
515 * series is parsed.
516 */
517
518 if ( trlcontxt )
519 synerr( _("trailing context used twice") );
520 else
521 trlcontxt = true;
522
523 if ( varlength )
524 /* We hope the trailing context is
525 * fixed-length.
526 */
527 varlength = false;
528 else
529 headcnt = rulelen;
530
531 rulelen = 0;
532
533 current_state_type = STATE_TRAILING_CONTEXT;
534 $$ = $1;
535 }
536 ;
537
538 series : series singleton
539 {
540 /* This is where concatenation of adjacent patterns
541 * gets done.
542 */
543 $$ = link_machines( $1, $2 );
544 }
545
546 | singleton
547 { $$ = $1; }
548
549 | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
550 {
551 varlength = true;
552
553 if ( $3 > $5 || $3 < 0 )
554 {
555 synerr( _("bad iteration values") );
556 $$ = $1;
557 }
558 else
559 {
560 if ( $3 == 0 )
561 {
562 if ( $5 <= 0 )
563 {
564 synerr(
565 _("bad iteration values") );
566 $$ = $1;
567 }
568 else
569 $$ = mkopt(
570 mkrep( $1, 1, $5 ) );
571 }
572 else
573 $$ = mkrep( $1, $3, $5 );
574 }
575 }
576
577 | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
578 {
579 varlength = true;
580
581 if ( $3 <= 0 )
582 {
583 synerr( _("iteration value must be positive") );
584 $$ = $1;
585 }
586
587 else
588 $$ = mkrep( $1, $3, INFINITE_REPEAT );
589 }
590
591 | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
592 {
593 /* The series could be something like "(foo)",
594 * in which case we have no idea what its length
595 * is, so we punt here.
596 */
597 varlength = true;
598
599 if ( $3 <= 0 )
600 {
601 synerr( _("iteration value must be positive")
602 );
603 $$ = $1;
604 }
605
606 else
607 $$ = link_machines( $1,
608 copysingl( $1, $3 - 1 ) );
609 }
610
611 ;
612
613 singleton : singleton '*'
614 {
615 varlength = true;
616
617 $$ = mkclos( $1 );
618 }
619
620 | singleton '+'
621 {
622 varlength = true;
623 $$ = mkposcl( $1 );
624 }
625
626 | singleton '?'
627 {
628 varlength = true;
629 $$ = mkopt( $1 );
630 }
631
632 | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
633 {
634 varlength = true;
635
636 if ( $3 > $5 || $3 < 0 )
637 {
638 synerr( _("bad iteration values") );
639 $$ = $1;
640 }
641 else
642 {
643 if ( $3 == 0 )
644 {
645 if ( $5 <= 0 )
646 {
647 synerr(
648 _("bad iteration values") );
649 $$ = $1;
650 }
651 else
652 $$ = mkopt(
653 mkrep( $1, 1, $5 ) );
654 }
655 else
656 $$ = mkrep( $1, $3, $5 );
657 }
658 }
659
660 | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
661 {
662 varlength = true;
663
664 if ( $3 <= 0 )
665 {
666 synerr( _("iteration value must be positive") );
667 $$ = $1;
668 }
669
670 else
671 $$ = mkrep( $1, $3, INFINITE_REPEAT );
672 }
673
674 | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
675 {
676 /* The singleton could be something like "(foo)",
677 * in which case we have no idea what its length
678 * is, so we punt here.
679 */
680 varlength = true;
681
682 if ( $3 <= 0 )
683 {
684 synerr( _("iteration value must be positive") );
685 $$ = $1;
686 }
687
688 else
689 $$ = link_machines( $1,
690 copysingl( $1, $3 - 1 ) );
691 }
692
693 | '.'
694 {
695 if ( ! madeany )
696 {
697 /* Create the '.' character class. */
698 ccldot = cclinit();
699 ccladd( ccldot, '\n' );
700 cclnegate( ccldot );
701
702 if ( useecs )
703 mkeccl( ccltbl + cclmap[ccldot],
704 ccllen[ccldot], nextecm,
705 ecgroup, csize, csize );
706
707 /* Create the (?s:'.') character class. */
708 cclany = cclinit();
709 cclnegate( cclany );
710
711 if ( useecs )
712 mkeccl( ccltbl + cclmap[cclany],
713 ccllen[cclany], nextecm,
714 ecgroup, csize, csize );
715
716 madeany = true;
717 }
718
719 ++rulelen;
720
721 if (sf_dot_all())
722 $$ = mkstate( -cclany );
723 else
724 $$ = mkstate( -ccldot );
725 }
726
727 | fullccl
728 {
729 /* Sort characters for fast searching.
730 */
731 qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp );
732
733 if ( useecs )
734 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
735 nextecm, ecgroup, csize, csize );
736
737 ++rulelen;
738
739 if (ccl_has_nl[$1])
740 rule_has_nl[num_rules] = true;
741
742 $$ = mkstate( -$1 );
743 }
744
745 | PREVCCL
746 {
747 ++rulelen;
748
749 if (ccl_has_nl[$1])
750 rule_has_nl[num_rules] = true;
751
752 $$ = mkstate( -$1 );
753 }
754
755 | '"' string '"'
756 { $$ = $2; }
757
758 | '(' re ')'
759 { $$ = $2; }
760
761 | CHAR
762 {
763 ++rulelen;
764
765 if ($1 == nlch)
766 rule_has_nl[num_rules] = true;
767
768 if (sf_case_ins() && has_case($1))
769 /* create an alternation, as in (a|A) */
770 $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
771 else
772 $$ = mkstate( $1 );
773 }
774 ;
775 fullccl:
776 fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); }
777 | fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); }
778 | braceccl
779 ;
780
781 braceccl:
782
783 '[' ccl ']' { $$ = $2; }
784
785 | '[' '^' ccl ']'
786 {
787 cclnegate( $3 );
788 $$ = $3;
789 }
790 ;
791
792 ccl : ccl CHAR '-' CHAR
793 {
794
795 if (sf_case_ins())
796 {
797
798 /* If one end of the range has case and the other
799 * does not, or the cases are different, then we're not
800 * sure what range the user is trying to express.
801 * Examples: [@-z] or [S-t]
802 */
803 if (has_case ($2) != has_case ($4)
804 || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
805 || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
806 format_warn3 (
807 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
808 $2, $4);
809
810 /* If the range spans uppercase characters but not
811 * lowercase (or vice-versa), then should we automatically
812 * include lowercase characters in the range?
813 * Example: [@-_] spans [a-z] but not [A-Z]
814 */
815 else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
816 format_warn3 (
817 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
818 $2, $4);
819 }
820
821 if ( $2 > $4 )
822 synerr( _("negative range in character class") );
823
824 else
825 {
826 for ( i = $2; i <= $4; ++i )
827 ccladd( $1, i );
828
829 /* Keep track if this ccl is staying in
830 * alphabetical order.
831 */
832 cclsorted = cclsorted && ($2 > lastchar);
833 lastchar = $4;
834
835 /* Do it again for upper/lowercase */
836 if (sf_case_ins() && has_case($2) && has_case($4)){
837 $2 = reverse_case ($2);
838 $4 = reverse_case ($4);
839
840 for ( i = $2; i <= $4; ++i )
841 ccladd( $1, i );
842
843 cclsorted = cclsorted && ($2 > lastchar);
844 lastchar = $4;
845 }
846
847 }
848
849 $$ = $1;
850 }
851
852 | ccl CHAR
853 {
854 ccladd( $1, $2 );
855 cclsorted = cclsorted && ($2 > lastchar);
856 lastchar = $2;
857
858 /* Do it again for upper/lowercase */
859 if (sf_case_ins() && has_case($2)){
860 $2 = reverse_case ($2);
861 ccladd ($1, $2);
862
863 cclsorted = cclsorted && ($2 > lastchar);
864 lastchar = $2;
865 }
866
867 $$ = $1;
868 }
869
870 | ccl ccl_expr
871 {
872 /* Too hard to properly maintain cclsorted. */
873 cclsorted = false;
874 $$ = $1;
875 }
876
877 |
878 {
879 cclsorted = true;
880 lastchar = 0;
881 currccl = $$ = cclinit();
882 }
883 ;
884
885 ccl_expr:
886 CCE_ALNUM { CCL_EXPR(isalnum); }
887 | CCE_ALPHA { CCL_EXPR(isalpha); }
888 | CCE_BLANK { CCL_EXPR(IS_BLANK); }
889 | CCE_CNTRL { CCL_EXPR(iscntrl); }
890 | CCE_DIGIT { CCL_EXPR(isdigit); }
891 | CCE_GRAPH { CCL_EXPR(isgraph); }
892 | CCE_LOWER {
893 CCL_EXPR(islower);
894 if (sf_case_ins())
895 CCL_EXPR(isupper);
896 }
897 | CCE_PRINT { CCL_EXPR(isprint); }
898 | CCE_PUNCT { CCL_EXPR(ispunct); }
899 | CCE_SPACE { CCL_EXPR(isspace); }
900 | CCE_XDIGIT { CCL_EXPR(isxdigit); }
901 | CCE_UPPER {
902 CCL_EXPR(isupper);
903 if (sf_case_ins())
904 CCL_EXPR(islower);
905 }
906
907 | CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); }
908 | CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); }
909 | CCE_NEG_BLANK { CCL_NEG_EXPR(IS_BLANK); }
910 | CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); }
911 | CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); }
912 | CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); }
913 | CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); }
914 | CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); }
915 | CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); }
916 | CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); }
917 | CCE_NEG_LOWER {
918 if ( sf_case_ins() )
919 lwarn(_("[:^lower:] is ambiguous in case insensitive scanner"));
920 else
921 CCL_NEG_EXPR(islower);
922 }
923 | CCE_NEG_UPPER {
924 if ( sf_case_ins() )
925 lwarn(_("[:^upper:] ambiguous in case insensitive scanner"));
926 else
927 CCL_NEG_EXPR(isupper);
928 }
929 ;
930
931 string : string CHAR
932 {
933 if ( $2 == nlch )
934 rule_has_nl[num_rules] = true;
935
936 ++rulelen;
937
938 if (sf_case_ins() && has_case($2))
939 $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
940 else
941 $$ = mkstate ($2);
942
943 $$ = link_machines( $1, $$);
944 }
945
946 |
947 { $$ = mkstate( SYM_EPSILON ); }
948 ;
949
950 %%
951
952
953 /* build_eof_action - build the "<<EOF>>" action for the active start
954 * conditions
955 */
956
957 void build_eof_action()
958 {
959 register int i;
960 char action_text[MAXLINE];
961
962 for ( i = 1; i <= scon_stk_ptr; ++i )
963 {
964 if ( sceof[scon_stk[i]] )
965 format_pinpoint_message(
966 "multiple <<EOF>> rules for start condition %s",
967 scname[scon_stk[i]] );
968
969 else
970 {
971 sceof[scon_stk[i]] = true;
972
973 if (previous_continued_action /* && previous action was regular */)
974 add_action("YY_RULE_SETUP\n");
975
976 snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
977 scname[scon_stk[i]] );
978 add_action( action_text );
979 }
980 }
981
982 line_directive_out( (FILE *) 0, 1 );
983
984 /* This isn't a normal rule after all - don't count it as
985 * such, so we don't have any holes in the rule numbering
986 * (which make generating "rule can never match" warnings
987 * more difficult.
988 */
989 --num_rules;
990 ++num_eof_rules;
991 }
992
993
994 /* format_synerr - write out formatted syntax error */
995
format_synerr(msg,arg)996 void format_synerr( msg, arg )
997 const char *msg, arg[];
998 {
999 char errmsg[MAXLINE];
1000
1001 (void) snprintf( errmsg, sizeof(errmsg), msg, arg );
1002 synerr( errmsg );
1003 }
1004
1005
1006 /* synerr - report a syntax error */
1007
synerr(str)1008 void synerr( str )
1009 const char *str;
1010 {
1011 syntaxerror = true;
1012 pinpoint_message( str );
1013 }
1014
1015
1016 /* format_warn - write out formatted warning */
1017
format_warn(msg,arg)1018 void format_warn( msg, arg )
1019 const char *msg, arg[];
1020 {
1021 char warn_msg[MAXLINE];
1022
1023 snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1024 lwarn( warn_msg );
1025 }
1026
1027
1028 /* lwarn - report a warning, unless -w was given */
1029
lwarn(str)1030 void lwarn( str )
1031 const char *str;
1032 {
1033 line_warning( str, linenum );
1034 }
1035
1036 /* format_pinpoint_message - write out a message formatted with one string,
1037 * pinpointing its location
1038 */
1039
format_pinpoint_message(msg,arg)1040 void format_pinpoint_message( msg, arg )
1041 const char *msg, arg[];
1042 {
1043 char errmsg[MAXLINE];
1044
1045 snprintf( errmsg, sizeof(errmsg), msg, arg );
1046 pinpoint_message( errmsg );
1047 }
1048
1049
1050 /* pinpoint_message - write out a message, pinpointing its location */
1051
pinpoint_message(str)1052 void pinpoint_message( str )
1053 const char *str;
1054 {
1055 line_pinpoint( str, linenum );
1056 }
1057
1058
1059 /* line_warning - report a warning at a given line, unless -w was given */
1060
line_warning(str,line)1061 void line_warning( str, line )
1062 const char *str;
1063 int line;
1064 {
1065 char warning[MAXLINE];
1066
1067 if ( ! nowarn )
1068 {
1069 snprintf( warning, sizeof(warning), "warning, %s", str );
1070 line_pinpoint( warning, line );
1071 }
1072 }
1073
1074
1075 /* line_pinpoint - write out a message, pinpointing it at the given line */
1076
line_pinpoint(str,line)1077 void line_pinpoint( str, line )
1078 const char *str;
1079 int line;
1080 {
1081 fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1082 }
1083
1084
1085 /* yyerror - eat up an error message from the parser;
1086 * currently, messages are ignore
1087 */
1088
yyerror(msg)1089 void yyerror( msg )
1090 const char *msg;
1091 {
1092 }
1093