xref: /netbsd-src/external/bsd/flex/dist/src/parse.y (revision 75f6d617e282811cb173c2ccfbf5df0dd71f7045)
1 /*	$NetBSD: parse.y,v 1.2 2016/01/09 17:38:57 christos Exp $	*/
2 
3 /* parse.y - parser for flex input */
4 
5 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
6 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
7 %token OPT_TABLES
8 
9 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
10 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
11 
12 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
13 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
14 
15 %left CCL_OP_DIFF CCL_OP_UNION
16 
17 /*
18  *POSIX and AT&T lex place the
19  * precedence of the repeat operator, {}, below that of concatenation.
20  * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
21  * Regular Expression (ERE) precedence that has the repeat operator
22  * higher than concatenation.  This causes ab{3} to yield abbb.
23  *
24  * In order to support the POSIX and AT&T precedence and the flex
25  * precedence we define two token sets for the begin and end tokens of
26  * the repeat operator, '{' and '}'.  The lexical scanner chooses
27  * which tokens to return based on whether posix_compat or lex_compat
28  * are specified. Specifying either posix_compat or lex_compat will
29  * cause flex to parse scanner files as per the AT&T and
30  * POSIX-mandated behavior.
31  */
32 
33 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
34 
35 
36 %{
37 /*  Copyright (c) 1990 The Regents of the University of California. */
38 /*  All rights reserved. */
39 
40 /*  This code is derived from software contributed to Berkeley by */
41 /*  Vern Paxson. */
42 
43 /*  The United States Government has rights in this work pursuant */
44 /*  to contract no. DE-AC03-76SF00098 between the United States */
45 /*  Department of Energy and the University of California. */
46 
47 /*  This file is part of flex. */
48 
49 /*  Redistribution and use in source and binary forms, with or without */
50 /*  modification, are permitted provided that the following conditions */
51 /*  are met: */
52 
53 /*  1. Redistributions of source code must retain the above copyright */
54 /*     notice, this list of conditions and the following disclaimer. */
55 /*  2. Redistributions in binary form must reproduce the above copyright */
56 /*     notice, this list of conditions and the following disclaimer in the */
57 /*     documentation and/or other materials provided with the distribution. */
58 
59 /*  Neither the name of the University nor the names of its contributors */
60 /*  may be used to endorse or promote products derived from this software */
61 /*  without specific prior written permission. */
62 
63 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
64 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
65 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
66 /*  PURPOSE. */
67 #include "flexdef.h"
68 __RCSID("$NetBSD: parse.y,v 1.2 2016/01/09 17:38:57 christos Exp $");
69 
70 #include "tables.h"
71 
72 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
73 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
74 
75 int *scon_stk;
76 int scon_stk_ptr;
77 
78 static int madeany = false;  /* whether we've made the '.' character class */
79 static int ccldot, cclany;
80 int previous_continued_action;	/* whether the previous rule's action was '|' */
81 
82 #define format_warn3(fmt, a1, a2) \
83 	do{ \
84         char fw3_msg[MAXLINE];\
85         snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
86         lwarn( fw3_msg );\
87 	}while(0)
88 
89 /* Expand a POSIX character class expression. */
90 #define CCL_EXPR(func) \
91 	do{ \
92 	int c; \
93 	for ( c = 0; c < csize; ++c ) \
94 		if ( isascii(c) && func(c) ) \
95 			ccladd( currccl, c ); \
96 	}while(0)
97 
98 /* negated class */
99 #define CCL_NEG_EXPR(func) \
100 	do{ \
101 	int c; \
102 	for ( c = 0; c < csize; ++c ) \
103 		if ( !func(c) ) \
104 			ccladd( currccl, c ); \
105 	}while(0)
106 
107 /* While POSIX defines isblank(), it's not ANSI C. */
108 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
109 
110 /* On some over-ambitious machines, such as DEC Alpha's, the default
111  * token type is "long" instead of "int"; this leads to problems with
112  * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
113  * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
114  * following should ensure that the default token type is "int".
115  */
116 #define YYSTYPE int
117 
118 %}
119 
120 %%
121 goal		:  initlex sect1 sect1end sect2 initforrule
122 			{ /* add default rule */
123 			int def_rule;
124 
125 			pat = cclinit();
126 			cclnegate( pat );
127 
128 			def_rule = mkstate( -pat );
129 
130 			/* Remember the number of the default rule so we
131 			 * don't generate "can't match" warnings for it.
132 			 */
133 			default_rule = num_rules;
134 
135 			finish_rule( def_rule, false, 0, 0, 0);
136 
137 			for ( i = 1; i <= lastsc; ++i )
138 				scset[i] = mkbranch( scset[i], def_rule );
139 
140 			if ( spprdflt )
141 				add_action(
142 				"YY_FATAL_ERROR( \"flex scanner jammed\" )" );
143 			else
144 				add_action( "ECHO" );
145 
146 			add_action( ";\n\tYY_BREAK\n" );
147 			}
148 		;
149 
150 initlex		:
151 			{ /* initialize for processing rules */
152 
153 			/* Create default DFA start condition. */
154 			scinstal( "INITIAL", false );
155 			}
156 		;
157 
158 sect1		:  sect1 startconddecl namelist1
159 		|  sect1 options
160 		|
161 		|  error
162 			{ synerr( _("unknown error processing section 1") ); }
163 		;
164 
165 sect1end	:  SECTEND
166 			{
167 			check_options();
168 			scon_stk = allocate_integer_array( lastsc + 1 );
169 			scon_stk_ptr = 0;
170 			}
171 		;
172 
173 startconddecl	:  SCDECL
174 			{ xcluflg = false; }
175 
176 		|  XSCDECL
177 			{ xcluflg = true; }
178 		;
179 
180 namelist1	:  namelist1 NAME
181 			{ scinstal( nmstr, xcluflg ); }
182 
183 		|  NAME
184 			{ scinstal( nmstr, xcluflg ); }
185 
186 		|  error
187 			{ synerr( _("bad start condition list") ); }
188 		;
189 
190 options		:  OPTION_OP optionlist
191 		;
192 
193 optionlist	:  optionlist option
194 		|
195 		;
196 
197 option		:  OPT_OUTFILE '=' NAME
198 			{
199 			outfilename = copy_string( nmstr );
200 			did_outfilename = 1;
201 			}
202 		|  OPT_EXTRA_TYPE '=' NAME
203 			{ extra_type = copy_string( nmstr ); }
204 		|  OPT_PREFIX '=' NAME
205 			{ prefix = copy_string( nmstr ); }
206 		|  OPT_YYCLASS '=' NAME
207 			{ yyclass = copy_string( nmstr ); }
208 		|  OPT_HEADER '=' NAME
209 			{ headerfilename = copy_string( nmstr ); }
210 	    |  OPT_TABLES '=' NAME
211             { tablesext = true; tablesfilename = copy_string( nmstr ); }
212 		;
213 
214 sect2		:  sect2 scon initforrule flexrule '\n'
215 			{ scon_stk_ptr = $2; }
216 		|  sect2 scon '{' sect2 '}'
217 			{ scon_stk_ptr = $2; }
218 		|
219 		;
220 
221 initforrule	:
222 			{
223 			/* Initialize for a parse of one rule. */
224 			trlcontxt = variable_trail_rule = varlength = false;
225 			trailcnt = headcnt = rulelen = 0;
226 			current_state_type = STATE_NORMAL;
227 			previous_continued_action = continued_action;
228 			in_rule = true;
229 
230 			new_rule();
231 			}
232 		;
233 
234 flexrule	:  '^' rule
235 			{
236 			pat = $2;
237 			finish_rule( pat, variable_trail_rule,
238 				headcnt, trailcnt , previous_continued_action);
239 
240 			if ( scon_stk_ptr > 0 )
241 				{
242 				for ( i = 1; i <= scon_stk_ptr; ++i )
243 					scbol[scon_stk[i]] =
244 						mkbranch( scbol[scon_stk[i]],
245 								pat );
246 				}
247 
248 			else
249 				{
250 				/* Add to all non-exclusive start conditions,
251 				 * including the default (0) start condition.
252 				 */
253 
254 				for ( i = 1; i <= lastsc; ++i )
255 					if ( ! scxclu[i] )
256 						scbol[i] = mkbranch( scbol[i],
257 									pat );
258 				}
259 
260 			if ( ! bol_needed )
261 				{
262 				bol_needed = true;
263 
264 				if ( performance_report > 1 )
265 					pinpoint_message(
266 			"'^' operator results in sub-optimal performance" );
267 				}
268 			}
269 
270 		|  rule
271 			{
272 			pat = $1;
273 			finish_rule( pat, variable_trail_rule,
274 				headcnt, trailcnt , previous_continued_action);
275 
276 			if ( scon_stk_ptr > 0 )
277 				{
278 				for ( i = 1; i <= scon_stk_ptr; ++i )
279 					scset[scon_stk[i]] =
280 						mkbranch( scset[scon_stk[i]],
281 								pat );
282 				}
283 
284 			else
285 				{
286 				for ( i = 1; i <= lastsc; ++i )
287 					if ( ! scxclu[i] )
288 						scset[i] =
289 							mkbranch( scset[i],
290 								pat );
291 				}
292 			}
293 
294 		|  EOF_OP
295 			{
296 			if ( scon_stk_ptr > 0 )
297 				build_eof_action();
298 
299 			else
300 				{
301 				/* This EOF applies to all start conditions
302 				 * which don't already have EOF actions.
303 				 */
304 				for ( i = 1; i <= lastsc; ++i )
305 					if ( ! sceof[i] )
306 						scon_stk[++scon_stk_ptr] = i;
307 
308 				if ( scon_stk_ptr == 0 )
309 					lwarn(
310 			"all start conditions already have <<EOF>> rules" );
311 
312 				else
313 					build_eof_action();
314 				}
315 			}
316 
317 		|  error
318 			{ synerr( _("unrecognized rule") ); }
319 		;
320 
321 scon_stk_ptr	:
322 			{ $$ = scon_stk_ptr; }
323 		;
324 
325 scon		:  '<' scon_stk_ptr namelist2 '>'
326 			{ $$ = $2; }
327 
328 		|  '<' '*' '>'
329 			{
330 			$$ = scon_stk_ptr;
331 
332 			for ( i = 1; i <= lastsc; ++i )
333 				{
334 				int j;
335 
336 				for ( j = 1; j <= scon_stk_ptr; ++j )
337 					if ( scon_stk[j] == i )
338 						break;
339 
340 				if ( j > scon_stk_ptr )
341 					scon_stk[++scon_stk_ptr] = i;
342 				}
343 			}
344 
345 		|
346 			{ $$ = scon_stk_ptr; }
347 		;
348 
349 namelist2	:  namelist2 ',' sconname
350 
351 		|  sconname
352 
353 		|  error
354 			{ synerr( _("bad start condition list") ); }
355 		;
356 
357 sconname	:  NAME
358 			{
359 			if ( (scnum = sclookup( nmstr )) == 0 )
360 				format_pinpoint_message(
361 					"undeclared start condition %s",
362 					nmstr );
363 			else
364 				{
365 				for ( i = 1; i <= scon_stk_ptr; ++i )
366 					if ( scon_stk[i] == scnum )
367 						{
368 						format_warn(
369 							"<%s> specified twice",
370 							scname[scnum] );
371 						break;
372 						}
373 
374 				if ( i > scon_stk_ptr )
375 					scon_stk[++scon_stk_ptr] = scnum;
376 				}
377 			}
378 		;
379 
380 rule		:  re2 re
381 			{
382 			if ( transchar[lastst[$2]] != SYM_EPSILON )
383 				/* Provide final transition \now/ so it
384 				 * will be marked as a trailing context
385 				 * state.
386 				 */
387 				$2 = link_machines( $2,
388 						mkstate( SYM_EPSILON ) );
389 
390 			mark_beginning_as_normal( $2 );
391 			current_state_type = STATE_NORMAL;
392 
393 			if ( previous_continued_action )
394 				{
395 				/* We need to treat this as variable trailing
396 				 * context so that the backup does not happen
397 				 * in the action but before the action switch
398 				 * statement.  If the backup happens in the
399 				 * action, then the rules "falling into" this
400 				 * one's action will *also* do the backup,
401 				 * erroneously.
402 				 */
403 				if ( ! varlength || headcnt != 0 )
404 					lwarn(
405 		"trailing context made variable due to preceding '|' action" );
406 
407 				/* Mark as variable. */
408 				varlength = true;
409 				headcnt = 0;
410 
411 				}
412 
413 			if ( lex_compat || (varlength && headcnt == 0) )
414 				{ /* variable trailing context rule */
415 				/* Mark the first part of the rule as the
416 				 * accepting "head" part of a trailing
417 				 * context rule.
418 				 *
419 				 * By the way, we didn't do this at the
420 				 * beginning of this production because back
421 				 * then current_state_type was set up for a
422 				 * trail rule, and add_accept() can create
423 				 * a new state ...
424 				 */
425 				add_accept( $1,
426 					num_rules | YY_TRAILING_HEAD_MASK );
427 				variable_trail_rule = true;
428 				}
429 
430 			else
431 				trailcnt = rulelen;
432 
433 			$$ = link_machines( $1, $2 );
434 			}
435 
436 		|  re2 re '$'
437 			{ synerr( _("trailing context used twice") ); }
438 
439 		|  re '$'
440 			{
441 			headcnt = 0;
442 			trailcnt = 1;
443 			rulelen = 1;
444 			varlength = false;
445 
446 			current_state_type = STATE_TRAILING_CONTEXT;
447 
448 			if ( trlcontxt )
449 				{
450 				synerr( _("trailing context used twice") );
451 				$$ = mkstate( SYM_EPSILON );
452 				}
453 
454 			else if ( previous_continued_action )
455 				{
456 				/* See the comment in the rule for "re2 re"
457 				 * above.
458 				 */
459 				lwarn(
460 		"trailing context made variable due to preceding '|' action" );
461 
462 				varlength = true;
463 				}
464 
465 			if ( lex_compat || varlength )
466 				{
467 				/* Again, see the comment in the rule for
468 				 * "re2 re" above.
469 				 */
470 				add_accept( $1,
471 					num_rules | YY_TRAILING_HEAD_MASK );
472 				variable_trail_rule = true;
473 				}
474 
475 			trlcontxt = true;
476 
477 			eps = mkstate( SYM_EPSILON );
478 			$$ = link_machines( $1,
479 				link_machines( eps, mkstate( '\n' ) ) );
480 			}
481 
482 		|  re
483 			{
484 			$$ = $1;
485 
486 			if ( trlcontxt )
487 				{
488 				if ( lex_compat || (varlength && headcnt == 0) )
489 					/* Both head and trail are
490 					 * variable-length.
491 					 */
492 					variable_trail_rule = true;
493 				else
494 					trailcnt = rulelen;
495 				}
496 			}
497 		;
498 
499 
500 re		:  re '|' series
501 			{
502 			varlength = true;
503 			$$ = mkor( $1, $3 );
504 			}
505 
506 		|  series
507 			{ $$ = $1; }
508 		;
509 
510 
511 re2		:  re '/'
512 			{
513 			/* This rule is written separately so the
514 			 * reduction will occur before the trailing
515 			 * series is parsed.
516 			 */
517 
518 			if ( trlcontxt )
519 				synerr( _("trailing context used twice") );
520 			else
521 				trlcontxt = true;
522 
523 			if ( varlength )
524 				/* We hope the trailing context is
525 				 * fixed-length.
526 				 */
527 				varlength = false;
528 			else
529 				headcnt = rulelen;
530 
531 			rulelen = 0;
532 
533 			current_state_type = STATE_TRAILING_CONTEXT;
534 			$$ = $1;
535 			}
536 		;
537 
538 series		:  series singleton
539 			{
540 			/* This is where concatenation of adjacent patterns
541 			 * gets done.
542 			 */
543 			$$ = link_machines( $1, $2 );
544 			}
545 
546 		|  singleton
547 			{ $$ = $1; }
548 
549 		|  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
550 			{
551 			varlength = true;
552 
553 			if ( $3 > $5 || $3 < 0 )
554 				{
555 				synerr( _("bad iteration values") );
556 				$$ = $1;
557 				}
558 			else
559 				{
560 				if ( $3 == 0 )
561 					{
562 					if ( $5 <= 0 )
563 						{
564 						synerr(
565 						_("bad iteration values") );
566 						$$ = $1;
567 						}
568 					else
569 						$$ = mkopt(
570 							mkrep( $1, 1, $5 ) );
571 					}
572 				else
573 					$$ = mkrep( $1, $3, $5 );
574 				}
575 			}
576 
577 		|  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
578 			{
579 			varlength = true;
580 
581 			if ( $3 <= 0 )
582 				{
583 				synerr( _("iteration value must be positive") );
584 				$$ = $1;
585 				}
586 
587 			else
588 				$$ = mkrep( $1, $3, INFINITE_REPEAT );
589 			}
590 
591 		|  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
592 			{
593 			/* The series could be something like "(foo)",
594 			 * in which case we have no idea what its length
595 			 * is, so we punt here.
596 			 */
597 			varlength = true;
598 
599 			if ( $3 <= 0 )
600 				{
601 				  synerr( _("iteration value must be positive")
602 					  );
603 				$$ = $1;
604 				}
605 
606 			else
607 				$$ = link_machines( $1,
608 						copysingl( $1, $3 - 1 ) );
609 			}
610 
611 		;
612 
613 singleton	:  singleton '*'
614 			{
615 			varlength = true;
616 
617 			$$ = mkclos( $1 );
618 			}
619 
620 		|  singleton '+'
621 			{
622 			varlength = true;
623 			$$ = mkposcl( $1 );
624 			}
625 
626 		|  singleton '?'
627 			{
628 			varlength = true;
629 			$$ = mkopt( $1 );
630 			}
631 
632 		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
633 			{
634 			varlength = true;
635 
636 			if ( $3 > $5 || $3 < 0 )
637 				{
638 				synerr( _("bad iteration values") );
639 				$$ = $1;
640 				}
641 			else
642 				{
643 				if ( $3 == 0 )
644 					{
645 					if ( $5 <= 0 )
646 						{
647 						synerr(
648 						_("bad iteration values") );
649 						$$ = $1;
650 						}
651 					else
652 						$$ = mkopt(
653 							mkrep( $1, 1, $5 ) );
654 					}
655 				else
656 					$$ = mkrep( $1, $3, $5 );
657 				}
658 			}
659 
660 		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
661 			{
662 			varlength = true;
663 
664 			if ( $3 <= 0 )
665 				{
666 				synerr( _("iteration value must be positive") );
667 				$$ = $1;
668 				}
669 
670 			else
671 				$$ = mkrep( $1, $3, INFINITE_REPEAT );
672 			}
673 
674 		|  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
675 			{
676 			/* The singleton could be something like "(foo)",
677 			 * in which case we have no idea what its length
678 			 * is, so we punt here.
679 			 */
680 			varlength = true;
681 
682 			if ( $3 <= 0 )
683 				{
684 				synerr( _("iteration value must be positive") );
685 				$$ = $1;
686 				}
687 
688 			else
689 				$$ = link_machines( $1,
690 						copysingl( $1, $3 - 1 ) );
691 			}
692 
693 		|  '.'
694 			{
695 			if ( ! madeany )
696 				{
697 				/* Create the '.' character class. */
698                     ccldot = cclinit();
699                     ccladd( ccldot, '\n' );
700                     cclnegate( ccldot );
701 
702                     if ( useecs )
703                         mkeccl( ccltbl + cclmap[ccldot],
704                             ccllen[ccldot], nextecm,
705                             ecgroup, csize, csize );
706 
707 				/* Create the (?s:'.') character class. */
708                     cclany = cclinit();
709                     cclnegate( cclany );
710 
711                     if ( useecs )
712                         mkeccl( ccltbl + cclmap[cclany],
713                             ccllen[cclany], nextecm,
714                             ecgroup, csize, csize );
715 
716 				madeany = true;
717 				}
718 
719 			++rulelen;
720 
721             if (sf_dot_all())
722                 $$ = mkstate( -cclany );
723             else
724                 $$ = mkstate( -ccldot );
725 			}
726 
727 		|  fullccl
728 			{
729 				/* Sort characters for fast searching.
730 				 */
731 				qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp );
732 
733 			if ( useecs )
734 				mkeccl( ccltbl + cclmap[$1], ccllen[$1],
735 					nextecm, ecgroup, csize, csize );
736 
737 			++rulelen;
738 
739 			if (ccl_has_nl[$1])
740 				rule_has_nl[num_rules] = true;
741 
742 			$$ = mkstate( -$1 );
743 			}
744 
745 		|  PREVCCL
746 			{
747 			++rulelen;
748 
749 			if (ccl_has_nl[$1])
750 				rule_has_nl[num_rules] = true;
751 
752 			$$ = mkstate( -$1 );
753 			}
754 
755 		|  '"' string '"'
756 			{ $$ = $2; }
757 
758 		|  '(' re ')'
759 			{ $$ = $2; }
760 
761 		|  CHAR
762 			{
763 			++rulelen;
764 
765 			if ($1 == nlch)
766 				rule_has_nl[num_rules] = true;
767 
768             if (sf_case_ins() && has_case($1))
769                 /* create an alternation, as in (a|A) */
770                 $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
771             else
772                 $$ = mkstate( $1 );
773 			}
774 		;
775 fullccl:
776         fullccl CCL_OP_DIFF  braceccl  { $$ = ccl_set_diff  ($1, $3); }
777     |   fullccl CCL_OP_UNION braceccl  { $$ = ccl_set_union ($1, $3); }
778     |   braceccl
779     ;
780 
781 braceccl:
782 
783             '[' ccl ']' { $$ = $2; }
784 
785 		|  '[' '^' ccl ']'
786 			{
787 			cclnegate( $3 );
788 			$$ = $3;
789 			}
790 		;
791 
792 ccl		:  ccl CHAR '-' CHAR
793 			{
794 
795 			if (sf_case_ins())
796 			  {
797 
798 			    /* If one end of the range has case and the other
799 			     * does not, or the cases are different, then we're not
800 			     * sure what range the user is trying to express.
801 			     * Examples: [@-z] or [S-t]
802 			     */
803 			    if (has_case ($2) != has_case ($4)
804 				     || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
805 				     || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
806 			      format_warn3 (
807 			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
808 					    $2, $4);
809 
810 			    /* If the range spans uppercase characters but not
811 			     * lowercase (or vice-versa), then should we automatically
812 			     * include lowercase characters in the range?
813 			     * Example: [@-_] spans [a-z] but not [A-Z]
814 			     */
815 			    else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
816 			      format_warn3 (
817 			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
818 					    $2, $4);
819 			  }
820 
821 			if ( $2 > $4 )
822 				synerr( _("negative range in character class") );
823 
824 			else
825 				{
826 				for ( i = $2; i <= $4; ++i )
827 					ccladd( $1, i );
828 
829 				/* Keep track if this ccl is staying in
830 				 * alphabetical order.
831 				 */
832 				cclsorted = cclsorted && ($2 > lastchar);
833 				lastchar = $4;
834 
835                 /* Do it again for upper/lowercase */
836                 if (sf_case_ins() && has_case($2) && has_case($4)){
837                     $2 = reverse_case ($2);
838                     $4 = reverse_case ($4);
839 
840                     for ( i = $2; i <= $4; ++i )
841                         ccladd( $1, i );
842 
843                     cclsorted = cclsorted && ($2 > lastchar);
844                     lastchar = $4;
845                 }
846 
847 				}
848 
849 			$$ = $1;
850 			}
851 
852 		|  ccl CHAR
853 			{
854 			ccladd( $1, $2 );
855 			cclsorted = cclsorted && ($2 > lastchar);
856 			lastchar = $2;
857 
858             /* Do it again for upper/lowercase */
859             if (sf_case_ins() && has_case($2)){
860                 $2 = reverse_case ($2);
861                 ccladd ($1, $2);
862 
863                 cclsorted = cclsorted && ($2 > lastchar);
864                 lastchar = $2;
865             }
866 
867 			$$ = $1;
868 			}
869 
870 		|  ccl ccl_expr
871 			{
872 			/* Too hard to properly maintain cclsorted. */
873 			cclsorted = false;
874 			$$ = $1;
875 			}
876 
877 		|
878 			{
879 			cclsorted = true;
880 			lastchar = 0;
881 			currccl = $$ = cclinit();
882 			}
883 		;
884 
885 ccl_expr:
886            CCE_ALNUM	{ CCL_EXPR(isalnum); }
887 		|  CCE_ALPHA	{ CCL_EXPR(isalpha); }
888 		|  CCE_BLANK	{ CCL_EXPR(IS_BLANK); }
889 		|  CCE_CNTRL	{ CCL_EXPR(iscntrl); }
890 		|  CCE_DIGIT	{ CCL_EXPR(isdigit); }
891 		|  CCE_GRAPH	{ CCL_EXPR(isgraph); }
892 		|  CCE_LOWER	{
893                           CCL_EXPR(islower);
894                           if (sf_case_ins())
895                               CCL_EXPR(isupper);
896                         }
897 		|  CCE_PRINT	{ CCL_EXPR(isprint); }
898 		|  CCE_PUNCT	{ CCL_EXPR(ispunct); }
899 		|  CCE_SPACE	{ CCL_EXPR(isspace); }
900 		|  CCE_XDIGIT	{ CCL_EXPR(isxdigit); }
901 		|  CCE_UPPER	{
902                     CCL_EXPR(isupper);
903                     if (sf_case_ins())
904                         CCL_EXPR(islower);
905 				}
906 
907         |  CCE_NEG_ALNUM	{ CCL_NEG_EXPR(isalnum); }
908 		|  CCE_NEG_ALPHA	{ CCL_NEG_EXPR(isalpha); }
909 		|  CCE_NEG_BLANK	{ CCL_NEG_EXPR(IS_BLANK); }
910 		|  CCE_NEG_CNTRL	{ CCL_NEG_EXPR(iscntrl); }
911 		|  CCE_NEG_DIGIT	{ CCL_NEG_EXPR(isdigit); }
912 		|  CCE_NEG_GRAPH	{ CCL_NEG_EXPR(isgraph); }
913 		|  CCE_NEG_PRINT	{ CCL_NEG_EXPR(isprint); }
914 		|  CCE_NEG_PUNCT	{ CCL_NEG_EXPR(ispunct); }
915 		|  CCE_NEG_SPACE	{ CCL_NEG_EXPR(isspace); }
916 		|  CCE_NEG_XDIGIT	{ CCL_NEG_EXPR(isxdigit); }
917 		|  CCE_NEG_LOWER	{
918 				if ( sf_case_ins() )
919 					lwarn(_("[:^lower:] is ambiguous in case insensitive scanner"));
920 				else
921 					CCL_NEG_EXPR(islower);
922 				}
923 		|  CCE_NEG_UPPER	{
924 				if ( sf_case_ins() )
925 					lwarn(_("[:^upper:] ambiguous in case insensitive scanner"));
926 				else
927 					CCL_NEG_EXPR(isupper);
928 				}
929 		;
930 
931 string		:  string CHAR
932 			{
933 			if ( $2 == nlch )
934 				rule_has_nl[num_rules] = true;
935 
936 			++rulelen;
937 
938             if (sf_case_ins() && has_case($2))
939                 $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
940             else
941                 $$ = mkstate ($2);
942 
943 			$$ = link_machines( $1, $$);
944 			}
945 
946 		|
947 			{ $$ = mkstate( SYM_EPSILON ); }
948 		;
949 
950 %%
951 
952 
953 /* build_eof_action - build the "<<EOF>>" action for the active start
954  *                    conditions
955  */
956 
957 void build_eof_action()
958 	{
959 	int i;
960 	char action_text[MAXLINE];
961 
962 	for ( i = 1; i <= scon_stk_ptr; ++i )
963 		{
964 		if ( sceof[scon_stk[i]] )
965 			format_pinpoint_message(
966 				"multiple <<EOF>> rules for start condition %s",
967 				scname[scon_stk[i]] );
968 
969 		else
970 			{
971 			sceof[scon_stk[i]] = true;
972 
973 			if (previous_continued_action /* && previous action was regular */)
974 				add_action("YY_RULE_SETUP\n");
975 
976 			snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
977 				scname[scon_stk[i]] );
978 			add_action( action_text );
979 			}
980 		}
981 
982 	line_directive_out( (FILE *) 0, 1 );
983 
984 	/* This isn't a normal rule after all - don't count it as
985 	 * such, so we don't have any holes in the rule numbering
986 	 * (which make generating "rule can never match" warnings
987 	 * more difficult.
988 	 */
989 	--num_rules;
990 	++num_eof_rules;
991 	}
992 
993 
994 /* format_synerr - write out formatted syntax error */
995 
996 void format_synerr( msg, arg )
997 const char *msg, arg[];
998 	{
999 	char errmsg[MAXLINE];
1000 
1001 	(void) snprintf( errmsg, sizeof(errmsg), msg, arg );
1002 	synerr( errmsg );
1003 	}
1004 
1005 
1006 /* synerr - report a syntax error */
1007 
1008 void synerr( str )
1009 const char *str;
1010 	{
1011 	syntaxerror = true;
1012 	pinpoint_message( str );
1013 	}
1014 
1015 
1016 /* format_warn - write out formatted warning */
1017 
1018 void format_warn( msg, arg )
1019 const char *msg, arg[];
1020 	{
1021 	char warn_msg[MAXLINE];
1022 
1023 	snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1024 	lwarn( warn_msg );
1025 	}
1026 
1027 
1028 /* lwarn - report a warning, unless -w was given */
1029 
1030 void lwarn( str )
1031 const char *str;
1032 	{
1033 	line_warning( str, linenum );
1034 	}
1035 
1036 /* format_pinpoint_message - write out a message formatted with one string,
1037  *			     pinpointing its location
1038  */
1039 
1040 void format_pinpoint_message( msg, arg )
1041 const char *msg, arg[];
1042 	{
1043 	char errmsg[MAXLINE];
1044 
1045 	snprintf( errmsg, sizeof(errmsg), msg, arg );
1046 	pinpoint_message( errmsg );
1047 	}
1048 
1049 
1050 /* pinpoint_message - write out a message, pinpointing its location */
1051 
1052 void pinpoint_message( str )
1053 const char *str;
1054 	{
1055 	line_pinpoint( str, linenum );
1056 	}
1057 
1058 
1059 /* line_warning - report a warning at a given line, unless -w was given */
1060 
1061 void line_warning( str, line )
1062 const char *str;
1063 int line;
1064 	{
1065 	char warning[MAXLINE];
1066 
1067 	if ( ! nowarn )
1068 		{
1069 		snprintf( warning, sizeof(warning), "warning, %s", str );
1070 		line_pinpoint( warning, line );
1071 		}
1072 	}
1073 
1074 
1075 /* line_pinpoint - write out a message, pinpointing it at the given line */
1076 
1077 void line_pinpoint( str, line )
1078 const char *str;
1079 int line;
1080 	{
1081 	fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1082 	}
1083 
1084 
1085 /* yyerror - eat up an error message from the parser;
1086  *	     currently, messages are ignore
1087  */
1088 
1089 void yyerror( msg )
1090 const char *msg;
1091 	{
1092 		(void)msg;
1093 	}
1094