xref: /netbsd-src/external/bsd/flex/dist/src/parse.y (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: parse.y,v 1.3 2017/01/02 17:45:27 christos Exp $	*/
2 
3 /* parse.y - parser for flex input */
4 
5 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
6 %token TOK_OPTION TOK_OUTFILE TOK_PREFIX TOK_YYCLASS TOK_HEADER_FILE TOK_EXTRA_TYPE
7 %token TOK_TABLES_FILE
8 
9 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
10 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
11 
12 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
13 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
14 
15 %left CCL_OP_DIFF CCL_OP_UNION
16 
17 /*
18  *POSIX and AT&T lex place the
19  * precedence of the repeat operator, {}, below that of concatenation.
20  * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
21  * Regular Expression (ERE) precedence that has the repeat operator
22  * higher than concatenation.  This causes ab{3} to yield abbb.
23  *
24  * In order to support the POSIX and AT&T precedence and the flex
25  * precedence we define two token sets for the begin and end tokens of
26  * the repeat operator, '{' and '}'.  The lexical scanner chooses
27  * which tokens to return based on whether posix_compat or lex_compat
28  * are specified. Specifying either posix_compat or lex_compat will
29  * cause flex to parse scanner files as per the AT&T and
30  * POSIX-mandated behavior.
31  */
32 
33 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
34 
35 
36 %{
37 /*  Copyright (c) 1990 The Regents of the University of California. */
38 /*  All rights reserved. */
39 
40 /*  This code is derived from software contributed to Berkeley by */
41 /*  Vern Paxson. */
42 
43 /*  The United States Government has rights in this work pursuant */
44 /*  to contract no. DE-AC03-76SF00098 between the United States */
45 /*  Department of Energy and the University of California. */
46 
47 /*  This file is part of flex. */
48 
49 /*  Redistribution and use in source and binary forms, with or without */
50 /*  modification, are permitted provided that the following conditions */
51 /*  are met: */
52 
53 /*  1. Redistributions of source code must retain the above copyright */
54 /*     notice, this list of conditions and the following disclaimer. */
55 /*  2. Redistributions in binary form must reproduce the above copyright */
56 /*     notice, this list of conditions and the following disclaimer in the */
57 /*     documentation and/or other materials provided with the distribution. */
58 
59 /*  Neither the name of the University nor the names of its contributors */
60 /*  may be used to endorse or promote products derived from this software */
61 /*  without specific prior written permission. */
62 
63 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
64 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
65 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
66 /*  PURPOSE. */
67 #include "flexdef.h"
68 __RCSID("$NetBSD: parse.y,v 1.3 2017/01/02 17:45:27 christos Exp $");
69 
70 #include "tables.h"
71 
72 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
73 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
74 
75 int *scon_stk;
76 int scon_stk_ptr;
77 
78 static int madeany = false;  /* whether we've made the '.' character class */
79 static int ccldot, cclany;
80 int previous_continued_action;	/* whether the previous rule's action was '|' */
81 
82 #define format_warn3(fmt, a1, a2) \
83 	do{ \
84         char fw3_msg[MAXLINE];\
85         snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
86         lwarn( fw3_msg );\
87 	}while(0)
88 
89 /* Expand a POSIX character class expression. */
90 #define CCL_EXPR(func) \
91 	do{ \
92 	int c; \
93 	for ( c = 0; c < csize; ++c ) \
94 		if ( isascii(c) && func(c) ) \
95 			ccladd( currccl, c ); \
96 	}while(0)
97 
98 /* negated class */
99 #define CCL_NEG_EXPR(func) \
100 	do{ \
101 	int c; \
102 	for ( c = 0; c < csize; ++c ) \
103 		if ( !func(c) ) \
104 			ccladd( currccl, c ); \
105 	}while(0)
106 
107 /* While POSIX defines isblank(), it's not ANSI C. */
108 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
109 
110 /* On some over-ambitious machines, such as DEC Alpha's, the default
111  * token type is "long" instead of "int"; this leads to problems with
112  * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
113  * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
114  * following should ensure that the default token type is "int".
115  */
116 #define YYSTYPE int
117 
118 %}
119 
120 %%
121 goal		:  initlex sect1 sect1end sect2 initforrule
122 			{ /* add default rule */
123 			int def_rule;
124 
125 			pat = cclinit();
126 			cclnegate( pat );
127 
128 			def_rule = mkstate( -pat );
129 
130 			/* Remember the number of the default rule so we
131 			 * don't generate "can't match" warnings for it.
132 			 */
133 			default_rule = num_rules;
134 
135 			finish_rule( def_rule, false, 0, 0, 0);
136 
137 			for ( i = 1; i <= lastsc; ++i )
138 				scset[i] = mkbranch( scset[i], def_rule );
139 
140 			if ( spprdflt )
141 				add_action(
142 				"YY_FATAL_ERROR( \"flex scanner jammed\" )" );
143 			else
144 				add_action( "ECHO" );
145 
146 			add_action( ";\n\tYY_BREAK]]\n" );
147 			}
148 		;
149 
150 initlex		:
151 			{ /* initialize for processing rules */
152 
153 			/* Create default DFA start condition. */
154 			scinstal( "INITIAL", false );
155 			}
156 		;
157 
158 sect1		:  sect1 startconddecl namelist1
159 		|  sect1 options
160 		|
161 		|  error
162 			{ synerr( _("unknown error processing section 1") ); }
163 		;
164 
165 sect1end	:  SECTEND
166 			{
167 			check_options();
168 			scon_stk = allocate_integer_array( lastsc + 1 );
169 			scon_stk_ptr = 0;
170 			}
171 		;
172 
173 startconddecl	:  SCDECL
174 			{ xcluflg = false; }
175 
176 		|  XSCDECL
177 			{ xcluflg = true; }
178 		;
179 
180 namelist1	:  namelist1 NAME
181 			{ scinstal( nmstr, xcluflg ); }
182 
183 		|  NAME
184 			{ scinstal( nmstr, xcluflg ); }
185 
186 		|  error
187 			{ synerr( _("bad start condition list") ); }
188 		;
189 
190 options		:  TOK_OPTION optionlist
191 		;
192 
193 optionlist	:  optionlist option
194 		|
195 		;
196 
197 option		:  TOK_OUTFILE '=' NAME
198 			{
199 			outfilename = xstrdup(nmstr);
200 			did_outfilename = 1;
201 			}
202 		|  TOK_EXTRA_TYPE '=' NAME
203 			{ extra_type = xstrdup(nmstr); }
204 		|  TOK_PREFIX '=' NAME
205 			{ prefix = xstrdup(nmstr);
206                           if (strchr(prefix, '[') || strchr(prefix, ']'))
207                               flexerror(_("Prefix must not contain [ or ]")); }
208 		|  TOK_YYCLASS '=' NAME
209 			{ yyclass = xstrdup(nmstr); }
210 		|  TOK_HEADER_FILE '=' NAME
211 			{ headerfilename = xstrdup(nmstr); }
212 	    |  TOK_TABLES_FILE '=' NAME
213             { tablesext = true; tablesfilename = xstrdup(nmstr); }
214 		;
215 
216 sect2		:  sect2 scon initforrule flexrule '\n'
217 			{ scon_stk_ptr = $2; }
218 		|  sect2 scon '{' sect2 '}'
219 			{ scon_stk_ptr = $2; }
220 		|
221 		;
222 
223 initforrule	:
224 			{
225 			/* Initialize for a parse of one rule. */
226 			trlcontxt = variable_trail_rule = varlength = false;
227 			trailcnt = headcnt = rulelen = 0;
228 			current_state_type = STATE_NORMAL;
229 			previous_continued_action = continued_action;
230 			in_rule = true;
231 
232 			new_rule();
233 			}
234 		;
235 
236 flexrule	:  '^' rule
237 			{
238 			pat = $2;
239 			finish_rule( pat, variable_trail_rule,
240 				headcnt, trailcnt , previous_continued_action);
241 
242 			if ( scon_stk_ptr > 0 )
243 				{
244 				for ( i = 1; i <= scon_stk_ptr; ++i )
245 					scbol[scon_stk[i]] =
246 						mkbranch( scbol[scon_stk[i]],
247 								pat );
248 				}
249 
250 			else
251 				{
252 				/* Add to all non-exclusive start conditions,
253 				 * including the default (0) start condition.
254 				 */
255 
256 				for ( i = 1; i <= lastsc; ++i )
257 					if ( ! scxclu[i] )
258 						scbol[i] = mkbranch( scbol[i],
259 									pat );
260 				}
261 
262 			if ( ! bol_needed )
263 				{
264 				bol_needed = true;
265 
266 				if ( performance_report > 1 )
267 					pinpoint_message(
268 			"'^' operator results in sub-optimal performance" );
269 				}
270 			}
271 
272 		|  rule
273 			{
274 			pat = $1;
275 			finish_rule( pat, variable_trail_rule,
276 				headcnt, trailcnt , previous_continued_action);
277 
278 			if ( scon_stk_ptr > 0 )
279 				{
280 				for ( i = 1; i <= scon_stk_ptr; ++i )
281 					scset[scon_stk[i]] =
282 						mkbranch( scset[scon_stk[i]],
283 								pat );
284 				}
285 
286 			else
287 				{
288 				for ( i = 1; i <= lastsc; ++i )
289 					if ( ! scxclu[i] )
290 						scset[i] =
291 							mkbranch( scset[i],
292 								pat );
293 				}
294 			}
295 
296 		|  EOF_OP
297 			{
298 			if ( scon_stk_ptr > 0 )
299 				build_eof_action();
300 
301 			else
302 				{
303 				/* This EOF applies to all start conditions
304 				 * which don't already have EOF actions.
305 				 */
306 				for ( i = 1; i <= lastsc; ++i )
307 					if ( ! sceof[i] )
308 						scon_stk[++scon_stk_ptr] = i;
309 
310 				if ( scon_stk_ptr == 0 )
311 					lwarn(
312 			"all start conditions already have <<EOF>> rules" );
313 
314 				else
315 					build_eof_action();
316 				}
317 			}
318 
319 		|  error
320 			{ synerr( _("unrecognized rule") ); }
321 		;
322 
323 scon_stk_ptr	:
324 			{ $$ = scon_stk_ptr; }
325 		;
326 
327 scon		:  '<' scon_stk_ptr namelist2 '>'
328 			{ $$ = $2; }
329 
330 		|  '<' '*' '>'
331 			{
332 			$$ = scon_stk_ptr;
333 
334 			for ( i = 1; i <= lastsc; ++i )
335 				{
336 				int j;
337 
338 				for ( j = 1; j <= scon_stk_ptr; ++j )
339 					if ( scon_stk[j] == i )
340 						break;
341 
342 				if ( j > scon_stk_ptr )
343 					scon_stk[++scon_stk_ptr] = i;
344 				}
345 			}
346 
347 		|
348 			{ $$ = scon_stk_ptr; }
349 		;
350 
351 namelist2	:  namelist2 ',' sconname
352 
353 		|  sconname
354 
355 		|  error
356 			{ synerr( _("bad start condition list") ); }
357 		;
358 
359 sconname	:  NAME
360 			{
361 			if ( (scnum = sclookup( nmstr )) == 0 )
362 				format_pinpoint_message(
363 					"undeclared start condition %s",
364 					nmstr );
365 			else
366 				{
367 				for ( i = 1; i <= scon_stk_ptr; ++i )
368 					if ( scon_stk[i] == scnum )
369 						{
370 						format_warn(
371 							"<%s> specified twice",
372 							scname[scnum] );
373 						break;
374 						}
375 
376 				if ( i > scon_stk_ptr )
377 					scon_stk[++scon_stk_ptr] = scnum;
378 				}
379 			}
380 		;
381 
382 rule		:  re2 re
383 			{
384 			if ( transchar[lastst[$2]] != SYM_EPSILON )
385 				/* Provide final transition \now/ so it
386 				 * will be marked as a trailing context
387 				 * state.
388 				 */
389 				$2 = link_machines( $2,
390 						mkstate( SYM_EPSILON ) );
391 
392 			mark_beginning_as_normal( $2 );
393 			current_state_type = STATE_NORMAL;
394 
395 			if ( previous_continued_action )
396 				{
397 				/* We need to treat this as variable trailing
398 				 * context so that the backup does not happen
399 				 * in the action but before the action switch
400 				 * statement.  If the backup happens in the
401 				 * action, then the rules "falling into" this
402 				 * one's action will *also* do the backup,
403 				 * erroneously.
404 				 */
405 				if ( ! varlength || headcnt != 0 )
406 					lwarn(
407 		"trailing context made variable due to preceding '|' action" );
408 
409 				/* Mark as variable. */
410 				varlength = true;
411 				headcnt = 0;
412 
413 				}
414 
415 			if ( lex_compat || (varlength && headcnt == 0) )
416 				{ /* variable trailing context rule */
417 				/* Mark the first part of the rule as the
418 				 * accepting "head" part of a trailing
419 				 * context rule.
420 				 *
421 				 * By the way, we didn't do this at the
422 				 * beginning of this production because back
423 				 * then current_state_type was set up for a
424 				 * trail rule, and add_accept() can create
425 				 * a new state ...
426 				 */
427 				add_accept( $1,
428 					num_rules | YY_TRAILING_HEAD_MASK );
429 				variable_trail_rule = true;
430 				}
431 
432 			else
433 				trailcnt = rulelen;
434 
435 			$$ = link_machines( $1, $2 );
436 			}
437 
438 		|  re2 re '$'
439 			{ synerr( _("trailing context used twice") ); }
440 
441 		|  re '$'
442 			{
443 			headcnt = 0;
444 			trailcnt = 1;
445 			rulelen = 1;
446 			varlength = false;
447 
448 			current_state_type = STATE_TRAILING_CONTEXT;
449 
450 			if ( trlcontxt )
451 				{
452 				synerr( _("trailing context used twice") );
453 				$$ = mkstate( SYM_EPSILON );
454 				}
455 
456 			else if ( previous_continued_action )
457 				{
458 				/* See the comment in the rule for "re2 re"
459 				 * above.
460 				 */
461 				lwarn(
462 		"trailing context made variable due to preceding '|' action" );
463 
464 				varlength = true;
465 				}
466 
467 			if ( lex_compat || varlength )
468 				{
469 				/* Again, see the comment in the rule for
470 				 * "re2 re" above.
471 				 */
472 				add_accept( $1,
473 					num_rules | YY_TRAILING_HEAD_MASK );
474 				variable_trail_rule = true;
475 				}
476 
477 			trlcontxt = true;
478 
479 			eps = mkstate( SYM_EPSILON );
480 			$$ = link_machines( $1,
481 				link_machines( eps, mkstate( '\n' ) ) );
482 			}
483 
484 		|  re
485 			{
486 			$$ = $1;
487 
488 			if ( trlcontxt )
489 				{
490 				if ( lex_compat || (varlength && headcnt == 0) )
491 					/* Both head and trail are
492 					 * variable-length.
493 					 */
494 					variable_trail_rule = true;
495 				else
496 					trailcnt = rulelen;
497 				}
498 			}
499 		;
500 
501 
502 re		:  re '|' series
503 			{
504 			varlength = true;
505 			$$ = mkor( $1, $3 );
506 			}
507 
508 		|  series
509 			{ $$ = $1; }
510 		;
511 
512 
513 re2		:  re '/'
514 			{
515 			/* This rule is written separately so the
516 			 * reduction will occur before the trailing
517 			 * series is parsed.
518 			 */
519 
520 			if ( trlcontxt )
521 				synerr( _("trailing context used twice") );
522 			else
523 				trlcontxt = true;
524 
525 			if ( varlength )
526 				/* We hope the trailing context is
527 				 * fixed-length.
528 				 */
529 				varlength = false;
530 			else
531 				headcnt = rulelen;
532 
533 			rulelen = 0;
534 
535 			current_state_type = STATE_TRAILING_CONTEXT;
536 			$$ = $1;
537 			}
538 		;
539 
540 series		:  series singleton
541 			{
542 			/* This is where concatenation of adjacent patterns
543 			 * gets done.
544 			 */
545 			$$ = link_machines( $1, $2 );
546 			}
547 
548 		|  singleton
549 			{ $$ = $1; }
550 
551 		|  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
552 			{
553 			varlength = true;
554 
555 			if ( $3 > $5 || $3 < 0 )
556 				{
557 				synerr( _("bad iteration values") );
558 				$$ = $1;
559 				}
560 			else
561 				{
562 				if ( $3 == 0 )
563 					{
564 					if ( $5 <= 0 )
565 						{
566 						synerr(
567 						_("bad iteration values") );
568 						$$ = $1;
569 						}
570 					else
571 						$$ = mkopt(
572 							mkrep( $1, 1, $5 ) );
573 					}
574 				else
575 					$$ = mkrep( $1, $3, $5 );
576 				}
577 			}
578 
579 		|  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
580 			{
581 			varlength = true;
582 
583 			if ( $3 <= 0 )
584 				{
585 				synerr( _("iteration value must be positive") );
586 				$$ = $1;
587 				}
588 
589 			else
590 				$$ = mkrep( $1, $3, INFINITE_REPEAT );
591 			}
592 
593 		|  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
594 			{
595 			/* The series could be something like "(foo)",
596 			 * in which case we have no idea what its length
597 			 * is, so we punt here.
598 			 */
599 			varlength = true;
600 
601 			if ( $3 <= 0 )
602 				{
603 				  synerr( _("iteration value must be positive")
604 					  );
605 				$$ = $1;
606 				}
607 
608 			else
609 				$$ = link_machines( $1,
610 						copysingl( $1, $3 - 1 ) );
611 			}
612 
613 		;
614 
615 singleton	:  singleton '*'
616 			{
617 			varlength = true;
618 
619 			$$ = mkclos( $1 );
620 			}
621 
622 		|  singleton '+'
623 			{
624 			varlength = true;
625 			$$ = mkposcl( $1 );
626 			}
627 
628 		|  singleton '?'
629 			{
630 			varlength = true;
631 			$$ = mkopt( $1 );
632 			}
633 
634 		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
635 			{
636 			varlength = true;
637 
638 			if ( $3 > $5 || $3 < 0 )
639 				{
640 				synerr( _("bad iteration values") );
641 				$$ = $1;
642 				}
643 			else
644 				{
645 				if ( $3 == 0 )
646 					{
647 					if ( $5 <= 0 )
648 						{
649 						synerr(
650 						_("bad iteration values") );
651 						$$ = $1;
652 						}
653 					else
654 						$$ = mkopt(
655 							mkrep( $1, 1, $5 ) );
656 					}
657 				else
658 					$$ = mkrep( $1, $3, $5 );
659 				}
660 			}
661 
662 		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
663 			{
664 			varlength = true;
665 
666 			if ( $3 <= 0 )
667 				{
668 				synerr( _("iteration value must be positive") );
669 				$$ = $1;
670 				}
671 
672 			else
673 				$$ = mkrep( $1, $3, INFINITE_REPEAT );
674 			}
675 
676 		|  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
677 			{
678 			/* The singleton could be something like "(foo)",
679 			 * in which case we have no idea what its length
680 			 * is, so we punt here.
681 			 */
682 			varlength = true;
683 
684 			if ( $3 <= 0 )
685 				{
686 				synerr( _("iteration value must be positive") );
687 				$$ = $1;
688 				}
689 
690 			else
691 				$$ = link_machines( $1,
692 						copysingl( $1, $3 - 1 ) );
693 			}
694 
695 		|  '.'
696 			{
697 			if ( ! madeany )
698 				{
699 				/* Create the '.' character class. */
700                     ccldot = cclinit();
701                     ccladd( ccldot, '\n' );
702                     cclnegate( ccldot );
703 
704                     if ( useecs )
705                         mkeccl( ccltbl + cclmap[ccldot],
706                             ccllen[ccldot], nextecm,
707                             ecgroup, csize, csize );
708 
709 				/* Create the (?s:'.') character class. */
710                     cclany = cclinit();
711                     cclnegate( cclany );
712 
713                     if ( useecs )
714                         mkeccl( ccltbl + cclmap[cclany],
715                             ccllen[cclany], nextecm,
716                             ecgroup, csize, csize );
717 
718 				madeany = true;
719 				}
720 
721 			++rulelen;
722 
723             if (sf_dot_all())
724                 $$ = mkstate( -cclany );
725             else
726                 $$ = mkstate( -ccldot );
727 			}
728 
729 		|  fullccl
730 			{
731 				/* Sort characters for fast searching.
732 				 */
733 				qsort( ccltbl + cclmap[$1], (size_t) ccllen[$1], sizeof (*ccltbl), cclcmp );
734 
735 			if ( useecs )
736 				mkeccl( ccltbl + cclmap[$1], ccllen[$1],
737 					nextecm, ecgroup, csize, csize );
738 
739 			++rulelen;
740 
741 			if (ccl_has_nl[$1])
742 				rule_has_nl[num_rules] = true;
743 
744 			$$ = mkstate( -$1 );
745 			}
746 
747 		|  PREVCCL
748 			{
749 			++rulelen;
750 
751 			if (ccl_has_nl[$1])
752 				rule_has_nl[num_rules] = true;
753 
754 			$$ = mkstate( -$1 );
755 			}
756 
757 		|  '"' string '"'
758 			{ $$ = $2; }
759 
760 		|  '(' re ')'
761 			{ $$ = $2; }
762 
763 		|  CHAR
764 			{
765 			++rulelen;
766 
767 			if ($1 == nlch)
768 				rule_has_nl[num_rules] = true;
769 
770             if (sf_case_ins() && has_case($1))
771                 /* create an alternation, as in (a|A) */
772                 $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
773             else
774                 $$ = mkstate( $1 );
775 			}
776 		;
777 fullccl:
778         fullccl CCL_OP_DIFF  braceccl  { $$ = ccl_set_diff  ($1, $3); }
779     |   fullccl CCL_OP_UNION braceccl  { $$ = ccl_set_union ($1, $3); }
780     |   braceccl
781     ;
782 
783 braceccl:
784 
785             '[' ccl ']' { $$ = $2; }
786 
787 		|  '[' '^' ccl ']'
788 			{
789 			cclnegate( $3 );
790 			$$ = $3;
791 			}
792 		;
793 
794 ccl		:  ccl CHAR '-' CHAR
795 			{
796 
797 			if (sf_case_ins())
798 			  {
799 
800 			    /* If one end of the range has case and the other
801 			     * does not, or the cases are different, then we're not
802 			     * sure what range the user is trying to express.
803 			     * Examples: [@-z] or [S-t]
804 			     */
805 			    if (has_case ($2) != has_case ($4)
806 				     || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
807 				     || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
808 			      format_warn3 (
809 			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
810 					    $2, $4);
811 
812 			    /* If the range spans uppercase characters but not
813 			     * lowercase (or vice-versa), then should we automatically
814 			     * include lowercase characters in the range?
815 			     * Example: [@-_] spans [a-z] but not [A-Z]
816 			     */
817 			    else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
818 			      format_warn3 (
819 			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
820 					    $2, $4);
821 			  }
822 
823 			if ( $2 > $4 )
824 				synerr( _("negative range in character class") );
825 
826 			else
827 				{
828 				for ( i = $2; i <= $4; ++i )
829 					ccladd( $1, i );
830 
831 				/* Keep track if this ccl is staying in
832 				 * alphabetical order.
833 				 */
834 				cclsorted = cclsorted && ($2 > lastchar);
835 				lastchar = $4;
836 
837                 /* Do it again for upper/lowercase */
838                 if (sf_case_ins() && has_case($2) && has_case($4)){
839                     $2 = reverse_case ($2);
840                     $4 = reverse_case ($4);
841 
842                     for ( i = $2; i <= $4; ++i )
843                         ccladd( $1, i );
844 
845                     cclsorted = cclsorted && ($2 > lastchar);
846                     lastchar = $4;
847                 }
848 
849 				}
850 
851 			$$ = $1;
852 			}
853 
854 		|  ccl CHAR
855 			{
856 			ccladd( $1, $2 );
857 			cclsorted = cclsorted && ($2 > lastchar);
858 			lastchar = $2;
859 
860             /* Do it again for upper/lowercase */
861             if (sf_case_ins() && has_case($2)){
862                 $2 = reverse_case ($2);
863                 ccladd ($1, $2);
864 
865                 cclsorted = cclsorted && ($2 > lastchar);
866                 lastchar = $2;
867             }
868 
869 			$$ = $1;
870 			}
871 
872 		|  ccl ccl_expr
873 			{
874 			/* Too hard to properly maintain cclsorted. */
875 			cclsorted = false;
876 			$$ = $1;
877 			}
878 
879 		|
880 			{
881 			cclsorted = true;
882 			lastchar = 0;
883 			currccl = $$ = cclinit();
884 			}
885 		;
886 
887 ccl_expr:
888            CCE_ALNUM	{ CCL_EXPR(isalnum); }
889 		|  CCE_ALPHA	{ CCL_EXPR(isalpha); }
890 		|  CCE_BLANK	{ CCL_EXPR(IS_BLANK); }
891 		|  CCE_CNTRL	{ CCL_EXPR(iscntrl); }
892 		|  CCE_DIGIT	{ CCL_EXPR(isdigit); }
893 		|  CCE_GRAPH	{ CCL_EXPR(isgraph); }
894 		|  CCE_LOWER	{
895                           CCL_EXPR(islower);
896                           if (sf_case_ins())
897                               CCL_EXPR(isupper);
898                         }
899 		|  CCE_PRINT	{ CCL_EXPR(isprint); }
900 		|  CCE_PUNCT	{ CCL_EXPR(ispunct); }
901 		|  CCE_SPACE	{ CCL_EXPR(isspace); }
902 		|  CCE_XDIGIT	{ CCL_EXPR(isxdigit); }
903 		|  CCE_UPPER	{
904                     CCL_EXPR(isupper);
905                     if (sf_case_ins())
906                         CCL_EXPR(islower);
907 				}
908 
909         |  CCE_NEG_ALNUM	{ CCL_NEG_EXPR(isalnum); }
910 		|  CCE_NEG_ALPHA	{ CCL_NEG_EXPR(isalpha); }
911 		|  CCE_NEG_BLANK	{ CCL_NEG_EXPR(IS_BLANK); }
912 		|  CCE_NEG_CNTRL	{ CCL_NEG_EXPR(iscntrl); }
913 		|  CCE_NEG_DIGIT	{ CCL_NEG_EXPR(isdigit); }
914 		|  CCE_NEG_GRAPH	{ CCL_NEG_EXPR(isgraph); }
915 		|  CCE_NEG_PRINT	{ CCL_NEG_EXPR(isprint); }
916 		|  CCE_NEG_PUNCT	{ CCL_NEG_EXPR(ispunct); }
917 		|  CCE_NEG_SPACE	{ CCL_NEG_EXPR(isspace); }
918 		|  CCE_NEG_XDIGIT	{ CCL_NEG_EXPR(isxdigit); }
919 		|  CCE_NEG_LOWER	{
920 				if ( sf_case_ins() )
921 					lwarn(_("[:^lower:] is ambiguous in case insensitive scanner"));
922 				else
923 					CCL_NEG_EXPR(islower);
924 				}
925 		|  CCE_NEG_UPPER	{
926 				if ( sf_case_ins() )
927 					lwarn(_("[:^upper:] ambiguous in case insensitive scanner"));
928 				else
929 					CCL_NEG_EXPR(isupper);
930 				}
931 		;
932 
933 string		:  string CHAR
934 			{
935 			if ( $2 == nlch )
936 				rule_has_nl[num_rules] = true;
937 
938 			++rulelen;
939 
940             if (sf_case_ins() && has_case($2))
941                 $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
942             else
943                 $$ = mkstate ($2);
944 
945 			$$ = link_machines( $1, $$);
946 			}
947 
948 		|
949 			{ $$ = mkstate( SYM_EPSILON ); }
950 		;
951 
952 %%
953 
954 
955 /* build_eof_action - build the "<<EOF>>" action for the active start
956  *                    conditions
957  */
958 
959 void build_eof_action(void)
960 	{
961 	int i;
962 	char action_text[MAXLINE];
963 
964 	for ( i = 1; i <= scon_stk_ptr; ++i )
965 		{
966 		if ( sceof[scon_stk[i]] )
967 			format_pinpoint_message(
968 				"multiple <<EOF>> rules for start condition %s",
969 				scname[scon_stk[i]] );
970 
971 		else
972 			{
973 			sceof[scon_stk[i]] = true;
974 
975 			if (previous_continued_action /* && previous action was regular */)
976 				add_action("YY_RULE_SETUP\n");
977 
978 			snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
979 				scname[scon_stk[i]] );
980 			add_action( action_text );
981 			}
982 		}
983 
984 	line_directive_out(NULL, 1);
985         add_action("[[");
986 
987 	/* This isn't a normal rule after all - don't count it as
988 	 * such, so we don't have any holes in the rule numbering
989 	 * (which make generating "rule can never match" warnings
990 	 * more difficult.
991 	 */
992 	--num_rules;
993 	++num_eof_rules;
994 	}
995 
996 
997 /* format_synerr - write out formatted syntax error */
998 
999 void format_synerr( const char *msg, const char arg[] )
1000 	{
1001 	char errmsg[MAXLINE];
1002 
1003 	(void) snprintf( errmsg, sizeof(errmsg), msg, arg );
1004 	synerr( errmsg );
1005 	}
1006 
1007 
1008 /* synerr - report a syntax error */
1009 
1010 void synerr( const char *str )
1011 	{
1012 	syntaxerror = true;
1013 	pinpoint_message( str );
1014 	}
1015 
1016 
1017 /* format_warn - write out formatted warning */
1018 
1019 void format_warn( const char *msg, const char arg[] )
1020 	{
1021 	char warn_msg[MAXLINE];
1022 
1023 	snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1024 	lwarn( warn_msg );
1025 	}
1026 
1027 
1028 /* lwarn - report a warning, unless -w was given */
1029 
1030 void lwarn( const char *str )
1031 	{
1032 	line_warning( str, linenum );
1033 	}
1034 
1035 /* format_pinpoint_message - write out a message formatted with one string,
1036  *			     pinpointing its location
1037  */
1038 
1039 void format_pinpoint_message( const char *msg, const char arg[] )
1040 	{
1041 	char errmsg[MAXLINE];
1042 
1043 	snprintf( errmsg, sizeof(errmsg), msg, arg );
1044 	pinpoint_message( errmsg );
1045 	}
1046 
1047 
1048 /* pinpoint_message - write out a message, pinpointing its location */
1049 
1050 void pinpoint_message( const char *str )
1051 	{
1052 	line_pinpoint( str, linenum );
1053 	}
1054 
1055 
1056 /* line_warning - report a warning at a given line, unless -w was given */
1057 
1058 void line_warning( const char *str, int line )
1059 	{
1060 	char warning[MAXLINE];
1061 
1062 	if ( ! nowarn )
1063 		{
1064 		snprintf( warning, sizeof(warning), "warning, %s", str );
1065 		line_pinpoint( warning, line );
1066 		}
1067 	}
1068 
1069 
1070 /* line_pinpoint - write out a message, pinpointing it at the given line */
1071 
1072 void line_pinpoint( const char *str, int line )
1073 	{
1074 	fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1075 	}
1076 
1077 
1078 /* yyerror - eat up an error message from the parser;
1079  *	     currently, messages are ignore
1080  */
1081 
1082 void yyerror( const char *msg )
1083 	{
1084 		(void)msg;
1085 	}
1086