xref: /netbsd-src/external/gpl3/gcc.old/dist/libstdc++-v3/include/bits/regex_compiler.h (revision c29d51755812ace2e87aeefdb06cb2b4dac7087a)
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2010-2013 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  *  @file bits/regex_compiler.h
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37   /**
38    * @addtogroup regex-detail
39    * @{
40    */
41 
42   /// Base class for scanner.
43   struct _Scanner_base
44   {
45     typedef unsigned int _StateT;
46 
47     static constexpr _StateT _S_state_at_start    = 1 << 0;
48     static constexpr _StateT _S_state_in_brace    = 1 << 2;
49     static constexpr _StateT _S_state_in_bracket  = 1 << 3;
50 
51     virtual ~_Scanner_base() { };
52   };
53 
54   /**
55    * @brief struct _Scanner. Scans an input range for regex tokens.
56    *
57    * The %_Scanner class interprets the regular expression pattern in
58    * the input range passed to its constructor as a sequence of parse
59    * tokens passed to the regular expression compiler.  The sequence
60    * of tokens provided depends on the flag settings passed to the
61    * constructor: different regular expression grammars will interpret
62    * the same input pattern in syntactically different ways.
63    */
64   template<typename _InputIterator>
65     class _Scanner: public _Scanner_base
66     {
67     public:
68       typedef _InputIterator                                        _IteratorT;
69       typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
70       typedef std::basic_string<_CharT>                             _StringT;
71       typedef regex_constants::syntax_option_type                   _FlagT;
72       typedef const std::ctype<_CharT>                              _CtypeT;
73 
74       /// Token types returned from the scanner.
75       enum _TokenT
76       {
77 	_S_token_anychar,
78 	_S_token_backref,
79 	_S_token_bracket_begin,
80 	_S_token_bracket_end,
81 	_S_token_inverse_class,
82 	_S_token_char_class_name,
83 	_S_token_closure0,
84 	_S_token_closure1,
85 	_S_token_collelem_multi,
86 	_S_token_collelem_single,
87 	_S_token_collsymbol,
88 	_S_token_comma,
89 	_S_token_dash,
90 	_S_token_dup_count,
91 	_S_token_eof,
92 	_S_token_equiv_class_name,
93 	_S_token_interval_begin,
94 	_S_token_interval_end,
95 	_S_token_line_begin,
96 	_S_token_line_end,
97 	_S_token_opt,
98 	_S_token_or,
99 	_S_token_ord_char,
100 	_S_token_quoted_char,
101 	_S_token_subexpr_begin,
102 	_S_token_subexpr_end,
103 	_S_token_word_begin,
104 	_S_token_word_end,
105 	_S_token_unknown
106       };
107 
108       _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
109 	       std::locale __loc)
110       : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
111         _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
112       { _M_advance(); }
113 
114       void
115       _M_advance();
116 
117       _TokenT
118       _M_token() const
119       { return _M_curToken; }
120 
121       const _StringT&
122       _M_value() const
123       { return _M_curValue; }
124 
125 #ifdef _GLIBCXX_DEBUG
126       std::ostream&
127       _M_print(std::ostream&);
128 #endif
129 
130     private:
131       void
132       _M_eat_escape();
133 
134       void
135       _M_scan_in_brace();
136 
137       void
138       _M_scan_in_bracket();
139 
140       void
141       _M_eat_charclass();
142 
143       void
144       _M_eat_equivclass();
145 
146       void
147       _M_eat_collsymbol();
148 
149       _IteratorT  _M_current;
150       _IteratorT  _M_end;
151       _FlagT      _M_flags;
152       _CtypeT&    _M_ctype;
153       _TokenT     _M_curToken;
154       _StringT    _M_curValue;
155       _StateT     _M_state;
156     };
157 
158   template<typename _InputIterator>
159     void
160     _Scanner<_InputIterator>::
161     _M_advance()
162     {
163       if (_M_current == _M_end)
164 	{
165 	  _M_curToken = _S_token_eof;
166 	  return;
167 	}
168 
169       _CharT __c = *_M_current;
170       if (_M_state & _S_state_in_bracket)
171 	{
172 	  _M_scan_in_bracket();
173 	  return;
174 	}
175       if (_M_state & _S_state_in_brace)
176 	{
177 	  _M_scan_in_brace();
178 	  return;
179 	}
180 #if 0
181       // TODO: re-enable line anchors when _M_assertion is implemented.
182       // See PR libstdc++/47724
183       else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
184 	{
185 	  _M_curToken = _S_token_line_begin;
186 	  ++_M_current;
187 	  return;
188 	}
189       else if (__c == _M_ctype.widen('$'))
190 	{
191 	  _M_curToken = _S_token_line_end;
192 	  ++_M_current;
193 	  return;
194 	}
195 #endif
196       else if (__c == _M_ctype.widen('.'))
197 	{
198 	  _M_curToken = _S_token_anychar;
199 	  ++_M_current;
200 	  return;
201 	}
202       else if (__c == _M_ctype.widen('*'))
203 	{
204 	  _M_curToken = _S_token_closure0;
205 	  ++_M_current;
206 	  return;
207 	}
208       else if (__c == _M_ctype.widen('+'))
209 	{
210 	  _M_curToken = _S_token_closure1;
211 	  ++_M_current;
212 	  return;
213 	}
214       else if (__c == _M_ctype.widen('|'))
215 	{
216 	  _M_curToken = _S_token_or;
217 	  ++_M_current;
218 	  return;
219 	}
220       else if (__c == _M_ctype.widen('['))
221 	{
222 	  _M_curToken = _S_token_bracket_begin;
223 	  _M_state |= (_S_state_in_bracket | _S_state_at_start);
224 	  ++_M_current;
225 	  return;
226 	}
227       else if (__c == _M_ctype.widen('\\'))
228 	{
229 	  _M_eat_escape();
230 	  return;
231 	}
232       else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
233 	{
234 	  if (__c == _M_ctype.widen('('))
235 	    {
236 	      _M_curToken = _S_token_subexpr_begin;
237 	      ++_M_current;
238 	      return;
239 	    }
240 	  else if (__c == _M_ctype.widen(')'))
241 	    {
242 	      _M_curToken = _S_token_subexpr_end;
243 	      ++_M_current;
244 	      return;
245 	    }
246 	  else if (__c == _M_ctype.widen('{'))
247 	    {
248 	      _M_curToken = _S_token_interval_begin;
249 	      _M_state |= _S_state_in_brace;
250 	      ++_M_current;
251 	      return;
252 	    }
253 	}
254 
255       _M_curToken = _S_token_ord_char;
256       _M_curValue.assign(1, __c);
257       ++_M_current;
258     }
259 
260 
261   template<typename _InputIterator>
262     void
263     _Scanner<_InputIterator>::
264     _M_scan_in_brace()
265     {
266       if (_M_ctype.is(_CtypeT::digit, *_M_current))
267 	{
268 	  _M_curToken = _S_token_dup_count;
269 	  _M_curValue.assign(1, *_M_current);
270 	  ++_M_current;
271 	  while (_M_current != _M_end
272 		 && _M_ctype.is(_CtypeT::digit, *_M_current))
273 	    {
274 	      _M_curValue += *_M_current;
275 	      ++_M_current;
276 	    }
277 	  return;
278 	}
279       else if (*_M_current == _M_ctype.widen(','))
280 	{
281 	  _M_curToken = _S_token_comma;
282 	  ++_M_current;
283 	  return;
284 	}
285       if (_M_flags & (regex_constants::basic | regex_constants::grep))
286 	{
287 	  if (*_M_current == _M_ctype.widen('\\'))
288 	    _M_eat_escape();
289 	}
290       else
291 	{
292 	  if (*_M_current == _M_ctype.widen('}'))
293 	    {
294 	      _M_curToken = _S_token_interval_end;
295 	      _M_state &= ~_S_state_in_brace;
296 	      ++_M_current;
297 	      return;
298 	    }
299 	}
300     }
301 
302   template<typename _InputIterator>
303     void
304     _Scanner<_InputIterator>::
305     _M_scan_in_bracket()
306     {
307       if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
308 	{
309 	  _M_curToken = _S_token_inverse_class;
310 	  _M_state &= ~_S_state_at_start;
311 	  ++_M_current;
312 	  return;
313 	}
314       else if (*_M_current == _M_ctype.widen('['))
315 	{
316 	  ++_M_current;
317 	  if (_M_current == _M_end)
318 	    {
319 	      _M_curToken = _S_token_eof;
320 	      return;
321 	    }
322 
323 	  if (*_M_current == _M_ctype.widen('.'))
324 	    {
325 	      _M_curToken = _S_token_collsymbol;
326 	      _M_eat_collsymbol();
327 	      return;
328 	    }
329 	  else if (*_M_current == _M_ctype.widen(':'))
330 	    {
331 	      _M_curToken = _S_token_char_class_name;
332 	      _M_eat_charclass();
333 	      return;
334 	    }
335 	  else if (*_M_current == _M_ctype.widen('='))
336 	    {
337 	      _M_curToken = _S_token_equiv_class_name;
338 	      _M_eat_equivclass();
339 	      return;
340 	    }
341 	}
342       else if (*_M_current == _M_ctype.widen('-'))
343 	{
344 	  _M_curToken = _S_token_dash;
345 	  ++_M_current;
346 	  return;
347 	}
348       else if (*_M_current == _M_ctype.widen(']'))
349 	{
350 	  if (!(_M_flags & regex_constants::ECMAScript)
351 	      || !(_M_state & _S_state_at_start))
352 	    {
353 	      // special case: only if  _not_ chr first after
354 	      // '[' or '[^' and if not ECMAscript
355 	      _M_curToken = _S_token_bracket_end;
356 	      ++_M_current;
357 	      return;
358 	    }
359 	}
360       _M_curToken = _S_token_collelem_single;
361       _M_curValue.assign(1, *_M_current);
362       ++_M_current;
363     }
364 
365   template<typename _InputIterator>
366     void
367     _Scanner<_InputIterator>::
368     _M_eat_escape()
369     {
370       ++_M_current;
371       if (_M_current == _M_end)
372 	{
373 	  _M_curToken = _S_token_eof;
374 	  return;
375 	}
376       _CharT __c = *_M_current;
377       ++_M_current;
378 
379       if (__c == _M_ctype.widen('('))
380 	{
381 	  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
382 	    {
383 	      _M_curToken = _S_token_ord_char;
384 	      _M_curValue.assign(1, __c);
385 	    }
386 	  else
387 	    _M_curToken = _S_token_subexpr_begin;
388 	}
389       else if (__c == _M_ctype.widen(')'))
390 	{
391 	  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
392 	    {
393 	      _M_curToken = _S_token_ord_char;
394 	      _M_curValue.assign(1, __c);
395 	    }
396 	  else
397 	    _M_curToken = _S_token_subexpr_end;
398 	}
399       else if (__c == _M_ctype.widen('{'))
400 	{
401 	  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
402 	    {
403 	      _M_curToken = _S_token_ord_char;
404 	      _M_curValue.assign(1, __c);
405 	    }
406 	  else
407 	    {
408 	      _M_curToken = _S_token_interval_begin;
409 	      _M_state |= _S_state_in_brace;
410 	    }
411 	}
412       else if (__c == _M_ctype.widen('}'))
413 	{
414 	  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
415 	    {
416 	      _M_curToken = _S_token_ord_char;
417 	      _M_curValue.assign(1, __c);
418 	    }
419 	  else
420 	    {
421 	      if (!(_M_state && _S_state_in_brace))
422 		__throw_regex_error(regex_constants::error_badbrace);
423 	      _M_state &= ~_S_state_in_brace;
424 	      _M_curToken = _S_token_interval_end;
425 	    }
426 	}
427       else if (__c == _M_ctype.widen('x'))
428 	{
429 	  ++_M_current;
430 	  if (_M_current == _M_end)
431 	    {
432 	      _M_curToken = _S_token_eof;
433 	      return;
434 	    }
435 	  if (_M_ctype.is(_CtypeT::digit, *_M_current))
436 	    {
437 	      _M_curValue.assign(1, *_M_current);
438 	      ++_M_current;
439 	      if (_M_current == _M_end)
440 		{
441 		  _M_curToken = _S_token_eof;
442 		  return;
443 		}
444 	      if (_M_ctype.is(_CtypeT::digit, *_M_current))
445 		{
446 		  _M_curValue += *_M_current;
447 		  ++_M_current;
448 		  return;
449 		}
450 	    }
451 	}
452       else if (__c == _M_ctype.widen('^')
453 	       || __c == _M_ctype.widen('.')
454 	       || __c == _M_ctype.widen('*')
455 	       || __c == _M_ctype.widen('$')
456 	       || __c == _M_ctype.widen('\\'))
457 	{
458 	  _M_curToken = _S_token_ord_char;
459 	  _M_curValue.assign(1, __c);
460 	}
461       else if (_M_ctype.is(_CtypeT::digit, __c))
462 	{
463 	  _M_curToken = _S_token_backref;
464 	  _M_curValue.assign(1, __c);
465 	}
466       else
467 	__throw_regex_error(regex_constants::error_escape);
468     }
469 
470 
471   // Eats a character class or throwns an exception.
472   // current point to ':' delimiter on entry, char after ']' on return
473   template<typename _InputIterator>
474     void
475     _Scanner<_InputIterator>::
476     _M_eat_charclass()
477     {
478       ++_M_current; // skip ':'
479       if (_M_current == _M_end)
480 	__throw_regex_error(regex_constants::error_ctype);
481       for (_M_curValue.clear();
482 	   _M_current != _M_end && *_M_current != _M_ctype.widen(':');
483 	   ++_M_current)
484 	_M_curValue += *_M_current;
485       if (_M_current == _M_end)
486 	__throw_regex_error(regex_constants::error_ctype);
487       ++_M_current; // skip ':'
488       if (*_M_current != _M_ctype.widen(']'))
489 	__throw_regex_error(regex_constants::error_ctype);
490       ++_M_current; // skip ']'
491     }
492 
493 
494   template<typename _InputIterator>
495     void
496     _Scanner<_InputIterator>::
497     _M_eat_equivclass()
498     {
499       ++_M_current; // skip '='
500       if (_M_current == _M_end)
501 	__throw_regex_error(regex_constants::error_collate);
502       for (_M_curValue.clear();
503 	   _M_current != _M_end && *_M_current != _M_ctype.widen('=');
504 	   ++_M_current)
505 	_M_curValue += *_M_current;
506       if (_M_current == _M_end)
507 	__throw_regex_error(regex_constants::error_collate);
508       ++_M_current; // skip '='
509       if (*_M_current != _M_ctype.widen(']'))
510 	__throw_regex_error(regex_constants::error_collate);
511       ++_M_current; // skip ']'
512     }
513 
514 
515   template<typename _InputIterator>
516     void
517     _Scanner<_InputIterator>::
518     _M_eat_collsymbol()
519     {
520       ++_M_current; // skip '.'
521       if (_M_current == _M_end)
522 	__throw_regex_error(regex_constants::error_collate);
523       for (_M_curValue.clear();
524 	   _M_current != _M_end && *_M_current != _M_ctype.widen('.');
525 	   ++_M_current)
526 	_M_curValue += *_M_current;
527       if (_M_current == _M_end)
528 	__throw_regex_error(regex_constants::error_collate);
529       ++_M_current; // skip '.'
530       if (*_M_current != _M_ctype.widen(']'))
531 	__throw_regex_error(regex_constants::error_collate);
532       ++_M_current; // skip ']'
533     }
534 
535 #ifdef _GLIBCXX_DEBUG
536   template<typename _InputIterator>
537     std::ostream&
538     _Scanner<_InputIterator>::
539     _M_print(std::ostream& ostr)
540     {
541       switch (_M_curToken)
542       {
543 	case _S_token_anychar:
544 	  ostr << "any-character\n";
545 	  break;
546 	case _S_token_backref:
547 	  ostr << "backref\n";
548 	  break;
549 	case _S_token_bracket_begin:
550 	  ostr << "bracket-begin\n";
551 	  break;
552 	case _S_token_bracket_end:
553 	  ostr << "bracket-end\n";
554 	  break;
555 	case _S_token_char_class_name:
556 	  ostr << "char-class-name \"" << _M_curValue << "\"\n";
557 	  break;
558 	case _S_token_closure0:
559 	  ostr << "closure0\n";
560 	  break;
561 	case _S_token_closure1:
562 	  ostr << "closure1\n";
563 	  break;
564 	case _S_token_collelem_multi:
565 	  ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
566 	  break;
567 	case _S_token_collelem_single:
568 	  ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
569 	  break;
570 	case _S_token_collsymbol:
571 	  ostr << "collsymbol \"" << _M_curValue << "\"\n";
572 	  break;
573 	case _S_token_comma:
574 	  ostr << "comma\n";
575 	  break;
576 	case _S_token_dash:
577 	  ostr << "dash\n";
578 	  break;
579 	case _S_token_dup_count:
580 	  ostr << "dup count: " << _M_curValue << "\n";
581 	  break;
582 	case _S_token_eof:
583 	  ostr << "EOF\n";
584 	  break;
585 	case _S_token_equiv_class_name:
586 	  ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
587 	  break;
588 	case _S_token_interval_begin:
589 	  ostr << "interval begin\n";
590 	  break;
591 	case _S_token_interval_end:
592 	  ostr << "interval end\n";
593 	  break;
594 	case _S_token_line_begin:
595 	  ostr << "line begin\n";
596 	  break;
597 	case _S_token_line_end:
598 	  ostr << "line end\n";
599 	  break;
600 	case _S_token_opt:
601 	  ostr << "opt\n";
602 	  break;
603 	case _S_token_or:
604 	  ostr << "or\n";
605 	  break;
606 	case _S_token_ord_char:
607 	  ostr << "ordinary character: \"" << _M_value() << "\"\n";
608 	  break;
609 	case _S_token_quoted_char:
610 	  ostr << "quoted char\n";
611 	  break;
612 	case _S_token_subexpr_begin:
613 	  ostr << "subexpr begin\n";
614 	  break;
615 	case _S_token_subexpr_end:
616 	  ostr << "subexpr end\n";
617 	  break;
618 	case _S_token_word_begin:
619 	  ostr << "word begin\n";
620 	  break;
621 	case _S_token_word_end:
622 	  ostr << "word end\n";
623 	  break;
624 	case _S_token_unknown:
625 	  ostr << "-- unknown token --\n";
626 	  break;
627       }
628       return ostr;
629     }
630 #endif
631 
632   /// Builds an NFA from an input iterator interval.
633   template<typename _InIter, typename _TraitsT>
634     class _Compiler
635     {
636     public:
637       typedef _InIter                                            _IterT;
638       typedef typename std::iterator_traits<_InIter>::value_type _CharT;
639       typedef std::basic_string<_CharT>                          _StringT;
640       typedef regex_constants::syntax_option_type                _FlagT;
641 
642       _Compiler(const _InIter& __b, const _InIter& __e,
643 		_TraitsT& __traits, _FlagT __flags);
644 
645       const _Nfa&
646       _M_nfa() const
647       { return _M_state_store; }
648 
649     private:
650       typedef _Scanner<_InIter>                              _ScannerT;
651       typedef typename _ScannerT::_TokenT                    _TokenT;
652       typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
653       typedef _RangeMatcher<_InIter, _TraitsT>               _RMatcherT;
654 
655       // accepts a specific token or returns false.
656       bool
657       _M_match_token(_TokenT __token);
658 
659       void
660       _M_disjunction();
661 
662       bool
663       _M_alternative();
664 
665       bool
666       _M_term();
667 
668       bool
669       _M_assertion();
670 
671       bool
672       _M_quantifier();
673 
674       bool
675       _M_atom();
676 
677       bool
678       _M_bracket_expression();
679 
680       bool
681       _M_bracket_list(_RMatcherT& __matcher);
682 
683       bool
684       _M_follow_list(_RMatcherT& __matcher);
685 
686       bool
687       _M_follow_list2(_RMatcherT& __matcher);
688 
689       bool
690       _M_expression_term(_RMatcherT& __matcher);
691 
692       bool
693       _M_range_expression(_RMatcherT& __matcher);
694 
695       bool
696       _M_start_range(_RMatcherT& __matcher);
697 
698       bool
699       _M_collating_symbol(_RMatcherT& __matcher);
700 
701       bool
702       _M_equivalence_class(_RMatcherT& __matcher);
703 
704       bool
705       _M_character_class(_RMatcherT& __matcher);
706 
707       int
708       _M_cur_int_value(int __radix);
709 
710       _TraitsT&      _M_traits;
711       _ScannerT      _M_scanner;
712       _StringT       _M_cur_value;
713       _Nfa           _M_state_store;
714       _StackT        _M_stack;
715     };
716 
717   template<typename _InIter, typename _TraitsT>
718     _Compiler<_InIter, _TraitsT>::
719     _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
720 	      _Compiler<_InIter, _TraitsT>::_FlagT __flags)
721     : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
722       _M_state_store(__flags)
723     {
724       typedef _StartTagger<_InIter, _TraitsT> _Start;
725       typedef _EndTagger<_InIter, _TraitsT> _End;
726 
727       _StateSeq __r(_M_state_store,
728       		    _M_state_store._M_insert_subexpr_begin(_Start(0)));
729       _M_disjunction();
730       if (!_M_stack.empty())
731 	{
732 	  __r._M_append(_M_stack.top());
733 	  _M_stack.pop();
734 	}
735       __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
736       __r._M_append(_M_state_store._M_insert_accept());
737     }
738 
739   template<typename _InIter, typename _TraitsT>
740     bool
741     _Compiler<_InIter, _TraitsT>::
742     _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
743     {
744       if (token == _M_scanner._M_token())
745 	{
746 	  _M_cur_value = _M_scanner._M_value();
747 	  _M_scanner._M_advance();
748 	  return true;
749 	}
750       return false;
751     }
752 
753   template<typename _InIter, typename _TraitsT>
754     void
755     _Compiler<_InIter, _TraitsT>::
756     _M_disjunction()
757     {
758       this->_M_alternative();
759       if (_M_match_token(_ScannerT::_S_token_or))
760 	{
761 	  _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
762 	  this->_M_disjunction();
763 	  _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
764 	  _M_stack.push(_StateSeq(__alt1, __alt2));
765 	}
766     }
767 
768   template<typename _InIter, typename _TraitsT>
769     bool
770     _Compiler<_InIter, _TraitsT>::
771     _M_alternative()
772     {
773       if (this->_M_term())
774 	{
775 	  _StateSeq __re = _M_stack.top(); _M_stack.pop();
776 	  this->_M_alternative();
777 	  if (!_M_stack.empty())
778 	    {
779 	      __re._M_append(_M_stack.top());
780 	      _M_stack.pop();
781 	    }
782 	  _M_stack.push(__re);
783 	  return true;
784 	}
785       return false;
786     }
787 
788   template<typename _InIter, typename _TraitsT>
789     bool
790     _Compiler<_InIter, _TraitsT>::
791     _M_term()
792     {
793       if (this->_M_assertion())
794 	return true;
795       if (this->_M_atom())
796 	{
797 	  this->_M_quantifier();
798 	  return true;
799 	}
800       return false;
801     }
802 
803   template<typename _InIter, typename _TraitsT>
804     bool
805     _Compiler<_InIter, _TraitsT>::
806     _M_assertion()
807     {
808       if (_M_match_token(_ScannerT::_S_token_line_begin))
809 	{
810 	  // __m.push(_Matcher::_S_opcode_line_begin);
811 	  return true;
812 	}
813       if (_M_match_token(_ScannerT::_S_token_line_end))
814 	{
815 	  // __m.push(_Matcher::_S_opcode_line_end);
816 	  return true;
817 	}
818       if (_M_match_token(_ScannerT::_S_token_word_begin))
819 	{
820 	  // __m.push(_Matcher::_S_opcode_word_begin);
821 	  return true;
822 	}
823       if (_M_match_token(_ScannerT::_S_token_word_end))
824 	{
825 	  // __m.push(_Matcher::_S_opcode_word_end);
826 	  return true;
827 	}
828       return false;
829     }
830 
831   template<typename _InIter, typename _TraitsT>
832     bool
833     _Compiler<_InIter, _TraitsT>::
834     _M_quantifier()
835     {
836       if (_M_match_token(_ScannerT::_S_token_closure0))
837 	{
838 	  if (_M_stack.empty())
839 	    __throw_regex_error(regex_constants::error_badrepeat);
840 	  _StateSeq __r(_M_stack.top(), -1);
841 	  __r._M_append(__r._M_front());
842 	  _M_stack.pop();
843 	  _M_stack.push(__r);
844 	  return true;
845 	}
846       if (_M_match_token(_ScannerT::_S_token_closure1))
847 	{
848 	  if (_M_stack.empty())
849 	    __throw_regex_error(regex_constants::error_badrepeat);
850 	  _StateSeq __r(_M_state_store,
851 			_M_state_store.
852 			_M_insert_alt(_S_invalid_state_id,
853 				      _M_stack.top()._M_front()));
854 	  _M_stack.top()._M_append(__r);
855 	  return true;
856 	}
857       if (_M_match_token(_ScannerT::_S_token_opt))
858 	{
859 	  if (_M_stack.empty())
860 	  __throw_regex_error(regex_constants::error_badrepeat);
861 	  _StateSeq __r(_M_stack.top(), -1);
862 	  _M_stack.pop();
863 	  _M_stack.push(__r);
864 	  return true;
865 	}
866       if (_M_match_token(_ScannerT::_S_token_interval_begin))
867 	{
868 	  if (_M_stack.empty())
869 	    __throw_regex_error(regex_constants::error_badrepeat);
870 	  if (!_M_match_token(_ScannerT::_S_token_dup_count))
871 	    __throw_regex_error(regex_constants::error_badbrace);
872 	  _StateSeq __r(_M_stack.top());
873 	  int __min_rep = _M_cur_int_value(10);
874 	  for (int __i = 1; __i < __min_rep; ++__i)
875 	    _M_stack.top()._M_append(__r._M_clone());
876 	  if (_M_match_token(_ScannerT::_S_token_comma))
877 	    if (_M_match_token(_ScannerT::_S_token_dup_count))
878 	      {
879 		int __n = _M_cur_int_value(10) - __min_rep;
880 		if (__n < 0)
881 		  __throw_regex_error(regex_constants::error_badbrace);
882 		for (int __i = 0; __i < __n; ++__i)
883 		  {
884 		    _StateSeq __r(_M_state_store,
885 				  _M_state_store.
886 				  _M_insert_alt(_S_invalid_state_id,
887 						_M_stack.top()._M_front()));
888 		    _M_stack.top()._M_append(__r);
889 		  }
890 	      }
891 	    else
892 	      {
893 		_StateSeq __r(_M_stack.top(), -1);
894 		__r._M_push_back(__r._M_front());
895 		_M_stack.pop();
896 		_M_stack.push(__r);
897 	      }
898 	  if (!_M_match_token(_ScannerT::_S_token_interval_end))
899 	    __throw_regex_error(regex_constants::error_brace);
900 	  return true;
901 	}
902       return false;
903     }
904 
905   template<typename _InIter, typename _TraitsT>
906     bool
907     _Compiler<_InIter, _TraitsT>::
908     _M_atom()
909     {
910       typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
911       typedef _StartTagger<_InIter, _TraitsT> _Start;
912       typedef _EndTagger<_InIter, _TraitsT> _End;
913 
914       if (_M_match_token(_ScannerT::_S_token_anychar))
915 	{
916 	  _M_stack.push(_StateSeq(_M_state_store,
917                                   _M_state_store._M_insert_matcher
918                                   (_AnyMatcher)));
919 	  return true;
920 	}
921       if (_M_match_token(_ScannerT::_S_token_ord_char))
922 	{
923 	  _M_stack.push(_StateSeq(_M_state_store,
924                                   _M_state_store._M_insert_matcher
925                                   (_CMatcher(_M_cur_value[0], _M_traits))));
926 	  return true;
927 	}
928       if (_M_match_token(_ScannerT::_S_token_quoted_char))
929 	{
930 	  // note that in the ECMA grammar, this case covers backrefs.
931 	  _M_stack.push(_StateSeq(_M_state_store,
932 				  _M_state_store._M_insert_matcher
933 				  (_CMatcher(_M_cur_value[0], _M_traits))));
934 	  return true;
935 	}
936       if (_M_match_token(_ScannerT::_S_token_backref))
937 	{
938 	  // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
939 	  return true;
940 	}
941       if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
942 	{
943 	  int __mark = _M_state_store._M_sub_count();
944 	  _StateSeq __r(_M_state_store,
945 			_M_state_store.
946 			_M_insert_subexpr_begin(_Start(__mark)));
947 	  this->_M_disjunction();
948 	  if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
949 	    __throw_regex_error(regex_constants::error_paren);
950 	  if (!_M_stack.empty())
951 	    {
952 	      __r._M_append(_M_stack.top());
953 	      _M_stack.pop();
954 	    }
955 	  __r._M_append(_M_state_store._M_insert_subexpr_end
956 			(__mark, _End(__mark)));
957 	  _M_stack.push(__r);
958 	  return true;
959 	}
960       return _M_bracket_expression();
961     }
962 
963   template<typename _InIter, typename _TraitsT>
964     bool
965     _Compiler<_InIter, _TraitsT>::
966     _M_bracket_expression()
967     {
968       if (_M_match_token(_ScannerT::_S_token_bracket_begin))
969 	{
970 	  _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
971 			       _M_traits);
972 	  if (!_M_bracket_list(__matcher)
973 	      || !_M_match_token(_ScannerT::_S_token_bracket_end))
974 	    __throw_regex_error(regex_constants::error_brack);
975 	  _M_stack.push(_StateSeq(_M_state_store,
976 				  _M_state_store._M_insert_matcher(__matcher)));
977 	  return true;
978 	}
979       return false;
980     }
981 
982   // If the dash is the last character in the bracket expression, it is not
983   // special.
984   template<typename _InIter, typename _TraitsT>
985     bool
986     _Compiler<_InIter, _TraitsT>::
987     _M_bracket_list(_RMatcherT& __matcher)
988     {
989       if (_M_follow_list(__matcher))
990 	{
991 	  if (_M_match_token(_ScannerT::_S_token_dash))
992 	    __matcher._M_add_char(_M_cur_value[0]);
993 	  return true;
994 	}
995       return false;
996     }
997 
998   template<typename _InIter, typename _TraitsT>
999     bool
1000     _Compiler<_InIter, _TraitsT>::
1001     _M_follow_list(_RMatcherT& __matcher)
1002     { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
1003 
1004   template<typename _InIter, typename _TraitsT>
1005     bool
1006     _Compiler<_InIter, _TraitsT>::
1007     _M_follow_list2(_RMatcherT& __matcher)
1008     {
1009       if (_M_expression_term(__matcher))
1010 	return _M_follow_list2(__matcher);
1011       return true;
1012     }
1013 
1014   template<typename _InIter, typename _TraitsT>
1015     bool
1016     _Compiler<_InIter, _TraitsT>::
1017     _M_expression_term(_RMatcherT& __matcher)
1018     {
1019       return (_M_collating_symbol(__matcher)
1020 	      || _M_character_class(__matcher)
1021 	      || _M_equivalence_class(__matcher)
1022 	      || (_M_start_range(__matcher)
1023 		  && _M_range_expression(__matcher)));
1024     }
1025 
1026   template<typename _InIter, typename _TraitsT>
1027     bool
1028     _Compiler<_InIter, _TraitsT>::
1029     _M_range_expression(_RMatcherT& __matcher)
1030     {
1031       if (!_M_collating_symbol(__matcher))
1032 	if (!_M_match_token(_ScannerT::_S_token_dash))
1033 	  __throw_regex_error(regex_constants::error_range);
1034       __matcher._M_make_range();
1035       return true;
1036     }
1037 
1038   template<typename _InIter, typename _TraitsT>
1039     bool
1040     _Compiler<_InIter, _TraitsT>::
1041     _M_start_range(_RMatcherT& __matcher)
1042     { return _M_match_token(_ScannerT::_S_token_dash); }
1043 
1044   template<typename _InIter, typename _TraitsT>
1045     bool
1046     _Compiler<_InIter, _TraitsT>::
1047     _M_collating_symbol(_RMatcherT& __matcher)
1048     {
1049       if (_M_match_token(_ScannerT::_S_token_collelem_single))
1050 	{
1051 	  __matcher._M_add_char(_M_cur_value[0]);
1052 	  return true;
1053 	}
1054       if (_M_match_token(_ScannerT::_S_token_collsymbol))
1055 	{
1056 	  __matcher._M_add_collating_element(_M_cur_value);
1057 	  return true;
1058 	}
1059       return false;
1060     }
1061 
1062   template<typename _InIter, typename _TraitsT>
1063     bool
1064     _Compiler<_InIter, _TraitsT>::
1065     _M_equivalence_class(_RMatcherT& __matcher)
1066     {
1067       if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
1068 	{
1069 	  __matcher._M_add_equivalence_class(_M_cur_value);
1070 	  return true;
1071 	}
1072       return false;
1073     }
1074 
1075   template<typename _InIter, typename _TraitsT>
1076     bool
1077     _Compiler<_InIter, _TraitsT>::
1078     _M_character_class(_RMatcherT& __matcher)
1079     {
1080       if (_M_match_token(_ScannerT::_S_token_char_class_name))
1081 	{
1082 	  __matcher._M_add_character_class(_M_cur_value);
1083 	  return true;
1084 	}
1085       return false;
1086     }
1087 
1088   template<typename _InIter, typename _TraitsT>
1089     int
1090     _Compiler<_InIter, _TraitsT>::
1091     _M_cur_int_value(int __radix)
1092     {
1093       int __v = 0;
1094       for (typename _StringT::size_type __i = 0;
1095 	   __i < _M_cur_value.length(); ++__i)
1096 	__v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
1097       return __v;
1098     }
1099 
1100   template<typename _InIter, typename _TraitsT>
1101     _AutomatonPtr
1102     __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
1103 	      regex_constants::syntax_option_type __f)
1104     { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
1105                                         __f)._M_nfa())); }
1106 
1107  //@} regex-detail
1108 _GLIBCXX_END_NAMESPACE_VERSION
1109 } // namespace __detail
1110 } // namespace std
1111