1*e4b17023SJohn Marino // class template regex -*- C++ -*-
2*e4b17023SJohn Marino
3*e4b17023SJohn Marino // Copyright (C) 2010, 2011 Free Software Foundation, Inc.
4*e4b17023SJohn Marino //
5*e4b17023SJohn Marino // This file is part of the GNU ISO C++ Library. This library is free
6*e4b17023SJohn Marino // software; you can redistribute it and/or modify it under the
7*e4b17023SJohn Marino // terms of the GNU General Public License as published by the
8*e4b17023SJohn Marino // Free Software Foundation; either version 3, or (at your option)
9*e4b17023SJohn Marino // any later version.
10*e4b17023SJohn Marino
11*e4b17023SJohn Marino // This library is distributed in the hope that it will be useful,
12*e4b17023SJohn Marino // but WITHOUT ANY WARRANTY; without even the implied warranty of
13*e4b17023SJohn Marino // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14*e4b17023SJohn Marino // GNU General Public License for more details.
15*e4b17023SJohn Marino
16*e4b17023SJohn Marino // Under Section 7 of GPL version 3, you are granted additional
17*e4b17023SJohn Marino // permissions described in the GCC Runtime Library Exception, version
18*e4b17023SJohn Marino // 3.1, as published by the Free Software Foundation.
19*e4b17023SJohn Marino
20*e4b17023SJohn Marino // You should have received a copy of the GNU General Public License and
21*e4b17023SJohn Marino // a copy of the GCC Runtime Library Exception along with this program;
22*e4b17023SJohn Marino // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23*e4b17023SJohn Marino // <http://www.gnu.org/licenses/>.
24*e4b17023SJohn Marino
25*e4b17023SJohn Marino /**
26*e4b17023SJohn Marino * @file bits/regex_compiler.h
27*e4b17023SJohn Marino * This is an internal header file, included by other library headers.
28*e4b17023SJohn Marino * Do not attempt to use it directly. @headername{regex}
29*e4b17023SJohn Marino */
30*e4b17023SJohn Marino
_GLIBCXX_VISIBILITY(default)31*e4b17023SJohn Marino namespace std _GLIBCXX_VISIBILITY(default)
32*e4b17023SJohn Marino {
33*e4b17023SJohn Marino namespace __regex
34*e4b17023SJohn Marino {
35*e4b17023SJohn Marino _GLIBCXX_BEGIN_NAMESPACE_VERSION
36*e4b17023SJohn Marino
37*e4b17023SJohn Marino struct _Scanner_base
38*e4b17023SJohn Marino {
39*e4b17023SJohn Marino typedef unsigned int _StateT;
40*e4b17023SJohn Marino
41*e4b17023SJohn Marino static constexpr _StateT _S_state_at_start = 1 << 0;
42*e4b17023SJohn Marino static constexpr _StateT _S_state_in_brace = 1 << 2;
43*e4b17023SJohn Marino static constexpr _StateT _S_state_in_bracket = 1 << 3;
44*e4b17023SJohn Marino
45*e4b17023SJohn Marino virtual ~_Scanner_base() { };
46*e4b17023SJohn Marino };
47*e4b17023SJohn Marino
48*e4b17023SJohn Marino //
49*e4b17023SJohn Marino // @brief Scans an input range for regex tokens.
50*e4b17023SJohn Marino //
51*e4b17023SJohn Marino // The %_Scanner class interprets the regular expression pattern in the input
52*e4b17023SJohn Marino // range passed to its constructor as a sequence of parse tokens passed to
53*e4b17023SJohn Marino // the regular expression compiler. The sequence of tokens provided depends
54*e4b17023SJohn Marino // on the flag settings passed to the constructor: different regular
55*e4b17023SJohn Marino // expression grammars will interpret the same input pattern in
56*e4b17023SJohn Marino // syntactically different ways.
57*e4b17023SJohn Marino //
58*e4b17023SJohn Marino template<typename _InputIterator>
59*e4b17023SJohn Marino class _Scanner: public _Scanner_base
60*e4b17023SJohn Marino {
61*e4b17023SJohn Marino public:
62*e4b17023SJohn Marino typedef _InputIterator _IteratorT;
63*e4b17023SJohn Marino typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
64*e4b17023SJohn Marino typedef std::basic_string<_CharT> _StringT;
65*e4b17023SJohn Marino typedef regex_constants::syntax_option_type _FlagT;
66*e4b17023SJohn Marino typedef const std::ctype<_CharT> _CtypeT;
67*e4b17023SJohn Marino
68*e4b17023SJohn Marino // Token types returned from the scanner.
69*e4b17023SJohn Marino enum _TokenT
70*e4b17023SJohn Marino {
71*e4b17023SJohn Marino _S_token_anychar,
72*e4b17023SJohn Marino _S_token_backref,
73*e4b17023SJohn Marino _S_token_bracket_begin,
74*e4b17023SJohn Marino _S_token_bracket_end,
75*e4b17023SJohn Marino _S_token_inverse_class,
76*e4b17023SJohn Marino _S_token_char_class_name,
77*e4b17023SJohn Marino _S_token_closure0,
78*e4b17023SJohn Marino _S_token_closure1,
79*e4b17023SJohn Marino _S_token_collelem_multi,
80*e4b17023SJohn Marino _S_token_collelem_single,
81*e4b17023SJohn Marino _S_token_collsymbol,
82*e4b17023SJohn Marino _S_token_comma,
83*e4b17023SJohn Marino _S_token_dash,
84*e4b17023SJohn Marino _S_token_dup_count,
85*e4b17023SJohn Marino _S_token_eof,
86*e4b17023SJohn Marino _S_token_equiv_class_name,
87*e4b17023SJohn Marino _S_token_interval_begin,
88*e4b17023SJohn Marino _S_token_interval_end,
89*e4b17023SJohn Marino _S_token_line_begin,
90*e4b17023SJohn Marino _S_token_line_end,
91*e4b17023SJohn Marino _S_token_opt,
92*e4b17023SJohn Marino _S_token_or,
93*e4b17023SJohn Marino _S_token_ord_char,
94*e4b17023SJohn Marino _S_token_quoted_char,
95*e4b17023SJohn Marino _S_token_subexpr_begin,
96*e4b17023SJohn Marino _S_token_subexpr_end,
97*e4b17023SJohn Marino _S_token_word_begin,
98*e4b17023SJohn Marino _S_token_word_end,
99*e4b17023SJohn Marino _S_token_unknown
100*e4b17023SJohn Marino };
101*e4b17023SJohn Marino
102*e4b17023SJohn Marino public:
103*e4b17023SJohn Marino _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
104*e4b17023SJohn Marino std::locale __loc)
105*e4b17023SJohn Marino : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
106*e4b17023SJohn Marino _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
107*e4b17023SJohn Marino { _M_advance(); }
108*e4b17023SJohn Marino
109*e4b17023SJohn Marino void
110*e4b17023SJohn Marino _M_advance();
111*e4b17023SJohn Marino
112*e4b17023SJohn Marino _TokenT
113*e4b17023SJohn Marino _M_token() const
114*e4b17023SJohn Marino { return _M_curToken; }
115*e4b17023SJohn Marino
116*e4b17023SJohn Marino const _StringT&
117*e4b17023SJohn Marino _M_value() const
118*e4b17023SJohn Marino { return _M_curValue; }
119*e4b17023SJohn Marino
120*e4b17023SJohn Marino #ifdef _GLIBCXX_DEBUG
121*e4b17023SJohn Marino std::ostream&
122*e4b17023SJohn Marino _M_print(std::ostream&);
123*e4b17023SJohn Marino #endif
124*e4b17023SJohn Marino
125*e4b17023SJohn Marino private:
126*e4b17023SJohn Marino void
127*e4b17023SJohn Marino _M_eat_escape();
128*e4b17023SJohn Marino
129*e4b17023SJohn Marino void
130*e4b17023SJohn Marino _M_scan_in_brace();
131*e4b17023SJohn Marino
132*e4b17023SJohn Marino void
133*e4b17023SJohn Marino _M_scan_in_bracket();
134*e4b17023SJohn Marino
135*e4b17023SJohn Marino void
136*e4b17023SJohn Marino _M_eat_charclass();
137*e4b17023SJohn Marino
138*e4b17023SJohn Marino void
139*e4b17023SJohn Marino _M_eat_equivclass();
140*e4b17023SJohn Marino
141*e4b17023SJohn Marino void
142*e4b17023SJohn Marino _M_eat_collsymbol();
143*e4b17023SJohn Marino
144*e4b17023SJohn Marino private:
145*e4b17023SJohn Marino _IteratorT _M_current;
146*e4b17023SJohn Marino _IteratorT _M_end;
147*e4b17023SJohn Marino _FlagT _M_flags;
148*e4b17023SJohn Marino _CtypeT& _M_ctype;
149*e4b17023SJohn Marino _TokenT _M_curToken;
150*e4b17023SJohn Marino _StringT _M_curValue;
151*e4b17023SJohn Marino _StateT _M_state;
152*e4b17023SJohn Marino };
153*e4b17023SJohn Marino
154*e4b17023SJohn Marino template<typename _InputIterator>
155*e4b17023SJohn Marino void
156*e4b17023SJohn Marino _Scanner<_InputIterator>::
157*e4b17023SJohn Marino _M_advance()
158*e4b17023SJohn Marino {
159*e4b17023SJohn Marino if (_M_current == _M_end)
160*e4b17023SJohn Marino {
161*e4b17023SJohn Marino _M_curToken = _S_token_eof;
162*e4b17023SJohn Marino return;
163*e4b17023SJohn Marino }
164*e4b17023SJohn Marino
165*e4b17023SJohn Marino _CharT __c = *_M_current;
166*e4b17023SJohn Marino if (_M_state & _S_state_in_bracket)
167*e4b17023SJohn Marino {
168*e4b17023SJohn Marino _M_scan_in_bracket();
169*e4b17023SJohn Marino return;
170*e4b17023SJohn Marino }
171*e4b17023SJohn Marino if (_M_state & _S_state_in_brace)
172*e4b17023SJohn Marino {
173*e4b17023SJohn Marino _M_scan_in_brace();
174*e4b17023SJohn Marino return;
175*e4b17023SJohn Marino }
176*e4b17023SJohn Marino #if 0
177*e4b17023SJohn Marino // TODO: re-enable line anchors when _M_assertion is implemented.
178*e4b17023SJohn Marino // See PR libstdc++/47724
179*e4b17023SJohn Marino else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
180*e4b17023SJohn Marino {
181*e4b17023SJohn Marino _M_curToken = _S_token_line_begin;
182*e4b17023SJohn Marino ++_M_current;
183*e4b17023SJohn Marino return;
184*e4b17023SJohn Marino }
185*e4b17023SJohn Marino else if (__c == _M_ctype.widen('$'))
186*e4b17023SJohn Marino {
187*e4b17023SJohn Marino _M_curToken = _S_token_line_end;
188*e4b17023SJohn Marino ++_M_current;
189*e4b17023SJohn Marino return;
190*e4b17023SJohn Marino }
191*e4b17023SJohn Marino #endif
192*e4b17023SJohn Marino else if (__c == _M_ctype.widen('.'))
193*e4b17023SJohn Marino {
194*e4b17023SJohn Marino _M_curToken = _S_token_anychar;
195*e4b17023SJohn Marino ++_M_current;
196*e4b17023SJohn Marino return;
197*e4b17023SJohn Marino }
198*e4b17023SJohn Marino else if (__c == _M_ctype.widen('*'))
199*e4b17023SJohn Marino {
200*e4b17023SJohn Marino _M_curToken = _S_token_closure0;
201*e4b17023SJohn Marino ++_M_current;
202*e4b17023SJohn Marino return;
203*e4b17023SJohn Marino }
204*e4b17023SJohn Marino else if (__c == _M_ctype.widen('+'))
205*e4b17023SJohn Marino {
206*e4b17023SJohn Marino _M_curToken = _S_token_closure1;
207*e4b17023SJohn Marino ++_M_current;
208*e4b17023SJohn Marino return;
209*e4b17023SJohn Marino }
210*e4b17023SJohn Marino else if (__c == _M_ctype.widen('|'))
211*e4b17023SJohn Marino {
212*e4b17023SJohn Marino _M_curToken = _S_token_or;
213*e4b17023SJohn Marino ++_M_current;
214*e4b17023SJohn Marino return;
215*e4b17023SJohn Marino }
216*e4b17023SJohn Marino else if (__c == _M_ctype.widen('['))
217*e4b17023SJohn Marino {
218*e4b17023SJohn Marino _M_curToken = _S_token_bracket_begin;
219*e4b17023SJohn Marino _M_state |= (_S_state_in_bracket | _S_state_at_start);
220*e4b17023SJohn Marino ++_M_current;
221*e4b17023SJohn Marino return;
222*e4b17023SJohn Marino }
223*e4b17023SJohn Marino else if (__c == _M_ctype.widen('\\'))
224*e4b17023SJohn Marino {
225*e4b17023SJohn Marino _M_eat_escape();
226*e4b17023SJohn Marino return;
227*e4b17023SJohn Marino }
228*e4b17023SJohn Marino else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
229*e4b17023SJohn Marino {
230*e4b17023SJohn Marino if (__c == _M_ctype.widen('('))
231*e4b17023SJohn Marino {
232*e4b17023SJohn Marino _M_curToken = _S_token_subexpr_begin;
233*e4b17023SJohn Marino ++_M_current;
234*e4b17023SJohn Marino return;
235*e4b17023SJohn Marino }
236*e4b17023SJohn Marino else if (__c == _M_ctype.widen(')'))
237*e4b17023SJohn Marino {
238*e4b17023SJohn Marino _M_curToken = _S_token_subexpr_end;
239*e4b17023SJohn Marino ++_M_current;
240*e4b17023SJohn Marino return;
241*e4b17023SJohn Marino }
242*e4b17023SJohn Marino else if (__c == _M_ctype.widen('{'))
243*e4b17023SJohn Marino {
244*e4b17023SJohn Marino _M_curToken = _S_token_interval_begin;
245*e4b17023SJohn Marino _M_state |= _S_state_in_brace;
246*e4b17023SJohn Marino ++_M_current;
247*e4b17023SJohn Marino return;
248*e4b17023SJohn Marino }
249*e4b17023SJohn Marino }
250*e4b17023SJohn Marino
251*e4b17023SJohn Marino _M_curToken = _S_token_ord_char;
252*e4b17023SJohn Marino _M_curValue.assign(1, __c);
253*e4b17023SJohn Marino ++_M_current;
254*e4b17023SJohn Marino }
255*e4b17023SJohn Marino
256*e4b17023SJohn Marino
257*e4b17023SJohn Marino template<typename _InputIterator>
258*e4b17023SJohn Marino void
259*e4b17023SJohn Marino _Scanner<_InputIterator>::
260*e4b17023SJohn Marino _M_scan_in_brace()
261*e4b17023SJohn Marino {
262*e4b17023SJohn Marino if (_M_ctype.is(_CtypeT::digit, *_M_current))
263*e4b17023SJohn Marino {
264*e4b17023SJohn Marino _M_curToken = _S_token_dup_count;
265*e4b17023SJohn Marino _M_curValue.assign(1, *_M_current);
266*e4b17023SJohn Marino ++_M_current;
267*e4b17023SJohn Marino while (_M_current != _M_end
268*e4b17023SJohn Marino && _M_ctype.is(_CtypeT::digit, *_M_current))
269*e4b17023SJohn Marino {
270*e4b17023SJohn Marino _M_curValue += *_M_current;
271*e4b17023SJohn Marino ++_M_current;
272*e4b17023SJohn Marino }
273*e4b17023SJohn Marino return;
274*e4b17023SJohn Marino }
275*e4b17023SJohn Marino else if (*_M_current == _M_ctype.widen(','))
276*e4b17023SJohn Marino {
277*e4b17023SJohn Marino _M_curToken = _S_token_comma;
278*e4b17023SJohn Marino ++_M_current;
279*e4b17023SJohn Marino return;
280*e4b17023SJohn Marino }
281*e4b17023SJohn Marino if (_M_flags & (regex_constants::basic | regex_constants::grep))
282*e4b17023SJohn Marino {
283*e4b17023SJohn Marino if (*_M_current == _M_ctype.widen('\\'))
284*e4b17023SJohn Marino _M_eat_escape();
285*e4b17023SJohn Marino }
286*e4b17023SJohn Marino else
287*e4b17023SJohn Marino {
288*e4b17023SJohn Marino if (*_M_current == _M_ctype.widen('}'))
289*e4b17023SJohn Marino {
290*e4b17023SJohn Marino _M_curToken = _S_token_interval_end;
291*e4b17023SJohn Marino _M_state &= ~_S_state_in_brace;
292*e4b17023SJohn Marino ++_M_current;
293*e4b17023SJohn Marino return;
294*e4b17023SJohn Marino }
295*e4b17023SJohn Marino }
296*e4b17023SJohn Marino }
297*e4b17023SJohn Marino
298*e4b17023SJohn Marino template<typename _InputIterator>
299*e4b17023SJohn Marino void
300*e4b17023SJohn Marino _Scanner<_InputIterator>::
301*e4b17023SJohn Marino _M_scan_in_bracket()
302*e4b17023SJohn Marino {
303*e4b17023SJohn Marino if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
304*e4b17023SJohn Marino {
305*e4b17023SJohn Marino _M_curToken = _S_token_inverse_class;
306*e4b17023SJohn Marino _M_state &= ~_S_state_at_start;
307*e4b17023SJohn Marino ++_M_current;
308*e4b17023SJohn Marino return;
309*e4b17023SJohn Marino }
310*e4b17023SJohn Marino else if (*_M_current == _M_ctype.widen('['))
311*e4b17023SJohn Marino {
312*e4b17023SJohn Marino ++_M_current;
313*e4b17023SJohn Marino if (_M_current == _M_end)
314*e4b17023SJohn Marino {
315*e4b17023SJohn Marino _M_curToken = _S_token_eof;
316*e4b17023SJohn Marino return;
317*e4b17023SJohn Marino }
318*e4b17023SJohn Marino
319*e4b17023SJohn Marino if (*_M_current == _M_ctype.widen('.'))
320*e4b17023SJohn Marino {
321*e4b17023SJohn Marino _M_curToken = _S_token_collsymbol;
322*e4b17023SJohn Marino _M_eat_collsymbol();
323*e4b17023SJohn Marino return;
324*e4b17023SJohn Marino }
325*e4b17023SJohn Marino else if (*_M_current == _M_ctype.widen(':'))
326*e4b17023SJohn Marino {
327*e4b17023SJohn Marino _M_curToken = _S_token_char_class_name;
328*e4b17023SJohn Marino _M_eat_charclass();
329*e4b17023SJohn Marino return;
330*e4b17023SJohn Marino }
331*e4b17023SJohn Marino else if (*_M_current == _M_ctype.widen('='))
332*e4b17023SJohn Marino {
333*e4b17023SJohn Marino _M_curToken = _S_token_equiv_class_name;
334*e4b17023SJohn Marino _M_eat_equivclass();
335*e4b17023SJohn Marino return;
336*e4b17023SJohn Marino }
337*e4b17023SJohn Marino }
338*e4b17023SJohn Marino else if (*_M_current == _M_ctype.widen('-'))
339*e4b17023SJohn Marino {
340*e4b17023SJohn Marino _M_curToken = _S_token_dash;
341*e4b17023SJohn Marino ++_M_current;
342*e4b17023SJohn Marino return;
343*e4b17023SJohn Marino }
344*e4b17023SJohn Marino else if (*_M_current == _M_ctype.widen(']'))
345*e4b17023SJohn Marino {
346*e4b17023SJohn Marino if (!(_M_flags & regex_constants::ECMAScript)
347*e4b17023SJohn Marino || !(_M_state & _S_state_at_start))
348*e4b17023SJohn Marino {
349*e4b17023SJohn Marino // special case: only if _not_ chr first after
350*e4b17023SJohn Marino // '[' or '[^' and if not ECMAscript
351*e4b17023SJohn Marino _M_curToken = _S_token_bracket_end;
352*e4b17023SJohn Marino ++_M_current;
353*e4b17023SJohn Marino return;
354*e4b17023SJohn Marino }
355*e4b17023SJohn Marino }
356*e4b17023SJohn Marino _M_curToken = _S_token_collelem_single;
357*e4b17023SJohn Marino _M_curValue.assign(1, *_M_current);
358*e4b17023SJohn Marino ++_M_current;
359*e4b17023SJohn Marino }
360*e4b17023SJohn Marino
361*e4b17023SJohn Marino template<typename _InputIterator>
362*e4b17023SJohn Marino void
363*e4b17023SJohn Marino _Scanner<_InputIterator>::
364*e4b17023SJohn Marino _M_eat_escape()
365*e4b17023SJohn Marino {
366*e4b17023SJohn Marino ++_M_current;
367*e4b17023SJohn Marino if (_M_current == _M_end)
368*e4b17023SJohn Marino {
369*e4b17023SJohn Marino _M_curToken = _S_token_eof;
370*e4b17023SJohn Marino return;
371*e4b17023SJohn Marino }
372*e4b17023SJohn Marino _CharT __c = *_M_current;
373*e4b17023SJohn Marino ++_M_current;
374*e4b17023SJohn Marino
375*e4b17023SJohn Marino if (__c == _M_ctype.widen('('))
376*e4b17023SJohn Marino {
377*e4b17023SJohn Marino if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
378*e4b17023SJohn Marino {
379*e4b17023SJohn Marino _M_curToken = _S_token_ord_char;
380*e4b17023SJohn Marino _M_curValue.assign(1, __c);
381*e4b17023SJohn Marino }
382*e4b17023SJohn Marino else
383*e4b17023SJohn Marino _M_curToken = _S_token_subexpr_begin;
384*e4b17023SJohn Marino }
385*e4b17023SJohn Marino else if (__c == _M_ctype.widen(')'))
386*e4b17023SJohn Marino {
387*e4b17023SJohn Marino if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
388*e4b17023SJohn Marino {
389*e4b17023SJohn Marino _M_curToken = _S_token_ord_char;
390*e4b17023SJohn Marino _M_curValue.assign(1, __c);
391*e4b17023SJohn Marino }
392*e4b17023SJohn Marino else
393*e4b17023SJohn Marino _M_curToken = _S_token_subexpr_end;
394*e4b17023SJohn Marino }
395*e4b17023SJohn Marino else if (__c == _M_ctype.widen('{'))
396*e4b17023SJohn Marino {
397*e4b17023SJohn Marino if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
398*e4b17023SJohn Marino {
399*e4b17023SJohn Marino _M_curToken = _S_token_ord_char;
400*e4b17023SJohn Marino _M_curValue.assign(1, __c);
401*e4b17023SJohn Marino }
402*e4b17023SJohn Marino else
403*e4b17023SJohn Marino {
404*e4b17023SJohn Marino _M_curToken = _S_token_interval_begin;
405*e4b17023SJohn Marino _M_state |= _S_state_in_brace;
406*e4b17023SJohn Marino }
407*e4b17023SJohn Marino }
408*e4b17023SJohn Marino else if (__c == _M_ctype.widen('}'))
409*e4b17023SJohn Marino {
410*e4b17023SJohn Marino if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
411*e4b17023SJohn Marino {
412*e4b17023SJohn Marino _M_curToken = _S_token_ord_char;
413*e4b17023SJohn Marino _M_curValue.assign(1, __c);
414*e4b17023SJohn Marino }
415*e4b17023SJohn Marino else
416*e4b17023SJohn Marino {
417*e4b17023SJohn Marino if (!(_M_state && _S_state_in_brace))
418*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_badbrace);
419*e4b17023SJohn Marino _M_state &= ~_S_state_in_brace;
420*e4b17023SJohn Marino _M_curToken = _S_token_interval_end;
421*e4b17023SJohn Marino }
422*e4b17023SJohn Marino }
423*e4b17023SJohn Marino else if (__c == _M_ctype.widen('x'))
424*e4b17023SJohn Marino {
425*e4b17023SJohn Marino ++_M_current;
426*e4b17023SJohn Marino if (_M_current == _M_end)
427*e4b17023SJohn Marino {
428*e4b17023SJohn Marino _M_curToken = _S_token_eof;
429*e4b17023SJohn Marino return;
430*e4b17023SJohn Marino }
431*e4b17023SJohn Marino if (_M_ctype.is(_CtypeT::digit, *_M_current))
432*e4b17023SJohn Marino {
433*e4b17023SJohn Marino _M_curValue.assign(1, *_M_current);
434*e4b17023SJohn Marino ++_M_current;
435*e4b17023SJohn Marino if (_M_current == _M_end)
436*e4b17023SJohn Marino {
437*e4b17023SJohn Marino _M_curToken = _S_token_eof;
438*e4b17023SJohn Marino return;
439*e4b17023SJohn Marino }
440*e4b17023SJohn Marino if (_M_ctype.is(_CtypeT::digit, *_M_current))
441*e4b17023SJohn Marino {
442*e4b17023SJohn Marino _M_curValue += *_M_current;
443*e4b17023SJohn Marino ++_M_current;
444*e4b17023SJohn Marino return;
445*e4b17023SJohn Marino }
446*e4b17023SJohn Marino }
447*e4b17023SJohn Marino }
448*e4b17023SJohn Marino else if (__c == _M_ctype.widen('^')
449*e4b17023SJohn Marino || __c == _M_ctype.widen('.')
450*e4b17023SJohn Marino || __c == _M_ctype.widen('*')
451*e4b17023SJohn Marino || __c == _M_ctype.widen('$')
452*e4b17023SJohn Marino || __c == _M_ctype.widen('\\'))
453*e4b17023SJohn Marino {
454*e4b17023SJohn Marino _M_curToken = _S_token_ord_char;
455*e4b17023SJohn Marino _M_curValue.assign(1, __c);
456*e4b17023SJohn Marino }
457*e4b17023SJohn Marino else if (_M_ctype.is(_CtypeT::digit, __c))
458*e4b17023SJohn Marino {
459*e4b17023SJohn Marino _M_curToken = _S_token_backref;
460*e4b17023SJohn Marino _M_curValue.assign(1, __c);
461*e4b17023SJohn Marino }
462*e4b17023SJohn Marino else
463*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_escape);
464*e4b17023SJohn Marino }
465*e4b17023SJohn Marino
466*e4b17023SJohn Marino
467*e4b17023SJohn Marino // Eats a character class or throwns an exception.
468*e4b17023SJohn Marino // current point to ':' delimiter on entry, char after ']' on return
469*e4b17023SJohn Marino template<typename _InputIterator>
470*e4b17023SJohn Marino void
471*e4b17023SJohn Marino _Scanner<_InputIterator>::
472*e4b17023SJohn Marino _M_eat_charclass()
473*e4b17023SJohn Marino {
474*e4b17023SJohn Marino ++_M_current; // skip ':'
475*e4b17023SJohn Marino if (_M_current == _M_end)
476*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_ctype);
477*e4b17023SJohn Marino for (_M_curValue.clear();
478*e4b17023SJohn Marino _M_current != _M_end && *_M_current != _M_ctype.widen(':');
479*e4b17023SJohn Marino ++_M_current)
480*e4b17023SJohn Marino _M_curValue += *_M_current;
481*e4b17023SJohn Marino if (_M_current == _M_end)
482*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_ctype);
483*e4b17023SJohn Marino ++_M_current; // skip ':'
484*e4b17023SJohn Marino if (*_M_current != _M_ctype.widen(']'))
485*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_ctype);
486*e4b17023SJohn Marino ++_M_current; // skip ']'
487*e4b17023SJohn Marino }
488*e4b17023SJohn Marino
489*e4b17023SJohn Marino
490*e4b17023SJohn Marino template<typename _InputIterator>
491*e4b17023SJohn Marino void
492*e4b17023SJohn Marino _Scanner<_InputIterator>::
493*e4b17023SJohn Marino _M_eat_equivclass()
494*e4b17023SJohn Marino {
495*e4b17023SJohn Marino ++_M_current; // skip '='
496*e4b17023SJohn Marino if (_M_current == _M_end)
497*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_collate);
498*e4b17023SJohn Marino for (_M_curValue.clear();
499*e4b17023SJohn Marino _M_current != _M_end && *_M_current != _M_ctype.widen('=');
500*e4b17023SJohn Marino ++_M_current)
501*e4b17023SJohn Marino _M_curValue += *_M_current;
502*e4b17023SJohn Marino if (_M_current == _M_end)
503*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_collate);
504*e4b17023SJohn Marino ++_M_current; // skip '='
505*e4b17023SJohn Marino if (*_M_current != _M_ctype.widen(']'))
506*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_collate);
507*e4b17023SJohn Marino ++_M_current; // skip ']'
508*e4b17023SJohn Marino }
509*e4b17023SJohn Marino
510*e4b17023SJohn Marino
511*e4b17023SJohn Marino template<typename _InputIterator>
512*e4b17023SJohn Marino void
513*e4b17023SJohn Marino _Scanner<_InputIterator>::
514*e4b17023SJohn Marino _M_eat_collsymbol()
515*e4b17023SJohn Marino {
516*e4b17023SJohn Marino ++_M_current; // skip '.'
517*e4b17023SJohn Marino if (_M_current == _M_end)
518*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_collate);
519*e4b17023SJohn Marino for (_M_curValue.clear();
520*e4b17023SJohn Marino _M_current != _M_end && *_M_current != _M_ctype.widen('.');
521*e4b17023SJohn Marino ++_M_current)
522*e4b17023SJohn Marino _M_curValue += *_M_current;
523*e4b17023SJohn Marino if (_M_current == _M_end)
524*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_collate);
525*e4b17023SJohn Marino ++_M_current; // skip '.'
526*e4b17023SJohn Marino if (*_M_current != _M_ctype.widen(']'))
527*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_collate);
528*e4b17023SJohn Marino ++_M_current; // skip ']'
529*e4b17023SJohn Marino }
530*e4b17023SJohn Marino
531*e4b17023SJohn Marino #ifdef _GLIBCXX_DEBUG
532*e4b17023SJohn Marino template<typename _InputIterator>
533*e4b17023SJohn Marino std::ostream&
534*e4b17023SJohn Marino _Scanner<_InputIterator>::
535*e4b17023SJohn Marino _M_print(std::ostream& ostr)
536*e4b17023SJohn Marino {
537*e4b17023SJohn Marino switch (_M_curToken)
538*e4b17023SJohn Marino {
539*e4b17023SJohn Marino case _S_token_anychar:
540*e4b17023SJohn Marino ostr << "any-character\n";
541*e4b17023SJohn Marino break;
542*e4b17023SJohn Marino case _S_token_backref:
543*e4b17023SJohn Marino ostr << "backref\n";
544*e4b17023SJohn Marino break;
545*e4b17023SJohn Marino case _S_token_bracket_begin:
546*e4b17023SJohn Marino ostr << "bracket-begin\n";
547*e4b17023SJohn Marino break;
548*e4b17023SJohn Marino case _S_token_bracket_end:
549*e4b17023SJohn Marino ostr << "bracket-end\n";
550*e4b17023SJohn Marino break;
551*e4b17023SJohn Marino case _S_token_char_class_name:
552*e4b17023SJohn Marino ostr << "char-class-name \"" << _M_curValue << "\"\n";
553*e4b17023SJohn Marino break;
554*e4b17023SJohn Marino case _S_token_closure0:
555*e4b17023SJohn Marino ostr << "closure0\n";
556*e4b17023SJohn Marino break;
557*e4b17023SJohn Marino case _S_token_closure1:
558*e4b17023SJohn Marino ostr << "closure1\n";
559*e4b17023SJohn Marino break;
560*e4b17023SJohn Marino case _S_token_collelem_multi:
561*e4b17023SJohn Marino ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
562*e4b17023SJohn Marino break;
563*e4b17023SJohn Marino case _S_token_collelem_single:
564*e4b17023SJohn Marino ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
565*e4b17023SJohn Marino break;
566*e4b17023SJohn Marino case _S_token_collsymbol:
567*e4b17023SJohn Marino ostr << "collsymbol \"" << _M_curValue << "\"\n";
568*e4b17023SJohn Marino break;
569*e4b17023SJohn Marino case _S_token_comma:
570*e4b17023SJohn Marino ostr << "comma\n";
571*e4b17023SJohn Marino break;
572*e4b17023SJohn Marino case _S_token_dash:
573*e4b17023SJohn Marino ostr << "dash\n";
574*e4b17023SJohn Marino break;
575*e4b17023SJohn Marino case _S_token_dup_count:
576*e4b17023SJohn Marino ostr << "dup count: " << _M_curValue << "\n";
577*e4b17023SJohn Marino break;
578*e4b17023SJohn Marino case _S_token_eof:
579*e4b17023SJohn Marino ostr << "EOF\n";
580*e4b17023SJohn Marino break;
581*e4b17023SJohn Marino case _S_token_equiv_class_name:
582*e4b17023SJohn Marino ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
583*e4b17023SJohn Marino break;
584*e4b17023SJohn Marino case _S_token_interval_begin:
585*e4b17023SJohn Marino ostr << "interval begin\n";
586*e4b17023SJohn Marino break;
587*e4b17023SJohn Marino case _S_token_interval_end:
588*e4b17023SJohn Marino ostr << "interval end\n";
589*e4b17023SJohn Marino break;
590*e4b17023SJohn Marino case _S_token_line_begin:
591*e4b17023SJohn Marino ostr << "line begin\n";
592*e4b17023SJohn Marino break;
593*e4b17023SJohn Marino case _S_token_line_end:
594*e4b17023SJohn Marino ostr << "line end\n";
595*e4b17023SJohn Marino break;
596*e4b17023SJohn Marino case _S_token_opt:
597*e4b17023SJohn Marino ostr << "opt\n";
598*e4b17023SJohn Marino break;
599*e4b17023SJohn Marino case _S_token_or:
600*e4b17023SJohn Marino ostr << "or\n";
601*e4b17023SJohn Marino break;
602*e4b17023SJohn Marino case _S_token_ord_char:
603*e4b17023SJohn Marino ostr << "ordinary character: \"" << _M_value() << "\"\n";
604*e4b17023SJohn Marino break;
605*e4b17023SJohn Marino case _S_token_quoted_char:
606*e4b17023SJohn Marino ostr << "quoted char\n";
607*e4b17023SJohn Marino break;
608*e4b17023SJohn Marino case _S_token_subexpr_begin:
609*e4b17023SJohn Marino ostr << "subexpr begin\n";
610*e4b17023SJohn Marino break;
611*e4b17023SJohn Marino case _S_token_subexpr_end:
612*e4b17023SJohn Marino ostr << "subexpr end\n";
613*e4b17023SJohn Marino break;
614*e4b17023SJohn Marino case _S_token_word_begin:
615*e4b17023SJohn Marino ostr << "word begin\n";
616*e4b17023SJohn Marino break;
617*e4b17023SJohn Marino case _S_token_word_end:
618*e4b17023SJohn Marino ostr << "word end\n";
619*e4b17023SJohn Marino break;
620*e4b17023SJohn Marino case _S_token_unknown:
621*e4b17023SJohn Marino ostr << "-- unknown token --\n";
622*e4b17023SJohn Marino break;
623*e4b17023SJohn Marino }
624*e4b17023SJohn Marino return ostr;
625*e4b17023SJohn Marino }
626*e4b17023SJohn Marino #endif
627*e4b17023SJohn Marino
628*e4b17023SJohn Marino // Builds an NFA from an input iterator interval.
629*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
630*e4b17023SJohn Marino class _Compiler
631*e4b17023SJohn Marino {
632*e4b17023SJohn Marino public:
633*e4b17023SJohn Marino typedef _InIter _IterT;
634*e4b17023SJohn Marino typedef typename std::iterator_traits<_InIter>::value_type _CharT;
635*e4b17023SJohn Marino typedef std::basic_string<_CharT> _StringT;
636*e4b17023SJohn Marino typedef regex_constants::syntax_option_type _FlagT;
637*e4b17023SJohn Marino
638*e4b17023SJohn Marino public:
639*e4b17023SJohn Marino _Compiler(const _InIter& __b, const _InIter& __e,
640*e4b17023SJohn Marino _TraitsT& __traits, _FlagT __flags);
641*e4b17023SJohn Marino
642*e4b17023SJohn Marino const _Nfa&
643*e4b17023SJohn Marino _M_nfa() const
644*e4b17023SJohn Marino { return _M_state_store; }
645*e4b17023SJohn Marino
646*e4b17023SJohn Marino private:
647*e4b17023SJohn Marino typedef _Scanner<_InIter> _ScannerT;
648*e4b17023SJohn Marino typedef typename _ScannerT::_TokenT _TokenT;
649*e4b17023SJohn Marino typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
650*e4b17023SJohn Marino typedef _RangeMatcher<_InIter, _TraitsT> _RMatcherT;
651*e4b17023SJohn Marino
652*e4b17023SJohn Marino // accepts a specific token or returns false.
653*e4b17023SJohn Marino bool
654*e4b17023SJohn Marino _M_match_token(_TokenT __token);
655*e4b17023SJohn Marino
656*e4b17023SJohn Marino void
657*e4b17023SJohn Marino _M_disjunction();
658*e4b17023SJohn Marino
659*e4b17023SJohn Marino bool
660*e4b17023SJohn Marino _M_alternative();
661*e4b17023SJohn Marino
662*e4b17023SJohn Marino bool
663*e4b17023SJohn Marino _M_term();
664*e4b17023SJohn Marino
665*e4b17023SJohn Marino bool
666*e4b17023SJohn Marino _M_assertion();
667*e4b17023SJohn Marino
668*e4b17023SJohn Marino bool
669*e4b17023SJohn Marino _M_quantifier();
670*e4b17023SJohn Marino
671*e4b17023SJohn Marino bool
672*e4b17023SJohn Marino _M_atom();
673*e4b17023SJohn Marino
674*e4b17023SJohn Marino bool
675*e4b17023SJohn Marino _M_bracket_expression();
676*e4b17023SJohn Marino
677*e4b17023SJohn Marino bool
678*e4b17023SJohn Marino _M_bracket_list(_RMatcherT& __matcher);
679*e4b17023SJohn Marino
680*e4b17023SJohn Marino bool
681*e4b17023SJohn Marino _M_follow_list(_RMatcherT& __matcher);
682*e4b17023SJohn Marino
683*e4b17023SJohn Marino bool
684*e4b17023SJohn Marino _M_follow_list2(_RMatcherT& __matcher);
685*e4b17023SJohn Marino
686*e4b17023SJohn Marino bool
687*e4b17023SJohn Marino _M_expression_term(_RMatcherT& __matcher);
688*e4b17023SJohn Marino
689*e4b17023SJohn Marino bool
690*e4b17023SJohn Marino _M_range_expression(_RMatcherT& __matcher);
691*e4b17023SJohn Marino
692*e4b17023SJohn Marino bool
693*e4b17023SJohn Marino _M_start_range(_RMatcherT& __matcher);
694*e4b17023SJohn Marino
695*e4b17023SJohn Marino bool
696*e4b17023SJohn Marino _M_collating_symbol(_RMatcherT& __matcher);
697*e4b17023SJohn Marino
698*e4b17023SJohn Marino bool
699*e4b17023SJohn Marino _M_equivalence_class(_RMatcherT& __matcher);
700*e4b17023SJohn Marino
701*e4b17023SJohn Marino bool
702*e4b17023SJohn Marino _M_character_class(_RMatcherT& __matcher);
703*e4b17023SJohn Marino
704*e4b17023SJohn Marino int
705*e4b17023SJohn Marino _M_cur_int_value(int __radix);
706*e4b17023SJohn Marino
707*e4b17023SJohn Marino private:
708*e4b17023SJohn Marino _TraitsT& _M_traits;
709*e4b17023SJohn Marino _ScannerT _M_scanner;
710*e4b17023SJohn Marino _StringT _M_cur_value;
711*e4b17023SJohn Marino _Nfa _M_state_store;
712*e4b17023SJohn Marino _StackT _M_stack;
713*e4b17023SJohn Marino };
714*e4b17023SJohn Marino
715*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
716*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
717*e4b17023SJohn Marino _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
718*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::_FlagT __flags)
719*e4b17023SJohn Marino : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
720*e4b17023SJohn Marino _M_state_store(__flags)
721*e4b17023SJohn Marino {
722*e4b17023SJohn Marino typedef _StartTagger<_InIter, _TraitsT> _Start;
723*e4b17023SJohn Marino typedef _EndTagger<_InIter, _TraitsT> _End;
724*e4b17023SJohn Marino
725*e4b17023SJohn Marino _StateSeq __r(_M_state_store,
726*e4b17023SJohn Marino _M_state_store._M_insert_subexpr_begin(_Start(0)));
727*e4b17023SJohn Marino _M_disjunction();
728*e4b17023SJohn Marino if (!_M_stack.empty())
729*e4b17023SJohn Marino {
730*e4b17023SJohn Marino __r._M_append(_M_stack.top());
731*e4b17023SJohn Marino _M_stack.pop();
732*e4b17023SJohn Marino }
733*e4b17023SJohn Marino __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
734*e4b17023SJohn Marino __r._M_append(_M_state_store._M_insert_accept());
735*e4b17023SJohn Marino }
736*e4b17023SJohn Marino
737*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
738*e4b17023SJohn Marino bool
739*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
740*e4b17023SJohn Marino _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
741*e4b17023SJohn Marino {
742*e4b17023SJohn Marino if (token == _M_scanner._M_token())
743*e4b17023SJohn Marino {
744*e4b17023SJohn Marino _M_cur_value = _M_scanner._M_value();
745*e4b17023SJohn Marino _M_scanner._M_advance();
746*e4b17023SJohn Marino return true;
747*e4b17023SJohn Marino }
748*e4b17023SJohn Marino return false;
749*e4b17023SJohn Marino }
750*e4b17023SJohn Marino
751*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
752*e4b17023SJohn Marino void
753*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
754*e4b17023SJohn Marino _M_disjunction()
755*e4b17023SJohn Marino {
756*e4b17023SJohn Marino this->_M_alternative();
757*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_or))
758*e4b17023SJohn Marino {
759*e4b17023SJohn Marino _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
760*e4b17023SJohn Marino this->_M_disjunction();
761*e4b17023SJohn Marino _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
762*e4b17023SJohn Marino _M_stack.push(_StateSeq(__alt1, __alt2));
763*e4b17023SJohn Marino }
764*e4b17023SJohn Marino }
765*e4b17023SJohn Marino
766*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
767*e4b17023SJohn Marino bool
768*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
769*e4b17023SJohn Marino _M_alternative()
770*e4b17023SJohn Marino {
771*e4b17023SJohn Marino if (this->_M_term())
772*e4b17023SJohn Marino {
773*e4b17023SJohn Marino _StateSeq __re = _M_stack.top(); _M_stack.pop();
774*e4b17023SJohn Marino this->_M_alternative();
775*e4b17023SJohn Marino if (!_M_stack.empty())
776*e4b17023SJohn Marino {
777*e4b17023SJohn Marino __re._M_append(_M_stack.top());
778*e4b17023SJohn Marino _M_stack.pop();
779*e4b17023SJohn Marino }
780*e4b17023SJohn Marino _M_stack.push(__re);
781*e4b17023SJohn Marino return true;
782*e4b17023SJohn Marino }
783*e4b17023SJohn Marino return false;
784*e4b17023SJohn Marino }
785*e4b17023SJohn Marino
786*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
787*e4b17023SJohn Marino bool
788*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
789*e4b17023SJohn Marino _M_term()
790*e4b17023SJohn Marino {
791*e4b17023SJohn Marino if (this->_M_assertion())
792*e4b17023SJohn Marino return true;
793*e4b17023SJohn Marino if (this->_M_atom())
794*e4b17023SJohn Marino {
795*e4b17023SJohn Marino this->_M_quantifier();
796*e4b17023SJohn Marino return true;
797*e4b17023SJohn Marino }
798*e4b17023SJohn Marino return false;
799*e4b17023SJohn Marino }
800*e4b17023SJohn Marino
801*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
802*e4b17023SJohn Marino bool
803*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
804*e4b17023SJohn Marino _M_assertion()
805*e4b17023SJohn Marino {
806*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_line_begin))
807*e4b17023SJohn Marino {
808*e4b17023SJohn Marino // __m.push(_Matcher::_S_opcode_line_begin);
809*e4b17023SJohn Marino return true;
810*e4b17023SJohn Marino }
811*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_line_end))
812*e4b17023SJohn Marino {
813*e4b17023SJohn Marino // __m.push(_Matcher::_S_opcode_line_end);
814*e4b17023SJohn Marino return true;
815*e4b17023SJohn Marino }
816*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_word_begin))
817*e4b17023SJohn Marino {
818*e4b17023SJohn Marino // __m.push(_Matcher::_S_opcode_word_begin);
819*e4b17023SJohn Marino return true;
820*e4b17023SJohn Marino }
821*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_word_end))
822*e4b17023SJohn Marino {
823*e4b17023SJohn Marino // __m.push(_Matcher::_S_opcode_word_end);
824*e4b17023SJohn Marino return true;
825*e4b17023SJohn Marino }
826*e4b17023SJohn Marino return false;
827*e4b17023SJohn Marino }
828*e4b17023SJohn Marino
829*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
830*e4b17023SJohn Marino bool
831*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
832*e4b17023SJohn Marino _M_quantifier()
833*e4b17023SJohn Marino {
834*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_closure0))
835*e4b17023SJohn Marino {
836*e4b17023SJohn Marino if (_M_stack.empty())
837*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_badrepeat);
838*e4b17023SJohn Marino _StateSeq __r(_M_stack.top(), -1);
839*e4b17023SJohn Marino __r._M_append(__r._M_front());
840*e4b17023SJohn Marino _M_stack.pop();
841*e4b17023SJohn Marino _M_stack.push(__r);
842*e4b17023SJohn Marino return true;
843*e4b17023SJohn Marino }
844*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_closure1))
845*e4b17023SJohn Marino {
846*e4b17023SJohn Marino if (_M_stack.empty())
847*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_badrepeat);
848*e4b17023SJohn Marino _StateSeq __r(_M_state_store,
849*e4b17023SJohn Marino _M_state_store.
850*e4b17023SJohn Marino _M_insert_alt(_S_invalid_state_id,
851*e4b17023SJohn Marino _M_stack.top()._M_front()));
852*e4b17023SJohn Marino _M_stack.top()._M_append(__r);
853*e4b17023SJohn Marino return true;
854*e4b17023SJohn Marino }
855*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_opt))
856*e4b17023SJohn Marino {
857*e4b17023SJohn Marino if (_M_stack.empty())
858*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_badrepeat);
859*e4b17023SJohn Marino _StateSeq __r(_M_stack.top(), -1);
860*e4b17023SJohn Marino _M_stack.pop();
861*e4b17023SJohn Marino _M_stack.push(__r);
862*e4b17023SJohn Marino return true;
863*e4b17023SJohn Marino }
864*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_interval_begin))
865*e4b17023SJohn Marino {
866*e4b17023SJohn Marino if (_M_stack.empty())
867*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_badrepeat);
868*e4b17023SJohn Marino if (!_M_match_token(_ScannerT::_S_token_dup_count))
869*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_badbrace);
870*e4b17023SJohn Marino _StateSeq __r(_M_stack.top());
871*e4b17023SJohn Marino int __min_rep = _M_cur_int_value(10);
872*e4b17023SJohn Marino for (int __i = 1; __i < __min_rep; ++__i)
873*e4b17023SJohn Marino _M_stack.top()._M_append(__r._M_clone());
874*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_comma))
875*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_dup_count))
876*e4b17023SJohn Marino {
877*e4b17023SJohn Marino int __n = _M_cur_int_value(10) - __min_rep;
878*e4b17023SJohn Marino if (__n < 0)
879*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_badbrace);
880*e4b17023SJohn Marino for (int __i = 0; __i < __n; ++__i)
881*e4b17023SJohn Marino {
882*e4b17023SJohn Marino _StateSeq __r(_M_state_store,
883*e4b17023SJohn Marino _M_state_store.
884*e4b17023SJohn Marino _M_insert_alt(_S_invalid_state_id,
885*e4b17023SJohn Marino _M_stack.top()._M_front()));
886*e4b17023SJohn Marino _M_stack.top()._M_append(__r);
887*e4b17023SJohn Marino }
888*e4b17023SJohn Marino }
889*e4b17023SJohn Marino else
890*e4b17023SJohn Marino {
891*e4b17023SJohn Marino _StateSeq __r(_M_stack.top(), -1);
892*e4b17023SJohn Marino __r._M_push_back(__r._M_front());
893*e4b17023SJohn Marino _M_stack.pop();
894*e4b17023SJohn Marino _M_stack.push(__r);
895*e4b17023SJohn Marino }
896*e4b17023SJohn Marino if (!_M_match_token(_ScannerT::_S_token_interval_end))
897*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_brace);
898*e4b17023SJohn Marino return true;
899*e4b17023SJohn Marino }
900*e4b17023SJohn Marino return false;
901*e4b17023SJohn Marino }
902*e4b17023SJohn Marino
903*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
904*e4b17023SJohn Marino bool
905*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
906*e4b17023SJohn Marino _M_atom()
907*e4b17023SJohn Marino {
908*e4b17023SJohn Marino typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
909*e4b17023SJohn Marino typedef _StartTagger<_InIter, _TraitsT> _Start;
910*e4b17023SJohn Marino typedef _EndTagger<_InIter, _TraitsT> _End;
911*e4b17023SJohn Marino
912*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_anychar))
913*e4b17023SJohn Marino {
914*e4b17023SJohn Marino _M_stack.push(_StateSeq(_M_state_store,
915*e4b17023SJohn Marino _M_state_store._M_insert_matcher
916*e4b17023SJohn Marino (_AnyMatcher)));
917*e4b17023SJohn Marino return true;
918*e4b17023SJohn Marino }
919*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_ord_char))
920*e4b17023SJohn Marino {
921*e4b17023SJohn Marino _M_stack.push(_StateSeq(_M_state_store,
922*e4b17023SJohn Marino _M_state_store._M_insert_matcher
923*e4b17023SJohn Marino (_CMatcher(_M_cur_value[0], _M_traits))));
924*e4b17023SJohn Marino return true;
925*e4b17023SJohn Marino }
926*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_quoted_char))
927*e4b17023SJohn Marino {
928*e4b17023SJohn Marino // note that in the ECMA grammar, this case covers backrefs.
929*e4b17023SJohn Marino _M_stack.push(_StateSeq(_M_state_store,
930*e4b17023SJohn Marino _M_state_store._M_insert_matcher
931*e4b17023SJohn Marino (_CMatcher(_M_cur_value[0], _M_traits))));
932*e4b17023SJohn Marino return true;
933*e4b17023SJohn Marino }
934*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_backref))
935*e4b17023SJohn Marino {
936*e4b17023SJohn Marino // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
937*e4b17023SJohn Marino return true;
938*e4b17023SJohn Marino }
939*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
940*e4b17023SJohn Marino {
941*e4b17023SJohn Marino int __mark = _M_state_store._M_sub_count();
942*e4b17023SJohn Marino _StateSeq __r(_M_state_store,
943*e4b17023SJohn Marino _M_state_store.
944*e4b17023SJohn Marino _M_insert_subexpr_begin(_Start(__mark)));
945*e4b17023SJohn Marino this->_M_disjunction();
946*e4b17023SJohn Marino if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
947*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_paren);
948*e4b17023SJohn Marino if (!_M_stack.empty())
949*e4b17023SJohn Marino {
950*e4b17023SJohn Marino __r._M_append(_M_stack.top());
951*e4b17023SJohn Marino _M_stack.pop();
952*e4b17023SJohn Marino }
953*e4b17023SJohn Marino __r._M_append(_M_state_store._M_insert_subexpr_end
954*e4b17023SJohn Marino (__mark, _End(__mark)));
955*e4b17023SJohn Marino _M_stack.push(__r);
956*e4b17023SJohn Marino return true;
957*e4b17023SJohn Marino }
958*e4b17023SJohn Marino return _M_bracket_expression();
959*e4b17023SJohn Marino }
960*e4b17023SJohn Marino
961*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
962*e4b17023SJohn Marino bool
963*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
964*e4b17023SJohn Marino _M_bracket_expression()
965*e4b17023SJohn Marino {
966*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_bracket_begin))
967*e4b17023SJohn Marino {
968*e4b17023SJohn Marino _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
969*e4b17023SJohn Marino _M_traits);
970*e4b17023SJohn Marino if (!_M_bracket_list(__matcher)
971*e4b17023SJohn Marino || !_M_match_token(_ScannerT::_S_token_bracket_end))
972*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_brack);
973*e4b17023SJohn Marino _M_stack.push(_StateSeq(_M_state_store,
974*e4b17023SJohn Marino _M_state_store._M_insert_matcher(__matcher)));
975*e4b17023SJohn Marino return true;
976*e4b17023SJohn Marino }
977*e4b17023SJohn Marino return false;
978*e4b17023SJohn Marino }
979*e4b17023SJohn Marino
980*e4b17023SJohn Marino // If the dash is the last character in the bracket expression, it is not
981*e4b17023SJohn Marino // special.
982*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
983*e4b17023SJohn Marino bool
984*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
985*e4b17023SJohn Marino _M_bracket_list(_RMatcherT& __matcher)
986*e4b17023SJohn Marino {
987*e4b17023SJohn Marino if (_M_follow_list(__matcher))
988*e4b17023SJohn Marino {
989*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_dash))
990*e4b17023SJohn Marino __matcher._M_add_char(_M_cur_value[0]);
991*e4b17023SJohn Marino return true;
992*e4b17023SJohn Marino }
993*e4b17023SJohn Marino return false;
994*e4b17023SJohn Marino }
995*e4b17023SJohn Marino
996*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
997*e4b17023SJohn Marino bool
998*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
999*e4b17023SJohn Marino _M_follow_list(_RMatcherT& __matcher)
1000*e4b17023SJohn Marino { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
1001*e4b17023SJohn Marino
1002*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
1003*e4b17023SJohn Marino bool
1004*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
1005*e4b17023SJohn Marino _M_follow_list2(_RMatcherT& __matcher)
1006*e4b17023SJohn Marino {
1007*e4b17023SJohn Marino if (_M_expression_term(__matcher))
1008*e4b17023SJohn Marino return _M_follow_list2(__matcher);
1009*e4b17023SJohn Marino return true;
1010*e4b17023SJohn Marino }
1011*e4b17023SJohn Marino
1012*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
1013*e4b17023SJohn Marino bool
1014*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
1015*e4b17023SJohn Marino _M_expression_term(_RMatcherT& __matcher)
1016*e4b17023SJohn Marino {
1017*e4b17023SJohn Marino return (_M_collating_symbol(__matcher)
1018*e4b17023SJohn Marino || _M_character_class(__matcher)
1019*e4b17023SJohn Marino || _M_equivalence_class(__matcher)
1020*e4b17023SJohn Marino || (_M_start_range(__matcher)
1021*e4b17023SJohn Marino && _M_range_expression(__matcher)));
1022*e4b17023SJohn Marino }
1023*e4b17023SJohn Marino
1024*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
1025*e4b17023SJohn Marino bool
1026*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
1027*e4b17023SJohn Marino _M_range_expression(_RMatcherT& __matcher)
1028*e4b17023SJohn Marino {
1029*e4b17023SJohn Marino if (!_M_collating_symbol(__matcher))
1030*e4b17023SJohn Marino if (!_M_match_token(_ScannerT::_S_token_dash))
1031*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_range);
1032*e4b17023SJohn Marino __matcher._M_make_range();
1033*e4b17023SJohn Marino return true;
1034*e4b17023SJohn Marino }
1035*e4b17023SJohn Marino
1036*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
1037*e4b17023SJohn Marino bool
1038*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
1039*e4b17023SJohn Marino _M_start_range(_RMatcherT& __matcher)
1040*e4b17023SJohn Marino { return _M_match_token(_ScannerT::_S_token_dash); }
1041*e4b17023SJohn Marino
1042*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
1043*e4b17023SJohn Marino bool
1044*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
1045*e4b17023SJohn Marino _M_collating_symbol(_RMatcherT& __matcher)
1046*e4b17023SJohn Marino {
1047*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_collelem_single))
1048*e4b17023SJohn Marino {
1049*e4b17023SJohn Marino __matcher._M_add_char(_M_cur_value[0]);
1050*e4b17023SJohn Marino return true;
1051*e4b17023SJohn Marino }
1052*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_collsymbol))
1053*e4b17023SJohn Marino {
1054*e4b17023SJohn Marino __matcher._M_add_collating_element(_M_cur_value);
1055*e4b17023SJohn Marino return true;
1056*e4b17023SJohn Marino }
1057*e4b17023SJohn Marino return false;
1058*e4b17023SJohn Marino }
1059*e4b17023SJohn Marino
1060*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
1061*e4b17023SJohn Marino bool
1062*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
1063*e4b17023SJohn Marino _M_equivalence_class(_RMatcherT& __matcher)
1064*e4b17023SJohn Marino {
1065*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
1066*e4b17023SJohn Marino {
1067*e4b17023SJohn Marino __matcher._M_add_equivalence_class(_M_cur_value);
1068*e4b17023SJohn Marino return true;
1069*e4b17023SJohn Marino }
1070*e4b17023SJohn Marino return false;
1071*e4b17023SJohn Marino }
1072*e4b17023SJohn Marino
1073*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
1074*e4b17023SJohn Marino bool
1075*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
1076*e4b17023SJohn Marino _M_character_class(_RMatcherT& __matcher)
1077*e4b17023SJohn Marino {
1078*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_char_class_name))
1079*e4b17023SJohn Marino {
1080*e4b17023SJohn Marino __matcher._M_add_character_class(_M_cur_value);
1081*e4b17023SJohn Marino return true;
1082*e4b17023SJohn Marino }
1083*e4b17023SJohn Marino return false;
1084*e4b17023SJohn Marino }
1085*e4b17023SJohn Marino
1086*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
1087*e4b17023SJohn Marino int
1088*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::
1089*e4b17023SJohn Marino _M_cur_int_value(int __radix)
1090*e4b17023SJohn Marino {
1091*e4b17023SJohn Marino int __v = 0;
1092*e4b17023SJohn Marino for (typename _StringT::size_type __i = 0;
1093*e4b17023SJohn Marino __i < _M_cur_value.length(); ++__i)
1094*e4b17023SJohn Marino __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
1095*e4b17023SJohn Marino return __v;
1096*e4b17023SJohn Marino }
1097*e4b17023SJohn Marino
1098*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT>
1099*e4b17023SJohn Marino _AutomatonPtr
1100*e4b17023SJohn Marino __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
1101*e4b17023SJohn Marino regex_constants::syntax_option_type __f)
1102*e4b17023SJohn Marino { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
1103*e4b17023SJohn Marino __f)._M_nfa())); }
1104*e4b17023SJohn Marino
1105*e4b17023SJohn Marino _GLIBCXX_END_NAMESPACE_VERSION
1106*e4b17023SJohn Marino } // namespace __regex
1107*e4b17023SJohn Marino } // namespace std
1108*e4b17023SJohn Marino
1109*e4b17023SJohn Marino /* vim: set ts=8 sw=2 sts=2: */
1110