1*e4b17023SJohn Marino // class template regex -*- C++ -*- 2*e4b17023SJohn Marino 3*e4b17023SJohn Marino // Copyright (C) 2010, 2011 Free Software Foundation, Inc. 4*e4b17023SJohn Marino // 5*e4b17023SJohn Marino // This file is part of the GNU ISO C++ Library. This library is free 6*e4b17023SJohn Marino // software; you can redistribute it and/or modify it under the 7*e4b17023SJohn Marino // terms of the GNU General Public License as published by the 8*e4b17023SJohn Marino // Free Software Foundation; either version 3, or (at your option) 9*e4b17023SJohn Marino // any later version. 10*e4b17023SJohn Marino 11*e4b17023SJohn Marino // This library is distributed in the hope that it will be useful, 12*e4b17023SJohn Marino // but WITHOUT ANY WARRANTY; without even the implied warranty of 13*e4b17023SJohn Marino // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14*e4b17023SJohn Marino // GNU General Public License for more details. 15*e4b17023SJohn Marino 16*e4b17023SJohn Marino // Under Section 7 of GPL version 3, you are granted additional 17*e4b17023SJohn Marino // permissions described in the GCC Runtime Library Exception, version 18*e4b17023SJohn Marino // 3.1, as published by the Free Software Foundation. 19*e4b17023SJohn Marino 20*e4b17023SJohn Marino // You should have received a copy of the GNU General Public License and 21*e4b17023SJohn Marino // a copy of the GCC Runtime Library Exception along with this program; 22*e4b17023SJohn Marino // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23*e4b17023SJohn Marino // <http://www.gnu.org/licenses/>. 24*e4b17023SJohn Marino 25*e4b17023SJohn Marino /** 26*e4b17023SJohn Marino * @file bits/regex_compiler.h 27*e4b17023SJohn Marino * This is an internal header file, included by other library headers. 28*e4b17023SJohn Marino * Do not attempt to use it directly. @headername{regex} 29*e4b17023SJohn Marino */ 30*e4b17023SJohn Marino 31*e4b17023SJohn Marino namespace std _GLIBCXX_VISIBILITY(default) 32*e4b17023SJohn Marino { 33*e4b17023SJohn Marino namespace __regex 34*e4b17023SJohn Marino { 35*e4b17023SJohn Marino _GLIBCXX_BEGIN_NAMESPACE_VERSION 36*e4b17023SJohn Marino 37*e4b17023SJohn Marino struct _Scanner_base 38*e4b17023SJohn Marino { 39*e4b17023SJohn Marino typedef unsigned int _StateT; 40*e4b17023SJohn Marino 41*e4b17023SJohn Marino static constexpr _StateT _S_state_at_start = 1 << 0; 42*e4b17023SJohn Marino static constexpr _StateT _S_state_in_brace = 1 << 2; 43*e4b17023SJohn Marino static constexpr _StateT _S_state_in_bracket = 1 << 3; 44*e4b17023SJohn Marino 45*e4b17023SJohn Marino virtual ~_Scanner_base() { }; 46*e4b17023SJohn Marino }; 47*e4b17023SJohn Marino 48*e4b17023SJohn Marino // 49*e4b17023SJohn Marino // @brief Scans an input range for regex tokens. 50*e4b17023SJohn Marino // 51*e4b17023SJohn Marino // The %_Scanner class interprets the regular expression pattern in the input 52*e4b17023SJohn Marino // range passed to its constructor as a sequence of parse tokens passed to 53*e4b17023SJohn Marino // the regular expression compiler. The sequence of tokens provided depends 54*e4b17023SJohn Marino // on the flag settings passed to the constructor: different regular 55*e4b17023SJohn Marino // expression grammars will interpret the same input pattern in 56*e4b17023SJohn Marino // syntactically different ways. 57*e4b17023SJohn Marino // 58*e4b17023SJohn Marino template<typename _InputIterator> 59*e4b17023SJohn Marino class _Scanner: public _Scanner_base 60*e4b17023SJohn Marino { 61*e4b17023SJohn Marino public: 62*e4b17023SJohn Marino typedef _InputIterator _IteratorT; 63*e4b17023SJohn Marino typedef typename std::iterator_traits<_IteratorT>::value_type _CharT; 64*e4b17023SJohn Marino typedef std::basic_string<_CharT> _StringT; 65*e4b17023SJohn Marino typedef regex_constants::syntax_option_type _FlagT; 66*e4b17023SJohn Marino typedef const std::ctype<_CharT> _CtypeT; 67*e4b17023SJohn Marino 68*e4b17023SJohn Marino // Token types returned from the scanner. 69*e4b17023SJohn Marino enum _TokenT 70*e4b17023SJohn Marino { 71*e4b17023SJohn Marino _S_token_anychar, 72*e4b17023SJohn Marino _S_token_backref, 73*e4b17023SJohn Marino _S_token_bracket_begin, 74*e4b17023SJohn Marino _S_token_bracket_end, 75*e4b17023SJohn Marino _S_token_inverse_class, 76*e4b17023SJohn Marino _S_token_char_class_name, 77*e4b17023SJohn Marino _S_token_closure0, 78*e4b17023SJohn Marino _S_token_closure1, 79*e4b17023SJohn Marino _S_token_collelem_multi, 80*e4b17023SJohn Marino _S_token_collelem_single, 81*e4b17023SJohn Marino _S_token_collsymbol, 82*e4b17023SJohn Marino _S_token_comma, 83*e4b17023SJohn Marino _S_token_dash, 84*e4b17023SJohn Marino _S_token_dup_count, 85*e4b17023SJohn Marino _S_token_eof, 86*e4b17023SJohn Marino _S_token_equiv_class_name, 87*e4b17023SJohn Marino _S_token_interval_begin, 88*e4b17023SJohn Marino _S_token_interval_end, 89*e4b17023SJohn Marino _S_token_line_begin, 90*e4b17023SJohn Marino _S_token_line_end, 91*e4b17023SJohn Marino _S_token_opt, 92*e4b17023SJohn Marino _S_token_or, 93*e4b17023SJohn Marino _S_token_ord_char, 94*e4b17023SJohn Marino _S_token_quoted_char, 95*e4b17023SJohn Marino _S_token_subexpr_begin, 96*e4b17023SJohn Marino _S_token_subexpr_end, 97*e4b17023SJohn Marino _S_token_word_begin, 98*e4b17023SJohn Marino _S_token_word_end, 99*e4b17023SJohn Marino _S_token_unknown 100*e4b17023SJohn Marino }; 101*e4b17023SJohn Marino 102*e4b17023SJohn Marino public: 103*e4b17023SJohn Marino _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags, 104*e4b17023SJohn Marino std::locale __loc) 105*e4b17023SJohn Marino : _M_current(__begin) , _M_end(__end) , _M_flags(__flags), 106*e4b17023SJohn Marino _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start) 107*e4b17023SJohn Marino { _M_advance(); } 108*e4b17023SJohn Marino 109*e4b17023SJohn Marino void 110*e4b17023SJohn Marino _M_advance(); 111*e4b17023SJohn Marino 112*e4b17023SJohn Marino _TokenT 113*e4b17023SJohn Marino _M_token() const 114*e4b17023SJohn Marino { return _M_curToken; } 115*e4b17023SJohn Marino 116*e4b17023SJohn Marino const _StringT& 117*e4b17023SJohn Marino _M_value() const 118*e4b17023SJohn Marino { return _M_curValue; } 119*e4b17023SJohn Marino 120*e4b17023SJohn Marino #ifdef _GLIBCXX_DEBUG 121*e4b17023SJohn Marino std::ostream& 122*e4b17023SJohn Marino _M_print(std::ostream&); 123*e4b17023SJohn Marino #endif 124*e4b17023SJohn Marino 125*e4b17023SJohn Marino private: 126*e4b17023SJohn Marino void 127*e4b17023SJohn Marino _M_eat_escape(); 128*e4b17023SJohn Marino 129*e4b17023SJohn Marino void 130*e4b17023SJohn Marino _M_scan_in_brace(); 131*e4b17023SJohn Marino 132*e4b17023SJohn Marino void 133*e4b17023SJohn Marino _M_scan_in_bracket(); 134*e4b17023SJohn Marino 135*e4b17023SJohn Marino void 136*e4b17023SJohn Marino _M_eat_charclass(); 137*e4b17023SJohn Marino 138*e4b17023SJohn Marino void 139*e4b17023SJohn Marino _M_eat_equivclass(); 140*e4b17023SJohn Marino 141*e4b17023SJohn Marino void 142*e4b17023SJohn Marino _M_eat_collsymbol(); 143*e4b17023SJohn Marino 144*e4b17023SJohn Marino private: 145*e4b17023SJohn Marino _IteratorT _M_current; 146*e4b17023SJohn Marino _IteratorT _M_end; 147*e4b17023SJohn Marino _FlagT _M_flags; 148*e4b17023SJohn Marino _CtypeT& _M_ctype; 149*e4b17023SJohn Marino _TokenT _M_curToken; 150*e4b17023SJohn Marino _StringT _M_curValue; 151*e4b17023SJohn Marino _StateT _M_state; 152*e4b17023SJohn Marino }; 153*e4b17023SJohn Marino 154*e4b17023SJohn Marino template<typename _InputIterator> 155*e4b17023SJohn Marino void 156*e4b17023SJohn Marino _Scanner<_InputIterator>:: 157*e4b17023SJohn Marino _M_advance() 158*e4b17023SJohn Marino { 159*e4b17023SJohn Marino if (_M_current == _M_end) 160*e4b17023SJohn Marino { 161*e4b17023SJohn Marino _M_curToken = _S_token_eof; 162*e4b17023SJohn Marino return; 163*e4b17023SJohn Marino } 164*e4b17023SJohn Marino 165*e4b17023SJohn Marino _CharT __c = *_M_current; 166*e4b17023SJohn Marino if (_M_state & _S_state_in_bracket) 167*e4b17023SJohn Marino { 168*e4b17023SJohn Marino _M_scan_in_bracket(); 169*e4b17023SJohn Marino return; 170*e4b17023SJohn Marino } 171*e4b17023SJohn Marino if (_M_state & _S_state_in_brace) 172*e4b17023SJohn Marino { 173*e4b17023SJohn Marino _M_scan_in_brace(); 174*e4b17023SJohn Marino return; 175*e4b17023SJohn Marino } 176*e4b17023SJohn Marino #if 0 177*e4b17023SJohn Marino // TODO: re-enable line anchors when _M_assertion is implemented. 178*e4b17023SJohn Marino // See PR libstdc++/47724 179*e4b17023SJohn Marino else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^')) 180*e4b17023SJohn Marino { 181*e4b17023SJohn Marino _M_curToken = _S_token_line_begin; 182*e4b17023SJohn Marino ++_M_current; 183*e4b17023SJohn Marino return; 184*e4b17023SJohn Marino } 185*e4b17023SJohn Marino else if (__c == _M_ctype.widen('$')) 186*e4b17023SJohn Marino { 187*e4b17023SJohn Marino _M_curToken = _S_token_line_end; 188*e4b17023SJohn Marino ++_M_current; 189*e4b17023SJohn Marino return; 190*e4b17023SJohn Marino } 191*e4b17023SJohn Marino #endif 192*e4b17023SJohn Marino else if (__c == _M_ctype.widen('.')) 193*e4b17023SJohn Marino { 194*e4b17023SJohn Marino _M_curToken = _S_token_anychar; 195*e4b17023SJohn Marino ++_M_current; 196*e4b17023SJohn Marino return; 197*e4b17023SJohn Marino } 198*e4b17023SJohn Marino else if (__c == _M_ctype.widen('*')) 199*e4b17023SJohn Marino { 200*e4b17023SJohn Marino _M_curToken = _S_token_closure0; 201*e4b17023SJohn Marino ++_M_current; 202*e4b17023SJohn Marino return; 203*e4b17023SJohn Marino } 204*e4b17023SJohn Marino else if (__c == _M_ctype.widen('+')) 205*e4b17023SJohn Marino { 206*e4b17023SJohn Marino _M_curToken = _S_token_closure1; 207*e4b17023SJohn Marino ++_M_current; 208*e4b17023SJohn Marino return; 209*e4b17023SJohn Marino } 210*e4b17023SJohn Marino else if (__c == _M_ctype.widen('|')) 211*e4b17023SJohn Marino { 212*e4b17023SJohn Marino _M_curToken = _S_token_or; 213*e4b17023SJohn Marino ++_M_current; 214*e4b17023SJohn Marino return; 215*e4b17023SJohn Marino } 216*e4b17023SJohn Marino else if (__c == _M_ctype.widen('[')) 217*e4b17023SJohn Marino { 218*e4b17023SJohn Marino _M_curToken = _S_token_bracket_begin; 219*e4b17023SJohn Marino _M_state |= (_S_state_in_bracket | _S_state_at_start); 220*e4b17023SJohn Marino ++_M_current; 221*e4b17023SJohn Marino return; 222*e4b17023SJohn Marino } 223*e4b17023SJohn Marino else if (__c == _M_ctype.widen('\\')) 224*e4b17023SJohn Marino { 225*e4b17023SJohn Marino _M_eat_escape(); 226*e4b17023SJohn Marino return; 227*e4b17023SJohn Marino } 228*e4b17023SJohn Marino else if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) 229*e4b17023SJohn Marino { 230*e4b17023SJohn Marino if (__c == _M_ctype.widen('(')) 231*e4b17023SJohn Marino { 232*e4b17023SJohn Marino _M_curToken = _S_token_subexpr_begin; 233*e4b17023SJohn Marino ++_M_current; 234*e4b17023SJohn Marino return; 235*e4b17023SJohn Marino } 236*e4b17023SJohn Marino else if (__c == _M_ctype.widen(')')) 237*e4b17023SJohn Marino { 238*e4b17023SJohn Marino _M_curToken = _S_token_subexpr_end; 239*e4b17023SJohn Marino ++_M_current; 240*e4b17023SJohn Marino return; 241*e4b17023SJohn Marino } 242*e4b17023SJohn Marino else if (__c == _M_ctype.widen('{')) 243*e4b17023SJohn Marino { 244*e4b17023SJohn Marino _M_curToken = _S_token_interval_begin; 245*e4b17023SJohn Marino _M_state |= _S_state_in_brace; 246*e4b17023SJohn Marino ++_M_current; 247*e4b17023SJohn Marino return; 248*e4b17023SJohn Marino } 249*e4b17023SJohn Marino } 250*e4b17023SJohn Marino 251*e4b17023SJohn Marino _M_curToken = _S_token_ord_char; 252*e4b17023SJohn Marino _M_curValue.assign(1, __c); 253*e4b17023SJohn Marino ++_M_current; 254*e4b17023SJohn Marino } 255*e4b17023SJohn Marino 256*e4b17023SJohn Marino 257*e4b17023SJohn Marino template<typename _InputIterator> 258*e4b17023SJohn Marino void 259*e4b17023SJohn Marino _Scanner<_InputIterator>:: 260*e4b17023SJohn Marino _M_scan_in_brace() 261*e4b17023SJohn Marino { 262*e4b17023SJohn Marino if (_M_ctype.is(_CtypeT::digit, *_M_current)) 263*e4b17023SJohn Marino { 264*e4b17023SJohn Marino _M_curToken = _S_token_dup_count; 265*e4b17023SJohn Marino _M_curValue.assign(1, *_M_current); 266*e4b17023SJohn Marino ++_M_current; 267*e4b17023SJohn Marino while (_M_current != _M_end 268*e4b17023SJohn Marino && _M_ctype.is(_CtypeT::digit, *_M_current)) 269*e4b17023SJohn Marino { 270*e4b17023SJohn Marino _M_curValue += *_M_current; 271*e4b17023SJohn Marino ++_M_current; 272*e4b17023SJohn Marino } 273*e4b17023SJohn Marino return; 274*e4b17023SJohn Marino } 275*e4b17023SJohn Marino else if (*_M_current == _M_ctype.widen(',')) 276*e4b17023SJohn Marino { 277*e4b17023SJohn Marino _M_curToken = _S_token_comma; 278*e4b17023SJohn Marino ++_M_current; 279*e4b17023SJohn Marino return; 280*e4b17023SJohn Marino } 281*e4b17023SJohn Marino if (_M_flags & (regex_constants::basic | regex_constants::grep)) 282*e4b17023SJohn Marino { 283*e4b17023SJohn Marino if (*_M_current == _M_ctype.widen('\\')) 284*e4b17023SJohn Marino _M_eat_escape(); 285*e4b17023SJohn Marino } 286*e4b17023SJohn Marino else 287*e4b17023SJohn Marino { 288*e4b17023SJohn Marino if (*_M_current == _M_ctype.widen('}')) 289*e4b17023SJohn Marino { 290*e4b17023SJohn Marino _M_curToken = _S_token_interval_end; 291*e4b17023SJohn Marino _M_state &= ~_S_state_in_brace; 292*e4b17023SJohn Marino ++_M_current; 293*e4b17023SJohn Marino return; 294*e4b17023SJohn Marino } 295*e4b17023SJohn Marino } 296*e4b17023SJohn Marino } 297*e4b17023SJohn Marino 298*e4b17023SJohn Marino template<typename _InputIterator> 299*e4b17023SJohn Marino void 300*e4b17023SJohn Marino _Scanner<_InputIterator>:: 301*e4b17023SJohn Marino _M_scan_in_bracket() 302*e4b17023SJohn Marino { 303*e4b17023SJohn Marino if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^')) 304*e4b17023SJohn Marino { 305*e4b17023SJohn Marino _M_curToken = _S_token_inverse_class; 306*e4b17023SJohn Marino _M_state &= ~_S_state_at_start; 307*e4b17023SJohn Marino ++_M_current; 308*e4b17023SJohn Marino return; 309*e4b17023SJohn Marino } 310*e4b17023SJohn Marino else if (*_M_current == _M_ctype.widen('[')) 311*e4b17023SJohn Marino { 312*e4b17023SJohn Marino ++_M_current; 313*e4b17023SJohn Marino if (_M_current == _M_end) 314*e4b17023SJohn Marino { 315*e4b17023SJohn Marino _M_curToken = _S_token_eof; 316*e4b17023SJohn Marino return; 317*e4b17023SJohn Marino } 318*e4b17023SJohn Marino 319*e4b17023SJohn Marino if (*_M_current == _M_ctype.widen('.')) 320*e4b17023SJohn Marino { 321*e4b17023SJohn Marino _M_curToken = _S_token_collsymbol; 322*e4b17023SJohn Marino _M_eat_collsymbol(); 323*e4b17023SJohn Marino return; 324*e4b17023SJohn Marino } 325*e4b17023SJohn Marino else if (*_M_current == _M_ctype.widen(':')) 326*e4b17023SJohn Marino { 327*e4b17023SJohn Marino _M_curToken = _S_token_char_class_name; 328*e4b17023SJohn Marino _M_eat_charclass(); 329*e4b17023SJohn Marino return; 330*e4b17023SJohn Marino } 331*e4b17023SJohn Marino else if (*_M_current == _M_ctype.widen('=')) 332*e4b17023SJohn Marino { 333*e4b17023SJohn Marino _M_curToken = _S_token_equiv_class_name; 334*e4b17023SJohn Marino _M_eat_equivclass(); 335*e4b17023SJohn Marino return; 336*e4b17023SJohn Marino } 337*e4b17023SJohn Marino } 338*e4b17023SJohn Marino else if (*_M_current == _M_ctype.widen('-')) 339*e4b17023SJohn Marino { 340*e4b17023SJohn Marino _M_curToken = _S_token_dash; 341*e4b17023SJohn Marino ++_M_current; 342*e4b17023SJohn Marino return; 343*e4b17023SJohn Marino } 344*e4b17023SJohn Marino else if (*_M_current == _M_ctype.widen(']')) 345*e4b17023SJohn Marino { 346*e4b17023SJohn Marino if (!(_M_flags & regex_constants::ECMAScript) 347*e4b17023SJohn Marino || !(_M_state & _S_state_at_start)) 348*e4b17023SJohn Marino { 349*e4b17023SJohn Marino // special case: only if _not_ chr first after 350*e4b17023SJohn Marino // '[' or '[^' and if not ECMAscript 351*e4b17023SJohn Marino _M_curToken = _S_token_bracket_end; 352*e4b17023SJohn Marino ++_M_current; 353*e4b17023SJohn Marino return; 354*e4b17023SJohn Marino } 355*e4b17023SJohn Marino } 356*e4b17023SJohn Marino _M_curToken = _S_token_collelem_single; 357*e4b17023SJohn Marino _M_curValue.assign(1, *_M_current); 358*e4b17023SJohn Marino ++_M_current; 359*e4b17023SJohn Marino } 360*e4b17023SJohn Marino 361*e4b17023SJohn Marino template<typename _InputIterator> 362*e4b17023SJohn Marino void 363*e4b17023SJohn Marino _Scanner<_InputIterator>:: 364*e4b17023SJohn Marino _M_eat_escape() 365*e4b17023SJohn Marino { 366*e4b17023SJohn Marino ++_M_current; 367*e4b17023SJohn Marino if (_M_current == _M_end) 368*e4b17023SJohn Marino { 369*e4b17023SJohn Marino _M_curToken = _S_token_eof; 370*e4b17023SJohn Marino return; 371*e4b17023SJohn Marino } 372*e4b17023SJohn Marino _CharT __c = *_M_current; 373*e4b17023SJohn Marino ++_M_current; 374*e4b17023SJohn Marino 375*e4b17023SJohn Marino if (__c == _M_ctype.widen('(')) 376*e4b17023SJohn Marino { 377*e4b17023SJohn Marino if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) 378*e4b17023SJohn Marino { 379*e4b17023SJohn Marino _M_curToken = _S_token_ord_char; 380*e4b17023SJohn Marino _M_curValue.assign(1, __c); 381*e4b17023SJohn Marino } 382*e4b17023SJohn Marino else 383*e4b17023SJohn Marino _M_curToken = _S_token_subexpr_begin; 384*e4b17023SJohn Marino } 385*e4b17023SJohn Marino else if (__c == _M_ctype.widen(')')) 386*e4b17023SJohn Marino { 387*e4b17023SJohn Marino if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) 388*e4b17023SJohn Marino { 389*e4b17023SJohn Marino _M_curToken = _S_token_ord_char; 390*e4b17023SJohn Marino _M_curValue.assign(1, __c); 391*e4b17023SJohn Marino } 392*e4b17023SJohn Marino else 393*e4b17023SJohn Marino _M_curToken = _S_token_subexpr_end; 394*e4b17023SJohn Marino } 395*e4b17023SJohn Marino else if (__c == _M_ctype.widen('{')) 396*e4b17023SJohn Marino { 397*e4b17023SJohn Marino if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) 398*e4b17023SJohn Marino { 399*e4b17023SJohn Marino _M_curToken = _S_token_ord_char; 400*e4b17023SJohn Marino _M_curValue.assign(1, __c); 401*e4b17023SJohn Marino } 402*e4b17023SJohn Marino else 403*e4b17023SJohn Marino { 404*e4b17023SJohn Marino _M_curToken = _S_token_interval_begin; 405*e4b17023SJohn Marino _M_state |= _S_state_in_brace; 406*e4b17023SJohn Marino } 407*e4b17023SJohn Marino } 408*e4b17023SJohn Marino else if (__c == _M_ctype.widen('}')) 409*e4b17023SJohn Marino { 410*e4b17023SJohn Marino if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) 411*e4b17023SJohn Marino { 412*e4b17023SJohn Marino _M_curToken = _S_token_ord_char; 413*e4b17023SJohn Marino _M_curValue.assign(1, __c); 414*e4b17023SJohn Marino } 415*e4b17023SJohn Marino else 416*e4b17023SJohn Marino { 417*e4b17023SJohn Marino if (!(_M_state && _S_state_in_brace)) 418*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_badbrace); 419*e4b17023SJohn Marino _M_state &= ~_S_state_in_brace; 420*e4b17023SJohn Marino _M_curToken = _S_token_interval_end; 421*e4b17023SJohn Marino } 422*e4b17023SJohn Marino } 423*e4b17023SJohn Marino else if (__c == _M_ctype.widen('x')) 424*e4b17023SJohn Marino { 425*e4b17023SJohn Marino ++_M_current; 426*e4b17023SJohn Marino if (_M_current == _M_end) 427*e4b17023SJohn Marino { 428*e4b17023SJohn Marino _M_curToken = _S_token_eof; 429*e4b17023SJohn Marino return; 430*e4b17023SJohn Marino } 431*e4b17023SJohn Marino if (_M_ctype.is(_CtypeT::digit, *_M_current)) 432*e4b17023SJohn Marino { 433*e4b17023SJohn Marino _M_curValue.assign(1, *_M_current); 434*e4b17023SJohn Marino ++_M_current; 435*e4b17023SJohn Marino if (_M_current == _M_end) 436*e4b17023SJohn Marino { 437*e4b17023SJohn Marino _M_curToken = _S_token_eof; 438*e4b17023SJohn Marino return; 439*e4b17023SJohn Marino } 440*e4b17023SJohn Marino if (_M_ctype.is(_CtypeT::digit, *_M_current)) 441*e4b17023SJohn Marino { 442*e4b17023SJohn Marino _M_curValue += *_M_current; 443*e4b17023SJohn Marino ++_M_current; 444*e4b17023SJohn Marino return; 445*e4b17023SJohn Marino } 446*e4b17023SJohn Marino } 447*e4b17023SJohn Marino } 448*e4b17023SJohn Marino else if (__c == _M_ctype.widen('^') 449*e4b17023SJohn Marino || __c == _M_ctype.widen('.') 450*e4b17023SJohn Marino || __c == _M_ctype.widen('*') 451*e4b17023SJohn Marino || __c == _M_ctype.widen('$') 452*e4b17023SJohn Marino || __c == _M_ctype.widen('\\')) 453*e4b17023SJohn Marino { 454*e4b17023SJohn Marino _M_curToken = _S_token_ord_char; 455*e4b17023SJohn Marino _M_curValue.assign(1, __c); 456*e4b17023SJohn Marino } 457*e4b17023SJohn Marino else if (_M_ctype.is(_CtypeT::digit, __c)) 458*e4b17023SJohn Marino { 459*e4b17023SJohn Marino _M_curToken = _S_token_backref; 460*e4b17023SJohn Marino _M_curValue.assign(1, __c); 461*e4b17023SJohn Marino } 462*e4b17023SJohn Marino else 463*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_escape); 464*e4b17023SJohn Marino } 465*e4b17023SJohn Marino 466*e4b17023SJohn Marino 467*e4b17023SJohn Marino // Eats a character class or throwns an exception. 468*e4b17023SJohn Marino // current point to ':' delimiter on entry, char after ']' on return 469*e4b17023SJohn Marino template<typename _InputIterator> 470*e4b17023SJohn Marino void 471*e4b17023SJohn Marino _Scanner<_InputIterator>:: 472*e4b17023SJohn Marino _M_eat_charclass() 473*e4b17023SJohn Marino { 474*e4b17023SJohn Marino ++_M_current; // skip ':' 475*e4b17023SJohn Marino if (_M_current == _M_end) 476*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_ctype); 477*e4b17023SJohn Marino for (_M_curValue.clear(); 478*e4b17023SJohn Marino _M_current != _M_end && *_M_current != _M_ctype.widen(':'); 479*e4b17023SJohn Marino ++_M_current) 480*e4b17023SJohn Marino _M_curValue += *_M_current; 481*e4b17023SJohn Marino if (_M_current == _M_end) 482*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_ctype); 483*e4b17023SJohn Marino ++_M_current; // skip ':' 484*e4b17023SJohn Marino if (*_M_current != _M_ctype.widen(']')) 485*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_ctype); 486*e4b17023SJohn Marino ++_M_current; // skip ']' 487*e4b17023SJohn Marino } 488*e4b17023SJohn Marino 489*e4b17023SJohn Marino 490*e4b17023SJohn Marino template<typename _InputIterator> 491*e4b17023SJohn Marino void 492*e4b17023SJohn Marino _Scanner<_InputIterator>:: 493*e4b17023SJohn Marino _M_eat_equivclass() 494*e4b17023SJohn Marino { 495*e4b17023SJohn Marino ++_M_current; // skip '=' 496*e4b17023SJohn Marino if (_M_current == _M_end) 497*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_collate); 498*e4b17023SJohn Marino for (_M_curValue.clear(); 499*e4b17023SJohn Marino _M_current != _M_end && *_M_current != _M_ctype.widen('='); 500*e4b17023SJohn Marino ++_M_current) 501*e4b17023SJohn Marino _M_curValue += *_M_current; 502*e4b17023SJohn Marino if (_M_current == _M_end) 503*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_collate); 504*e4b17023SJohn Marino ++_M_current; // skip '=' 505*e4b17023SJohn Marino if (*_M_current != _M_ctype.widen(']')) 506*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_collate); 507*e4b17023SJohn Marino ++_M_current; // skip ']' 508*e4b17023SJohn Marino } 509*e4b17023SJohn Marino 510*e4b17023SJohn Marino 511*e4b17023SJohn Marino template<typename _InputIterator> 512*e4b17023SJohn Marino void 513*e4b17023SJohn Marino _Scanner<_InputIterator>:: 514*e4b17023SJohn Marino _M_eat_collsymbol() 515*e4b17023SJohn Marino { 516*e4b17023SJohn Marino ++_M_current; // skip '.' 517*e4b17023SJohn Marino if (_M_current == _M_end) 518*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_collate); 519*e4b17023SJohn Marino for (_M_curValue.clear(); 520*e4b17023SJohn Marino _M_current != _M_end && *_M_current != _M_ctype.widen('.'); 521*e4b17023SJohn Marino ++_M_current) 522*e4b17023SJohn Marino _M_curValue += *_M_current; 523*e4b17023SJohn Marino if (_M_current == _M_end) 524*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_collate); 525*e4b17023SJohn Marino ++_M_current; // skip '.' 526*e4b17023SJohn Marino if (*_M_current != _M_ctype.widen(']')) 527*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_collate); 528*e4b17023SJohn Marino ++_M_current; // skip ']' 529*e4b17023SJohn Marino } 530*e4b17023SJohn Marino 531*e4b17023SJohn Marino #ifdef _GLIBCXX_DEBUG 532*e4b17023SJohn Marino template<typename _InputIterator> 533*e4b17023SJohn Marino std::ostream& 534*e4b17023SJohn Marino _Scanner<_InputIterator>:: 535*e4b17023SJohn Marino _M_print(std::ostream& ostr) 536*e4b17023SJohn Marino { 537*e4b17023SJohn Marino switch (_M_curToken) 538*e4b17023SJohn Marino { 539*e4b17023SJohn Marino case _S_token_anychar: 540*e4b17023SJohn Marino ostr << "any-character\n"; 541*e4b17023SJohn Marino break; 542*e4b17023SJohn Marino case _S_token_backref: 543*e4b17023SJohn Marino ostr << "backref\n"; 544*e4b17023SJohn Marino break; 545*e4b17023SJohn Marino case _S_token_bracket_begin: 546*e4b17023SJohn Marino ostr << "bracket-begin\n"; 547*e4b17023SJohn Marino break; 548*e4b17023SJohn Marino case _S_token_bracket_end: 549*e4b17023SJohn Marino ostr << "bracket-end\n"; 550*e4b17023SJohn Marino break; 551*e4b17023SJohn Marino case _S_token_char_class_name: 552*e4b17023SJohn Marino ostr << "char-class-name \"" << _M_curValue << "\"\n"; 553*e4b17023SJohn Marino break; 554*e4b17023SJohn Marino case _S_token_closure0: 555*e4b17023SJohn Marino ostr << "closure0\n"; 556*e4b17023SJohn Marino break; 557*e4b17023SJohn Marino case _S_token_closure1: 558*e4b17023SJohn Marino ostr << "closure1\n"; 559*e4b17023SJohn Marino break; 560*e4b17023SJohn Marino case _S_token_collelem_multi: 561*e4b17023SJohn Marino ostr << "coll-elem-multi \"" << _M_curValue << "\"\n"; 562*e4b17023SJohn Marino break; 563*e4b17023SJohn Marino case _S_token_collelem_single: 564*e4b17023SJohn Marino ostr << "coll-elem-single \"" << _M_curValue << "\"\n"; 565*e4b17023SJohn Marino break; 566*e4b17023SJohn Marino case _S_token_collsymbol: 567*e4b17023SJohn Marino ostr << "collsymbol \"" << _M_curValue << "\"\n"; 568*e4b17023SJohn Marino break; 569*e4b17023SJohn Marino case _S_token_comma: 570*e4b17023SJohn Marino ostr << "comma\n"; 571*e4b17023SJohn Marino break; 572*e4b17023SJohn Marino case _S_token_dash: 573*e4b17023SJohn Marino ostr << "dash\n"; 574*e4b17023SJohn Marino break; 575*e4b17023SJohn Marino case _S_token_dup_count: 576*e4b17023SJohn Marino ostr << "dup count: " << _M_curValue << "\n"; 577*e4b17023SJohn Marino break; 578*e4b17023SJohn Marino case _S_token_eof: 579*e4b17023SJohn Marino ostr << "EOF\n"; 580*e4b17023SJohn Marino break; 581*e4b17023SJohn Marino case _S_token_equiv_class_name: 582*e4b17023SJohn Marino ostr << "equiv-class-name \"" << _M_curValue << "\"\n"; 583*e4b17023SJohn Marino break; 584*e4b17023SJohn Marino case _S_token_interval_begin: 585*e4b17023SJohn Marino ostr << "interval begin\n"; 586*e4b17023SJohn Marino break; 587*e4b17023SJohn Marino case _S_token_interval_end: 588*e4b17023SJohn Marino ostr << "interval end\n"; 589*e4b17023SJohn Marino break; 590*e4b17023SJohn Marino case _S_token_line_begin: 591*e4b17023SJohn Marino ostr << "line begin\n"; 592*e4b17023SJohn Marino break; 593*e4b17023SJohn Marino case _S_token_line_end: 594*e4b17023SJohn Marino ostr << "line end\n"; 595*e4b17023SJohn Marino break; 596*e4b17023SJohn Marino case _S_token_opt: 597*e4b17023SJohn Marino ostr << "opt\n"; 598*e4b17023SJohn Marino break; 599*e4b17023SJohn Marino case _S_token_or: 600*e4b17023SJohn Marino ostr << "or\n"; 601*e4b17023SJohn Marino break; 602*e4b17023SJohn Marino case _S_token_ord_char: 603*e4b17023SJohn Marino ostr << "ordinary character: \"" << _M_value() << "\"\n"; 604*e4b17023SJohn Marino break; 605*e4b17023SJohn Marino case _S_token_quoted_char: 606*e4b17023SJohn Marino ostr << "quoted char\n"; 607*e4b17023SJohn Marino break; 608*e4b17023SJohn Marino case _S_token_subexpr_begin: 609*e4b17023SJohn Marino ostr << "subexpr begin\n"; 610*e4b17023SJohn Marino break; 611*e4b17023SJohn Marino case _S_token_subexpr_end: 612*e4b17023SJohn Marino ostr << "subexpr end\n"; 613*e4b17023SJohn Marino break; 614*e4b17023SJohn Marino case _S_token_word_begin: 615*e4b17023SJohn Marino ostr << "word begin\n"; 616*e4b17023SJohn Marino break; 617*e4b17023SJohn Marino case _S_token_word_end: 618*e4b17023SJohn Marino ostr << "word end\n"; 619*e4b17023SJohn Marino break; 620*e4b17023SJohn Marino case _S_token_unknown: 621*e4b17023SJohn Marino ostr << "-- unknown token --\n"; 622*e4b17023SJohn Marino break; 623*e4b17023SJohn Marino } 624*e4b17023SJohn Marino return ostr; 625*e4b17023SJohn Marino } 626*e4b17023SJohn Marino #endif 627*e4b17023SJohn Marino 628*e4b17023SJohn Marino // Builds an NFA from an input iterator interval. 629*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 630*e4b17023SJohn Marino class _Compiler 631*e4b17023SJohn Marino { 632*e4b17023SJohn Marino public: 633*e4b17023SJohn Marino typedef _InIter _IterT; 634*e4b17023SJohn Marino typedef typename std::iterator_traits<_InIter>::value_type _CharT; 635*e4b17023SJohn Marino typedef std::basic_string<_CharT> _StringT; 636*e4b17023SJohn Marino typedef regex_constants::syntax_option_type _FlagT; 637*e4b17023SJohn Marino 638*e4b17023SJohn Marino public: 639*e4b17023SJohn Marino _Compiler(const _InIter& __b, const _InIter& __e, 640*e4b17023SJohn Marino _TraitsT& __traits, _FlagT __flags); 641*e4b17023SJohn Marino 642*e4b17023SJohn Marino const _Nfa& 643*e4b17023SJohn Marino _M_nfa() const 644*e4b17023SJohn Marino { return _M_state_store; } 645*e4b17023SJohn Marino 646*e4b17023SJohn Marino private: 647*e4b17023SJohn Marino typedef _Scanner<_InIter> _ScannerT; 648*e4b17023SJohn Marino typedef typename _ScannerT::_TokenT _TokenT; 649*e4b17023SJohn Marino typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT; 650*e4b17023SJohn Marino typedef _RangeMatcher<_InIter, _TraitsT> _RMatcherT; 651*e4b17023SJohn Marino 652*e4b17023SJohn Marino // accepts a specific token or returns false. 653*e4b17023SJohn Marino bool 654*e4b17023SJohn Marino _M_match_token(_TokenT __token); 655*e4b17023SJohn Marino 656*e4b17023SJohn Marino void 657*e4b17023SJohn Marino _M_disjunction(); 658*e4b17023SJohn Marino 659*e4b17023SJohn Marino bool 660*e4b17023SJohn Marino _M_alternative(); 661*e4b17023SJohn Marino 662*e4b17023SJohn Marino bool 663*e4b17023SJohn Marino _M_term(); 664*e4b17023SJohn Marino 665*e4b17023SJohn Marino bool 666*e4b17023SJohn Marino _M_assertion(); 667*e4b17023SJohn Marino 668*e4b17023SJohn Marino bool 669*e4b17023SJohn Marino _M_quantifier(); 670*e4b17023SJohn Marino 671*e4b17023SJohn Marino bool 672*e4b17023SJohn Marino _M_atom(); 673*e4b17023SJohn Marino 674*e4b17023SJohn Marino bool 675*e4b17023SJohn Marino _M_bracket_expression(); 676*e4b17023SJohn Marino 677*e4b17023SJohn Marino bool 678*e4b17023SJohn Marino _M_bracket_list(_RMatcherT& __matcher); 679*e4b17023SJohn Marino 680*e4b17023SJohn Marino bool 681*e4b17023SJohn Marino _M_follow_list(_RMatcherT& __matcher); 682*e4b17023SJohn Marino 683*e4b17023SJohn Marino bool 684*e4b17023SJohn Marino _M_follow_list2(_RMatcherT& __matcher); 685*e4b17023SJohn Marino 686*e4b17023SJohn Marino bool 687*e4b17023SJohn Marino _M_expression_term(_RMatcherT& __matcher); 688*e4b17023SJohn Marino 689*e4b17023SJohn Marino bool 690*e4b17023SJohn Marino _M_range_expression(_RMatcherT& __matcher); 691*e4b17023SJohn Marino 692*e4b17023SJohn Marino bool 693*e4b17023SJohn Marino _M_start_range(_RMatcherT& __matcher); 694*e4b17023SJohn Marino 695*e4b17023SJohn Marino bool 696*e4b17023SJohn Marino _M_collating_symbol(_RMatcherT& __matcher); 697*e4b17023SJohn Marino 698*e4b17023SJohn Marino bool 699*e4b17023SJohn Marino _M_equivalence_class(_RMatcherT& __matcher); 700*e4b17023SJohn Marino 701*e4b17023SJohn Marino bool 702*e4b17023SJohn Marino _M_character_class(_RMatcherT& __matcher); 703*e4b17023SJohn Marino 704*e4b17023SJohn Marino int 705*e4b17023SJohn Marino _M_cur_int_value(int __radix); 706*e4b17023SJohn Marino 707*e4b17023SJohn Marino private: 708*e4b17023SJohn Marino _TraitsT& _M_traits; 709*e4b17023SJohn Marino _ScannerT _M_scanner; 710*e4b17023SJohn Marino _StringT _M_cur_value; 711*e4b17023SJohn Marino _Nfa _M_state_store; 712*e4b17023SJohn Marino _StackT _M_stack; 713*e4b17023SJohn Marino }; 714*e4b17023SJohn Marino 715*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 716*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 717*e4b17023SJohn Marino _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits, 718*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>::_FlagT __flags) 719*e4b17023SJohn Marino : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()), 720*e4b17023SJohn Marino _M_state_store(__flags) 721*e4b17023SJohn Marino { 722*e4b17023SJohn Marino typedef _StartTagger<_InIter, _TraitsT> _Start; 723*e4b17023SJohn Marino typedef _EndTagger<_InIter, _TraitsT> _End; 724*e4b17023SJohn Marino 725*e4b17023SJohn Marino _StateSeq __r(_M_state_store, 726*e4b17023SJohn Marino _M_state_store._M_insert_subexpr_begin(_Start(0))); 727*e4b17023SJohn Marino _M_disjunction(); 728*e4b17023SJohn Marino if (!_M_stack.empty()) 729*e4b17023SJohn Marino { 730*e4b17023SJohn Marino __r._M_append(_M_stack.top()); 731*e4b17023SJohn Marino _M_stack.pop(); 732*e4b17023SJohn Marino } 733*e4b17023SJohn Marino __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0))); 734*e4b17023SJohn Marino __r._M_append(_M_state_store._M_insert_accept()); 735*e4b17023SJohn Marino } 736*e4b17023SJohn Marino 737*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 738*e4b17023SJohn Marino bool 739*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 740*e4b17023SJohn Marino _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token) 741*e4b17023SJohn Marino { 742*e4b17023SJohn Marino if (token == _M_scanner._M_token()) 743*e4b17023SJohn Marino { 744*e4b17023SJohn Marino _M_cur_value = _M_scanner._M_value(); 745*e4b17023SJohn Marino _M_scanner._M_advance(); 746*e4b17023SJohn Marino return true; 747*e4b17023SJohn Marino } 748*e4b17023SJohn Marino return false; 749*e4b17023SJohn Marino } 750*e4b17023SJohn Marino 751*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 752*e4b17023SJohn Marino void 753*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 754*e4b17023SJohn Marino _M_disjunction() 755*e4b17023SJohn Marino { 756*e4b17023SJohn Marino this->_M_alternative(); 757*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_or)) 758*e4b17023SJohn Marino { 759*e4b17023SJohn Marino _StateSeq __alt1 = _M_stack.top(); _M_stack.pop(); 760*e4b17023SJohn Marino this->_M_disjunction(); 761*e4b17023SJohn Marino _StateSeq __alt2 = _M_stack.top(); _M_stack.pop(); 762*e4b17023SJohn Marino _M_stack.push(_StateSeq(__alt1, __alt2)); 763*e4b17023SJohn Marino } 764*e4b17023SJohn Marino } 765*e4b17023SJohn Marino 766*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 767*e4b17023SJohn Marino bool 768*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 769*e4b17023SJohn Marino _M_alternative() 770*e4b17023SJohn Marino { 771*e4b17023SJohn Marino if (this->_M_term()) 772*e4b17023SJohn Marino { 773*e4b17023SJohn Marino _StateSeq __re = _M_stack.top(); _M_stack.pop(); 774*e4b17023SJohn Marino this->_M_alternative(); 775*e4b17023SJohn Marino if (!_M_stack.empty()) 776*e4b17023SJohn Marino { 777*e4b17023SJohn Marino __re._M_append(_M_stack.top()); 778*e4b17023SJohn Marino _M_stack.pop(); 779*e4b17023SJohn Marino } 780*e4b17023SJohn Marino _M_stack.push(__re); 781*e4b17023SJohn Marino return true; 782*e4b17023SJohn Marino } 783*e4b17023SJohn Marino return false; 784*e4b17023SJohn Marino } 785*e4b17023SJohn Marino 786*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 787*e4b17023SJohn Marino bool 788*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 789*e4b17023SJohn Marino _M_term() 790*e4b17023SJohn Marino { 791*e4b17023SJohn Marino if (this->_M_assertion()) 792*e4b17023SJohn Marino return true; 793*e4b17023SJohn Marino if (this->_M_atom()) 794*e4b17023SJohn Marino { 795*e4b17023SJohn Marino this->_M_quantifier(); 796*e4b17023SJohn Marino return true; 797*e4b17023SJohn Marino } 798*e4b17023SJohn Marino return false; 799*e4b17023SJohn Marino } 800*e4b17023SJohn Marino 801*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 802*e4b17023SJohn Marino bool 803*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 804*e4b17023SJohn Marino _M_assertion() 805*e4b17023SJohn Marino { 806*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_line_begin)) 807*e4b17023SJohn Marino { 808*e4b17023SJohn Marino // __m.push(_Matcher::_S_opcode_line_begin); 809*e4b17023SJohn Marino return true; 810*e4b17023SJohn Marino } 811*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_line_end)) 812*e4b17023SJohn Marino { 813*e4b17023SJohn Marino // __m.push(_Matcher::_S_opcode_line_end); 814*e4b17023SJohn Marino return true; 815*e4b17023SJohn Marino } 816*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_word_begin)) 817*e4b17023SJohn Marino { 818*e4b17023SJohn Marino // __m.push(_Matcher::_S_opcode_word_begin); 819*e4b17023SJohn Marino return true; 820*e4b17023SJohn Marino } 821*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_word_end)) 822*e4b17023SJohn Marino { 823*e4b17023SJohn Marino // __m.push(_Matcher::_S_opcode_word_end); 824*e4b17023SJohn Marino return true; 825*e4b17023SJohn Marino } 826*e4b17023SJohn Marino return false; 827*e4b17023SJohn Marino } 828*e4b17023SJohn Marino 829*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 830*e4b17023SJohn Marino bool 831*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 832*e4b17023SJohn Marino _M_quantifier() 833*e4b17023SJohn Marino { 834*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_closure0)) 835*e4b17023SJohn Marino { 836*e4b17023SJohn Marino if (_M_stack.empty()) 837*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_badrepeat); 838*e4b17023SJohn Marino _StateSeq __r(_M_stack.top(), -1); 839*e4b17023SJohn Marino __r._M_append(__r._M_front()); 840*e4b17023SJohn Marino _M_stack.pop(); 841*e4b17023SJohn Marino _M_stack.push(__r); 842*e4b17023SJohn Marino return true; 843*e4b17023SJohn Marino } 844*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_closure1)) 845*e4b17023SJohn Marino { 846*e4b17023SJohn Marino if (_M_stack.empty()) 847*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_badrepeat); 848*e4b17023SJohn Marino _StateSeq __r(_M_state_store, 849*e4b17023SJohn Marino _M_state_store. 850*e4b17023SJohn Marino _M_insert_alt(_S_invalid_state_id, 851*e4b17023SJohn Marino _M_stack.top()._M_front())); 852*e4b17023SJohn Marino _M_stack.top()._M_append(__r); 853*e4b17023SJohn Marino return true; 854*e4b17023SJohn Marino } 855*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_opt)) 856*e4b17023SJohn Marino { 857*e4b17023SJohn Marino if (_M_stack.empty()) 858*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_badrepeat); 859*e4b17023SJohn Marino _StateSeq __r(_M_stack.top(), -1); 860*e4b17023SJohn Marino _M_stack.pop(); 861*e4b17023SJohn Marino _M_stack.push(__r); 862*e4b17023SJohn Marino return true; 863*e4b17023SJohn Marino } 864*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_interval_begin)) 865*e4b17023SJohn Marino { 866*e4b17023SJohn Marino if (_M_stack.empty()) 867*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_badrepeat); 868*e4b17023SJohn Marino if (!_M_match_token(_ScannerT::_S_token_dup_count)) 869*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_badbrace); 870*e4b17023SJohn Marino _StateSeq __r(_M_stack.top()); 871*e4b17023SJohn Marino int __min_rep = _M_cur_int_value(10); 872*e4b17023SJohn Marino for (int __i = 1; __i < __min_rep; ++__i) 873*e4b17023SJohn Marino _M_stack.top()._M_append(__r._M_clone()); 874*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_comma)) 875*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_dup_count)) 876*e4b17023SJohn Marino { 877*e4b17023SJohn Marino int __n = _M_cur_int_value(10) - __min_rep; 878*e4b17023SJohn Marino if (__n < 0) 879*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_badbrace); 880*e4b17023SJohn Marino for (int __i = 0; __i < __n; ++__i) 881*e4b17023SJohn Marino { 882*e4b17023SJohn Marino _StateSeq __r(_M_state_store, 883*e4b17023SJohn Marino _M_state_store. 884*e4b17023SJohn Marino _M_insert_alt(_S_invalid_state_id, 885*e4b17023SJohn Marino _M_stack.top()._M_front())); 886*e4b17023SJohn Marino _M_stack.top()._M_append(__r); 887*e4b17023SJohn Marino } 888*e4b17023SJohn Marino } 889*e4b17023SJohn Marino else 890*e4b17023SJohn Marino { 891*e4b17023SJohn Marino _StateSeq __r(_M_stack.top(), -1); 892*e4b17023SJohn Marino __r._M_push_back(__r._M_front()); 893*e4b17023SJohn Marino _M_stack.pop(); 894*e4b17023SJohn Marino _M_stack.push(__r); 895*e4b17023SJohn Marino } 896*e4b17023SJohn Marino if (!_M_match_token(_ScannerT::_S_token_interval_end)) 897*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_brace); 898*e4b17023SJohn Marino return true; 899*e4b17023SJohn Marino } 900*e4b17023SJohn Marino return false; 901*e4b17023SJohn Marino } 902*e4b17023SJohn Marino 903*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 904*e4b17023SJohn Marino bool 905*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 906*e4b17023SJohn Marino _M_atom() 907*e4b17023SJohn Marino { 908*e4b17023SJohn Marino typedef _CharMatcher<_InIter, _TraitsT> _CMatcher; 909*e4b17023SJohn Marino typedef _StartTagger<_InIter, _TraitsT> _Start; 910*e4b17023SJohn Marino typedef _EndTagger<_InIter, _TraitsT> _End; 911*e4b17023SJohn Marino 912*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_anychar)) 913*e4b17023SJohn Marino { 914*e4b17023SJohn Marino _M_stack.push(_StateSeq(_M_state_store, 915*e4b17023SJohn Marino _M_state_store._M_insert_matcher 916*e4b17023SJohn Marino (_AnyMatcher))); 917*e4b17023SJohn Marino return true; 918*e4b17023SJohn Marino } 919*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_ord_char)) 920*e4b17023SJohn Marino { 921*e4b17023SJohn Marino _M_stack.push(_StateSeq(_M_state_store, 922*e4b17023SJohn Marino _M_state_store._M_insert_matcher 923*e4b17023SJohn Marino (_CMatcher(_M_cur_value[0], _M_traits)))); 924*e4b17023SJohn Marino return true; 925*e4b17023SJohn Marino } 926*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_quoted_char)) 927*e4b17023SJohn Marino { 928*e4b17023SJohn Marino // note that in the ECMA grammar, this case covers backrefs. 929*e4b17023SJohn Marino _M_stack.push(_StateSeq(_M_state_store, 930*e4b17023SJohn Marino _M_state_store._M_insert_matcher 931*e4b17023SJohn Marino (_CMatcher(_M_cur_value[0], _M_traits)))); 932*e4b17023SJohn Marino return true; 933*e4b17023SJohn Marino } 934*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_backref)) 935*e4b17023SJohn Marino { 936*e4b17023SJohn Marino // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value); 937*e4b17023SJohn Marino return true; 938*e4b17023SJohn Marino } 939*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_subexpr_begin)) 940*e4b17023SJohn Marino { 941*e4b17023SJohn Marino int __mark = _M_state_store._M_sub_count(); 942*e4b17023SJohn Marino _StateSeq __r(_M_state_store, 943*e4b17023SJohn Marino _M_state_store. 944*e4b17023SJohn Marino _M_insert_subexpr_begin(_Start(__mark))); 945*e4b17023SJohn Marino this->_M_disjunction(); 946*e4b17023SJohn Marino if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) 947*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_paren); 948*e4b17023SJohn Marino if (!_M_stack.empty()) 949*e4b17023SJohn Marino { 950*e4b17023SJohn Marino __r._M_append(_M_stack.top()); 951*e4b17023SJohn Marino _M_stack.pop(); 952*e4b17023SJohn Marino } 953*e4b17023SJohn Marino __r._M_append(_M_state_store._M_insert_subexpr_end 954*e4b17023SJohn Marino (__mark, _End(__mark))); 955*e4b17023SJohn Marino _M_stack.push(__r); 956*e4b17023SJohn Marino return true; 957*e4b17023SJohn Marino } 958*e4b17023SJohn Marino return _M_bracket_expression(); 959*e4b17023SJohn Marino } 960*e4b17023SJohn Marino 961*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 962*e4b17023SJohn Marino bool 963*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 964*e4b17023SJohn Marino _M_bracket_expression() 965*e4b17023SJohn Marino { 966*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_bracket_begin)) 967*e4b17023SJohn Marino { 968*e4b17023SJohn Marino _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin), 969*e4b17023SJohn Marino _M_traits); 970*e4b17023SJohn Marino if (!_M_bracket_list(__matcher) 971*e4b17023SJohn Marino || !_M_match_token(_ScannerT::_S_token_bracket_end)) 972*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_brack); 973*e4b17023SJohn Marino _M_stack.push(_StateSeq(_M_state_store, 974*e4b17023SJohn Marino _M_state_store._M_insert_matcher(__matcher))); 975*e4b17023SJohn Marino return true; 976*e4b17023SJohn Marino } 977*e4b17023SJohn Marino return false; 978*e4b17023SJohn Marino } 979*e4b17023SJohn Marino 980*e4b17023SJohn Marino // If the dash is the last character in the bracket expression, it is not 981*e4b17023SJohn Marino // special. 982*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 983*e4b17023SJohn Marino bool 984*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 985*e4b17023SJohn Marino _M_bracket_list(_RMatcherT& __matcher) 986*e4b17023SJohn Marino { 987*e4b17023SJohn Marino if (_M_follow_list(__matcher)) 988*e4b17023SJohn Marino { 989*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_dash)) 990*e4b17023SJohn Marino __matcher._M_add_char(_M_cur_value[0]); 991*e4b17023SJohn Marino return true; 992*e4b17023SJohn Marino } 993*e4b17023SJohn Marino return false; 994*e4b17023SJohn Marino } 995*e4b17023SJohn Marino 996*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 997*e4b17023SJohn Marino bool 998*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 999*e4b17023SJohn Marino _M_follow_list(_RMatcherT& __matcher) 1000*e4b17023SJohn Marino { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); } 1001*e4b17023SJohn Marino 1002*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 1003*e4b17023SJohn Marino bool 1004*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 1005*e4b17023SJohn Marino _M_follow_list2(_RMatcherT& __matcher) 1006*e4b17023SJohn Marino { 1007*e4b17023SJohn Marino if (_M_expression_term(__matcher)) 1008*e4b17023SJohn Marino return _M_follow_list2(__matcher); 1009*e4b17023SJohn Marino return true; 1010*e4b17023SJohn Marino } 1011*e4b17023SJohn Marino 1012*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 1013*e4b17023SJohn Marino bool 1014*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 1015*e4b17023SJohn Marino _M_expression_term(_RMatcherT& __matcher) 1016*e4b17023SJohn Marino { 1017*e4b17023SJohn Marino return (_M_collating_symbol(__matcher) 1018*e4b17023SJohn Marino || _M_character_class(__matcher) 1019*e4b17023SJohn Marino || _M_equivalence_class(__matcher) 1020*e4b17023SJohn Marino || (_M_start_range(__matcher) 1021*e4b17023SJohn Marino && _M_range_expression(__matcher))); 1022*e4b17023SJohn Marino } 1023*e4b17023SJohn Marino 1024*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 1025*e4b17023SJohn Marino bool 1026*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 1027*e4b17023SJohn Marino _M_range_expression(_RMatcherT& __matcher) 1028*e4b17023SJohn Marino { 1029*e4b17023SJohn Marino if (!_M_collating_symbol(__matcher)) 1030*e4b17023SJohn Marino if (!_M_match_token(_ScannerT::_S_token_dash)) 1031*e4b17023SJohn Marino __throw_regex_error(regex_constants::error_range); 1032*e4b17023SJohn Marino __matcher._M_make_range(); 1033*e4b17023SJohn Marino return true; 1034*e4b17023SJohn Marino } 1035*e4b17023SJohn Marino 1036*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 1037*e4b17023SJohn Marino bool 1038*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 1039*e4b17023SJohn Marino _M_start_range(_RMatcherT& __matcher) 1040*e4b17023SJohn Marino { return _M_match_token(_ScannerT::_S_token_dash); } 1041*e4b17023SJohn Marino 1042*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 1043*e4b17023SJohn Marino bool 1044*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 1045*e4b17023SJohn Marino _M_collating_symbol(_RMatcherT& __matcher) 1046*e4b17023SJohn Marino { 1047*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_collelem_single)) 1048*e4b17023SJohn Marino { 1049*e4b17023SJohn Marino __matcher._M_add_char(_M_cur_value[0]); 1050*e4b17023SJohn Marino return true; 1051*e4b17023SJohn Marino } 1052*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_collsymbol)) 1053*e4b17023SJohn Marino { 1054*e4b17023SJohn Marino __matcher._M_add_collating_element(_M_cur_value); 1055*e4b17023SJohn Marino return true; 1056*e4b17023SJohn Marino } 1057*e4b17023SJohn Marino return false; 1058*e4b17023SJohn Marino } 1059*e4b17023SJohn Marino 1060*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 1061*e4b17023SJohn Marino bool 1062*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 1063*e4b17023SJohn Marino _M_equivalence_class(_RMatcherT& __matcher) 1064*e4b17023SJohn Marino { 1065*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) 1066*e4b17023SJohn Marino { 1067*e4b17023SJohn Marino __matcher._M_add_equivalence_class(_M_cur_value); 1068*e4b17023SJohn Marino return true; 1069*e4b17023SJohn Marino } 1070*e4b17023SJohn Marino return false; 1071*e4b17023SJohn Marino } 1072*e4b17023SJohn Marino 1073*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 1074*e4b17023SJohn Marino bool 1075*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 1076*e4b17023SJohn Marino _M_character_class(_RMatcherT& __matcher) 1077*e4b17023SJohn Marino { 1078*e4b17023SJohn Marino if (_M_match_token(_ScannerT::_S_token_char_class_name)) 1079*e4b17023SJohn Marino { 1080*e4b17023SJohn Marino __matcher._M_add_character_class(_M_cur_value); 1081*e4b17023SJohn Marino return true; 1082*e4b17023SJohn Marino } 1083*e4b17023SJohn Marino return false; 1084*e4b17023SJohn Marino } 1085*e4b17023SJohn Marino 1086*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 1087*e4b17023SJohn Marino int 1088*e4b17023SJohn Marino _Compiler<_InIter, _TraitsT>:: 1089*e4b17023SJohn Marino _M_cur_int_value(int __radix) 1090*e4b17023SJohn Marino { 1091*e4b17023SJohn Marino int __v = 0; 1092*e4b17023SJohn Marino for (typename _StringT::size_type __i = 0; 1093*e4b17023SJohn Marino __i < _M_cur_value.length(); ++__i) 1094*e4b17023SJohn Marino __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix); 1095*e4b17023SJohn Marino return __v; 1096*e4b17023SJohn Marino } 1097*e4b17023SJohn Marino 1098*e4b17023SJohn Marino template<typename _InIter, typename _TraitsT> 1099*e4b17023SJohn Marino _AutomatonPtr 1100*e4b17023SJohn Marino __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t, 1101*e4b17023SJohn Marino regex_constants::syntax_option_type __f) 1102*e4b17023SJohn Marino { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t, 1103*e4b17023SJohn Marino __f)._M_nfa())); } 1104*e4b17023SJohn Marino 1105*e4b17023SJohn Marino _GLIBCXX_END_NAMESPACE_VERSION 1106*e4b17023SJohn Marino } // namespace __regex 1107*e4b17023SJohn Marino } // namespace std 1108*e4b17023SJohn Marino 1109*e4b17023SJohn Marino /* vim: set ts=8 sw=2 sts=2: */ 1110