11debfc3dSmrg // class template regex -*- C++ -*- 21debfc3dSmrg 38feb0f0bSmrg // Copyright (C) 2013-2020 Free Software Foundation, Inc. 41debfc3dSmrg // 51debfc3dSmrg // This file is part of the GNU ISO C++ Library. This library is free 61debfc3dSmrg // software; you can redistribute it and/or modify it under the 71debfc3dSmrg // terms of the GNU General Public License as published by the 81debfc3dSmrg // Free Software Foundation; either version 3, or (at your option) 91debfc3dSmrg // any later version. 101debfc3dSmrg 111debfc3dSmrg // This library is distributed in the hope that it will be useful, 121debfc3dSmrg // but WITHOUT ANY WARRANTY; without even the implied warranty of 131debfc3dSmrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 141debfc3dSmrg // GNU General Public License for more details. 151debfc3dSmrg 161debfc3dSmrg // Under Section 7 of GPL version 3, you are granted additional 171debfc3dSmrg // permissions described in the GCC Runtime Library Exception, version 181debfc3dSmrg // 3.1, as published by the Free Software Foundation. 191debfc3dSmrg 201debfc3dSmrg // You should have received a copy of the GNU General Public License and 211debfc3dSmrg // a copy of the GCC Runtime Library Exception along with this program; 221debfc3dSmrg // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 231debfc3dSmrg // <http://www.gnu.org/licenses/>. 241debfc3dSmrg 251debfc3dSmrg /** 261debfc3dSmrg * @file bits/regex_scanner.tcc 271debfc3dSmrg * This is an internal header file, included by other library headers. 281debfc3dSmrg * Do not attempt to use it directly. @headername{regex} 291debfc3dSmrg */ 301debfc3dSmrg 311debfc3dSmrg // FIXME make comments doxygen format. 321debfc3dSmrg 331debfc3dSmrg // N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep 341debfc3dSmrg // and awk 351debfc3dSmrg // 1) grep is basic except '\n' is treated as '|' 361debfc3dSmrg // 2) egrep is extended except '\n' is treated as '|' 371debfc3dSmrg // 3) awk is extended except special escaping rules, and there's no 381debfc3dSmrg // back-reference. 391debfc3dSmrg // 401debfc3dSmrg // References: 411debfc3dSmrg // 421debfc3dSmrg // ECMAScript: ECMA-262 15.10 431debfc3dSmrg // 441debfc3dSmrg // basic, extended: 451debfc3dSmrg // http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html 461debfc3dSmrg // 471debfc3dSmrg // awk: http://pubs.opengroup.org/onlinepubs/000095399/utilities/awk.html 481debfc3dSmrg 491debfc3dSmrg namespace std _GLIBCXX_VISIBILITY(default) 501debfc3dSmrg { 511debfc3dSmrg _GLIBCXX_BEGIN_NAMESPACE_VERSION 521debfc3dSmrg 53a2dc1f3fSmrg namespace __detail 54a2dc1f3fSmrg { 551debfc3dSmrg template<typename _CharT> 561debfc3dSmrg _Scanner<_CharT>:: _Scanner(typename _Scanner::_IterT __begin,typename _Scanner::_IterT __end,_FlagT __flags,std::locale __loc)571debfc3dSmrg _Scanner(typename _Scanner::_IterT __begin, 581debfc3dSmrg typename _Scanner::_IterT __end, 591debfc3dSmrg _FlagT __flags, std::locale __loc) 601debfc3dSmrg : _ScannerBase(__flags), 611debfc3dSmrg _M_current(__begin), _M_end(__end), 621debfc3dSmrg _M_ctype(std::use_facet<_CtypeT>(__loc)), 631debfc3dSmrg _M_eat_escape(_M_is_ecma() 641debfc3dSmrg ? &_Scanner::_M_eat_escape_ecma 651debfc3dSmrg : &_Scanner::_M_eat_escape_posix) 661debfc3dSmrg { _M_advance(); } 671debfc3dSmrg 681debfc3dSmrg template<typename _CharT> 691debfc3dSmrg void 701debfc3dSmrg _Scanner<_CharT>:: _M_advance()711debfc3dSmrg _M_advance() 721debfc3dSmrg { 731debfc3dSmrg if (_M_current == _M_end) 741debfc3dSmrg { 751debfc3dSmrg _M_token = _S_token_eof; 761debfc3dSmrg return; 771debfc3dSmrg } 781debfc3dSmrg 791debfc3dSmrg if (_M_state == _S_state_normal) 801debfc3dSmrg _M_scan_normal(); 811debfc3dSmrg else if (_M_state == _S_state_in_bracket) 821debfc3dSmrg _M_scan_in_bracket(); 831debfc3dSmrg else if (_M_state == _S_state_in_brace) 841debfc3dSmrg _M_scan_in_brace(); 851debfc3dSmrg else 861debfc3dSmrg { 871debfc3dSmrg __glibcxx_assert(false); 881debfc3dSmrg } 891debfc3dSmrg } 901debfc3dSmrg 911debfc3dSmrg // Differences between styles: 921debfc3dSmrg // 1) "\(", "\)", "\{" in basic. It's not escaping. 931debfc3dSmrg // 2) "(?:", "(?=", "(?!" in ECMAScript. 941debfc3dSmrg template<typename _CharT> 951debfc3dSmrg void 961debfc3dSmrg _Scanner<_CharT>:: _M_scan_normal()971debfc3dSmrg _M_scan_normal() 981debfc3dSmrg { 991debfc3dSmrg auto __c = *_M_current++; 1001debfc3dSmrg 1011debfc3dSmrg if (std::strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr) 1021debfc3dSmrg { 1031debfc3dSmrg _M_token = _S_token_ord_char; 1041debfc3dSmrg _M_value.assign(1, __c); 1051debfc3dSmrg return; 1061debfc3dSmrg } 1071debfc3dSmrg if (__c == '\\') 1081debfc3dSmrg { 1091debfc3dSmrg if (_M_current == _M_end) 1101debfc3dSmrg __throw_regex_error( 1111debfc3dSmrg regex_constants::error_escape, 1121debfc3dSmrg "Unexpected end of regex when escaping."); 1131debfc3dSmrg 1141debfc3dSmrg if (!_M_is_basic() 1151debfc3dSmrg || (*_M_current != '(' 1161debfc3dSmrg && *_M_current != ')' 1171debfc3dSmrg && *_M_current != '{')) 1181debfc3dSmrg { 1191debfc3dSmrg (this->*_M_eat_escape)(); 1201debfc3dSmrg return; 1211debfc3dSmrg } 1221debfc3dSmrg __c = *_M_current++; 1231debfc3dSmrg } 1241debfc3dSmrg if (__c == '(') 1251debfc3dSmrg { 1261debfc3dSmrg if (_M_is_ecma() && *_M_current == '?') 1271debfc3dSmrg { 1281debfc3dSmrg if (++_M_current == _M_end) 1291debfc3dSmrg __throw_regex_error( 1301debfc3dSmrg regex_constants::error_paren, 1311debfc3dSmrg "Unexpected end of regex when in an open parenthesis."); 1321debfc3dSmrg 1331debfc3dSmrg if (*_M_current == ':') 1341debfc3dSmrg { 1351debfc3dSmrg ++_M_current; 1361debfc3dSmrg _M_token = _S_token_subexpr_no_group_begin; 1371debfc3dSmrg } 1381debfc3dSmrg else if (*_M_current == '=') 1391debfc3dSmrg { 1401debfc3dSmrg ++_M_current; 1411debfc3dSmrg _M_token = _S_token_subexpr_lookahead_begin; 1421debfc3dSmrg _M_value.assign(1, 'p'); 1431debfc3dSmrg } 1441debfc3dSmrg else if (*_M_current == '!') 1451debfc3dSmrg { 1461debfc3dSmrg ++_M_current; 1471debfc3dSmrg _M_token = _S_token_subexpr_lookahead_begin; 1481debfc3dSmrg _M_value.assign(1, 'n'); 1491debfc3dSmrg } 1501debfc3dSmrg else 1511debfc3dSmrg __throw_regex_error( 1521debfc3dSmrg regex_constants::error_paren, 1531debfc3dSmrg "Invalid special open parenthesis."); 1541debfc3dSmrg } 1551debfc3dSmrg else if (_M_flags & regex_constants::nosubs) 1561debfc3dSmrg _M_token = _S_token_subexpr_no_group_begin; 1571debfc3dSmrg else 1581debfc3dSmrg _M_token = _S_token_subexpr_begin; 1591debfc3dSmrg } 1601debfc3dSmrg else if (__c == ')') 1611debfc3dSmrg _M_token = _S_token_subexpr_end; 1621debfc3dSmrg else if (__c == '[') 1631debfc3dSmrg { 1641debfc3dSmrg _M_state = _S_state_in_bracket; 1651debfc3dSmrg _M_at_bracket_start = true; 1661debfc3dSmrg if (_M_current != _M_end && *_M_current == '^') 1671debfc3dSmrg { 1681debfc3dSmrg _M_token = _S_token_bracket_neg_begin; 1691debfc3dSmrg ++_M_current; 1701debfc3dSmrg } 1711debfc3dSmrg else 1721debfc3dSmrg _M_token = _S_token_bracket_begin; 1731debfc3dSmrg } 1741debfc3dSmrg else if (__c == '{') 1751debfc3dSmrg { 1761debfc3dSmrg _M_state = _S_state_in_brace; 1771debfc3dSmrg _M_token = _S_token_interval_begin; 1781debfc3dSmrg } 179*23f5f463Smrg else if (__builtin_expect(__c == _CharT(0), false)) 180*23f5f463Smrg { 181*23f5f463Smrg if (!_M_is_ecma()) 182*23f5f463Smrg { 183*23f5f463Smrg __throw_regex_error(regex_constants::_S_null, 184*23f5f463Smrg "Unexpected null character in regular expression"); 185*23f5f463Smrg } 186*23f5f463Smrg _M_token = _S_token_ord_char; 187*23f5f463Smrg _M_value.assign(1, __c); 188*23f5f463Smrg } 1891debfc3dSmrg else if (__c != ']' && __c != '}') 1901debfc3dSmrg { 1911debfc3dSmrg auto __it = _M_token_tbl; 1921debfc3dSmrg auto __narrowc = _M_ctype.narrow(__c, '\0'); 1931debfc3dSmrg for (; __it->first != '\0'; ++__it) 1941debfc3dSmrg if (__it->first == __narrowc) 1951debfc3dSmrg { 1961debfc3dSmrg _M_token = __it->second; 1971debfc3dSmrg return; 1981debfc3dSmrg } 1991debfc3dSmrg __glibcxx_assert(false); 2001debfc3dSmrg } 2011debfc3dSmrg else 2021debfc3dSmrg { 2031debfc3dSmrg _M_token = _S_token_ord_char; 2041debfc3dSmrg _M_value.assign(1, __c); 2051debfc3dSmrg } 2061debfc3dSmrg } 2071debfc3dSmrg 2081debfc3dSmrg // Differences between styles: 2091debfc3dSmrg // 1) different semantics of "[]" and "[^]". 2101debfc3dSmrg // 2) Escaping in bracket expr. 2111debfc3dSmrg template<typename _CharT> 2121debfc3dSmrg void 2131debfc3dSmrg _Scanner<_CharT>:: _M_scan_in_bracket()2141debfc3dSmrg _M_scan_in_bracket() 2151debfc3dSmrg { 2161debfc3dSmrg if (_M_current == _M_end) 2171debfc3dSmrg __throw_regex_error( 2181debfc3dSmrg regex_constants::error_brack, 2191debfc3dSmrg "Unexpected end of regex when in bracket expression."); 2201debfc3dSmrg 2211debfc3dSmrg auto __c = *_M_current++; 2221debfc3dSmrg 2231debfc3dSmrg if (__c == '-') 2241debfc3dSmrg _M_token = _S_token_bracket_dash; 2251debfc3dSmrg else if (__c == '[') 2261debfc3dSmrg { 2271debfc3dSmrg if (_M_current == _M_end) 2281debfc3dSmrg __throw_regex_error(regex_constants::error_brack, 2291debfc3dSmrg "Unexpected character class open bracket."); 2301debfc3dSmrg 2311debfc3dSmrg if (*_M_current == '.') 2321debfc3dSmrg { 2331debfc3dSmrg _M_token = _S_token_collsymbol; 2341debfc3dSmrg _M_eat_class(*_M_current++); 2351debfc3dSmrg } 2361debfc3dSmrg else if (*_M_current == ':') 2371debfc3dSmrg { 2381debfc3dSmrg _M_token = _S_token_char_class_name; 2391debfc3dSmrg _M_eat_class(*_M_current++); 2401debfc3dSmrg } 2411debfc3dSmrg else if (*_M_current == '=') 2421debfc3dSmrg { 2431debfc3dSmrg _M_token = _S_token_equiv_class_name; 2441debfc3dSmrg _M_eat_class(*_M_current++); 2451debfc3dSmrg } 2461debfc3dSmrg else 2471debfc3dSmrg { 2481debfc3dSmrg _M_token = _S_token_ord_char; 2491debfc3dSmrg _M_value.assign(1, __c); 2501debfc3dSmrg } 2511debfc3dSmrg } 2521debfc3dSmrg // In POSIX, when encountering "[]" or "[^]", the ']' is interpreted 2531debfc3dSmrg // literally. So "[]]" and "[^]]" are valid regexes. See the testcases 2541debfc3dSmrg // `*/empty_range.cc`. 2551debfc3dSmrg else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start)) 2561debfc3dSmrg { 2571debfc3dSmrg _M_token = _S_token_bracket_end; 2581debfc3dSmrg _M_state = _S_state_normal; 2591debfc3dSmrg } 2601debfc3dSmrg // ECMAScript and awk permits escaping in bracket. 2611debfc3dSmrg else if (__c == '\\' && (_M_is_ecma() || _M_is_awk())) 2621debfc3dSmrg (this->*_M_eat_escape)(); 2631debfc3dSmrg else 2641debfc3dSmrg { 2651debfc3dSmrg _M_token = _S_token_ord_char; 2661debfc3dSmrg _M_value.assign(1, __c); 2671debfc3dSmrg } 2681debfc3dSmrg _M_at_bracket_start = false; 2691debfc3dSmrg } 2701debfc3dSmrg 2711debfc3dSmrg // Differences between styles: 2721debfc3dSmrg // 1) "\}" in basic style. 2731debfc3dSmrg template<typename _CharT> 2741debfc3dSmrg void 2751debfc3dSmrg _Scanner<_CharT>:: _M_scan_in_brace()2761debfc3dSmrg _M_scan_in_brace() 2771debfc3dSmrg { 2781debfc3dSmrg if (_M_current == _M_end) 2791debfc3dSmrg __throw_regex_error( 2801debfc3dSmrg regex_constants::error_brace, 2811debfc3dSmrg "Unexpected end of regex when in brace expression."); 2821debfc3dSmrg 2831debfc3dSmrg auto __c = *_M_current++; 2841debfc3dSmrg 2851debfc3dSmrg if (_M_ctype.is(_CtypeT::digit, __c)) 2861debfc3dSmrg { 2871debfc3dSmrg _M_token = _S_token_dup_count; 2881debfc3dSmrg _M_value.assign(1, __c); 2891debfc3dSmrg while (_M_current != _M_end 2901debfc3dSmrg && _M_ctype.is(_CtypeT::digit, *_M_current)) 2911debfc3dSmrg _M_value += *_M_current++; 2921debfc3dSmrg } 2931debfc3dSmrg else if (__c == ',') 2941debfc3dSmrg _M_token = _S_token_comma; 2951debfc3dSmrg // basic use \}. 2961debfc3dSmrg else if (_M_is_basic()) 2971debfc3dSmrg { 2981debfc3dSmrg if (__c == '\\' && _M_current != _M_end && *_M_current == '}') 2991debfc3dSmrg { 3001debfc3dSmrg _M_state = _S_state_normal; 3011debfc3dSmrg _M_token = _S_token_interval_end; 3021debfc3dSmrg ++_M_current; 3031debfc3dSmrg } 3041debfc3dSmrg else 3051debfc3dSmrg __throw_regex_error(regex_constants::error_badbrace, 3061debfc3dSmrg "Unexpected character in brace expression."); 3071debfc3dSmrg } 3081debfc3dSmrg else if (__c == '}') 3091debfc3dSmrg { 3101debfc3dSmrg _M_state = _S_state_normal; 3111debfc3dSmrg _M_token = _S_token_interval_end; 3121debfc3dSmrg } 3131debfc3dSmrg else 3141debfc3dSmrg __throw_regex_error(regex_constants::error_badbrace, 3151debfc3dSmrg "Unexpected character in brace expression."); 3161debfc3dSmrg } 3171debfc3dSmrg 3181debfc3dSmrg template<typename _CharT> 3191debfc3dSmrg void 3201debfc3dSmrg _Scanner<_CharT>:: _M_eat_escape_ecma()3211debfc3dSmrg _M_eat_escape_ecma() 3221debfc3dSmrg { 3231debfc3dSmrg if (_M_current == _M_end) 3241debfc3dSmrg __throw_regex_error(regex_constants::error_escape, 3251debfc3dSmrg "Unexpected end of regex when escaping."); 3261debfc3dSmrg 3271debfc3dSmrg auto __c = *_M_current++; 3281debfc3dSmrg auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0')); 3291debfc3dSmrg 3301debfc3dSmrg if (__pos != nullptr && (__c != 'b' || _M_state == _S_state_in_bracket)) 3311debfc3dSmrg { 3321debfc3dSmrg _M_token = _S_token_ord_char; 3331debfc3dSmrg _M_value.assign(1, *__pos); 3341debfc3dSmrg } 3351debfc3dSmrg else if (__c == 'b') 3361debfc3dSmrg { 3371debfc3dSmrg _M_token = _S_token_word_bound; 3381debfc3dSmrg _M_value.assign(1, 'p'); 3391debfc3dSmrg } 3401debfc3dSmrg else if (__c == 'B') 3411debfc3dSmrg { 3421debfc3dSmrg _M_token = _S_token_word_bound; 3431debfc3dSmrg _M_value.assign(1, 'n'); 3441debfc3dSmrg } 3451debfc3dSmrg // N3376 28.13 3461debfc3dSmrg else if (__c == 'd' 3471debfc3dSmrg || __c == 'D' 3481debfc3dSmrg || __c == 's' 3491debfc3dSmrg || __c == 'S' 3501debfc3dSmrg || __c == 'w' 3511debfc3dSmrg || __c == 'W') 3521debfc3dSmrg { 3531debfc3dSmrg _M_token = _S_token_quoted_class; 3541debfc3dSmrg _M_value.assign(1, __c); 3551debfc3dSmrg } 3561debfc3dSmrg else if (__c == 'c') 3571debfc3dSmrg { 3581debfc3dSmrg if (_M_current == _M_end) 3591debfc3dSmrg __throw_regex_error( 3601debfc3dSmrg regex_constants::error_escape, 3611debfc3dSmrg "Unexpected end of regex when reading control code."); 3621debfc3dSmrg _M_token = _S_token_ord_char; 3631debfc3dSmrg _M_value.assign(1, *_M_current++); 3641debfc3dSmrg } 3651debfc3dSmrg else if (__c == 'x' || __c == 'u') 3661debfc3dSmrg { 3671debfc3dSmrg _M_value.erase(); 3681debfc3dSmrg for (int __i = 0; __i < (__c == 'x' ? 2 : 4); __i++) 3691debfc3dSmrg { 3701debfc3dSmrg if (_M_current == _M_end 3711debfc3dSmrg || !_M_ctype.is(_CtypeT::xdigit, *_M_current)) 3721debfc3dSmrg __throw_regex_error( 3731debfc3dSmrg regex_constants::error_escape, 3741debfc3dSmrg "Unexpected end of regex when ascii character."); 3751debfc3dSmrg _M_value += *_M_current++; 3761debfc3dSmrg } 3771debfc3dSmrg _M_token = _S_token_hex_num; 3781debfc3dSmrg } 3791debfc3dSmrg // ECMAScript recognizes multi-digit back-references. 3801debfc3dSmrg else if (_M_ctype.is(_CtypeT::digit, __c)) 3811debfc3dSmrg { 3821debfc3dSmrg _M_value.assign(1, __c); 3831debfc3dSmrg while (_M_current != _M_end 3841debfc3dSmrg && _M_ctype.is(_CtypeT::digit, *_M_current)) 3851debfc3dSmrg _M_value += *_M_current++; 3861debfc3dSmrg _M_token = _S_token_backref; 3871debfc3dSmrg } 3881debfc3dSmrg else 3891debfc3dSmrg { 3901debfc3dSmrg _M_token = _S_token_ord_char; 3911debfc3dSmrg _M_value.assign(1, __c); 3921debfc3dSmrg } 3931debfc3dSmrg } 3941debfc3dSmrg 3951debfc3dSmrg // Differences between styles: 3961debfc3dSmrg // 1) Extended doesn't support backref, but basic does. 3971debfc3dSmrg template<typename _CharT> 3981debfc3dSmrg void 3991debfc3dSmrg _Scanner<_CharT>:: _M_eat_escape_posix()4001debfc3dSmrg _M_eat_escape_posix() 4011debfc3dSmrg { 4021debfc3dSmrg if (_M_current == _M_end) 4031debfc3dSmrg __throw_regex_error(regex_constants::error_escape, 4041debfc3dSmrg "Unexpected end of regex when escaping."); 4051debfc3dSmrg 4061debfc3dSmrg auto __c = *_M_current; 4071debfc3dSmrg auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')); 4081debfc3dSmrg 4091debfc3dSmrg if (__pos != nullptr && *__pos != '\0') 4101debfc3dSmrg { 4111debfc3dSmrg _M_token = _S_token_ord_char; 4121debfc3dSmrg _M_value.assign(1, __c); 4131debfc3dSmrg } 4141debfc3dSmrg // We MUST judge awk before handling backrefs. There's no backref in awk. 4151debfc3dSmrg else if (_M_is_awk()) 4161debfc3dSmrg { 4171debfc3dSmrg _M_eat_escape_awk(); 4181debfc3dSmrg return; 4191debfc3dSmrg } 4201debfc3dSmrg else if (_M_is_basic() && _M_ctype.is(_CtypeT::digit, __c) && __c != '0') 4211debfc3dSmrg { 4221debfc3dSmrg _M_token = _S_token_backref; 4231debfc3dSmrg _M_value.assign(1, __c); 4241debfc3dSmrg } 4251debfc3dSmrg else 4261debfc3dSmrg { 4271debfc3dSmrg #ifdef __STRICT_ANSI__ 4281debfc3dSmrg // POSIX says it is undefined to escape ordinary characters 4291debfc3dSmrg __throw_regex_error(regex_constants::error_escape, 4301debfc3dSmrg "Unexpected escape character."); 4311debfc3dSmrg #else 4321debfc3dSmrg _M_token = _S_token_ord_char; 4331debfc3dSmrg _M_value.assign(1, __c); 4341debfc3dSmrg #endif 4351debfc3dSmrg } 4361debfc3dSmrg ++_M_current; 4371debfc3dSmrg } 4381debfc3dSmrg 4391debfc3dSmrg template<typename _CharT> 4401debfc3dSmrg void 4411debfc3dSmrg _Scanner<_CharT>:: _M_eat_escape_awk()4421debfc3dSmrg _M_eat_escape_awk() 4431debfc3dSmrg { 4441debfc3dSmrg auto __c = *_M_current++; 4451debfc3dSmrg auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0')); 4461debfc3dSmrg 4471debfc3dSmrg if (__pos != nullptr) 4481debfc3dSmrg { 4491debfc3dSmrg _M_token = _S_token_ord_char; 4501debfc3dSmrg _M_value.assign(1, *__pos); 4511debfc3dSmrg } 4521debfc3dSmrg // \ddd for oct representation 4531debfc3dSmrg else if (_M_ctype.is(_CtypeT::digit, __c) 4541debfc3dSmrg && __c != '8' 4551debfc3dSmrg && __c != '9') 4561debfc3dSmrg { 4571debfc3dSmrg _M_value.assign(1, __c); 4581debfc3dSmrg for (int __i = 0; 4591debfc3dSmrg __i < 2 4601debfc3dSmrg && _M_current != _M_end 4611debfc3dSmrg && _M_ctype.is(_CtypeT::digit, *_M_current) 4621debfc3dSmrg && *_M_current != '8' 4631debfc3dSmrg && *_M_current != '9'; 4641debfc3dSmrg __i++) 4651debfc3dSmrg _M_value += *_M_current++; 4661debfc3dSmrg _M_token = _S_token_oct_num; 4671debfc3dSmrg return; 4681debfc3dSmrg } 4691debfc3dSmrg else 4701debfc3dSmrg __throw_regex_error(regex_constants::error_escape, 4711debfc3dSmrg "Unexpected escape character."); 4721debfc3dSmrg } 4731debfc3dSmrg 4741debfc3dSmrg // Eats a character class or throws an exception. 4751debfc3dSmrg // __ch could be ':', '.' or '=', _M_current is the char after ']' when 4761debfc3dSmrg // returning. 4771debfc3dSmrg template<typename _CharT> 4781debfc3dSmrg void 4791debfc3dSmrg _Scanner<_CharT>:: _M_eat_class(char __ch)4801debfc3dSmrg _M_eat_class(char __ch) 4811debfc3dSmrg { 4821debfc3dSmrg for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;) 4831debfc3dSmrg _M_value += *_M_current++; 4841debfc3dSmrg if (_M_current == _M_end 4851debfc3dSmrg || *_M_current++ != __ch 4861debfc3dSmrg || _M_current == _M_end // skip __ch 4871debfc3dSmrg || *_M_current++ != ']') // skip ']' 4881debfc3dSmrg { 4891debfc3dSmrg if (__ch == ':') 4901debfc3dSmrg __throw_regex_error(regex_constants::error_ctype, 4911debfc3dSmrg "Unexpected end of character class."); 4921debfc3dSmrg else 4931debfc3dSmrg __throw_regex_error(regex_constants::error_collate, 4941debfc3dSmrg "Unexpected end of character class."); 4951debfc3dSmrg } 4961debfc3dSmrg } 4971debfc3dSmrg 4981debfc3dSmrg #ifdef _GLIBCXX_DEBUG 4991debfc3dSmrg template<typename _CharT> 5001debfc3dSmrg std::ostream& 5011debfc3dSmrg _Scanner<_CharT>:: _M_print(std::ostream & ostr)5021debfc3dSmrg _M_print(std::ostream& ostr) 5031debfc3dSmrg { 5041debfc3dSmrg switch (_M_token) 5051debfc3dSmrg { 5061debfc3dSmrg case _S_token_anychar: 5071debfc3dSmrg ostr << "any-character\n"; 5081debfc3dSmrg break; 5091debfc3dSmrg case _S_token_backref: 5101debfc3dSmrg ostr << "backref\n"; 5111debfc3dSmrg break; 5121debfc3dSmrg case _S_token_bracket_begin: 5131debfc3dSmrg ostr << "bracket-begin\n"; 5141debfc3dSmrg break; 5151debfc3dSmrg case _S_token_bracket_neg_begin: 5161debfc3dSmrg ostr << "bracket-neg-begin\n"; 5171debfc3dSmrg break; 5181debfc3dSmrg case _S_token_bracket_end: 5191debfc3dSmrg ostr << "bracket-end\n"; 5201debfc3dSmrg break; 5211debfc3dSmrg case _S_token_char_class_name: 5221debfc3dSmrg ostr << "char-class-name \"" << _M_value << "\"\n"; 5231debfc3dSmrg break; 5241debfc3dSmrg case _S_token_closure0: 5251debfc3dSmrg ostr << "closure0\n"; 5261debfc3dSmrg break; 5271debfc3dSmrg case _S_token_closure1: 5281debfc3dSmrg ostr << "closure1\n"; 5291debfc3dSmrg break; 5301debfc3dSmrg case _S_token_collsymbol: 5311debfc3dSmrg ostr << "collsymbol \"" << _M_value << "\"\n"; 5321debfc3dSmrg break; 5331debfc3dSmrg case _S_token_comma: 5341debfc3dSmrg ostr << "comma\n"; 5351debfc3dSmrg break; 5361debfc3dSmrg case _S_token_dup_count: 5371debfc3dSmrg ostr << "dup count: " << _M_value << "\n"; 5381debfc3dSmrg break; 5391debfc3dSmrg case _S_token_eof: 5401debfc3dSmrg ostr << "EOF\n"; 5411debfc3dSmrg break; 5421debfc3dSmrg case _S_token_equiv_class_name: 5431debfc3dSmrg ostr << "equiv-class-name \"" << _M_value << "\"\n"; 5441debfc3dSmrg break; 5451debfc3dSmrg case _S_token_interval_begin: 5461debfc3dSmrg ostr << "interval begin\n"; 5471debfc3dSmrg break; 5481debfc3dSmrg case _S_token_interval_end: 5491debfc3dSmrg ostr << "interval end\n"; 5501debfc3dSmrg break; 5511debfc3dSmrg case _S_token_line_begin: 5521debfc3dSmrg ostr << "line begin\n"; 5531debfc3dSmrg break; 5541debfc3dSmrg case _S_token_line_end: 5551debfc3dSmrg ostr << "line end\n"; 5561debfc3dSmrg break; 5571debfc3dSmrg case _S_token_opt: 5581debfc3dSmrg ostr << "opt\n"; 5591debfc3dSmrg break; 5601debfc3dSmrg case _S_token_or: 5611debfc3dSmrg ostr << "or\n"; 5621debfc3dSmrg break; 5631debfc3dSmrg case _S_token_ord_char: 5641debfc3dSmrg ostr << "ordinary character: \"" << _M_value << "\"\n"; 5651debfc3dSmrg break; 5661debfc3dSmrg case _S_token_subexpr_begin: 5671debfc3dSmrg ostr << "subexpr begin\n"; 5681debfc3dSmrg break; 5691debfc3dSmrg case _S_token_subexpr_no_group_begin: 5701debfc3dSmrg ostr << "no grouping subexpr begin\n"; 5711debfc3dSmrg break; 5721debfc3dSmrg case _S_token_subexpr_lookahead_begin: 5731debfc3dSmrg ostr << "lookahead subexpr begin\n"; 5741debfc3dSmrg break; 5751debfc3dSmrg case _S_token_subexpr_end: 5761debfc3dSmrg ostr << "subexpr end\n"; 5771debfc3dSmrg break; 5781debfc3dSmrg case _S_token_unknown: 5791debfc3dSmrg ostr << "-- unknown token --\n"; 5801debfc3dSmrg break; 5811debfc3dSmrg case _S_token_oct_num: 5821debfc3dSmrg ostr << "oct number " << _M_value << "\n"; 5831debfc3dSmrg break; 5841debfc3dSmrg case _S_token_hex_num: 5851debfc3dSmrg ostr << "hex number " << _M_value << "\n"; 5861debfc3dSmrg break; 5871debfc3dSmrg case _S_token_quoted_class: 5881debfc3dSmrg ostr << "quoted class " << "\\" << _M_value << "\n"; 5891debfc3dSmrg break; 5901debfc3dSmrg default: 5911debfc3dSmrg _GLIBCXX_DEBUG_ASSERT(false); 5921debfc3dSmrg } 5931debfc3dSmrg return ostr; 5941debfc3dSmrg } 5951debfc3dSmrg #endif 5961debfc3dSmrg 5971debfc3dSmrg } // namespace __detail 598a2dc1f3fSmrg _GLIBCXX_END_NAMESPACE_VERSION 5991debfc3dSmrg } // namespace 600