14d5abbe8Smrg // class template regex -*- C++ -*- 24d5abbe8Smrg 3b1e83836Smrg // Copyright (C) 2013-2022 Free Software Foundation, Inc. 44d5abbe8Smrg // 54d5abbe8Smrg // This file is part of the GNU ISO C++ Library. This library is free 64d5abbe8Smrg // software; you can redistribute it and/or modify it under the 74d5abbe8Smrg // terms of the GNU General Public License as published by the 84d5abbe8Smrg // Free Software Foundation; either version 3, or (at your option) 94d5abbe8Smrg // any later version. 104d5abbe8Smrg 114d5abbe8Smrg // This library is distributed in the hope that it will be useful, 124d5abbe8Smrg // but WITHOUT ANY WARRANTY; without even the implied warranty of 134d5abbe8Smrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 144d5abbe8Smrg // GNU General Public License for more details. 154d5abbe8Smrg 164d5abbe8Smrg // Under Section 7 of GPL version 3, you are granted additional 174d5abbe8Smrg // permissions described in the GCC Runtime Library Exception, version 184d5abbe8Smrg // 3.1, as published by the Free Software Foundation. 194d5abbe8Smrg 204d5abbe8Smrg // You should have received a copy of the GNU General Public License and 214d5abbe8Smrg // a copy of the GCC Runtime Library Exception along with this program; 224d5abbe8Smrg // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 234d5abbe8Smrg // <http://www.gnu.org/licenses/>. 244d5abbe8Smrg 254d5abbe8Smrg /** 264d5abbe8Smrg * @file bits/regex_scanner.tcc 274d5abbe8Smrg * This is an internal header file, included by other library headers. 284d5abbe8Smrg * Do not attempt to use it directly. @headername{regex} 294d5abbe8Smrg */ 304d5abbe8Smrg 314d5abbe8Smrg // FIXME make comments doxygen format. 324d5abbe8Smrg 334d5abbe8Smrg // N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep 344d5abbe8Smrg // and awk 354d5abbe8Smrg // 1) grep is basic except '\n' is treated as '|' 364d5abbe8Smrg // 2) egrep is extended except '\n' is treated as '|' 374d5abbe8Smrg // 3) awk is extended except special escaping rules, and there's no 384d5abbe8Smrg // back-reference. 394d5abbe8Smrg // 404d5abbe8Smrg // References: 414d5abbe8Smrg // 424d5abbe8Smrg // ECMAScript: ECMA-262 15.10 434d5abbe8Smrg // 444d5abbe8Smrg // basic, extended: 454d5abbe8Smrg // http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html 464d5abbe8Smrg // 474d5abbe8Smrg // awk: http://pubs.opengroup.org/onlinepubs/000095399/utilities/awk.html 484d5abbe8Smrg 494d5abbe8Smrg namespace std _GLIBCXX_VISIBILITY(default) 504d5abbe8Smrg { 518b6133e5Smrg _GLIBCXX_BEGIN_NAMESPACE_VERSION 528b6133e5Smrg 53a3e9eb18Smrg namespace __detail 54a3e9eb18Smrg { 554d5abbe8Smrg template<typename _CharT> 564d5abbe8Smrg _Scanner<_CharT>:: _Scanner(const _CharT * __begin,const _CharT * __end,_FlagT __flags,std::locale __loc)57b1e83836Smrg _Scanner(const _CharT* __begin, const _CharT* __end, 584d5abbe8Smrg _FlagT __flags, std::locale __loc) 594d5abbe8Smrg : _ScannerBase(__flags), 604d5abbe8Smrg _M_current(__begin), _M_end(__end), 614d5abbe8Smrg _M_ctype(std::use_facet<_CtypeT>(__loc)), 624d5abbe8Smrg _M_eat_escape(_M_is_ecma() 634d5abbe8Smrg ? &_Scanner::_M_eat_escape_ecma 644d5abbe8Smrg : &_Scanner::_M_eat_escape_posix) 654d5abbe8Smrg { _M_advance(); } 664d5abbe8Smrg 674d5abbe8Smrg template<typename _CharT> 684d5abbe8Smrg void 694d5abbe8Smrg _Scanner<_CharT>:: _M_advance()704d5abbe8Smrg _M_advance() 714d5abbe8Smrg { 724d5abbe8Smrg if (_M_current == _M_end) 734d5abbe8Smrg { 744d5abbe8Smrg _M_token = _S_token_eof; 754d5abbe8Smrg return; 764d5abbe8Smrg } 774d5abbe8Smrg 784d5abbe8Smrg if (_M_state == _S_state_normal) 794d5abbe8Smrg _M_scan_normal(); 804d5abbe8Smrg else if (_M_state == _S_state_in_bracket) 814d5abbe8Smrg _M_scan_in_bracket(); 824d5abbe8Smrg else if (_M_state == _S_state_in_brace) 834d5abbe8Smrg _M_scan_in_brace(); 844d5abbe8Smrg else 854d5abbe8Smrg { 86b1e83836Smrg __glibcxx_assert(!"unexpected state while processing regex"); 874d5abbe8Smrg } 884d5abbe8Smrg } 894d5abbe8Smrg 904d5abbe8Smrg // Differences between styles: 914d5abbe8Smrg // 1) "\(", "\)", "\{" in basic. It's not escaping. 924d5abbe8Smrg // 2) "(?:", "(?=", "(?!" in ECMAScript. 934d5abbe8Smrg template<typename _CharT> 944d5abbe8Smrg void 954d5abbe8Smrg _Scanner<_CharT>:: _M_scan_normal()964d5abbe8Smrg _M_scan_normal() 974d5abbe8Smrg { 984d5abbe8Smrg auto __c = *_M_current++; 994d5abbe8Smrg 100b1e83836Smrg if (__builtin_strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr) 1014d5abbe8Smrg { 1024d5abbe8Smrg _M_token = _S_token_ord_char; 1034d5abbe8Smrg _M_value.assign(1, __c); 1044d5abbe8Smrg return; 1054d5abbe8Smrg } 1064d5abbe8Smrg if (__c == '\\') 1074d5abbe8Smrg { 1084d5abbe8Smrg if (_M_current == _M_end) 109f9a78e0eSmrg __throw_regex_error( 110f9a78e0eSmrg regex_constants::error_escape, 111b1e83836Smrg "Invalid escape at end of regular expression"); 1124d5abbe8Smrg 1134d5abbe8Smrg if (!_M_is_basic() 1144d5abbe8Smrg || (*_M_current != '(' 1154d5abbe8Smrg && *_M_current != ')' 1164d5abbe8Smrg && *_M_current != '{')) 1174d5abbe8Smrg { 1184d5abbe8Smrg (this->*_M_eat_escape)(); 1194d5abbe8Smrg return; 1204d5abbe8Smrg } 1214d5abbe8Smrg __c = *_M_current++; 1224d5abbe8Smrg } 1234d5abbe8Smrg if (__c == '(') 1244d5abbe8Smrg { 1254d5abbe8Smrg if (_M_is_ecma() && *_M_current == '?') 1264d5abbe8Smrg { 1274d5abbe8Smrg if (++_M_current == _M_end) 128b1e83836Smrg __throw_regex_error(regex_constants::error_paren); 1294d5abbe8Smrg 1304d5abbe8Smrg if (*_M_current == ':') 1314d5abbe8Smrg { 1324d5abbe8Smrg ++_M_current; 1334d5abbe8Smrg _M_token = _S_token_subexpr_no_group_begin; 1344d5abbe8Smrg } 1354d5abbe8Smrg else if (*_M_current == '=') 1364d5abbe8Smrg { 1374d5abbe8Smrg ++_M_current; 1384d5abbe8Smrg _M_token = _S_token_subexpr_lookahead_begin; 1394d5abbe8Smrg _M_value.assign(1, 'p'); 1404d5abbe8Smrg } 1414d5abbe8Smrg else if (*_M_current == '!') 1424d5abbe8Smrg { 1434d5abbe8Smrg ++_M_current; 1444d5abbe8Smrg _M_token = _S_token_subexpr_lookahead_begin; 1454d5abbe8Smrg _M_value.assign(1, 'n'); 1464d5abbe8Smrg } 1474d5abbe8Smrg else 148b1e83836Smrg __throw_regex_error(regex_constants::error_paren, 149b1e83836Smrg "Invalid '(?...)' zero-width assertion " 150b1e83836Smrg "in regular expression"); 1514d5abbe8Smrg } 1524d5abbe8Smrg else if (_M_flags & regex_constants::nosubs) 1534d5abbe8Smrg _M_token = _S_token_subexpr_no_group_begin; 1544d5abbe8Smrg else 1554d5abbe8Smrg _M_token = _S_token_subexpr_begin; 1564d5abbe8Smrg } 1574d5abbe8Smrg else if (__c == ')') 1584d5abbe8Smrg _M_token = _S_token_subexpr_end; 1594d5abbe8Smrg else if (__c == '[') 1604d5abbe8Smrg { 1614d5abbe8Smrg _M_state = _S_state_in_bracket; 1624d5abbe8Smrg _M_at_bracket_start = true; 1634d5abbe8Smrg if (_M_current != _M_end && *_M_current == '^') 1644d5abbe8Smrg { 1654d5abbe8Smrg _M_token = _S_token_bracket_neg_begin; 1664d5abbe8Smrg ++_M_current; 1674d5abbe8Smrg } 1684d5abbe8Smrg else 1694d5abbe8Smrg _M_token = _S_token_bracket_begin; 1704d5abbe8Smrg } 1714d5abbe8Smrg else if (__c == '{') 1724d5abbe8Smrg { 1734d5abbe8Smrg _M_state = _S_state_in_brace; 1744d5abbe8Smrg _M_token = _S_token_interval_begin; 1754d5abbe8Smrg } 1767d4dc15bSmrg else if (__builtin_expect(__c == _CharT(0), false)) 1777d4dc15bSmrg { 1787d4dc15bSmrg if (!_M_is_ecma()) 179b1e83836Smrg __throw_regex_error(regex_constants::_S_null); 1807d4dc15bSmrg _M_token = _S_token_ord_char; 1817d4dc15bSmrg _M_value.assign(1, __c); 1827d4dc15bSmrg } 183f30ff588Smrg else if (__c != ']' && __c != '}') 1844d5abbe8Smrg { 1854d5abbe8Smrg auto __it = _M_token_tbl; 1864d5abbe8Smrg auto __narrowc = _M_ctype.narrow(__c, '\0'); 1874d5abbe8Smrg for (; __it->first != '\0'; ++__it) 1884d5abbe8Smrg if (__it->first == __narrowc) 1894d5abbe8Smrg { 1904d5abbe8Smrg _M_token = __it->second; 1914d5abbe8Smrg return; 1924d5abbe8Smrg } 193b1e83836Smrg __glibcxx_assert(!"unexpected special character in regex"); 1944d5abbe8Smrg } 1954d5abbe8Smrg else 1964d5abbe8Smrg { 1974d5abbe8Smrg _M_token = _S_token_ord_char; 1984d5abbe8Smrg _M_value.assign(1, __c); 1994d5abbe8Smrg } 2004d5abbe8Smrg } 2014d5abbe8Smrg 2024d5abbe8Smrg // Differences between styles: 2034d5abbe8Smrg // 1) different semantics of "[]" and "[^]". 2044d5abbe8Smrg // 2) Escaping in bracket expr. 2054d5abbe8Smrg template<typename _CharT> 2064d5abbe8Smrg void 2074d5abbe8Smrg _Scanner<_CharT>:: _M_scan_in_bracket()2084d5abbe8Smrg _M_scan_in_bracket() 2094d5abbe8Smrg { 2104d5abbe8Smrg if (_M_current == _M_end) 211b1e83836Smrg __throw_regex_error(regex_constants::error_brack); 2124d5abbe8Smrg 2134d5abbe8Smrg auto __c = *_M_current++; 2144d5abbe8Smrg 215b17d1066Smrg if (__c == '-') 216b17d1066Smrg _M_token = _S_token_bracket_dash; 217b17d1066Smrg else if (__c == '[') 2184d5abbe8Smrg { 2194d5abbe8Smrg if (_M_current == _M_end) 220f9a78e0eSmrg __throw_regex_error(regex_constants::error_brack, 221b1e83836Smrg "Incomplete '[[' character class in " 222b1e83836Smrg "regular expression"); 2234d5abbe8Smrg 2244d5abbe8Smrg if (*_M_current == '.') 2254d5abbe8Smrg { 2264d5abbe8Smrg _M_token = _S_token_collsymbol; 2274d5abbe8Smrg _M_eat_class(*_M_current++); 2284d5abbe8Smrg } 2294d5abbe8Smrg else if (*_M_current == ':') 2304d5abbe8Smrg { 2314d5abbe8Smrg _M_token = _S_token_char_class_name; 2324d5abbe8Smrg _M_eat_class(*_M_current++); 2334d5abbe8Smrg } 2344d5abbe8Smrg else if (*_M_current == '=') 2354d5abbe8Smrg { 2364d5abbe8Smrg _M_token = _S_token_equiv_class_name; 2374d5abbe8Smrg _M_eat_class(*_M_current++); 2384d5abbe8Smrg } 2394d5abbe8Smrg else 2404d5abbe8Smrg { 2414d5abbe8Smrg _M_token = _S_token_ord_char; 2424d5abbe8Smrg _M_value.assign(1, __c); 2434d5abbe8Smrg } 2444d5abbe8Smrg } 2454d5abbe8Smrg // In POSIX, when encountering "[]" or "[^]", the ']' is interpreted 2464d5abbe8Smrg // literally. So "[]]" and "[^]]" are valid regexes. See the testcases 247b1e83836Smrg // `.../empty_range.cc`. 2484d5abbe8Smrg else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start)) 2494d5abbe8Smrg { 2504d5abbe8Smrg _M_token = _S_token_bracket_end; 2514d5abbe8Smrg _M_state = _S_state_normal; 2524d5abbe8Smrg } 2534d5abbe8Smrg // ECMAScript and awk permits escaping in bracket. 2544d5abbe8Smrg else if (__c == '\\' && (_M_is_ecma() || _M_is_awk())) 2554d5abbe8Smrg (this->*_M_eat_escape)(); 2564d5abbe8Smrg else 2574d5abbe8Smrg { 2584d5abbe8Smrg _M_token = _S_token_ord_char; 2594d5abbe8Smrg _M_value.assign(1, __c); 2604d5abbe8Smrg } 2614d5abbe8Smrg _M_at_bracket_start = false; 2624d5abbe8Smrg } 2634d5abbe8Smrg 2644d5abbe8Smrg // Differences between styles: 2654d5abbe8Smrg // 1) "\}" in basic style. 2664d5abbe8Smrg template<typename _CharT> 2674d5abbe8Smrg void 2684d5abbe8Smrg _Scanner<_CharT>:: _M_scan_in_brace()2694d5abbe8Smrg _M_scan_in_brace() 2704d5abbe8Smrg { 2714d5abbe8Smrg if (_M_current == _M_end) 272b1e83836Smrg __throw_regex_error(regex_constants::error_brace); 2734d5abbe8Smrg 2744d5abbe8Smrg auto __c = *_M_current++; 2754d5abbe8Smrg 2764d5abbe8Smrg if (_M_ctype.is(_CtypeT::digit, __c)) 2774d5abbe8Smrg { 2784d5abbe8Smrg _M_token = _S_token_dup_count; 2794d5abbe8Smrg _M_value.assign(1, __c); 2804d5abbe8Smrg while (_M_current != _M_end 2814d5abbe8Smrg && _M_ctype.is(_CtypeT::digit, *_M_current)) 2824d5abbe8Smrg _M_value += *_M_current++; 2834d5abbe8Smrg } 2844d5abbe8Smrg else if (__c == ',') 2854d5abbe8Smrg _M_token = _S_token_comma; 2864d5abbe8Smrg // basic use \}. 2874d5abbe8Smrg else if (_M_is_basic()) 2884d5abbe8Smrg { 2894d5abbe8Smrg if (__c == '\\' && _M_current != _M_end && *_M_current == '}') 2904d5abbe8Smrg { 2914d5abbe8Smrg _M_state = _S_state_normal; 2924d5abbe8Smrg _M_token = _S_token_interval_end; 2934d5abbe8Smrg ++_M_current; 2944d5abbe8Smrg } 2954d5abbe8Smrg else 296b1e83836Smrg __throw_regex_error(regex_constants::error_badbrace); 2974d5abbe8Smrg } 2984d5abbe8Smrg else if (__c == '}') 2994d5abbe8Smrg { 3004d5abbe8Smrg _M_state = _S_state_normal; 3014d5abbe8Smrg _M_token = _S_token_interval_end; 3024d5abbe8Smrg } 3034d5abbe8Smrg else 304b1e83836Smrg __throw_regex_error(regex_constants::error_badbrace); 3054d5abbe8Smrg } 3064d5abbe8Smrg 3074d5abbe8Smrg template<typename _CharT> 3084d5abbe8Smrg void 3094d5abbe8Smrg _Scanner<_CharT>:: _M_eat_escape_ecma()3104d5abbe8Smrg _M_eat_escape_ecma() 3114d5abbe8Smrg { 3124d5abbe8Smrg if (_M_current == _M_end) 313b1e83836Smrg __throw_regex_error(regex_constants::error_escape); 3144d5abbe8Smrg 3154d5abbe8Smrg auto __c = *_M_current++; 3164d5abbe8Smrg auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0')); 3174d5abbe8Smrg 3184d5abbe8Smrg if (__pos != nullptr && (__c != 'b' || _M_state == _S_state_in_bracket)) 3194d5abbe8Smrg { 3204d5abbe8Smrg _M_token = _S_token_ord_char; 3214d5abbe8Smrg _M_value.assign(1, *__pos); 3224d5abbe8Smrg } 3234d5abbe8Smrg else if (__c == 'b') 3244d5abbe8Smrg { 3254d5abbe8Smrg _M_token = _S_token_word_bound; 3264d5abbe8Smrg _M_value.assign(1, 'p'); 3274d5abbe8Smrg } 3284d5abbe8Smrg else if (__c == 'B') 3294d5abbe8Smrg { 3304d5abbe8Smrg _M_token = _S_token_word_bound; 3314d5abbe8Smrg _M_value.assign(1, 'n'); 3324d5abbe8Smrg } 3334d5abbe8Smrg // N3376 28.13 3344d5abbe8Smrg else if (__c == 'd' 3354d5abbe8Smrg || __c == 'D' 3364d5abbe8Smrg || __c == 's' 3374d5abbe8Smrg || __c == 'S' 3384d5abbe8Smrg || __c == 'w' 3394d5abbe8Smrg || __c == 'W') 3404d5abbe8Smrg { 3414d5abbe8Smrg _M_token = _S_token_quoted_class; 3424d5abbe8Smrg _M_value.assign(1, __c); 3434d5abbe8Smrg } 3444d5abbe8Smrg else if (__c == 'c') 3454d5abbe8Smrg { 3464d5abbe8Smrg if (_M_current == _M_end) 347b1e83836Smrg __throw_regex_error(regex_constants::error_escape, 348b1e83836Smrg "invalid '\\cX' control character in " 349b1e83836Smrg "regular expression"); 3504d5abbe8Smrg _M_token = _S_token_ord_char; 3514d5abbe8Smrg _M_value.assign(1, *_M_current++); 3524d5abbe8Smrg } 3534d5abbe8Smrg else if (__c == 'x' || __c == 'u') 3544d5abbe8Smrg { 355b1e83836Smrg _M_value.clear(); 356b1e83836Smrg const int __n = __c == 'x' ? 2 : 4; 357b1e83836Smrg for (int __i = 0; __i < __n; __i++) 3584d5abbe8Smrg { 3594d5abbe8Smrg if (_M_current == _M_end 3604d5abbe8Smrg || !_M_ctype.is(_CtypeT::xdigit, *_M_current)) 361b1e83836Smrg __throw_regex_error(regex_constants::error_escape, 362b1e83836Smrg __n == 2 363b1e83836Smrg ? "Invalid '\\xNN' control character in " 364b1e83836Smrg "regular expression" 365b1e83836Smrg : "Invalid '\\uNNNN' control character in " 366b1e83836Smrg "regular expression"); 3674d5abbe8Smrg _M_value += *_M_current++; 3684d5abbe8Smrg } 3694d5abbe8Smrg _M_token = _S_token_hex_num; 3704d5abbe8Smrg } 3714d5abbe8Smrg // ECMAScript recognizes multi-digit back-references. 3724d5abbe8Smrg else if (_M_ctype.is(_CtypeT::digit, __c)) 3734d5abbe8Smrg { 3744d5abbe8Smrg _M_value.assign(1, __c); 3754d5abbe8Smrg while (_M_current != _M_end 3764d5abbe8Smrg && _M_ctype.is(_CtypeT::digit, *_M_current)) 3774d5abbe8Smrg _M_value += *_M_current++; 3784d5abbe8Smrg _M_token = _S_token_backref; 3794d5abbe8Smrg } 3804d5abbe8Smrg else 3814d5abbe8Smrg { 3824d5abbe8Smrg _M_token = _S_token_ord_char; 3834d5abbe8Smrg _M_value.assign(1, __c); 3844d5abbe8Smrg } 3854d5abbe8Smrg } 3864d5abbe8Smrg 3874d5abbe8Smrg // Differences between styles: 3884d5abbe8Smrg // 1) Extended doesn't support backref, but basic does. 3894d5abbe8Smrg template<typename _CharT> 3904d5abbe8Smrg void 3914d5abbe8Smrg _Scanner<_CharT>:: _M_eat_escape_posix()3924d5abbe8Smrg _M_eat_escape_posix() 3934d5abbe8Smrg { 3944d5abbe8Smrg if (_M_current == _M_end) 395b1e83836Smrg __throw_regex_error(regex_constants::error_escape); 3964d5abbe8Smrg 3974d5abbe8Smrg auto __c = *_M_current; 398b1e83836Smrg auto __pos = __builtin_strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')); 3994d5abbe8Smrg 4004d5abbe8Smrg if (__pos != nullptr && *__pos != '\0') 4014d5abbe8Smrg { 4024d5abbe8Smrg _M_token = _S_token_ord_char; 4034d5abbe8Smrg _M_value.assign(1, __c); 4044d5abbe8Smrg } 4054d5abbe8Smrg // We MUST judge awk before handling backrefs. There's no backref in awk. 4064d5abbe8Smrg else if (_M_is_awk()) 4074d5abbe8Smrg { 4084d5abbe8Smrg _M_eat_escape_awk(); 4094d5abbe8Smrg return; 4104d5abbe8Smrg } 4114d5abbe8Smrg else if (_M_is_basic() && _M_ctype.is(_CtypeT::digit, __c) && __c != '0') 4124d5abbe8Smrg { 4134d5abbe8Smrg _M_token = _S_token_backref; 4144d5abbe8Smrg _M_value.assign(1, __c); 4154d5abbe8Smrg } 4164d5abbe8Smrg else 4174d5abbe8Smrg { 4184d5abbe8Smrg #ifdef __STRICT_ANSI__ 4194d5abbe8Smrg // POSIX says it is undefined to escape ordinary characters 420b1e83836Smrg __throw_regex_error(regex_constants::error_escape); 4214d5abbe8Smrg #else 4224d5abbe8Smrg _M_token = _S_token_ord_char; 4234d5abbe8Smrg _M_value.assign(1, __c); 4244d5abbe8Smrg #endif 4254d5abbe8Smrg } 4264d5abbe8Smrg ++_M_current; 4274d5abbe8Smrg } 4284d5abbe8Smrg 4294d5abbe8Smrg template<typename _CharT> 4304d5abbe8Smrg void 4314d5abbe8Smrg _Scanner<_CharT>:: _M_eat_escape_awk()4324d5abbe8Smrg _M_eat_escape_awk() 4334d5abbe8Smrg { 4344d5abbe8Smrg auto __c = *_M_current++; 4354d5abbe8Smrg auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0')); 4364d5abbe8Smrg 4374d5abbe8Smrg if (__pos != nullptr) 4384d5abbe8Smrg { 4394d5abbe8Smrg _M_token = _S_token_ord_char; 4404d5abbe8Smrg _M_value.assign(1, *__pos); 4414d5abbe8Smrg } 4424d5abbe8Smrg // \ddd for oct representation 4434d5abbe8Smrg else if (_M_ctype.is(_CtypeT::digit, __c) 4444d5abbe8Smrg && __c != '8' 4454d5abbe8Smrg && __c != '9') 4464d5abbe8Smrg { 4474d5abbe8Smrg _M_value.assign(1, __c); 4484d5abbe8Smrg for (int __i = 0; 4494d5abbe8Smrg __i < 2 4504d5abbe8Smrg && _M_current != _M_end 4514d5abbe8Smrg && _M_ctype.is(_CtypeT::digit, *_M_current) 4524d5abbe8Smrg && *_M_current != '8' 4534d5abbe8Smrg && *_M_current != '9'; 4544d5abbe8Smrg __i++) 4554d5abbe8Smrg _M_value += *_M_current++; 4564d5abbe8Smrg _M_token = _S_token_oct_num; 4574d5abbe8Smrg return; 4584d5abbe8Smrg } 4594d5abbe8Smrg else 460b1e83836Smrg __throw_regex_error(regex_constants::error_escape); 4614d5abbe8Smrg } 4624d5abbe8Smrg 4634d5abbe8Smrg // Eats a character class or throws an exception. 4644d5abbe8Smrg // __ch could be ':', '.' or '=', _M_current is the char after ']' when 4654d5abbe8Smrg // returning. 4664d5abbe8Smrg template<typename _CharT> 4674d5abbe8Smrg void 4684d5abbe8Smrg _Scanner<_CharT>:: _M_eat_class(char __ch)4694d5abbe8Smrg _M_eat_class(char __ch) 4704d5abbe8Smrg { 4714d5abbe8Smrg for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;) 4724d5abbe8Smrg _M_value += *_M_current++; 4734d5abbe8Smrg if (_M_current == _M_end 4744d5abbe8Smrg || *_M_current++ != __ch 4754d5abbe8Smrg || _M_current == _M_end // skip __ch 4764d5abbe8Smrg || *_M_current++ != ']') // skip ']' 4774d5abbe8Smrg { 478b1e83836Smrg __throw_regex_error(__ch == ':' ? regex_constants::error_ctype 479b1e83836Smrg : regex_constants::error_collate); 4804d5abbe8Smrg } 4814d5abbe8Smrg } 4824d5abbe8Smrg 4834d5abbe8Smrg #ifdef _GLIBCXX_DEBUG 4844d5abbe8Smrg template<typename _CharT> 4854d5abbe8Smrg std::ostream& 4864d5abbe8Smrg _Scanner<_CharT>:: _M_print(std::ostream & __ostr)487*0a307195Smrg _M_print(std::ostream& __ostr) 4884d5abbe8Smrg { 4894d5abbe8Smrg switch (_M_token) 4904d5abbe8Smrg { 4914d5abbe8Smrg case _S_token_anychar: 492*0a307195Smrg __ostr << "any-character\n"; 4934d5abbe8Smrg break; 4944d5abbe8Smrg case _S_token_backref: 495*0a307195Smrg __ostr << "backref\n"; 4964d5abbe8Smrg break; 4974d5abbe8Smrg case _S_token_bracket_begin: 498*0a307195Smrg __ostr << "bracket-begin\n"; 4994d5abbe8Smrg break; 5004d5abbe8Smrg case _S_token_bracket_neg_begin: 501*0a307195Smrg __ostr << "bracket-neg-begin\n"; 5024d5abbe8Smrg break; 5034d5abbe8Smrg case _S_token_bracket_end: 504*0a307195Smrg __ostr << "bracket-end\n"; 5054d5abbe8Smrg break; 5064d5abbe8Smrg case _S_token_char_class_name: 507*0a307195Smrg __ostr << "char-class-name \"" << _M_value << "\"\n"; 5084d5abbe8Smrg break; 5094d5abbe8Smrg case _S_token_closure0: 510*0a307195Smrg __ostr << "closure0\n"; 5114d5abbe8Smrg break; 5124d5abbe8Smrg case _S_token_closure1: 513*0a307195Smrg __ostr << "closure1\n"; 5144d5abbe8Smrg break; 5154d5abbe8Smrg case _S_token_collsymbol: 516*0a307195Smrg __ostr << "collsymbol \"" << _M_value << "\"\n"; 5174d5abbe8Smrg break; 5184d5abbe8Smrg case _S_token_comma: 519*0a307195Smrg __ostr << "comma\n"; 5204d5abbe8Smrg break; 5214d5abbe8Smrg case _S_token_dup_count: 522*0a307195Smrg __ostr << "dup count: " << _M_value << "\n"; 5234d5abbe8Smrg break; 5244d5abbe8Smrg case _S_token_eof: 525*0a307195Smrg __ostr << "EOF\n"; 5264d5abbe8Smrg break; 5274d5abbe8Smrg case _S_token_equiv_class_name: 528*0a307195Smrg __ostr << "equiv-class-name \"" << _M_value << "\"\n"; 5294d5abbe8Smrg break; 5304d5abbe8Smrg case _S_token_interval_begin: 531*0a307195Smrg __ostr << "interval begin\n"; 5324d5abbe8Smrg break; 5334d5abbe8Smrg case _S_token_interval_end: 534*0a307195Smrg __ostr << "interval end\n"; 5354d5abbe8Smrg break; 5364d5abbe8Smrg case _S_token_line_begin: 537*0a307195Smrg __ostr << "line begin\n"; 5384d5abbe8Smrg break; 5394d5abbe8Smrg case _S_token_line_end: 540*0a307195Smrg __ostr << "line end\n"; 5414d5abbe8Smrg break; 5424d5abbe8Smrg case _S_token_opt: 543*0a307195Smrg __ostr << "opt\n"; 5444d5abbe8Smrg break; 5454d5abbe8Smrg case _S_token_or: 546*0a307195Smrg __ostr << "or\n"; 5474d5abbe8Smrg break; 5484d5abbe8Smrg case _S_token_ord_char: 549*0a307195Smrg __ostr << "ordinary character: \"" << _M_value << "\"\n"; 5504d5abbe8Smrg break; 5514d5abbe8Smrg case _S_token_subexpr_begin: 552*0a307195Smrg __ostr << "subexpr begin\n"; 5534d5abbe8Smrg break; 5544d5abbe8Smrg case _S_token_subexpr_no_group_begin: 555*0a307195Smrg __ostr << "no grouping subexpr begin\n"; 5564d5abbe8Smrg break; 5574d5abbe8Smrg case _S_token_subexpr_lookahead_begin: 558*0a307195Smrg __ostr << "lookahead subexpr begin\n"; 5594d5abbe8Smrg break; 5604d5abbe8Smrg case _S_token_subexpr_end: 561*0a307195Smrg __ostr << "subexpr end\n"; 5624d5abbe8Smrg break; 5634d5abbe8Smrg case _S_token_unknown: 564*0a307195Smrg __ostr << "-- unknown token --\n"; 5654d5abbe8Smrg break; 5664d5abbe8Smrg case _S_token_oct_num: 567*0a307195Smrg __ostr << "oct number " << _M_value << "\n"; 5684d5abbe8Smrg break; 5694d5abbe8Smrg case _S_token_hex_num: 570*0a307195Smrg __ostr << "hex number " << _M_value << "\n"; 5714d5abbe8Smrg break; 5724d5abbe8Smrg case _S_token_quoted_class: 573*0a307195Smrg __ostr << "quoted class " << "\\" << _M_value << "\n"; 5744d5abbe8Smrg break; 5754d5abbe8Smrg default: 5764d5abbe8Smrg _GLIBCXX_DEBUG_ASSERT(false); 5774d5abbe8Smrg } 578*0a307195Smrg return __ostr; 5794d5abbe8Smrg } 5804d5abbe8Smrg #endif 5814d5abbe8Smrg 5828b6133e5Smrg } // namespace __detail 583a3e9eb18Smrg _GLIBCXX_END_NAMESPACE_VERSION 5844d5abbe8Smrg } // namespace 585