xref: /netbsd-src/external/gpl3/gcc/dist/libstdc++-v3/include/bits/regex_scanner.tcc (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
14d5abbe8Smrg // class template regex -*- C++ -*-
24d5abbe8Smrg 
3b1e83836Smrg // Copyright (C) 2013-2022 Free Software Foundation, Inc.
44d5abbe8Smrg //
54d5abbe8Smrg // This file is part of the GNU ISO C++ Library.  This library is free
64d5abbe8Smrg // software; you can redistribute it and/or modify it under the
74d5abbe8Smrg // terms of the GNU General Public License as published by the
84d5abbe8Smrg // Free Software Foundation; either version 3, or (at your option)
94d5abbe8Smrg // any later version.
104d5abbe8Smrg 
114d5abbe8Smrg // This library is distributed in the hope that it will be useful,
124d5abbe8Smrg // but WITHOUT ANY WARRANTY; without even the implied warranty of
134d5abbe8Smrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
144d5abbe8Smrg // GNU General Public License for more details.
154d5abbe8Smrg 
164d5abbe8Smrg // Under Section 7 of GPL version 3, you are granted additional
174d5abbe8Smrg // permissions described in the GCC Runtime Library Exception, version
184d5abbe8Smrg // 3.1, as published by the Free Software Foundation.
194d5abbe8Smrg 
204d5abbe8Smrg // You should have received a copy of the GNU General Public License and
214d5abbe8Smrg // a copy of the GCC Runtime Library Exception along with this program;
224d5abbe8Smrg // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
234d5abbe8Smrg // <http://www.gnu.org/licenses/>.
244d5abbe8Smrg 
254d5abbe8Smrg /**
264d5abbe8Smrg  *  @file bits/regex_scanner.tcc
274d5abbe8Smrg  *  This is an internal header file, included by other library headers.
284d5abbe8Smrg  *  Do not attempt to use it directly. @headername{regex}
294d5abbe8Smrg  */
304d5abbe8Smrg 
314d5abbe8Smrg // FIXME make comments doxygen format.
324d5abbe8Smrg 
334d5abbe8Smrg // N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep
344d5abbe8Smrg // and awk
354d5abbe8Smrg // 1) grep is basic except '\n' is treated as '|'
364d5abbe8Smrg // 2) egrep is extended except '\n' is treated as '|'
374d5abbe8Smrg // 3) awk is extended except special escaping rules, and there's no
384d5abbe8Smrg //    back-reference.
394d5abbe8Smrg //
404d5abbe8Smrg // References:
414d5abbe8Smrg //
424d5abbe8Smrg // ECMAScript: ECMA-262 15.10
434d5abbe8Smrg //
444d5abbe8Smrg // basic, extended:
454d5abbe8Smrg // http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html
464d5abbe8Smrg //
474d5abbe8Smrg // awk: http://pubs.opengroup.org/onlinepubs/000095399/utilities/awk.html
484d5abbe8Smrg 
494d5abbe8Smrg namespace std _GLIBCXX_VISIBILITY(default)
504d5abbe8Smrg {
518b6133e5Smrg _GLIBCXX_BEGIN_NAMESPACE_VERSION
528b6133e5Smrg 
53a3e9eb18Smrg namespace __detail
54a3e9eb18Smrg {
554d5abbe8Smrg   template<typename _CharT>
564d5abbe8Smrg     _Scanner<_CharT>::
_Scanner(const _CharT * __begin,const _CharT * __end,_FlagT __flags,std::locale __loc)57b1e83836Smrg     _Scanner(const _CharT* __begin, const _CharT* __end,
584d5abbe8Smrg 	     _FlagT __flags, std::locale __loc)
594d5abbe8Smrg     : _ScannerBase(__flags),
604d5abbe8Smrg       _M_current(__begin), _M_end(__end),
614d5abbe8Smrg       _M_ctype(std::use_facet<_CtypeT>(__loc)),
624d5abbe8Smrg       _M_eat_escape(_M_is_ecma()
634d5abbe8Smrg 		    ? &_Scanner::_M_eat_escape_ecma
644d5abbe8Smrg 		    : &_Scanner::_M_eat_escape_posix)
654d5abbe8Smrg     { _M_advance(); }
664d5abbe8Smrg 
674d5abbe8Smrg   template<typename _CharT>
684d5abbe8Smrg     void
694d5abbe8Smrg     _Scanner<_CharT>::
_M_advance()704d5abbe8Smrg     _M_advance()
714d5abbe8Smrg     {
724d5abbe8Smrg       if (_M_current == _M_end)
734d5abbe8Smrg 	{
744d5abbe8Smrg 	  _M_token = _S_token_eof;
754d5abbe8Smrg 	  return;
764d5abbe8Smrg 	}
774d5abbe8Smrg 
784d5abbe8Smrg       if (_M_state == _S_state_normal)
794d5abbe8Smrg 	_M_scan_normal();
804d5abbe8Smrg       else if (_M_state == _S_state_in_bracket)
814d5abbe8Smrg 	_M_scan_in_bracket();
824d5abbe8Smrg       else if (_M_state == _S_state_in_brace)
834d5abbe8Smrg 	_M_scan_in_brace();
844d5abbe8Smrg       else
854d5abbe8Smrg 	{
86b1e83836Smrg 	  __glibcxx_assert(!"unexpected state while processing regex");
874d5abbe8Smrg 	}
884d5abbe8Smrg     }
894d5abbe8Smrg 
904d5abbe8Smrg   // Differences between styles:
914d5abbe8Smrg   // 1) "\(", "\)", "\{" in basic. It's not escaping.
924d5abbe8Smrg   // 2) "(?:", "(?=", "(?!" in ECMAScript.
934d5abbe8Smrg   template<typename _CharT>
944d5abbe8Smrg     void
954d5abbe8Smrg     _Scanner<_CharT>::
_M_scan_normal()964d5abbe8Smrg     _M_scan_normal()
974d5abbe8Smrg     {
984d5abbe8Smrg       auto __c = *_M_current++;
994d5abbe8Smrg 
100b1e83836Smrg       if (__builtin_strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr)
1014d5abbe8Smrg 	{
1024d5abbe8Smrg 	  _M_token = _S_token_ord_char;
1034d5abbe8Smrg 	  _M_value.assign(1, __c);
1044d5abbe8Smrg 	  return;
1054d5abbe8Smrg 	}
1064d5abbe8Smrg       if (__c == '\\')
1074d5abbe8Smrg 	{
1084d5abbe8Smrg 	  if (_M_current == _M_end)
109f9a78e0eSmrg 	    __throw_regex_error(
110f9a78e0eSmrg 	      regex_constants::error_escape,
111b1e83836Smrg 	      "Invalid escape at end of regular expression");
1124d5abbe8Smrg 
1134d5abbe8Smrg 	  if (!_M_is_basic()
1144d5abbe8Smrg 	      || (*_M_current != '('
1154d5abbe8Smrg 		  && *_M_current != ')'
1164d5abbe8Smrg 		  && *_M_current != '{'))
1174d5abbe8Smrg 	    {
1184d5abbe8Smrg 	      (this->*_M_eat_escape)();
1194d5abbe8Smrg 	      return;
1204d5abbe8Smrg 	    }
1214d5abbe8Smrg 	  __c = *_M_current++;
1224d5abbe8Smrg 	}
1234d5abbe8Smrg       if (__c == '(')
1244d5abbe8Smrg 	{
1254d5abbe8Smrg 	  if (_M_is_ecma() && *_M_current == '?')
1264d5abbe8Smrg 	    {
1274d5abbe8Smrg 	      if (++_M_current == _M_end)
128b1e83836Smrg 		__throw_regex_error(regex_constants::error_paren);
1294d5abbe8Smrg 
1304d5abbe8Smrg 	      if (*_M_current == ':')
1314d5abbe8Smrg 		{
1324d5abbe8Smrg 		  ++_M_current;
1334d5abbe8Smrg 		  _M_token = _S_token_subexpr_no_group_begin;
1344d5abbe8Smrg 		}
1354d5abbe8Smrg 	      else if (*_M_current == '=')
1364d5abbe8Smrg 		{
1374d5abbe8Smrg 		  ++_M_current;
1384d5abbe8Smrg 		  _M_token = _S_token_subexpr_lookahead_begin;
1394d5abbe8Smrg 		  _M_value.assign(1, 'p');
1404d5abbe8Smrg 		}
1414d5abbe8Smrg 	      else if (*_M_current == '!')
1424d5abbe8Smrg 		{
1434d5abbe8Smrg 		  ++_M_current;
1444d5abbe8Smrg 		  _M_token = _S_token_subexpr_lookahead_begin;
1454d5abbe8Smrg 		  _M_value.assign(1, 'n');
1464d5abbe8Smrg 		}
1474d5abbe8Smrg 	      else
148b1e83836Smrg 		__throw_regex_error(regex_constants::error_paren,
149b1e83836Smrg 				    "Invalid '(?...)' zero-width assertion "
150b1e83836Smrg 				    "in regular expression");
1514d5abbe8Smrg 	    }
1524d5abbe8Smrg 	  else if (_M_flags & regex_constants::nosubs)
1534d5abbe8Smrg 	    _M_token = _S_token_subexpr_no_group_begin;
1544d5abbe8Smrg 	  else
1554d5abbe8Smrg 	    _M_token = _S_token_subexpr_begin;
1564d5abbe8Smrg 	}
1574d5abbe8Smrg       else if (__c == ')')
1584d5abbe8Smrg 	_M_token = _S_token_subexpr_end;
1594d5abbe8Smrg       else if (__c == '[')
1604d5abbe8Smrg 	{
1614d5abbe8Smrg 	  _M_state = _S_state_in_bracket;
1624d5abbe8Smrg 	  _M_at_bracket_start = true;
1634d5abbe8Smrg 	  if (_M_current != _M_end && *_M_current == '^')
1644d5abbe8Smrg 	    {
1654d5abbe8Smrg 	      _M_token = _S_token_bracket_neg_begin;
1664d5abbe8Smrg 	      ++_M_current;
1674d5abbe8Smrg 	    }
1684d5abbe8Smrg 	  else
1694d5abbe8Smrg 	    _M_token = _S_token_bracket_begin;
1704d5abbe8Smrg 	}
1714d5abbe8Smrg       else if (__c == '{')
1724d5abbe8Smrg 	{
1734d5abbe8Smrg 	  _M_state = _S_state_in_brace;
1744d5abbe8Smrg 	  _M_token = _S_token_interval_begin;
1754d5abbe8Smrg 	}
1767d4dc15bSmrg       else if (__builtin_expect(__c == _CharT(0), false))
1777d4dc15bSmrg 	{
1787d4dc15bSmrg 	  if (!_M_is_ecma())
179b1e83836Smrg 	    __throw_regex_error(regex_constants::_S_null);
1807d4dc15bSmrg 	  _M_token = _S_token_ord_char;
1817d4dc15bSmrg 	  _M_value.assign(1, __c);
1827d4dc15bSmrg 	}
183f30ff588Smrg       else if (__c != ']' && __c != '}')
1844d5abbe8Smrg 	{
1854d5abbe8Smrg 	  auto __it = _M_token_tbl;
1864d5abbe8Smrg 	  auto __narrowc = _M_ctype.narrow(__c, '\0');
1874d5abbe8Smrg 	  for (; __it->first != '\0'; ++__it)
1884d5abbe8Smrg 	    if (__it->first == __narrowc)
1894d5abbe8Smrg 	      {
1904d5abbe8Smrg 		_M_token = __it->second;
1914d5abbe8Smrg 		return;
1924d5abbe8Smrg 	      }
193b1e83836Smrg 	  __glibcxx_assert(!"unexpected special character in regex");
1944d5abbe8Smrg 	}
1954d5abbe8Smrg       else
1964d5abbe8Smrg 	{
1974d5abbe8Smrg 	  _M_token = _S_token_ord_char;
1984d5abbe8Smrg 	  _M_value.assign(1, __c);
1994d5abbe8Smrg 	}
2004d5abbe8Smrg     }
2014d5abbe8Smrg 
2024d5abbe8Smrg   // Differences between styles:
2034d5abbe8Smrg   // 1) different semantics of "[]" and "[^]".
2044d5abbe8Smrg   // 2) Escaping in bracket expr.
2054d5abbe8Smrg   template<typename _CharT>
2064d5abbe8Smrg     void
2074d5abbe8Smrg     _Scanner<_CharT>::
_M_scan_in_bracket()2084d5abbe8Smrg     _M_scan_in_bracket()
2094d5abbe8Smrg     {
2104d5abbe8Smrg       if (_M_current == _M_end)
211b1e83836Smrg 	__throw_regex_error(regex_constants::error_brack);
2124d5abbe8Smrg 
2134d5abbe8Smrg       auto __c = *_M_current++;
2144d5abbe8Smrg 
215b17d1066Smrg       if (__c == '-')
216b17d1066Smrg 	_M_token = _S_token_bracket_dash;
217b17d1066Smrg       else if (__c == '[')
2184d5abbe8Smrg 	{
2194d5abbe8Smrg 	  if (_M_current == _M_end)
220f9a78e0eSmrg 	    __throw_regex_error(regex_constants::error_brack,
221b1e83836Smrg 				"Incomplete '[[' character class in "
222b1e83836Smrg 				"regular expression");
2234d5abbe8Smrg 
2244d5abbe8Smrg 	  if (*_M_current == '.')
2254d5abbe8Smrg 	    {
2264d5abbe8Smrg 	      _M_token = _S_token_collsymbol;
2274d5abbe8Smrg 	      _M_eat_class(*_M_current++);
2284d5abbe8Smrg 	    }
2294d5abbe8Smrg 	  else if (*_M_current == ':')
2304d5abbe8Smrg 	    {
2314d5abbe8Smrg 	      _M_token = _S_token_char_class_name;
2324d5abbe8Smrg 	      _M_eat_class(*_M_current++);
2334d5abbe8Smrg 	    }
2344d5abbe8Smrg 	  else if (*_M_current == '=')
2354d5abbe8Smrg 	    {
2364d5abbe8Smrg 	      _M_token = _S_token_equiv_class_name;
2374d5abbe8Smrg 	      _M_eat_class(*_M_current++);
2384d5abbe8Smrg 	    }
2394d5abbe8Smrg 	  else
2404d5abbe8Smrg 	    {
2414d5abbe8Smrg 	      _M_token = _S_token_ord_char;
2424d5abbe8Smrg 	      _M_value.assign(1, __c);
2434d5abbe8Smrg 	    }
2444d5abbe8Smrg 	}
2454d5abbe8Smrg       // In POSIX, when encountering "[]" or "[^]", the ']' is interpreted
2464d5abbe8Smrg       // literally. So "[]]" and "[^]]" are valid regexes. See the testcases
247b1e83836Smrg       // `.../empty_range.cc`.
2484d5abbe8Smrg       else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start))
2494d5abbe8Smrg 	{
2504d5abbe8Smrg 	  _M_token = _S_token_bracket_end;
2514d5abbe8Smrg 	  _M_state = _S_state_normal;
2524d5abbe8Smrg 	}
2534d5abbe8Smrg       // ECMAScript and awk permits escaping in bracket.
2544d5abbe8Smrg       else if (__c == '\\' && (_M_is_ecma() || _M_is_awk()))
2554d5abbe8Smrg 	(this->*_M_eat_escape)();
2564d5abbe8Smrg       else
2574d5abbe8Smrg 	{
2584d5abbe8Smrg 	  _M_token = _S_token_ord_char;
2594d5abbe8Smrg 	  _M_value.assign(1, __c);
2604d5abbe8Smrg 	}
2614d5abbe8Smrg       _M_at_bracket_start = false;
2624d5abbe8Smrg     }
2634d5abbe8Smrg 
2644d5abbe8Smrg   // Differences between styles:
2654d5abbe8Smrg   // 1) "\}" in basic style.
2664d5abbe8Smrg   template<typename _CharT>
2674d5abbe8Smrg     void
2684d5abbe8Smrg     _Scanner<_CharT>::
_M_scan_in_brace()2694d5abbe8Smrg     _M_scan_in_brace()
2704d5abbe8Smrg     {
2714d5abbe8Smrg       if (_M_current == _M_end)
272b1e83836Smrg 	__throw_regex_error(regex_constants::error_brace);
2734d5abbe8Smrg 
2744d5abbe8Smrg       auto __c = *_M_current++;
2754d5abbe8Smrg 
2764d5abbe8Smrg       if (_M_ctype.is(_CtypeT::digit, __c))
2774d5abbe8Smrg 	{
2784d5abbe8Smrg 	  _M_token = _S_token_dup_count;
2794d5abbe8Smrg 	  _M_value.assign(1, __c);
2804d5abbe8Smrg 	  while (_M_current != _M_end
2814d5abbe8Smrg 		 && _M_ctype.is(_CtypeT::digit, *_M_current))
2824d5abbe8Smrg 	    _M_value += *_M_current++;
2834d5abbe8Smrg 	}
2844d5abbe8Smrg       else if (__c == ',')
2854d5abbe8Smrg 	_M_token = _S_token_comma;
2864d5abbe8Smrg       // basic use \}.
2874d5abbe8Smrg       else if (_M_is_basic())
2884d5abbe8Smrg 	{
2894d5abbe8Smrg 	  if (__c == '\\' && _M_current != _M_end && *_M_current == '}')
2904d5abbe8Smrg 	    {
2914d5abbe8Smrg 	      _M_state = _S_state_normal;
2924d5abbe8Smrg 	      _M_token = _S_token_interval_end;
2934d5abbe8Smrg 	      ++_M_current;
2944d5abbe8Smrg 	    }
2954d5abbe8Smrg 	  else
296b1e83836Smrg 	    __throw_regex_error(regex_constants::error_badbrace);
2974d5abbe8Smrg 	}
2984d5abbe8Smrg       else if (__c == '}')
2994d5abbe8Smrg 	{
3004d5abbe8Smrg 	  _M_state = _S_state_normal;
3014d5abbe8Smrg 	  _M_token = _S_token_interval_end;
3024d5abbe8Smrg 	}
3034d5abbe8Smrg       else
304b1e83836Smrg 	__throw_regex_error(regex_constants::error_badbrace);
3054d5abbe8Smrg     }
3064d5abbe8Smrg 
3074d5abbe8Smrg   template<typename _CharT>
3084d5abbe8Smrg     void
3094d5abbe8Smrg     _Scanner<_CharT>::
_M_eat_escape_ecma()3104d5abbe8Smrg     _M_eat_escape_ecma()
3114d5abbe8Smrg     {
3124d5abbe8Smrg       if (_M_current == _M_end)
313b1e83836Smrg 	__throw_regex_error(regex_constants::error_escape);
3144d5abbe8Smrg 
3154d5abbe8Smrg       auto __c = *_M_current++;
3164d5abbe8Smrg       auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
3174d5abbe8Smrg 
3184d5abbe8Smrg       if (__pos != nullptr && (__c != 'b' || _M_state == _S_state_in_bracket))
3194d5abbe8Smrg 	{
3204d5abbe8Smrg 	  _M_token = _S_token_ord_char;
3214d5abbe8Smrg 	  _M_value.assign(1, *__pos);
3224d5abbe8Smrg 	}
3234d5abbe8Smrg       else if (__c == 'b')
3244d5abbe8Smrg 	{
3254d5abbe8Smrg 	  _M_token = _S_token_word_bound;
3264d5abbe8Smrg 	  _M_value.assign(1, 'p');
3274d5abbe8Smrg 	}
3284d5abbe8Smrg       else if (__c == 'B')
3294d5abbe8Smrg 	{
3304d5abbe8Smrg 	  _M_token = _S_token_word_bound;
3314d5abbe8Smrg 	  _M_value.assign(1, 'n');
3324d5abbe8Smrg 	}
3334d5abbe8Smrg       // N3376 28.13
3344d5abbe8Smrg       else if (__c == 'd'
3354d5abbe8Smrg 	       || __c == 'D'
3364d5abbe8Smrg 	       || __c == 's'
3374d5abbe8Smrg 	       || __c == 'S'
3384d5abbe8Smrg 	       || __c == 'w'
3394d5abbe8Smrg 	       || __c == 'W')
3404d5abbe8Smrg 	{
3414d5abbe8Smrg 	  _M_token = _S_token_quoted_class;
3424d5abbe8Smrg 	  _M_value.assign(1, __c);
3434d5abbe8Smrg 	}
3444d5abbe8Smrg       else if (__c == 'c')
3454d5abbe8Smrg 	{
3464d5abbe8Smrg 	  if (_M_current == _M_end)
347b1e83836Smrg 	    __throw_regex_error(regex_constants::error_escape,
348b1e83836Smrg 				"invalid '\\cX' control character in "
349b1e83836Smrg 				"regular expression");
3504d5abbe8Smrg 	  _M_token = _S_token_ord_char;
3514d5abbe8Smrg 	  _M_value.assign(1, *_M_current++);
3524d5abbe8Smrg 	}
3534d5abbe8Smrg       else if (__c == 'x' || __c == 'u')
3544d5abbe8Smrg 	{
355b1e83836Smrg 	  _M_value.clear();
356b1e83836Smrg 	  const int __n = __c == 'x' ? 2 : 4;
357b1e83836Smrg 	  for (int __i = 0; __i < __n; __i++)
3584d5abbe8Smrg 	    {
3594d5abbe8Smrg 	      if (_M_current == _M_end
3604d5abbe8Smrg 		  || !_M_ctype.is(_CtypeT::xdigit, *_M_current))
361b1e83836Smrg 		__throw_regex_error(regex_constants::error_escape,
362b1e83836Smrg 				    __n == 2
363b1e83836Smrg 				    ? "Invalid '\\xNN' control character in "
364b1e83836Smrg 				      "regular expression"
365b1e83836Smrg 				    : "Invalid '\\uNNNN' control character in "
366b1e83836Smrg 				      "regular expression");
3674d5abbe8Smrg 	      _M_value += *_M_current++;
3684d5abbe8Smrg 	    }
3694d5abbe8Smrg 	  _M_token = _S_token_hex_num;
3704d5abbe8Smrg 	}
3714d5abbe8Smrg       // ECMAScript recognizes multi-digit back-references.
3724d5abbe8Smrg       else if (_M_ctype.is(_CtypeT::digit, __c))
3734d5abbe8Smrg 	{
3744d5abbe8Smrg 	  _M_value.assign(1, __c);
3754d5abbe8Smrg 	  while (_M_current != _M_end
3764d5abbe8Smrg 		 && _M_ctype.is(_CtypeT::digit, *_M_current))
3774d5abbe8Smrg 	    _M_value += *_M_current++;
3784d5abbe8Smrg 	  _M_token = _S_token_backref;
3794d5abbe8Smrg 	}
3804d5abbe8Smrg       else
3814d5abbe8Smrg 	{
3824d5abbe8Smrg 	  _M_token = _S_token_ord_char;
3834d5abbe8Smrg 	  _M_value.assign(1, __c);
3844d5abbe8Smrg 	}
3854d5abbe8Smrg     }
3864d5abbe8Smrg 
3874d5abbe8Smrg   // Differences between styles:
3884d5abbe8Smrg   // 1) Extended doesn't support backref, but basic does.
3894d5abbe8Smrg   template<typename _CharT>
3904d5abbe8Smrg     void
3914d5abbe8Smrg     _Scanner<_CharT>::
_M_eat_escape_posix()3924d5abbe8Smrg     _M_eat_escape_posix()
3934d5abbe8Smrg     {
3944d5abbe8Smrg       if (_M_current == _M_end)
395b1e83836Smrg 	__throw_regex_error(regex_constants::error_escape);
3964d5abbe8Smrg 
3974d5abbe8Smrg       auto __c = *_M_current;
398b1e83836Smrg       auto __pos = __builtin_strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
3994d5abbe8Smrg 
4004d5abbe8Smrg       if (__pos != nullptr && *__pos != '\0')
4014d5abbe8Smrg 	{
4024d5abbe8Smrg 	  _M_token = _S_token_ord_char;
4034d5abbe8Smrg 	  _M_value.assign(1, __c);
4044d5abbe8Smrg 	}
4054d5abbe8Smrg       // We MUST judge awk before handling backrefs. There's no backref in awk.
4064d5abbe8Smrg       else if (_M_is_awk())
4074d5abbe8Smrg 	{
4084d5abbe8Smrg 	  _M_eat_escape_awk();
4094d5abbe8Smrg 	  return;
4104d5abbe8Smrg 	}
4114d5abbe8Smrg       else if (_M_is_basic() && _M_ctype.is(_CtypeT::digit, __c) && __c != '0')
4124d5abbe8Smrg 	{
4134d5abbe8Smrg 	  _M_token = _S_token_backref;
4144d5abbe8Smrg 	  _M_value.assign(1, __c);
4154d5abbe8Smrg 	}
4164d5abbe8Smrg       else
4174d5abbe8Smrg 	{
4184d5abbe8Smrg #ifdef __STRICT_ANSI__
4194d5abbe8Smrg 	  // POSIX says it is undefined to escape ordinary characters
420b1e83836Smrg 	  __throw_regex_error(regex_constants::error_escape);
4214d5abbe8Smrg #else
4224d5abbe8Smrg 	  _M_token = _S_token_ord_char;
4234d5abbe8Smrg 	  _M_value.assign(1, __c);
4244d5abbe8Smrg #endif
4254d5abbe8Smrg 	}
4264d5abbe8Smrg       ++_M_current;
4274d5abbe8Smrg     }
4284d5abbe8Smrg 
4294d5abbe8Smrg   template<typename _CharT>
4304d5abbe8Smrg     void
4314d5abbe8Smrg     _Scanner<_CharT>::
_M_eat_escape_awk()4324d5abbe8Smrg     _M_eat_escape_awk()
4334d5abbe8Smrg     {
4344d5abbe8Smrg       auto __c = *_M_current++;
4354d5abbe8Smrg       auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
4364d5abbe8Smrg 
4374d5abbe8Smrg       if (__pos != nullptr)
4384d5abbe8Smrg 	{
4394d5abbe8Smrg 	  _M_token = _S_token_ord_char;
4404d5abbe8Smrg 	  _M_value.assign(1, *__pos);
4414d5abbe8Smrg 	}
4424d5abbe8Smrg       // \ddd for oct representation
4434d5abbe8Smrg       else if (_M_ctype.is(_CtypeT::digit, __c)
4444d5abbe8Smrg 	       && __c != '8'
4454d5abbe8Smrg 	       && __c != '9')
4464d5abbe8Smrg 	{
4474d5abbe8Smrg 	  _M_value.assign(1,  __c);
4484d5abbe8Smrg 	  for (int __i = 0;
4494d5abbe8Smrg 	       __i < 2
4504d5abbe8Smrg 	       && _M_current != _M_end
4514d5abbe8Smrg 	       && _M_ctype.is(_CtypeT::digit, *_M_current)
4524d5abbe8Smrg 	       && *_M_current != '8'
4534d5abbe8Smrg 	       && *_M_current != '9';
4544d5abbe8Smrg 	       __i++)
4554d5abbe8Smrg 	    _M_value += *_M_current++;
4564d5abbe8Smrg 	  _M_token = _S_token_oct_num;
4574d5abbe8Smrg 	  return;
4584d5abbe8Smrg 	}
4594d5abbe8Smrg       else
460b1e83836Smrg 	__throw_regex_error(regex_constants::error_escape);
4614d5abbe8Smrg     }
4624d5abbe8Smrg 
4634d5abbe8Smrg   // Eats a character class or throws an exception.
4644d5abbe8Smrg   // __ch could be ':', '.' or '=', _M_current is the char after ']' when
4654d5abbe8Smrg   // returning.
4664d5abbe8Smrg   template<typename _CharT>
4674d5abbe8Smrg     void
4684d5abbe8Smrg     _Scanner<_CharT>::
_M_eat_class(char __ch)4694d5abbe8Smrg     _M_eat_class(char __ch)
4704d5abbe8Smrg     {
4714d5abbe8Smrg       for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;)
4724d5abbe8Smrg 	_M_value += *_M_current++;
4734d5abbe8Smrg       if (_M_current == _M_end
4744d5abbe8Smrg 	  || *_M_current++ != __ch
4754d5abbe8Smrg 	  || _M_current == _M_end // skip __ch
4764d5abbe8Smrg 	  || *_M_current++ != ']') // skip ']'
4774d5abbe8Smrg 	{
478b1e83836Smrg 	  __throw_regex_error(__ch == ':' ? regex_constants::error_ctype
479b1e83836Smrg 					  : regex_constants::error_collate);
4804d5abbe8Smrg 	}
4814d5abbe8Smrg     }
4824d5abbe8Smrg 
4834d5abbe8Smrg #ifdef _GLIBCXX_DEBUG
4844d5abbe8Smrg   template<typename _CharT>
4854d5abbe8Smrg     std::ostream&
4864d5abbe8Smrg     _Scanner<_CharT>::
_M_print(std::ostream & __ostr)487*0a307195Smrg     _M_print(std::ostream& __ostr)
4884d5abbe8Smrg     {
4894d5abbe8Smrg       switch (_M_token)
4904d5abbe8Smrg       {
4914d5abbe8Smrg       case _S_token_anychar:
492*0a307195Smrg 	__ostr << "any-character\n";
4934d5abbe8Smrg 	break;
4944d5abbe8Smrg       case _S_token_backref:
495*0a307195Smrg 	__ostr << "backref\n";
4964d5abbe8Smrg 	break;
4974d5abbe8Smrg       case _S_token_bracket_begin:
498*0a307195Smrg 	__ostr << "bracket-begin\n";
4994d5abbe8Smrg 	break;
5004d5abbe8Smrg       case _S_token_bracket_neg_begin:
501*0a307195Smrg 	__ostr << "bracket-neg-begin\n";
5024d5abbe8Smrg 	break;
5034d5abbe8Smrg       case _S_token_bracket_end:
504*0a307195Smrg 	__ostr << "bracket-end\n";
5054d5abbe8Smrg 	break;
5064d5abbe8Smrg       case _S_token_char_class_name:
507*0a307195Smrg 	__ostr << "char-class-name \"" << _M_value << "\"\n";
5084d5abbe8Smrg 	break;
5094d5abbe8Smrg       case _S_token_closure0:
510*0a307195Smrg 	__ostr << "closure0\n";
5114d5abbe8Smrg 	break;
5124d5abbe8Smrg       case _S_token_closure1:
513*0a307195Smrg 	__ostr << "closure1\n";
5144d5abbe8Smrg 	break;
5154d5abbe8Smrg       case _S_token_collsymbol:
516*0a307195Smrg 	__ostr << "collsymbol \"" << _M_value << "\"\n";
5174d5abbe8Smrg 	break;
5184d5abbe8Smrg       case _S_token_comma:
519*0a307195Smrg 	__ostr << "comma\n";
5204d5abbe8Smrg 	break;
5214d5abbe8Smrg       case _S_token_dup_count:
522*0a307195Smrg 	__ostr << "dup count: " << _M_value << "\n";
5234d5abbe8Smrg 	break;
5244d5abbe8Smrg       case _S_token_eof:
525*0a307195Smrg 	__ostr << "EOF\n";
5264d5abbe8Smrg 	break;
5274d5abbe8Smrg       case _S_token_equiv_class_name:
528*0a307195Smrg 	__ostr << "equiv-class-name \"" << _M_value << "\"\n";
5294d5abbe8Smrg 	break;
5304d5abbe8Smrg       case _S_token_interval_begin:
531*0a307195Smrg 	__ostr << "interval begin\n";
5324d5abbe8Smrg 	break;
5334d5abbe8Smrg       case _S_token_interval_end:
534*0a307195Smrg 	__ostr << "interval end\n";
5354d5abbe8Smrg 	break;
5364d5abbe8Smrg       case _S_token_line_begin:
537*0a307195Smrg 	__ostr << "line begin\n";
5384d5abbe8Smrg 	break;
5394d5abbe8Smrg       case _S_token_line_end:
540*0a307195Smrg 	__ostr << "line end\n";
5414d5abbe8Smrg 	break;
5424d5abbe8Smrg       case _S_token_opt:
543*0a307195Smrg 	__ostr << "opt\n";
5444d5abbe8Smrg 	break;
5454d5abbe8Smrg       case _S_token_or:
546*0a307195Smrg 	__ostr << "or\n";
5474d5abbe8Smrg 	break;
5484d5abbe8Smrg       case _S_token_ord_char:
549*0a307195Smrg 	__ostr << "ordinary character: \"" << _M_value << "\"\n";
5504d5abbe8Smrg 	break;
5514d5abbe8Smrg       case _S_token_subexpr_begin:
552*0a307195Smrg 	__ostr << "subexpr begin\n";
5534d5abbe8Smrg 	break;
5544d5abbe8Smrg       case _S_token_subexpr_no_group_begin:
555*0a307195Smrg 	__ostr << "no grouping subexpr begin\n";
5564d5abbe8Smrg 	break;
5574d5abbe8Smrg       case _S_token_subexpr_lookahead_begin:
558*0a307195Smrg 	__ostr << "lookahead subexpr begin\n";
5594d5abbe8Smrg 	break;
5604d5abbe8Smrg       case _S_token_subexpr_end:
561*0a307195Smrg 	__ostr << "subexpr end\n";
5624d5abbe8Smrg 	break;
5634d5abbe8Smrg       case _S_token_unknown:
564*0a307195Smrg 	__ostr << "-- unknown token --\n";
5654d5abbe8Smrg 	break;
5664d5abbe8Smrg       case _S_token_oct_num:
567*0a307195Smrg 	__ostr << "oct number " << _M_value << "\n";
5684d5abbe8Smrg 	break;
5694d5abbe8Smrg       case _S_token_hex_num:
570*0a307195Smrg 	__ostr << "hex number " << _M_value << "\n";
5714d5abbe8Smrg 	break;
5724d5abbe8Smrg       case _S_token_quoted_class:
573*0a307195Smrg 	__ostr << "quoted class " << "\\" << _M_value << "\n";
5744d5abbe8Smrg 	break;
5754d5abbe8Smrg       default:
5764d5abbe8Smrg 	_GLIBCXX_DEBUG_ASSERT(false);
5774d5abbe8Smrg       }
578*0a307195Smrg       return __ostr;
5794d5abbe8Smrg     }
5804d5abbe8Smrg #endif
5814d5abbe8Smrg 
5828b6133e5Smrg } // namespace __detail
583a3e9eb18Smrg _GLIBCXX_END_NAMESPACE_VERSION
5844d5abbe8Smrg } // namespace
585