xref: /netbsd-src/external/gpl3/gcc.old/dist/libstdc++-v3/include/bits/regex_scanner.tcc (revision 23f5f46327e37e7811da3520f4bb933f9489322f)
11debfc3dSmrg // class template regex -*- C++ -*-
21debfc3dSmrg 
38feb0f0bSmrg // Copyright (C) 2013-2020 Free Software Foundation, Inc.
41debfc3dSmrg //
51debfc3dSmrg // This file is part of the GNU ISO C++ Library.  This library is free
61debfc3dSmrg // software; you can redistribute it and/or modify it under the
71debfc3dSmrg // terms of the GNU General Public License as published by the
81debfc3dSmrg // Free Software Foundation; either version 3, or (at your option)
91debfc3dSmrg // any later version.
101debfc3dSmrg 
111debfc3dSmrg // This library is distributed in the hope that it will be useful,
121debfc3dSmrg // but WITHOUT ANY WARRANTY; without even the implied warranty of
131debfc3dSmrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
141debfc3dSmrg // GNU General Public License for more details.
151debfc3dSmrg 
161debfc3dSmrg // Under Section 7 of GPL version 3, you are granted additional
171debfc3dSmrg // permissions described in the GCC Runtime Library Exception, version
181debfc3dSmrg // 3.1, as published by the Free Software Foundation.
191debfc3dSmrg 
201debfc3dSmrg // You should have received a copy of the GNU General Public License and
211debfc3dSmrg // a copy of the GCC Runtime Library Exception along with this program;
221debfc3dSmrg // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
231debfc3dSmrg // <http://www.gnu.org/licenses/>.
241debfc3dSmrg 
251debfc3dSmrg /**
261debfc3dSmrg  *  @file bits/regex_scanner.tcc
271debfc3dSmrg  *  This is an internal header file, included by other library headers.
281debfc3dSmrg  *  Do not attempt to use it directly. @headername{regex}
291debfc3dSmrg  */
301debfc3dSmrg 
311debfc3dSmrg // FIXME make comments doxygen format.
321debfc3dSmrg 
331debfc3dSmrg // N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep
341debfc3dSmrg // and awk
351debfc3dSmrg // 1) grep is basic except '\n' is treated as '|'
361debfc3dSmrg // 2) egrep is extended except '\n' is treated as '|'
371debfc3dSmrg // 3) awk is extended except special escaping rules, and there's no
381debfc3dSmrg //    back-reference.
391debfc3dSmrg //
401debfc3dSmrg // References:
411debfc3dSmrg //
421debfc3dSmrg // ECMAScript: ECMA-262 15.10
431debfc3dSmrg //
441debfc3dSmrg // basic, extended:
451debfc3dSmrg // http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html
461debfc3dSmrg //
471debfc3dSmrg // awk: http://pubs.opengroup.org/onlinepubs/000095399/utilities/awk.html
481debfc3dSmrg 
491debfc3dSmrg namespace std _GLIBCXX_VISIBILITY(default)
501debfc3dSmrg {
511debfc3dSmrg _GLIBCXX_BEGIN_NAMESPACE_VERSION
521debfc3dSmrg 
53a2dc1f3fSmrg namespace __detail
54a2dc1f3fSmrg {
551debfc3dSmrg   template<typename _CharT>
561debfc3dSmrg     _Scanner<_CharT>::
_Scanner(typename _Scanner::_IterT __begin,typename _Scanner::_IterT __end,_FlagT __flags,std::locale __loc)571debfc3dSmrg     _Scanner(typename _Scanner::_IterT __begin,
581debfc3dSmrg 	     typename _Scanner::_IterT __end,
591debfc3dSmrg 	     _FlagT __flags, std::locale __loc)
601debfc3dSmrg     : _ScannerBase(__flags),
611debfc3dSmrg       _M_current(__begin), _M_end(__end),
621debfc3dSmrg       _M_ctype(std::use_facet<_CtypeT>(__loc)),
631debfc3dSmrg       _M_eat_escape(_M_is_ecma()
641debfc3dSmrg 		    ? &_Scanner::_M_eat_escape_ecma
651debfc3dSmrg 		    : &_Scanner::_M_eat_escape_posix)
661debfc3dSmrg     { _M_advance(); }
671debfc3dSmrg 
681debfc3dSmrg   template<typename _CharT>
691debfc3dSmrg     void
701debfc3dSmrg     _Scanner<_CharT>::
_M_advance()711debfc3dSmrg     _M_advance()
721debfc3dSmrg     {
731debfc3dSmrg       if (_M_current == _M_end)
741debfc3dSmrg 	{
751debfc3dSmrg 	  _M_token = _S_token_eof;
761debfc3dSmrg 	  return;
771debfc3dSmrg 	}
781debfc3dSmrg 
791debfc3dSmrg       if (_M_state == _S_state_normal)
801debfc3dSmrg 	_M_scan_normal();
811debfc3dSmrg       else if (_M_state == _S_state_in_bracket)
821debfc3dSmrg 	_M_scan_in_bracket();
831debfc3dSmrg       else if (_M_state == _S_state_in_brace)
841debfc3dSmrg 	_M_scan_in_brace();
851debfc3dSmrg       else
861debfc3dSmrg 	{
871debfc3dSmrg 	  __glibcxx_assert(false);
881debfc3dSmrg 	}
891debfc3dSmrg     }
901debfc3dSmrg 
911debfc3dSmrg   // Differences between styles:
921debfc3dSmrg   // 1) "\(", "\)", "\{" in basic. It's not escaping.
931debfc3dSmrg   // 2) "(?:", "(?=", "(?!" in ECMAScript.
941debfc3dSmrg   template<typename _CharT>
951debfc3dSmrg     void
961debfc3dSmrg     _Scanner<_CharT>::
_M_scan_normal()971debfc3dSmrg     _M_scan_normal()
981debfc3dSmrg     {
991debfc3dSmrg       auto __c = *_M_current++;
1001debfc3dSmrg 
1011debfc3dSmrg       if (std::strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr)
1021debfc3dSmrg 	{
1031debfc3dSmrg 	  _M_token = _S_token_ord_char;
1041debfc3dSmrg 	  _M_value.assign(1, __c);
1051debfc3dSmrg 	  return;
1061debfc3dSmrg 	}
1071debfc3dSmrg       if (__c == '\\')
1081debfc3dSmrg 	{
1091debfc3dSmrg 	  if (_M_current == _M_end)
1101debfc3dSmrg 	    __throw_regex_error(
1111debfc3dSmrg 	      regex_constants::error_escape,
1121debfc3dSmrg 	      "Unexpected end of regex when escaping.");
1131debfc3dSmrg 
1141debfc3dSmrg 	  if (!_M_is_basic()
1151debfc3dSmrg 	      || (*_M_current != '('
1161debfc3dSmrg 		  && *_M_current != ')'
1171debfc3dSmrg 		  && *_M_current != '{'))
1181debfc3dSmrg 	    {
1191debfc3dSmrg 	      (this->*_M_eat_escape)();
1201debfc3dSmrg 	      return;
1211debfc3dSmrg 	    }
1221debfc3dSmrg 	  __c = *_M_current++;
1231debfc3dSmrg 	}
1241debfc3dSmrg       if (__c == '(')
1251debfc3dSmrg 	{
1261debfc3dSmrg 	  if (_M_is_ecma() && *_M_current == '?')
1271debfc3dSmrg 	    {
1281debfc3dSmrg 	      if (++_M_current == _M_end)
1291debfc3dSmrg 		__throw_regex_error(
1301debfc3dSmrg 		  regex_constants::error_paren,
1311debfc3dSmrg 		  "Unexpected end of regex when in an open parenthesis.");
1321debfc3dSmrg 
1331debfc3dSmrg 	      if (*_M_current == ':')
1341debfc3dSmrg 		{
1351debfc3dSmrg 		  ++_M_current;
1361debfc3dSmrg 		  _M_token = _S_token_subexpr_no_group_begin;
1371debfc3dSmrg 		}
1381debfc3dSmrg 	      else if (*_M_current == '=')
1391debfc3dSmrg 		{
1401debfc3dSmrg 		  ++_M_current;
1411debfc3dSmrg 		  _M_token = _S_token_subexpr_lookahead_begin;
1421debfc3dSmrg 		  _M_value.assign(1, 'p');
1431debfc3dSmrg 		}
1441debfc3dSmrg 	      else if (*_M_current == '!')
1451debfc3dSmrg 		{
1461debfc3dSmrg 		  ++_M_current;
1471debfc3dSmrg 		  _M_token = _S_token_subexpr_lookahead_begin;
1481debfc3dSmrg 		  _M_value.assign(1, 'n');
1491debfc3dSmrg 		}
1501debfc3dSmrg 	      else
1511debfc3dSmrg 		__throw_regex_error(
1521debfc3dSmrg 		  regex_constants::error_paren,
1531debfc3dSmrg 		  "Invalid special open parenthesis.");
1541debfc3dSmrg 	    }
1551debfc3dSmrg 	  else if (_M_flags & regex_constants::nosubs)
1561debfc3dSmrg 	    _M_token = _S_token_subexpr_no_group_begin;
1571debfc3dSmrg 	  else
1581debfc3dSmrg 	    _M_token = _S_token_subexpr_begin;
1591debfc3dSmrg 	}
1601debfc3dSmrg       else if (__c == ')')
1611debfc3dSmrg 	_M_token = _S_token_subexpr_end;
1621debfc3dSmrg       else if (__c == '[')
1631debfc3dSmrg 	{
1641debfc3dSmrg 	  _M_state = _S_state_in_bracket;
1651debfc3dSmrg 	  _M_at_bracket_start = true;
1661debfc3dSmrg 	  if (_M_current != _M_end && *_M_current == '^')
1671debfc3dSmrg 	    {
1681debfc3dSmrg 	      _M_token = _S_token_bracket_neg_begin;
1691debfc3dSmrg 	      ++_M_current;
1701debfc3dSmrg 	    }
1711debfc3dSmrg 	  else
1721debfc3dSmrg 	    _M_token = _S_token_bracket_begin;
1731debfc3dSmrg 	}
1741debfc3dSmrg       else if (__c == '{')
1751debfc3dSmrg 	{
1761debfc3dSmrg 	  _M_state = _S_state_in_brace;
1771debfc3dSmrg 	  _M_token = _S_token_interval_begin;
1781debfc3dSmrg 	}
179*23f5f463Smrg       else if (__builtin_expect(__c == _CharT(0), false))
180*23f5f463Smrg 	{
181*23f5f463Smrg 	  if (!_M_is_ecma())
182*23f5f463Smrg 	    {
183*23f5f463Smrg 	      __throw_regex_error(regex_constants::_S_null,
184*23f5f463Smrg 		  "Unexpected null character in regular expression");
185*23f5f463Smrg 	    }
186*23f5f463Smrg 	  _M_token = _S_token_ord_char;
187*23f5f463Smrg 	  _M_value.assign(1, __c);
188*23f5f463Smrg 	}
1891debfc3dSmrg       else if (__c != ']' && __c != '}')
1901debfc3dSmrg 	{
1911debfc3dSmrg 	  auto __it = _M_token_tbl;
1921debfc3dSmrg 	  auto __narrowc = _M_ctype.narrow(__c, '\0');
1931debfc3dSmrg 	  for (; __it->first != '\0'; ++__it)
1941debfc3dSmrg 	    if (__it->first == __narrowc)
1951debfc3dSmrg 	      {
1961debfc3dSmrg 		_M_token = __it->second;
1971debfc3dSmrg 		return;
1981debfc3dSmrg 	      }
1991debfc3dSmrg 	  __glibcxx_assert(false);
2001debfc3dSmrg 	}
2011debfc3dSmrg       else
2021debfc3dSmrg 	{
2031debfc3dSmrg 	  _M_token = _S_token_ord_char;
2041debfc3dSmrg 	  _M_value.assign(1, __c);
2051debfc3dSmrg 	}
2061debfc3dSmrg     }
2071debfc3dSmrg 
2081debfc3dSmrg   // Differences between styles:
2091debfc3dSmrg   // 1) different semantics of "[]" and "[^]".
2101debfc3dSmrg   // 2) Escaping in bracket expr.
2111debfc3dSmrg   template<typename _CharT>
2121debfc3dSmrg     void
2131debfc3dSmrg     _Scanner<_CharT>::
_M_scan_in_bracket()2141debfc3dSmrg     _M_scan_in_bracket()
2151debfc3dSmrg     {
2161debfc3dSmrg       if (_M_current == _M_end)
2171debfc3dSmrg 	__throw_regex_error(
2181debfc3dSmrg 	  regex_constants::error_brack,
2191debfc3dSmrg 	  "Unexpected end of regex when in bracket expression.");
2201debfc3dSmrg 
2211debfc3dSmrg       auto __c = *_M_current++;
2221debfc3dSmrg 
2231debfc3dSmrg       if (__c == '-')
2241debfc3dSmrg 	_M_token = _S_token_bracket_dash;
2251debfc3dSmrg       else if (__c == '[')
2261debfc3dSmrg 	{
2271debfc3dSmrg 	  if (_M_current == _M_end)
2281debfc3dSmrg 	    __throw_regex_error(regex_constants::error_brack,
2291debfc3dSmrg 				"Unexpected character class open bracket.");
2301debfc3dSmrg 
2311debfc3dSmrg 	  if (*_M_current == '.')
2321debfc3dSmrg 	    {
2331debfc3dSmrg 	      _M_token = _S_token_collsymbol;
2341debfc3dSmrg 	      _M_eat_class(*_M_current++);
2351debfc3dSmrg 	    }
2361debfc3dSmrg 	  else if (*_M_current == ':')
2371debfc3dSmrg 	    {
2381debfc3dSmrg 	      _M_token = _S_token_char_class_name;
2391debfc3dSmrg 	      _M_eat_class(*_M_current++);
2401debfc3dSmrg 	    }
2411debfc3dSmrg 	  else if (*_M_current == '=')
2421debfc3dSmrg 	    {
2431debfc3dSmrg 	      _M_token = _S_token_equiv_class_name;
2441debfc3dSmrg 	      _M_eat_class(*_M_current++);
2451debfc3dSmrg 	    }
2461debfc3dSmrg 	  else
2471debfc3dSmrg 	    {
2481debfc3dSmrg 	      _M_token = _S_token_ord_char;
2491debfc3dSmrg 	      _M_value.assign(1, __c);
2501debfc3dSmrg 	    }
2511debfc3dSmrg 	}
2521debfc3dSmrg       // In POSIX, when encountering "[]" or "[^]", the ']' is interpreted
2531debfc3dSmrg       // literally. So "[]]" and "[^]]" are valid regexes. See the testcases
2541debfc3dSmrg       // `*/empty_range.cc`.
2551debfc3dSmrg       else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start))
2561debfc3dSmrg 	{
2571debfc3dSmrg 	  _M_token = _S_token_bracket_end;
2581debfc3dSmrg 	  _M_state = _S_state_normal;
2591debfc3dSmrg 	}
2601debfc3dSmrg       // ECMAScript and awk permits escaping in bracket.
2611debfc3dSmrg       else if (__c == '\\' && (_M_is_ecma() || _M_is_awk()))
2621debfc3dSmrg 	(this->*_M_eat_escape)();
2631debfc3dSmrg       else
2641debfc3dSmrg 	{
2651debfc3dSmrg 	  _M_token = _S_token_ord_char;
2661debfc3dSmrg 	  _M_value.assign(1, __c);
2671debfc3dSmrg 	}
2681debfc3dSmrg       _M_at_bracket_start = false;
2691debfc3dSmrg     }
2701debfc3dSmrg 
2711debfc3dSmrg   // Differences between styles:
2721debfc3dSmrg   // 1) "\}" in basic style.
2731debfc3dSmrg   template<typename _CharT>
2741debfc3dSmrg     void
2751debfc3dSmrg     _Scanner<_CharT>::
_M_scan_in_brace()2761debfc3dSmrg     _M_scan_in_brace()
2771debfc3dSmrg     {
2781debfc3dSmrg       if (_M_current == _M_end)
2791debfc3dSmrg 	__throw_regex_error(
2801debfc3dSmrg 	  regex_constants::error_brace,
2811debfc3dSmrg 	  "Unexpected end of regex when in brace expression.");
2821debfc3dSmrg 
2831debfc3dSmrg       auto __c = *_M_current++;
2841debfc3dSmrg 
2851debfc3dSmrg       if (_M_ctype.is(_CtypeT::digit, __c))
2861debfc3dSmrg 	{
2871debfc3dSmrg 	  _M_token = _S_token_dup_count;
2881debfc3dSmrg 	  _M_value.assign(1, __c);
2891debfc3dSmrg 	  while (_M_current != _M_end
2901debfc3dSmrg 		 && _M_ctype.is(_CtypeT::digit, *_M_current))
2911debfc3dSmrg 	    _M_value += *_M_current++;
2921debfc3dSmrg 	}
2931debfc3dSmrg       else if (__c == ',')
2941debfc3dSmrg 	_M_token = _S_token_comma;
2951debfc3dSmrg       // basic use \}.
2961debfc3dSmrg       else if (_M_is_basic())
2971debfc3dSmrg 	{
2981debfc3dSmrg 	  if (__c == '\\' && _M_current != _M_end && *_M_current == '}')
2991debfc3dSmrg 	    {
3001debfc3dSmrg 	      _M_state = _S_state_normal;
3011debfc3dSmrg 	      _M_token = _S_token_interval_end;
3021debfc3dSmrg 	      ++_M_current;
3031debfc3dSmrg 	    }
3041debfc3dSmrg 	  else
3051debfc3dSmrg 	    __throw_regex_error(regex_constants::error_badbrace,
3061debfc3dSmrg 				"Unexpected character in brace expression.");
3071debfc3dSmrg 	}
3081debfc3dSmrg       else if (__c == '}')
3091debfc3dSmrg 	{
3101debfc3dSmrg 	  _M_state = _S_state_normal;
3111debfc3dSmrg 	  _M_token = _S_token_interval_end;
3121debfc3dSmrg 	}
3131debfc3dSmrg       else
3141debfc3dSmrg 	__throw_regex_error(regex_constants::error_badbrace,
3151debfc3dSmrg 			    "Unexpected character in brace expression.");
3161debfc3dSmrg     }
3171debfc3dSmrg 
3181debfc3dSmrg   template<typename _CharT>
3191debfc3dSmrg     void
3201debfc3dSmrg     _Scanner<_CharT>::
_M_eat_escape_ecma()3211debfc3dSmrg     _M_eat_escape_ecma()
3221debfc3dSmrg     {
3231debfc3dSmrg       if (_M_current == _M_end)
3241debfc3dSmrg 	__throw_regex_error(regex_constants::error_escape,
3251debfc3dSmrg 			    "Unexpected end of regex when escaping.");
3261debfc3dSmrg 
3271debfc3dSmrg       auto __c = *_M_current++;
3281debfc3dSmrg       auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
3291debfc3dSmrg 
3301debfc3dSmrg       if (__pos != nullptr && (__c != 'b' || _M_state == _S_state_in_bracket))
3311debfc3dSmrg 	{
3321debfc3dSmrg 	  _M_token = _S_token_ord_char;
3331debfc3dSmrg 	  _M_value.assign(1, *__pos);
3341debfc3dSmrg 	}
3351debfc3dSmrg       else if (__c == 'b')
3361debfc3dSmrg 	{
3371debfc3dSmrg 	  _M_token = _S_token_word_bound;
3381debfc3dSmrg 	  _M_value.assign(1, 'p');
3391debfc3dSmrg 	}
3401debfc3dSmrg       else if (__c == 'B')
3411debfc3dSmrg 	{
3421debfc3dSmrg 	  _M_token = _S_token_word_bound;
3431debfc3dSmrg 	  _M_value.assign(1, 'n');
3441debfc3dSmrg 	}
3451debfc3dSmrg       // N3376 28.13
3461debfc3dSmrg       else if (__c == 'd'
3471debfc3dSmrg 	       || __c == 'D'
3481debfc3dSmrg 	       || __c == 's'
3491debfc3dSmrg 	       || __c == 'S'
3501debfc3dSmrg 	       || __c == 'w'
3511debfc3dSmrg 	       || __c == 'W')
3521debfc3dSmrg 	{
3531debfc3dSmrg 	  _M_token = _S_token_quoted_class;
3541debfc3dSmrg 	  _M_value.assign(1, __c);
3551debfc3dSmrg 	}
3561debfc3dSmrg       else if (__c == 'c')
3571debfc3dSmrg 	{
3581debfc3dSmrg 	  if (_M_current == _M_end)
3591debfc3dSmrg 	    __throw_regex_error(
3601debfc3dSmrg 	      regex_constants::error_escape,
3611debfc3dSmrg 	      "Unexpected end of regex when reading control code.");
3621debfc3dSmrg 	  _M_token = _S_token_ord_char;
3631debfc3dSmrg 	  _M_value.assign(1, *_M_current++);
3641debfc3dSmrg 	}
3651debfc3dSmrg       else if (__c == 'x' || __c == 'u')
3661debfc3dSmrg 	{
3671debfc3dSmrg 	  _M_value.erase();
3681debfc3dSmrg 	  for (int __i = 0; __i < (__c == 'x' ? 2 : 4); __i++)
3691debfc3dSmrg 	    {
3701debfc3dSmrg 	      if (_M_current == _M_end
3711debfc3dSmrg 		  || !_M_ctype.is(_CtypeT::xdigit, *_M_current))
3721debfc3dSmrg 		__throw_regex_error(
3731debfc3dSmrg 		  regex_constants::error_escape,
3741debfc3dSmrg 		  "Unexpected end of regex when ascii character.");
3751debfc3dSmrg 	      _M_value += *_M_current++;
3761debfc3dSmrg 	    }
3771debfc3dSmrg 	  _M_token = _S_token_hex_num;
3781debfc3dSmrg 	}
3791debfc3dSmrg       // ECMAScript recognizes multi-digit back-references.
3801debfc3dSmrg       else if (_M_ctype.is(_CtypeT::digit, __c))
3811debfc3dSmrg 	{
3821debfc3dSmrg 	  _M_value.assign(1, __c);
3831debfc3dSmrg 	  while (_M_current != _M_end
3841debfc3dSmrg 		 && _M_ctype.is(_CtypeT::digit, *_M_current))
3851debfc3dSmrg 	    _M_value += *_M_current++;
3861debfc3dSmrg 	  _M_token = _S_token_backref;
3871debfc3dSmrg 	}
3881debfc3dSmrg       else
3891debfc3dSmrg 	{
3901debfc3dSmrg 	  _M_token = _S_token_ord_char;
3911debfc3dSmrg 	  _M_value.assign(1, __c);
3921debfc3dSmrg 	}
3931debfc3dSmrg     }
3941debfc3dSmrg 
3951debfc3dSmrg   // Differences between styles:
3961debfc3dSmrg   // 1) Extended doesn't support backref, but basic does.
3971debfc3dSmrg   template<typename _CharT>
3981debfc3dSmrg     void
3991debfc3dSmrg     _Scanner<_CharT>::
_M_eat_escape_posix()4001debfc3dSmrg     _M_eat_escape_posix()
4011debfc3dSmrg     {
4021debfc3dSmrg       if (_M_current == _M_end)
4031debfc3dSmrg 	__throw_regex_error(regex_constants::error_escape,
4041debfc3dSmrg 			    "Unexpected end of regex when escaping.");
4051debfc3dSmrg 
4061debfc3dSmrg       auto __c = *_M_current;
4071debfc3dSmrg       auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
4081debfc3dSmrg 
4091debfc3dSmrg       if (__pos != nullptr && *__pos != '\0')
4101debfc3dSmrg 	{
4111debfc3dSmrg 	  _M_token = _S_token_ord_char;
4121debfc3dSmrg 	  _M_value.assign(1, __c);
4131debfc3dSmrg 	}
4141debfc3dSmrg       // We MUST judge awk before handling backrefs. There's no backref in awk.
4151debfc3dSmrg       else if (_M_is_awk())
4161debfc3dSmrg 	{
4171debfc3dSmrg 	  _M_eat_escape_awk();
4181debfc3dSmrg 	  return;
4191debfc3dSmrg 	}
4201debfc3dSmrg       else if (_M_is_basic() && _M_ctype.is(_CtypeT::digit, __c) && __c != '0')
4211debfc3dSmrg 	{
4221debfc3dSmrg 	  _M_token = _S_token_backref;
4231debfc3dSmrg 	  _M_value.assign(1, __c);
4241debfc3dSmrg 	}
4251debfc3dSmrg       else
4261debfc3dSmrg 	{
4271debfc3dSmrg #ifdef __STRICT_ANSI__
4281debfc3dSmrg 	  // POSIX says it is undefined to escape ordinary characters
4291debfc3dSmrg 	  __throw_regex_error(regex_constants::error_escape,
4301debfc3dSmrg 			      "Unexpected escape character.");
4311debfc3dSmrg #else
4321debfc3dSmrg 	  _M_token = _S_token_ord_char;
4331debfc3dSmrg 	  _M_value.assign(1, __c);
4341debfc3dSmrg #endif
4351debfc3dSmrg 	}
4361debfc3dSmrg       ++_M_current;
4371debfc3dSmrg     }
4381debfc3dSmrg 
4391debfc3dSmrg   template<typename _CharT>
4401debfc3dSmrg     void
4411debfc3dSmrg     _Scanner<_CharT>::
_M_eat_escape_awk()4421debfc3dSmrg     _M_eat_escape_awk()
4431debfc3dSmrg     {
4441debfc3dSmrg       auto __c = *_M_current++;
4451debfc3dSmrg       auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
4461debfc3dSmrg 
4471debfc3dSmrg       if (__pos != nullptr)
4481debfc3dSmrg 	{
4491debfc3dSmrg 	  _M_token = _S_token_ord_char;
4501debfc3dSmrg 	  _M_value.assign(1, *__pos);
4511debfc3dSmrg 	}
4521debfc3dSmrg       // \ddd for oct representation
4531debfc3dSmrg       else if (_M_ctype.is(_CtypeT::digit, __c)
4541debfc3dSmrg 	       && __c != '8'
4551debfc3dSmrg 	       && __c != '9')
4561debfc3dSmrg 	{
4571debfc3dSmrg 	  _M_value.assign(1,  __c);
4581debfc3dSmrg 	  for (int __i = 0;
4591debfc3dSmrg 	       __i < 2
4601debfc3dSmrg 	       && _M_current != _M_end
4611debfc3dSmrg 	       && _M_ctype.is(_CtypeT::digit, *_M_current)
4621debfc3dSmrg 	       && *_M_current != '8'
4631debfc3dSmrg 	       && *_M_current != '9';
4641debfc3dSmrg 	       __i++)
4651debfc3dSmrg 	    _M_value += *_M_current++;
4661debfc3dSmrg 	  _M_token = _S_token_oct_num;
4671debfc3dSmrg 	  return;
4681debfc3dSmrg 	}
4691debfc3dSmrg       else
4701debfc3dSmrg 	__throw_regex_error(regex_constants::error_escape,
4711debfc3dSmrg 			    "Unexpected escape character.");
4721debfc3dSmrg     }
4731debfc3dSmrg 
4741debfc3dSmrg   // Eats a character class or throws an exception.
4751debfc3dSmrg   // __ch could be ':', '.' or '=', _M_current is the char after ']' when
4761debfc3dSmrg   // returning.
4771debfc3dSmrg   template<typename _CharT>
4781debfc3dSmrg     void
4791debfc3dSmrg     _Scanner<_CharT>::
_M_eat_class(char __ch)4801debfc3dSmrg     _M_eat_class(char __ch)
4811debfc3dSmrg     {
4821debfc3dSmrg       for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;)
4831debfc3dSmrg 	_M_value += *_M_current++;
4841debfc3dSmrg       if (_M_current == _M_end
4851debfc3dSmrg 	  || *_M_current++ != __ch
4861debfc3dSmrg 	  || _M_current == _M_end // skip __ch
4871debfc3dSmrg 	  || *_M_current++ != ']') // skip ']'
4881debfc3dSmrg 	{
4891debfc3dSmrg 	  if (__ch == ':')
4901debfc3dSmrg 	    __throw_regex_error(regex_constants::error_ctype,
4911debfc3dSmrg 				"Unexpected end of character class.");
4921debfc3dSmrg 	  else
4931debfc3dSmrg 	    __throw_regex_error(regex_constants::error_collate,
4941debfc3dSmrg 				"Unexpected end of character class.");
4951debfc3dSmrg 	}
4961debfc3dSmrg     }
4971debfc3dSmrg 
4981debfc3dSmrg #ifdef _GLIBCXX_DEBUG
4991debfc3dSmrg   template<typename _CharT>
5001debfc3dSmrg     std::ostream&
5011debfc3dSmrg     _Scanner<_CharT>::
_M_print(std::ostream & ostr)5021debfc3dSmrg     _M_print(std::ostream& ostr)
5031debfc3dSmrg     {
5041debfc3dSmrg       switch (_M_token)
5051debfc3dSmrg       {
5061debfc3dSmrg       case _S_token_anychar:
5071debfc3dSmrg 	ostr << "any-character\n";
5081debfc3dSmrg 	break;
5091debfc3dSmrg       case _S_token_backref:
5101debfc3dSmrg 	ostr << "backref\n";
5111debfc3dSmrg 	break;
5121debfc3dSmrg       case _S_token_bracket_begin:
5131debfc3dSmrg 	ostr << "bracket-begin\n";
5141debfc3dSmrg 	break;
5151debfc3dSmrg       case _S_token_bracket_neg_begin:
5161debfc3dSmrg 	ostr << "bracket-neg-begin\n";
5171debfc3dSmrg 	break;
5181debfc3dSmrg       case _S_token_bracket_end:
5191debfc3dSmrg 	ostr << "bracket-end\n";
5201debfc3dSmrg 	break;
5211debfc3dSmrg       case _S_token_char_class_name:
5221debfc3dSmrg 	ostr << "char-class-name \"" << _M_value << "\"\n";
5231debfc3dSmrg 	break;
5241debfc3dSmrg       case _S_token_closure0:
5251debfc3dSmrg 	ostr << "closure0\n";
5261debfc3dSmrg 	break;
5271debfc3dSmrg       case _S_token_closure1:
5281debfc3dSmrg 	ostr << "closure1\n";
5291debfc3dSmrg 	break;
5301debfc3dSmrg       case _S_token_collsymbol:
5311debfc3dSmrg 	ostr << "collsymbol \"" << _M_value << "\"\n";
5321debfc3dSmrg 	break;
5331debfc3dSmrg       case _S_token_comma:
5341debfc3dSmrg 	ostr << "comma\n";
5351debfc3dSmrg 	break;
5361debfc3dSmrg       case _S_token_dup_count:
5371debfc3dSmrg 	ostr << "dup count: " << _M_value << "\n";
5381debfc3dSmrg 	break;
5391debfc3dSmrg       case _S_token_eof:
5401debfc3dSmrg 	ostr << "EOF\n";
5411debfc3dSmrg 	break;
5421debfc3dSmrg       case _S_token_equiv_class_name:
5431debfc3dSmrg 	ostr << "equiv-class-name \"" << _M_value << "\"\n";
5441debfc3dSmrg 	break;
5451debfc3dSmrg       case _S_token_interval_begin:
5461debfc3dSmrg 	ostr << "interval begin\n";
5471debfc3dSmrg 	break;
5481debfc3dSmrg       case _S_token_interval_end:
5491debfc3dSmrg 	ostr << "interval end\n";
5501debfc3dSmrg 	break;
5511debfc3dSmrg       case _S_token_line_begin:
5521debfc3dSmrg 	ostr << "line begin\n";
5531debfc3dSmrg 	break;
5541debfc3dSmrg       case _S_token_line_end:
5551debfc3dSmrg 	ostr << "line end\n";
5561debfc3dSmrg 	break;
5571debfc3dSmrg       case _S_token_opt:
5581debfc3dSmrg 	ostr << "opt\n";
5591debfc3dSmrg 	break;
5601debfc3dSmrg       case _S_token_or:
5611debfc3dSmrg 	ostr << "or\n";
5621debfc3dSmrg 	break;
5631debfc3dSmrg       case _S_token_ord_char:
5641debfc3dSmrg 	ostr << "ordinary character: \"" << _M_value << "\"\n";
5651debfc3dSmrg 	break;
5661debfc3dSmrg       case _S_token_subexpr_begin:
5671debfc3dSmrg 	ostr << "subexpr begin\n";
5681debfc3dSmrg 	break;
5691debfc3dSmrg       case _S_token_subexpr_no_group_begin:
5701debfc3dSmrg 	ostr << "no grouping subexpr begin\n";
5711debfc3dSmrg 	break;
5721debfc3dSmrg       case _S_token_subexpr_lookahead_begin:
5731debfc3dSmrg 	ostr << "lookahead subexpr begin\n";
5741debfc3dSmrg 	break;
5751debfc3dSmrg       case _S_token_subexpr_end:
5761debfc3dSmrg 	ostr << "subexpr end\n";
5771debfc3dSmrg 	break;
5781debfc3dSmrg       case _S_token_unknown:
5791debfc3dSmrg 	ostr << "-- unknown token --\n";
5801debfc3dSmrg 	break;
5811debfc3dSmrg       case _S_token_oct_num:
5821debfc3dSmrg 	ostr << "oct number " << _M_value << "\n";
5831debfc3dSmrg 	break;
5841debfc3dSmrg       case _S_token_hex_num:
5851debfc3dSmrg 	ostr << "hex number " << _M_value << "\n";
5861debfc3dSmrg 	break;
5871debfc3dSmrg       case _S_token_quoted_class:
5881debfc3dSmrg 	ostr << "quoted class " << "\\" << _M_value << "\n";
5891debfc3dSmrg 	break;
5901debfc3dSmrg       default:
5911debfc3dSmrg 	_GLIBCXX_DEBUG_ASSERT(false);
5921debfc3dSmrg       }
5931debfc3dSmrg       return ostr;
5941debfc3dSmrg     }
5951debfc3dSmrg #endif
5961debfc3dSmrg 
5971debfc3dSmrg } // namespace __detail
598a2dc1f3fSmrg _GLIBCXX_END_NAMESPACE_VERSION
5991debfc3dSmrg } // namespace
600