xref: /dflybsd-src/contrib/gcc-8.0/libstdc++-v3/include/bits/regex_compiler.tcc (revision 95059079af47f9a66a175f374f2da1a5020e3255)
138fd1498Szrj // class template regex -*- C++ -*-
238fd1498Szrj 
338fd1498Szrj // Copyright (C) 2013-2018 Free Software Foundation, Inc.
438fd1498Szrj //
538fd1498Szrj // This file is part of the GNU ISO C++ Library.  This library is free
638fd1498Szrj // software; you can redistribute it and/or modify it under the
738fd1498Szrj // terms of the GNU General Public License as published by the
838fd1498Szrj // Free Software Foundation; either version 3, or (at your option)
938fd1498Szrj // any later version.
1038fd1498Szrj 
1138fd1498Szrj // This library is distributed in the hope that it will be useful,
1238fd1498Szrj // but WITHOUT ANY WARRANTY; without even the implied warranty of
1338fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1438fd1498Szrj // GNU General Public License for more details.
1538fd1498Szrj 
1638fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional
1738fd1498Szrj // permissions described in the GCC Runtime Library Exception, version
1838fd1498Szrj // 3.1, as published by the Free Software Foundation.
1938fd1498Szrj 
2038fd1498Szrj // You should have received a copy of the GNU General Public License and
2138fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program;
2238fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
2338fd1498Szrj // <http://www.gnu.org/licenses/>.
2438fd1498Szrj 
2538fd1498Szrj /**
2638fd1498Szrj  *  @file bits/regex_compiler.tcc
2738fd1498Szrj  *  This is an internal header file, included by other library headers.
2838fd1498Szrj  *  Do not attempt to use it directly. @headername{regex}
2938fd1498Szrj  */
3038fd1498Szrj 
3138fd1498Szrj // FIXME make comments doxygen format.
3238fd1498Szrj 
3338fd1498Szrj /*
3438fd1498Szrj // This compiler refers to "Regular Expression Matching Can Be Simple And Fast"
3538fd1498Szrj // (http://swtch.com/~rsc/regexp/regexp1.html),
3638fd1498Szrj // but doesn't strictly follow it.
3738fd1498Szrj //
3838fd1498Szrj // When compiling, states are *chained* instead of tree- or graph-constructed.
3938fd1498Szrj // It's more like structured programs: there's if statement and loop statement.
4038fd1498Szrj //
4138fd1498Szrj // For alternative structure (say "a|b"), aka "if statement", two branches
4238fd1498Szrj // should be constructed. However, these two shall merge to an "end_tag" at
4338fd1498Szrj // the end of this operator:
4438fd1498Szrj //
4538fd1498Szrj //                branch1
4638fd1498Szrj //              /        \
4738fd1498Szrj // => begin_tag            end_tag =>
4838fd1498Szrj //              \        /
4938fd1498Szrj //                branch2
5038fd1498Szrj //
5138fd1498Szrj // This is the difference between this implementation and that in Russ's
5238fd1498Szrj // article.
5338fd1498Szrj //
5438fd1498Szrj // That's why we introduced dummy node here ------ "end_tag" is a dummy node.
5538fd1498Szrj // All dummy nodes will be eliminated at the end of compilation.
5638fd1498Szrj */
5738fd1498Szrj 
5838fd1498Szrj namespace std _GLIBCXX_VISIBILITY(default)
5938fd1498Szrj {
6038fd1498Szrj _GLIBCXX_BEGIN_NAMESPACE_VERSION
6138fd1498Szrj 
6238fd1498Szrj namespace __detail
6338fd1498Szrj {
6438fd1498Szrj   template<typename _TraitsT>
6538fd1498Szrj     _Compiler<_TraitsT>::
_Compiler(_IterT __b,_IterT __e,const typename _TraitsT::locale_type & __loc,_FlagT __flags)6638fd1498Szrj     _Compiler(_IterT __b, _IterT __e,
6738fd1498Szrj 	      const typename _TraitsT::locale_type& __loc, _FlagT __flags)
6838fd1498Szrj     : _M_flags((__flags
6938fd1498Szrj 		& (regex_constants::ECMAScript
7038fd1498Szrj 		   | regex_constants::basic
7138fd1498Szrj 		   | regex_constants::extended
7238fd1498Szrj 		   | regex_constants::grep
7338fd1498Szrj 		   | regex_constants::egrep
7438fd1498Szrj 		   | regex_constants::awk))
7538fd1498Szrj 	       ? __flags
7638fd1498Szrj 	       : __flags | regex_constants::ECMAScript),
7738fd1498Szrj       _M_scanner(__b, __e, _M_flags, __loc),
7838fd1498Szrj       _M_nfa(make_shared<_RegexT>(__loc, _M_flags)),
7938fd1498Szrj       _M_traits(_M_nfa->_M_traits),
8038fd1498Szrj       _M_ctype(std::use_facet<_CtypeT>(__loc))
8138fd1498Szrj     {
8238fd1498Szrj       _StateSeqT __r(*_M_nfa, _M_nfa->_M_start());
8338fd1498Szrj       __r._M_append(_M_nfa->_M_insert_subexpr_begin());
8438fd1498Szrj       this->_M_disjunction();
8538fd1498Szrj       if (!_M_match_token(_ScannerT::_S_token_eof))
8638fd1498Szrj 	__throw_regex_error(regex_constants::error_paren);
8738fd1498Szrj       __r._M_append(_M_pop());
8838fd1498Szrj       __glibcxx_assert(_M_stack.empty());
8938fd1498Szrj       __r._M_append(_M_nfa->_M_insert_subexpr_end());
9038fd1498Szrj       __r._M_append(_M_nfa->_M_insert_accept());
9138fd1498Szrj       _M_nfa->_M_eliminate_dummy();
9238fd1498Szrj     }
9338fd1498Szrj 
9438fd1498Szrj   template<typename _TraitsT>
9538fd1498Szrj     void
9638fd1498Szrj     _Compiler<_TraitsT>::
_M_disjunction()9738fd1498Szrj     _M_disjunction()
9838fd1498Szrj     {
9938fd1498Szrj       this->_M_alternative();
10038fd1498Szrj       while (_M_match_token(_ScannerT::_S_token_or))
10138fd1498Szrj 	{
10238fd1498Szrj 	  _StateSeqT __alt1 = _M_pop();
10338fd1498Szrj 	  this->_M_alternative();
10438fd1498Szrj 	  _StateSeqT __alt2 = _M_pop();
10538fd1498Szrj 	  auto __end = _M_nfa->_M_insert_dummy();
10638fd1498Szrj 	  __alt1._M_append(__end);
10738fd1498Szrj 	  __alt2._M_append(__end);
10838fd1498Szrj 	  // __alt2 is state._M_next, __alt1 is state._M_alt. The executor
10938fd1498Szrj 	  // executes _M_alt before _M_next, as well as executing left
11038fd1498Szrj 	  // alternative before right one.
11138fd1498Szrj 	  _M_stack.push(_StateSeqT(*_M_nfa,
11238fd1498Szrj 				   _M_nfa->_M_insert_alt(
11338fd1498Szrj 				     __alt2._M_start, __alt1._M_start, false),
11438fd1498Szrj 				   __end));
11538fd1498Szrj 	}
11638fd1498Szrj     }
11738fd1498Szrj 
11838fd1498Szrj   template<typename _TraitsT>
11938fd1498Szrj     void
12038fd1498Szrj     _Compiler<_TraitsT>::
_M_alternative()12138fd1498Szrj     _M_alternative()
12238fd1498Szrj     {
12338fd1498Szrj       if (this->_M_term())
12438fd1498Szrj 	{
12538fd1498Szrj 	  _StateSeqT __re = _M_pop();
12638fd1498Szrj 	  this->_M_alternative();
12738fd1498Szrj 	  __re._M_append(_M_pop());
12838fd1498Szrj 	  _M_stack.push(__re);
12938fd1498Szrj 	}
13038fd1498Szrj       else
13138fd1498Szrj 	_M_stack.push(_StateSeqT(*_M_nfa, _M_nfa->_M_insert_dummy()));
13238fd1498Szrj     }
13338fd1498Szrj 
13438fd1498Szrj   template<typename _TraitsT>
13538fd1498Szrj     bool
13638fd1498Szrj     _Compiler<_TraitsT>::
_M_term()13738fd1498Szrj     _M_term()
13838fd1498Szrj     {
13938fd1498Szrj       if (this->_M_assertion())
14038fd1498Szrj 	return true;
14138fd1498Szrj       if (this->_M_atom())
14238fd1498Szrj 	{
14338fd1498Szrj 	  while (this->_M_quantifier());
14438fd1498Szrj 	  return true;
14538fd1498Szrj 	}
14638fd1498Szrj       return false;
14738fd1498Szrj     }
14838fd1498Szrj 
14938fd1498Szrj   template<typename _TraitsT>
15038fd1498Szrj     bool
15138fd1498Szrj     _Compiler<_TraitsT>::
_M_assertion()15238fd1498Szrj     _M_assertion()
15338fd1498Szrj     {
15438fd1498Szrj       if (_M_match_token(_ScannerT::_S_token_line_begin))
15538fd1498Szrj 	_M_stack.push(_StateSeqT(*_M_nfa, _M_nfa->_M_insert_line_begin()));
15638fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_line_end))
15738fd1498Szrj 	_M_stack.push(_StateSeqT(*_M_nfa, _M_nfa->_M_insert_line_end()));
15838fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_word_bound))
15938fd1498Szrj 	// _M_value[0] == 'n' means it's negative, say "not word boundary".
16038fd1498Szrj 	_M_stack.push(_StateSeqT(*_M_nfa, _M_nfa->
16138fd1498Szrj 	      _M_insert_word_bound(_M_value[0] == 'n')));
16238fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_subexpr_lookahead_begin))
16338fd1498Szrj 	{
16438fd1498Szrj 	  auto __neg = _M_value[0] == 'n';
16538fd1498Szrj 	  this->_M_disjunction();
16638fd1498Szrj 	  if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
16738fd1498Szrj 	    __throw_regex_error(regex_constants::error_paren,
16838fd1498Szrj 				"Parenthesis is not closed.");
16938fd1498Szrj 	  auto __tmp = _M_pop();
17038fd1498Szrj 	  __tmp._M_append(_M_nfa->_M_insert_accept());
17138fd1498Szrj 	  _M_stack.push(
17238fd1498Szrj 	      _StateSeqT(
17338fd1498Szrj 		*_M_nfa,
17438fd1498Szrj 		_M_nfa->_M_insert_lookahead(__tmp._M_start, __neg)));
17538fd1498Szrj 	}
17638fd1498Szrj       else
17738fd1498Szrj 	return false;
17838fd1498Szrj       return true;
17938fd1498Szrj     }
18038fd1498Szrj 
18138fd1498Szrj   template<typename _TraitsT>
18238fd1498Szrj     bool
18338fd1498Szrj     _Compiler<_TraitsT>::
_M_quantifier()18438fd1498Szrj     _M_quantifier()
18538fd1498Szrj     {
18638fd1498Szrj       bool __neg = (_M_flags & regex_constants::ECMAScript);
18738fd1498Szrj       auto __init = [this, &__neg]()
18838fd1498Szrj 	{
18938fd1498Szrj 	  if (_M_stack.empty())
19038fd1498Szrj 	    __throw_regex_error(regex_constants::error_badrepeat,
19138fd1498Szrj 				"Nothing to repeat before a quantifier.");
19238fd1498Szrj 	  __neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
19338fd1498Szrj 	};
19438fd1498Szrj       if (_M_match_token(_ScannerT::_S_token_closure0))
19538fd1498Szrj 	{
19638fd1498Szrj 	  __init();
19738fd1498Szrj 	  auto __e = _M_pop();
19838fd1498Szrj 	  _StateSeqT __r(*_M_nfa,
19938fd1498Szrj 			 _M_nfa->_M_insert_repeat(_S_invalid_state_id,
20038fd1498Szrj 						  __e._M_start, __neg));
20138fd1498Szrj 	  __e._M_append(__r);
20238fd1498Szrj 	  _M_stack.push(__r);
20338fd1498Szrj 	}
20438fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_closure1))
20538fd1498Szrj 	{
20638fd1498Szrj 	  __init();
20738fd1498Szrj 	  auto __e = _M_pop();
20838fd1498Szrj 	  __e._M_append(_M_nfa->_M_insert_repeat(_S_invalid_state_id,
20938fd1498Szrj 						 __e._M_start, __neg));
21038fd1498Szrj 	  _M_stack.push(__e);
21138fd1498Szrj 	}
21238fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_opt))
21338fd1498Szrj 	{
21438fd1498Szrj 	  __init();
21538fd1498Szrj 	  auto __e = _M_pop();
21638fd1498Szrj 	  auto __end = _M_nfa->_M_insert_dummy();
21738fd1498Szrj 	  _StateSeqT __r(*_M_nfa,
21838fd1498Szrj 			 _M_nfa->_M_insert_repeat(_S_invalid_state_id,
21938fd1498Szrj 						  __e._M_start, __neg));
22038fd1498Szrj 	  __e._M_append(__end);
22138fd1498Szrj 	  __r._M_append(__end);
22238fd1498Szrj 	  _M_stack.push(__r);
22338fd1498Szrj 	}
22438fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_interval_begin))
22538fd1498Szrj 	{
22638fd1498Szrj 	  if (_M_stack.empty())
22738fd1498Szrj 	    __throw_regex_error(regex_constants::error_badrepeat,
22838fd1498Szrj 				"Nothing to repeat before a quantifier.");
22938fd1498Szrj 	  if (!_M_match_token(_ScannerT::_S_token_dup_count))
23038fd1498Szrj 	    __throw_regex_error(regex_constants::error_badbrace,
23138fd1498Szrj 				"Unexpected token in brace expression.");
23238fd1498Szrj 	  _StateSeqT __r(_M_pop());
23338fd1498Szrj 	  _StateSeqT __e(*_M_nfa, _M_nfa->_M_insert_dummy());
23438fd1498Szrj 	  long __min_rep = _M_cur_int_value(10);
23538fd1498Szrj 	  bool __infi = false;
23638fd1498Szrj 	  long __n;
23738fd1498Szrj 
23838fd1498Szrj 	  // {3
23938fd1498Szrj 	  if (_M_match_token(_ScannerT::_S_token_comma))
24038fd1498Szrj 	    if (_M_match_token(_ScannerT::_S_token_dup_count)) // {3,7}
24138fd1498Szrj 	      __n = _M_cur_int_value(10) - __min_rep;
24238fd1498Szrj 	    else
24338fd1498Szrj 	      __infi = true;
24438fd1498Szrj 	  else
24538fd1498Szrj 	    __n = 0;
24638fd1498Szrj 	  if (!_M_match_token(_ScannerT::_S_token_interval_end))
24738fd1498Szrj 	    __throw_regex_error(regex_constants::error_brace,
24838fd1498Szrj 				"Unexpected end of brace expression.");
24938fd1498Szrj 
25038fd1498Szrj 	  __neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
25138fd1498Szrj 
25238fd1498Szrj 	  for (long __i = 0; __i < __min_rep; ++__i)
25338fd1498Szrj 	    __e._M_append(__r._M_clone());
25438fd1498Szrj 
25538fd1498Szrj 	  if (__infi)
25638fd1498Szrj 	    {
25738fd1498Szrj 	      auto __tmp = __r._M_clone();
25838fd1498Szrj 	      _StateSeqT __s(*_M_nfa,
25938fd1498Szrj 			     _M_nfa->_M_insert_repeat(_S_invalid_state_id,
26038fd1498Szrj 						      __tmp._M_start, __neg));
26138fd1498Szrj 	      __tmp._M_append(__s);
26238fd1498Szrj 	      __e._M_append(__s);
26338fd1498Szrj 	    }
26438fd1498Szrj 	  else
26538fd1498Szrj 	    {
26638fd1498Szrj 	      if (__n < 0)
26738fd1498Szrj 		__throw_regex_error(regex_constants::error_badbrace,
26838fd1498Szrj 				    "Invalid range in brace expression.");
26938fd1498Szrj 	      auto __end = _M_nfa->_M_insert_dummy();
27038fd1498Szrj 	      // _M_alt is the "match more" branch, and _M_next is the
27138fd1498Szrj 	      // "match less" one. Switch _M_alt and _M_next of all created
27238fd1498Szrj 	      // nodes. This is a hack but IMO works well.
27338fd1498Szrj 	      std::stack<_StateIdT> __stack;
27438fd1498Szrj 	      for (long __i = 0; __i < __n; ++__i)
27538fd1498Szrj 		{
27638fd1498Szrj 		  auto __tmp = __r._M_clone();
27738fd1498Szrj 		  auto __alt = _M_nfa->_M_insert_repeat(__tmp._M_start,
27838fd1498Szrj 							__end, __neg);
27938fd1498Szrj 		  __stack.push(__alt);
28038fd1498Szrj 		  __e._M_append(_StateSeqT(*_M_nfa, __alt, __tmp._M_end));
28138fd1498Szrj 		}
28238fd1498Szrj 	      __e._M_append(__end);
28338fd1498Szrj 	      while (!__stack.empty())
28438fd1498Szrj 		{
28538fd1498Szrj 		  auto& __tmp = (*_M_nfa)[__stack.top()];
28638fd1498Szrj 		  __stack.pop();
28738fd1498Szrj 		  std::swap(__tmp._M_next, __tmp._M_alt);
28838fd1498Szrj 		}
28938fd1498Szrj 	    }
29038fd1498Szrj 	  _M_stack.push(__e);
29138fd1498Szrj 	}
29238fd1498Szrj       else
29338fd1498Szrj 	return false;
29438fd1498Szrj       return true;
29538fd1498Szrj     }
29638fd1498Szrj 
29738fd1498Szrj #define __INSERT_REGEX_MATCHER(__func, ...)\
298*58e805e6Szrj 	do {\
29938fd1498Szrj 	  if (!(_M_flags & regex_constants::icase))\
30038fd1498Szrj 	    if (!(_M_flags & regex_constants::collate))\
30138fd1498Szrj 	      __func<false, false>(__VA_ARGS__);\
30238fd1498Szrj 	    else\
30338fd1498Szrj 	      __func<false, true>(__VA_ARGS__);\
30438fd1498Szrj 	  else\
30538fd1498Szrj 	    if (!(_M_flags & regex_constants::collate))\
30638fd1498Szrj 	      __func<true, false>(__VA_ARGS__);\
30738fd1498Szrj 	    else\
30838fd1498Szrj 	      __func<true, true>(__VA_ARGS__);\
309*58e805e6Szrj 	} while (false)
31038fd1498Szrj 
31138fd1498Szrj   template<typename _TraitsT>
31238fd1498Szrj     bool
31338fd1498Szrj     _Compiler<_TraitsT>::
_M_atom()31438fd1498Szrj     _M_atom()
31538fd1498Szrj     {
31638fd1498Szrj       if (_M_match_token(_ScannerT::_S_token_anychar))
31738fd1498Szrj 	{
31838fd1498Szrj 	  if (!(_M_flags & regex_constants::ECMAScript))
31938fd1498Szrj 	    __INSERT_REGEX_MATCHER(_M_insert_any_matcher_posix);
32038fd1498Szrj 	  else
32138fd1498Szrj 	    __INSERT_REGEX_MATCHER(_M_insert_any_matcher_ecma);
32238fd1498Szrj 	}
32338fd1498Szrj       else if (_M_try_char())
32438fd1498Szrj 	__INSERT_REGEX_MATCHER(_M_insert_char_matcher);
32538fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_backref))
32638fd1498Szrj 	_M_stack.push(_StateSeqT(*_M_nfa, _M_nfa->
32738fd1498Szrj 				 _M_insert_backref(_M_cur_int_value(10))));
32838fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_quoted_class))
32938fd1498Szrj 	__INSERT_REGEX_MATCHER(_M_insert_character_class_matcher);
33038fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_subexpr_no_group_begin))
33138fd1498Szrj 	{
33238fd1498Szrj 	  _StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_dummy());
33338fd1498Szrj 	  this->_M_disjunction();
33438fd1498Szrj 	  if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
33538fd1498Szrj 	    __throw_regex_error(regex_constants::error_paren,
33638fd1498Szrj 				"Parenthesis is not closed.");
33738fd1498Szrj 	  __r._M_append(_M_pop());
33838fd1498Szrj 	  _M_stack.push(__r);
33938fd1498Szrj 	}
34038fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
34138fd1498Szrj 	{
34238fd1498Szrj 	  _StateSeqT __r(*_M_nfa, _M_nfa->_M_insert_subexpr_begin());
34338fd1498Szrj 	  this->_M_disjunction();
34438fd1498Szrj 	  if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
34538fd1498Szrj 	    __throw_regex_error(regex_constants::error_paren,
34638fd1498Szrj 				"Parenthesis is not closed.");
34738fd1498Szrj 	  __r._M_append(_M_pop());
34838fd1498Szrj 	  __r._M_append(_M_nfa->_M_insert_subexpr_end());
34938fd1498Szrj 	  _M_stack.push(__r);
35038fd1498Szrj 	}
35138fd1498Szrj       else if (!_M_bracket_expression())
35238fd1498Szrj 	return false;
35338fd1498Szrj       return true;
35438fd1498Szrj     }
35538fd1498Szrj 
35638fd1498Szrj   template<typename _TraitsT>
35738fd1498Szrj     bool
35838fd1498Szrj     _Compiler<_TraitsT>::
_M_bracket_expression()35938fd1498Szrj     _M_bracket_expression()
36038fd1498Szrj     {
36138fd1498Szrj       bool __neg =
36238fd1498Szrj 	_M_match_token(_ScannerT::_S_token_bracket_neg_begin);
36338fd1498Szrj       if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin)))
36438fd1498Szrj 	return false;
36538fd1498Szrj       __INSERT_REGEX_MATCHER(_M_insert_bracket_matcher, __neg);
36638fd1498Szrj       return true;
36738fd1498Szrj     }
36838fd1498Szrj #undef __INSERT_REGEX_MATCHER
36938fd1498Szrj 
37038fd1498Szrj   template<typename _TraitsT>
37138fd1498Szrj   template<bool __icase, bool __collate>
37238fd1498Szrj     void
37338fd1498Szrj     _Compiler<_TraitsT>::
_M_insert_any_matcher_ecma()37438fd1498Szrj     _M_insert_any_matcher_ecma()
37538fd1498Szrj     {
37638fd1498Szrj       _M_stack.push(_StateSeqT(*_M_nfa,
37738fd1498Szrj 	_M_nfa->_M_insert_matcher
37838fd1498Szrj 	  (_AnyMatcher<_TraitsT, true, __icase, __collate>
37938fd1498Szrj 	    (_M_traits))));
38038fd1498Szrj     }
38138fd1498Szrj 
38238fd1498Szrj   template<typename _TraitsT>
38338fd1498Szrj   template<bool __icase, bool __collate>
38438fd1498Szrj     void
38538fd1498Szrj     _Compiler<_TraitsT>::
_M_insert_any_matcher_posix()38638fd1498Szrj     _M_insert_any_matcher_posix()
38738fd1498Szrj     {
38838fd1498Szrj       _M_stack.push(_StateSeqT(*_M_nfa,
38938fd1498Szrj 	_M_nfa->_M_insert_matcher
39038fd1498Szrj 	  (_AnyMatcher<_TraitsT, false, __icase, __collate>
39138fd1498Szrj 	    (_M_traits))));
39238fd1498Szrj     }
39338fd1498Szrj 
39438fd1498Szrj   template<typename _TraitsT>
39538fd1498Szrj   template<bool __icase, bool __collate>
39638fd1498Szrj     void
39738fd1498Szrj     _Compiler<_TraitsT>::
_M_insert_char_matcher()39838fd1498Szrj     _M_insert_char_matcher()
39938fd1498Szrj     {
40038fd1498Szrj       _M_stack.push(_StateSeqT(*_M_nfa,
40138fd1498Szrj 	_M_nfa->_M_insert_matcher
40238fd1498Szrj 	  (_CharMatcher<_TraitsT, __icase, __collate>
40338fd1498Szrj 	    (_M_value[0], _M_traits))));
40438fd1498Szrj     }
40538fd1498Szrj 
40638fd1498Szrj   template<typename _TraitsT>
40738fd1498Szrj   template<bool __icase, bool __collate>
40838fd1498Szrj     void
40938fd1498Szrj     _Compiler<_TraitsT>::
_M_insert_character_class_matcher()41038fd1498Szrj     _M_insert_character_class_matcher()
41138fd1498Szrj     {
41238fd1498Szrj       __glibcxx_assert(_M_value.size() == 1);
41338fd1498Szrj       _BracketMatcher<_TraitsT, __icase, __collate> __matcher
41438fd1498Szrj 	(_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits);
41538fd1498Szrj       __matcher._M_add_character_class(_M_value, false);
41638fd1498Szrj       __matcher._M_ready();
41738fd1498Szrj       _M_stack.push(_StateSeqT(*_M_nfa,
41838fd1498Szrj 	_M_nfa->_M_insert_matcher(std::move(__matcher))));
41938fd1498Szrj     }
42038fd1498Szrj 
42138fd1498Szrj   template<typename _TraitsT>
42238fd1498Szrj   template<bool __icase, bool __collate>
42338fd1498Szrj     void
42438fd1498Szrj     _Compiler<_TraitsT>::
_M_insert_bracket_matcher(bool __neg)42538fd1498Szrj     _M_insert_bracket_matcher(bool __neg)
42638fd1498Szrj     {
42738fd1498Szrj       _BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits);
42838fd1498Szrj       pair<bool, _CharT> __last_char; // Optional<_CharT>
42938fd1498Szrj       __last_char.first = false;
43038fd1498Szrj       if (!(_M_flags & regex_constants::ECMAScript))
43138fd1498Szrj 	{
43238fd1498Szrj 	  if (_M_try_char())
43338fd1498Szrj 	    {
43438fd1498Szrj 	      __last_char.first = true;
43538fd1498Szrj 	      __last_char.second = _M_value[0];
43638fd1498Szrj 	    }
43738fd1498Szrj 	  else if (_M_match_token(_ScannerT::_S_token_bracket_dash))
43838fd1498Szrj 	    {
43938fd1498Szrj 	      __last_char.first = true;
44038fd1498Szrj 	      __last_char.second = '-';
44138fd1498Szrj 	    }
44238fd1498Szrj 	}
44338fd1498Szrj       while (_M_expression_term(__last_char, __matcher));
44438fd1498Szrj       if (__last_char.first)
44538fd1498Szrj 	__matcher._M_add_char(__last_char.second);
44638fd1498Szrj       __matcher._M_ready();
44738fd1498Szrj       _M_stack.push(_StateSeqT(
44838fd1498Szrj 		      *_M_nfa,
44938fd1498Szrj 		      _M_nfa->_M_insert_matcher(std::move(__matcher))));
45038fd1498Szrj     }
45138fd1498Szrj 
45238fd1498Szrj   template<typename _TraitsT>
45338fd1498Szrj   template<bool __icase, bool __collate>
45438fd1498Szrj     bool
45538fd1498Szrj     _Compiler<_TraitsT>::
_M_expression_term(pair<bool,_CharT> & __last_char,_BracketMatcher<_TraitsT,__icase,__collate> & __matcher)45638fd1498Szrj     _M_expression_term(pair<bool, _CharT>& __last_char,
45738fd1498Szrj 		       _BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
45838fd1498Szrj     {
45938fd1498Szrj       if (_M_match_token(_ScannerT::_S_token_bracket_end))
46038fd1498Szrj 	return false;
46138fd1498Szrj 
46238fd1498Szrj       const auto __push_char = [&](_CharT __ch)
46338fd1498Szrj       {
46438fd1498Szrj 	if (__last_char.first)
46538fd1498Szrj 	  __matcher._M_add_char(__last_char.second);
46638fd1498Szrj 	else
46738fd1498Szrj 	  __last_char.first = true;
46838fd1498Szrj 	__last_char.second = __ch;
46938fd1498Szrj       };
47038fd1498Szrj       const auto __flush = [&]
47138fd1498Szrj       {
47238fd1498Szrj 	if (__last_char.first)
47338fd1498Szrj 	  {
47438fd1498Szrj 	    __matcher._M_add_char(__last_char.second);
47538fd1498Szrj 	    __last_char.first = false;
47638fd1498Szrj 	  }
47738fd1498Szrj       };
47838fd1498Szrj 
47938fd1498Szrj       if (_M_match_token(_ScannerT::_S_token_collsymbol))
48038fd1498Szrj 	{
48138fd1498Szrj 	  auto __symbol = __matcher._M_add_collate_element(_M_value);
48238fd1498Szrj 	  if (__symbol.size() == 1)
48338fd1498Szrj 	    __push_char(__symbol[0]);
48438fd1498Szrj 	  else
48538fd1498Szrj 	    __flush();
48638fd1498Szrj 	}
48738fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
48838fd1498Szrj 	{
48938fd1498Szrj 	  __flush();
49038fd1498Szrj 	  __matcher._M_add_equivalence_class(_M_value);
49138fd1498Szrj 	}
49238fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_char_class_name))
49338fd1498Szrj 	{
49438fd1498Szrj 	  __flush();
49538fd1498Szrj 	  __matcher._M_add_character_class(_M_value, false);
49638fd1498Szrj 	}
49738fd1498Szrj       else if (_M_try_char())
49838fd1498Szrj 	__push_char(_M_value[0]);
49938fd1498Szrj       // POSIX doesn't allow '-' as a start-range char (say [a-z--0]),
50038fd1498Szrj       // except when the '-' is the first or last character in the bracket
50138fd1498Szrj       // expression ([--0]). ECMAScript treats all '-' after a range as a
50238fd1498Szrj       // normal character. Also see above, where _M_expression_term gets called.
50338fd1498Szrj       //
50438fd1498Szrj       // As a result, POSIX rejects [-----], but ECMAScript doesn't.
50538fd1498Szrj       // Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax.
50638fd1498Szrj       // Clang (3.5) always uses ECMAScript style even in its POSIX syntax.
50738fd1498Szrj       //
50838fd1498Szrj       // It turns out that no one reads BNFs ;)
50938fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_bracket_dash))
51038fd1498Szrj 	{
51138fd1498Szrj 	  if (!__last_char.first)
51238fd1498Szrj 	    {
51338fd1498Szrj 	      if (!(_M_flags & regex_constants::ECMAScript))
51438fd1498Szrj 		{
51538fd1498Szrj 		  if (_M_match_token(_ScannerT::_S_token_bracket_end))
51638fd1498Szrj 		    {
51738fd1498Szrj 		      __push_char('-');
51838fd1498Szrj 		      return false;
51938fd1498Szrj 		    }
52038fd1498Szrj 		  __throw_regex_error(
52138fd1498Szrj 		    regex_constants::error_range,
52238fd1498Szrj 		    "Unexpected dash in bracket expression. For POSIX syntax, "
52338fd1498Szrj 		    "a dash is not treated literally only when it is at "
52438fd1498Szrj 		    "beginning or end.");
52538fd1498Szrj 		}
52638fd1498Szrj 	      __push_char('-');
52738fd1498Szrj 	    }
52838fd1498Szrj 	  else
52938fd1498Szrj 	    {
53038fd1498Szrj 	      if (_M_try_char())
53138fd1498Szrj 		{
53238fd1498Szrj 		  __matcher._M_make_range(__last_char.second, _M_value[0]);
53338fd1498Szrj 		  __last_char.first = false;
53438fd1498Szrj 		}
53538fd1498Szrj 	      else if (_M_match_token(_ScannerT::_S_token_bracket_dash))
53638fd1498Szrj 		{
53738fd1498Szrj 		  __matcher._M_make_range(__last_char.second, '-');
53838fd1498Szrj 		  __last_char.first = false;
53938fd1498Szrj 		}
54038fd1498Szrj 	      else
54138fd1498Szrj 		{
54238fd1498Szrj 		  if (_M_scanner._M_get_token()
54338fd1498Szrj 		      != _ScannerT::_S_token_bracket_end)
54438fd1498Szrj 		    __throw_regex_error(
54538fd1498Szrj 		      regex_constants::error_range,
54638fd1498Szrj 		      "Character is expected after a dash.");
54738fd1498Szrj 		  __push_char('-');
54838fd1498Szrj 		}
54938fd1498Szrj 	    }
55038fd1498Szrj 	}
55138fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_quoted_class))
55238fd1498Szrj 	{
55338fd1498Szrj 	  __flush();
55438fd1498Szrj 	  __matcher._M_add_character_class(_M_value,
55538fd1498Szrj 					   _M_ctype.is(_CtypeT::upper,
55638fd1498Szrj 						       _M_value[0]));
55738fd1498Szrj 	}
55838fd1498Szrj       else
55938fd1498Szrj 	__throw_regex_error(regex_constants::error_brack,
56038fd1498Szrj 			    "Unexpected character in bracket expression.");
56138fd1498Szrj 
56238fd1498Szrj       return true;
56338fd1498Szrj     }
56438fd1498Szrj 
56538fd1498Szrj   template<typename _TraitsT>
56638fd1498Szrj     bool
56738fd1498Szrj     _Compiler<_TraitsT>::
_M_try_char()56838fd1498Szrj     _M_try_char()
56938fd1498Szrj     {
57038fd1498Szrj       bool __is_char = false;
57138fd1498Szrj       if (_M_match_token(_ScannerT::_S_token_oct_num))
57238fd1498Szrj 	{
57338fd1498Szrj 	  __is_char = true;
57438fd1498Szrj 	  _M_value.assign(1, _M_cur_int_value(8));
57538fd1498Szrj 	}
57638fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_hex_num))
57738fd1498Szrj 	{
57838fd1498Szrj 	  __is_char = true;
57938fd1498Szrj 	  _M_value.assign(1, _M_cur_int_value(16));
58038fd1498Szrj 	}
58138fd1498Szrj       else if (_M_match_token(_ScannerT::_S_token_ord_char))
58238fd1498Szrj 	__is_char = true;
58338fd1498Szrj       return __is_char;
58438fd1498Szrj     }
58538fd1498Szrj 
58638fd1498Szrj   template<typename _TraitsT>
58738fd1498Szrj     bool
58838fd1498Szrj     _Compiler<_TraitsT>::
_M_match_token(_TokenT token)58938fd1498Szrj     _M_match_token(_TokenT token)
59038fd1498Szrj     {
59138fd1498Szrj       if (token == _M_scanner._M_get_token())
59238fd1498Szrj 	{
59338fd1498Szrj 	  _M_value = _M_scanner._M_get_value();
59438fd1498Szrj 	  _M_scanner._M_advance();
59538fd1498Szrj 	  return true;
59638fd1498Szrj 	}
59738fd1498Szrj       return false;
59838fd1498Szrj     }
59938fd1498Szrj 
60038fd1498Szrj   template<typename _TraitsT>
60138fd1498Szrj     int
60238fd1498Szrj     _Compiler<_TraitsT>::
_M_cur_int_value(int __radix)60338fd1498Szrj     _M_cur_int_value(int __radix)
60438fd1498Szrj     {
60538fd1498Szrj       long __v = 0;
60638fd1498Szrj       for (typename _StringT::size_type __i = 0;
60738fd1498Szrj 	   __i < _M_value.length(); ++__i)
60838fd1498Szrj 	__v =__v * __radix + _M_traits.value(_M_value[__i], __radix);
60938fd1498Szrj       return __v;
61038fd1498Szrj     }
61138fd1498Szrj 
61238fd1498Szrj   template<typename _TraitsT, bool __icase, bool __collate>
61338fd1498Szrj     bool
61438fd1498Szrj     _BracketMatcher<_TraitsT, __icase, __collate>::
_M_apply(_CharT __ch,false_type) const61538fd1498Szrj     _M_apply(_CharT __ch, false_type) const
61638fd1498Szrj     {
61738fd1498Szrj       return [this, __ch]
61838fd1498Szrj       {
61938fd1498Szrj 	if (std::binary_search(_M_char_set.begin(), _M_char_set.end(),
62038fd1498Szrj 			       _M_translator._M_translate(__ch)))
62138fd1498Szrj 	  return true;
62238fd1498Szrj 	auto __s = _M_translator._M_transform(__ch);
62338fd1498Szrj 	for (auto& __it : _M_range_set)
62438fd1498Szrj 	  if (_M_translator._M_match_range(__it.first, __it.second, __s))
62538fd1498Szrj 	    return true;
62638fd1498Szrj 	if (_M_traits.isctype(__ch, _M_class_set))
62738fd1498Szrj 	  return true;
62838fd1498Szrj 	if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(),
62938fd1498Szrj 		      _M_traits.transform_primary(&__ch, &__ch+1))
63038fd1498Szrj 	    != _M_equiv_set.end())
63138fd1498Szrj 	  return true;
63238fd1498Szrj 	for (auto& __it : _M_neg_class_set)
63338fd1498Szrj 	  if (!_M_traits.isctype(__ch, __it))
63438fd1498Szrj 	    return true;
63538fd1498Szrj 	return false;
63638fd1498Szrj       }() ^ _M_is_non_matching;
63738fd1498Szrj     }
63838fd1498Szrj } // namespace __detail
63938fd1498Szrj 
64038fd1498Szrj _GLIBCXX_END_NAMESPACE_VERSION
64138fd1498Szrj } // namespace
642