xref: /netbsd-src/external/gpl3/gcc.old/dist/libstdc++-v3/include/bits/regex_automaton.tcc (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
11debfc3dSmrg // class template regex -*- C++ -*-
21debfc3dSmrg 
3*8feb0f0bSmrg // Copyright (C) 2013-2020 Free Software Foundation, Inc.
41debfc3dSmrg //
51debfc3dSmrg // This file is part of the GNU ISO C++ Library.  This library is free
61debfc3dSmrg // software; you can redistribute it and/or modify it under the
71debfc3dSmrg // terms of the GNU General Public License as published by the
81debfc3dSmrg // Free Software Foundation; either version 3, or (at your option)
91debfc3dSmrg // any later version.
101debfc3dSmrg 
111debfc3dSmrg // This library is distributed in the hope that it will be useful,
121debfc3dSmrg // but WITHOUT ANY WARRANTY; without even the implied warranty of
131debfc3dSmrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
141debfc3dSmrg // GNU General Public License for more details.
151debfc3dSmrg 
161debfc3dSmrg // Under Section 7 of GPL version 3, you are granted additional
171debfc3dSmrg // permissions described in the GCC Runtime Library Exception, version
181debfc3dSmrg // 3.1, as published by the Free Software Foundation.
191debfc3dSmrg 
201debfc3dSmrg // You should have received a copy of the GNU General Public License and
211debfc3dSmrg // a copy of the GCC Runtime Library Exception along with this program;
221debfc3dSmrg // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
231debfc3dSmrg // <http://www.gnu.org/licenses/>.
241debfc3dSmrg 
251debfc3dSmrg /**
261debfc3dSmrg  *  @file bits/regex_automaton.tcc
271debfc3dSmrg  *  This is an internal header file, included by other library headers.
281debfc3dSmrg  *  Do not attempt to use it directly. @headername{regex}
291debfc3dSmrg  */
301debfc3dSmrg 
311debfc3dSmrg namespace std _GLIBCXX_VISIBILITY(default)
321debfc3dSmrg {
331debfc3dSmrg _GLIBCXX_BEGIN_NAMESPACE_VERSION
341debfc3dSmrg 
35a2dc1f3fSmrg namespace __detail
36a2dc1f3fSmrg {
371debfc3dSmrg #ifdef _GLIBCXX_DEBUG
381debfc3dSmrg   inline std::ostream&
_M_print(std::ostream & ostr) const391debfc3dSmrg   _State_base::_M_print(std::ostream& ostr) const
401debfc3dSmrg   {
411debfc3dSmrg     switch (_M_opcode)
421debfc3dSmrg     {
431debfc3dSmrg       case _S_opcode_alternative:
441debfc3dSmrg       case _S_opcode_repeat:
451debfc3dSmrg 	ostr << "alt next=" << _M_next << " alt=" << _M_alt;
461debfc3dSmrg 	break;
471debfc3dSmrg       case _S_opcode_subexpr_begin:
481debfc3dSmrg 	ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr;
491debfc3dSmrg 	break;
501debfc3dSmrg       case _S_opcode_subexpr_end:
511debfc3dSmrg 	ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr;
521debfc3dSmrg 	break;
531debfc3dSmrg       case _S_opcode_backref:
541debfc3dSmrg 	ostr << "backref next=" << _M_next << " index=" << _M_backref_index;
551debfc3dSmrg 	break;
561debfc3dSmrg       case _S_opcode_match:
571debfc3dSmrg 	ostr << "match next=" << _M_next;
581debfc3dSmrg 	break;
591debfc3dSmrg       case _S_opcode_accept:
601debfc3dSmrg 	ostr << "accept next=" << _M_next;
611debfc3dSmrg 	break;
621debfc3dSmrg       default:
631debfc3dSmrg 	ostr << "unknown next=" << _M_next;
641debfc3dSmrg 	break;
651debfc3dSmrg     }
661debfc3dSmrg     return ostr;
671debfc3dSmrg   }
681debfc3dSmrg 
691debfc3dSmrg   // Prints graphviz dot commands for state.
701debfc3dSmrg   inline std::ostream&
_M_dot(std::ostream & __ostr,_StateIdT __id) const711debfc3dSmrg   _State_base::_M_dot(std::ostream& __ostr, _StateIdT __id) const
721debfc3dSmrg   {
731debfc3dSmrg     switch (_M_opcode)
741debfc3dSmrg     {
751debfc3dSmrg       case _S_opcode_alternative:
761debfc3dSmrg       case _S_opcode_repeat:
771debfc3dSmrg 	__ostr << __id << " [label=\"" << __id << "\\nALT\"];\n"
781debfc3dSmrg 	       << __id << " -> " << _M_next
791debfc3dSmrg 	       << " [label=\"next\", tailport=\"s\"];\n"
801debfc3dSmrg 	       << __id << " -> " << _M_alt
811debfc3dSmrg 	       << " [label=\"alt\", tailport=\"n\"];\n";
821debfc3dSmrg 	break;
831debfc3dSmrg       case _S_opcode_backref:
841debfc3dSmrg 	__ostr << __id << " [label=\"" << __id << "\\nBACKREF "
851debfc3dSmrg 	       << _M_subexpr << "\"];\n"
861debfc3dSmrg 	       << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
871debfc3dSmrg 	break;
881debfc3dSmrg       case _S_opcode_line_begin_assertion:
891debfc3dSmrg 	__ostr << __id << " [label=\"" << __id << "\\nLINE_BEGIN \"];\n"
901debfc3dSmrg 	       << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
911debfc3dSmrg 	break;
921debfc3dSmrg       case _S_opcode_line_end_assertion:
931debfc3dSmrg 	__ostr << __id << " [label=\"" << __id << "\\nLINE_END \"];\n"
941debfc3dSmrg 	       << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
951debfc3dSmrg 	break;
961debfc3dSmrg       case _S_opcode_word_boundary:
971debfc3dSmrg 	__ostr << __id << " [label=\"" << __id << "\\nWORD_BOUNDRY "
981debfc3dSmrg 	       << _M_neg << "\"];\n"
991debfc3dSmrg 	       << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
1001debfc3dSmrg 	break;
1011debfc3dSmrg       case _S_opcode_subexpr_lookahead:
1021debfc3dSmrg 	__ostr << __id << " [label=\"" << __id << "\\nLOOK_AHEAD\"];\n"
1031debfc3dSmrg 	       << __id << " -> " << _M_next
1041debfc3dSmrg 	       << " [label=\"epsilon\", tailport=\"s\"];\n"
1051debfc3dSmrg 	       << __id << " -> " << _M_alt
1061debfc3dSmrg 	       << " [label=\"<assert>\", tailport=\"n\"];\n";
1071debfc3dSmrg 	break;
1081debfc3dSmrg       case _S_opcode_subexpr_begin:
1091debfc3dSmrg 	__ostr << __id << " [label=\"" << __id << "\\nSBEGIN "
1101debfc3dSmrg 	       << _M_subexpr << "\"];\n"
1111debfc3dSmrg 	       << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
1121debfc3dSmrg 	break;
1131debfc3dSmrg       case _S_opcode_subexpr_end:
1141debfc3dSmrg 	__ostr << __id << " [label=\"" << __id << "\\nSEND "
1151debfc3dSmrg 	       << _M_subexpr << "\"];\n"
1161debfc3dSmrg 	       << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
1171debfc3dSmrg 	break;
1181debfc3dSmrg       case _S_opcode_dummy:
1191debfc3dSmrg 	break;
1201debfc3dSmrg       case _S_opcode_match:
1211debfc3dSmrg 	__ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
1221debfc3dSmrg 	       << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
1231debfc3dSmrg 	break;
1241debfc3dSmrg       case _S_opcode_accept:
1251debfc3dSmrg 	__ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ;
1261debfc3dSmrg 	break;
1271debfc3dSmrg       default:
1281debfc3dSmrg 	_GLIBCXX_DEBUG_ASSERT(false);
1291debfc3dSmrg 	break;
1301debfc3dSmrg     }
1311debfc3dSmrg     return __ostr;
1321debfc3dSmrg   }
1331debfc3dSmrg 
1341debfc3dSmrg   template<typename _TraitsT>
1351debfc3dSmrg     std::ostream&
_M_dot(std::ostream & __ostr) const1361debfc3dSmrg     _NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const
1371debfc3dSmrg     {
1381debfc3dSmrg       __ostr << "digraph _Nfa {\n"
1391debfc3dSmrg 		"  rankdir=LR;\n";
1401debfc3dSmrg       for (size_t __i = 0; __i < this->size(); ++__i)
1411debfc3dSmrg 	(*this)[__i]._M_dot(__ostr, __i);
1421debfc3dSmrg       __ostr << "}\n";
1431debfc3dSmrg       return __ostr;
1441debfc3dSmrg     }
1451debfc3dSmrg #endif
1461debfc3dSmrg 
1471debfc3dSmrg   template<typename _TraitsT>
1481debfc3dSmrg     _StateIdT
_M_insert_backref(size_t __index)1491debfc3dSmrg     _NFA<_TraitsT>::_M_insert_backref(size_t __index)
1501debfc3dSmrg     {
1511debfc3dSmrg       if (this->_M_flags & regex_constants::__polynomial)
1521debfc3dSmrg 	__throw_regex_error(regex_constants::error_complexity,
1531debfc3dSmrg 			    "Unexpected back-reference in polynomial mode.");
1541debfc3dSmrg       // To figure out whether a backref is valid, a stack is used to store
1551debfc3dSmrg       // unfinished sub-expressions. For example, when parsing
1561debfc3dSmrg       // "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3
1571debfc3dSmrg       // sub expressions are parsed or partially parsed(in the stack), aka,
1581debfc3dSmrg       // "(a..", "(b)" and "(c..").
1591debfc3dSmrg       // _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this
1601debfc3dSmrg       // time, "\\2" is valid, but "\\1" and "\\3" are not.
1611debfc3dSmrg       if (__index >= _M_subexpr_count)
1621debfc3dSmrg 	__throw_regex_error(
1631debfc3dSmrg 	  regex_constants::error_backref,
1641debfc3dSmrg 	  "Back-reference index exceeds current sub-expression count.");
1651debfc3dSmrg       for (auto __it : this->_M_paren_stack)
1661debfc3dSmrg 	if (__index == __it)
1671debfc3dSmrg 	  __throw_regex_error(
1681debfc3dSmrg 	    regex_constants::error_backref,
1691debfc3dSmrg 	    "Back-reference referred to an opened sub-expression.");
1701debfc3dSmrg       this->_M_has_backref = true;
1711debfc3dSmrg       _StateT __tmp(_S_opcode_backref);
1721debfc3dSmrg       __tmp._M_backref_index = __index;
1731debfc3dSmrg       return _M_insert_state(std::move(__tmp));
1741debfc3dSmrg     }
1751debfc3dSmrg 
1761debfc3dSmrg   template<typename _TraitsT>
1771debfc3dSmrg     void
_M_eliminate_dummy()1781debfc3dSmrg     _NFA<_TraitsT>::_M_eliminate_dummy()
1791debfc3dSmrg     {
1801debfc3dSmrg       for (auto& __it : *this)
1811debfc3dSmrg 	{
1821debfc3dSmrg 	  while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode()
1831debfc3dSmrg 		 == _S_opcode_dummy)
1841debfc3dSmrg 	    __it._M_next = (*this)[__it._M_next]._M_next;
1851debfc3dSmrg 	  if (__it._M_has_alt())
1861debfc3dSmrg 	    while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode()
1871debfc3dSmrg 		   == _S_opcode_dummy)
1881debfc3dSmrg 	      __it._M_alt = (*this)[__it._M_alt]._M_next;
1891debfc3dSmrg 	}
1901debfc3dSmrg     }
1911debfc3dSmrg 
1921debfc3dSmrg   // Just apply DFS on the sequence and re-link their links.
1931debfc3dSmrg   template<typename _TraitsT>
1941debfc3dSmrg     _StateSeq<_TraitsT>
_M_clone()1951debfc3dSmrg     _StateSeq<_TraitsT>::_M_clone()
1961debfc3dSmrg     {
1971debfc3dSmrg       std::map<_StateIdT, _StateIdT> __m;
1981debfc3dSmrg       std::stack<_StateIdT> __stack;
1991debfc3dSmrg       __stack.push(_M_start);
2001debfc3dSmrg       while (!__stack.empty())
2011debfc3dSmrg 	{
2021debfc3dSmrg 	  auto __u = __stack.top();
2031debfc3dSmrg 	  __stack.pop();
2041debfc3dSmrg 	  auto __dup = _M_nfa[__u];
2051debfc3dSmrg 	  // _M_insert_state() never return -1
2061debfc3dSmrg 	  auto __id = _M_nfa._M_insert_state(std::move(__dup));
2071debfc3dSmrg 	  __m[__u] = __id;
2081debfc3dSmrg 	  if (__dup._M_has_alt())
2091debfc3dSmrg 	    if (__dup._M_alt != _S_invalid_state_id
2101debfc3dSmrg 		&& __m.count(__dup._M_alt) == 0)
2111debfc3dSmrg 	      __stack.push(__dup._M_alt);
2121debfc3dSmrg 	  if (__u == _M_end)
2131debfc3dSmrg 	    continue;
2141debfc3dSmrg 	  if (__dup._M_next != _S_invalid_state_id
2151debfc3dSmrg 	      && __m.count(__dup._M_next) == 0)
2161debfc3dSmrg 	    __stack.push(__dup._M_next);
2171debfc3dSmrg 	}
2181debfc3dSmrg       for (auto __it : __m)
2191debfc3dSmrg 	{
2201debfc3dSmrg 	  auto __v = __it.second;
2211debfc3dSmrg 	  auto& __ref = _M_nfa[__v];
2221debfc3dSmrg 	  if (__ref._M_next != _S_invalid_state_id)
223c0a68be4Smrg 	    __ref._M_next = __m.find(__ref._M_next)->second;
224c0a68be4Smrg 	  if (__ref._M_has_alt() && __ref._M_alt != _S_invalid_state_id)
225c0a68be4Smrg 	    __ref._M_alt = __m.find(__ref._M_alt)->second;
2261debfc3dSmrg 	}
2271debfc3dSmrg       return _StateSeq(_M_nfa, __m[_M_start], __m[_M_end]);
2281debfc3dSmrg     }
229a2dc1f3fSmrg } // namespace __detail
2301debfc3dSmrg 
2311debfc3dSmrg _GLIBCXX_END_NAMESPACE_VERSION
2321debfc3dSmrg } // namespace
233