11debfc3dSmrg // class template regex -*- C++ -*- 21debfc3dSmrg 3*8feb0f0bSmrg // Copyright (C) 2013-2020 Free Software Foundation, Inc. 41debfc3dSmrg // 51debfc3dSmrg // This file is part of the GNU ISO C++ Library. This library is free 61debfc3dSmrg // software; you can redistribute it and/or modify it under the 71debfc3dSmrg // terms of the GNU General Public License as published by the 81debfc3dSmrg // Free Software Foundation; either version 3, or (at your option) 91debfc3dSmrg // any later version. 101debfc3dSmrg 111debfc3dSmrg // This library is distributed in the hope that it will be useful, 121debfc3dSmrg // but WITHOUT ANY WARRANTY; without even the implied warranty of 131debfc3dSmrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 141debfc3dSmrg // GNU General Public License for more details. 151debfc3dSmrg 161debfc3dSmrg // Under Section 7 of GPL version 3, you are granted additional 171debfc3dSmrg // permissions described in the GCC Runtime Library Exception, version 181debfc3dSmrg // 3.1, as published by the Free Software Foundation. 191debfc3dSmrg 201debfc3dSmrg // You should have received a copy of the GNU General Public License and 211debfc3dSmrg // a copy of the GCC Runtime Library Exception along with this program; 221debfc3dSmrg // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 231debfc3dSmrg // <http://www.gnu.org/licenses/>. 241debfc3dSmrg 251debfc3dSmrg /** 261debfc3dSmrg * @file bits/regex_automaton.tcc 271debfc3dSmrg * This is an internal header file, included by other library headers. 281debfc3dSmrg * Do not attempt to use it directly. @headername{regex} 291debfc3dSmrg */ 301debfc3dSmrg 311debfc3dSmrg namespace std _GLIBCXX_VISIBILITY(default) 321debfc3dSmrg { 331debfc3dSmrg _GLIBCXX_BEGIN_NAMESPACE_VERSION 341debfc3dSmrg 35a2dc1f3fSmrg namespace __detail 36a2dc1f3fSmrg { 371debfc3dSmrg #ifdef _GLIBCXX_DEBUG 381debfc3dSmrg inline std::ostream& _M_print(std::ostream & ostr) const391debfc3dSmrg _State_base::_M_print(std::ostream& ostr) const 401debfc3dSmrg { 411debfc3dSmrg switch (_M_opcode) 421debfc3dSmrg { 431debfc3dSmrg case _S_opcode_alternative: 441debfc3dSmrg case _S_opcode_repeat: 451debfc3dSmrg ostr << "alt next=" << _M_next << " alt=" << _M_alt; 461debfc3dSmrg break; 471debfc3dSmrg case _S_opcode_subexpr_begin: 481debfc3dSmrg ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr; 491debfc3dSmrg break; 501debfc3dSmrg case _S_opcode_subexpr_end: 511debfc3dSmrg ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr; 521debfc3dSmrg break; 531debfc3dSmrg case _S_opcode_backref: 541debfc3dSmrg ostr << "backref next=" << _M_next << " index=" << _M_backref_index; 551debfc3dSmrg break; 561debfc3dSmrg case _S_opcode_match: 571debfc3dSmrg ostr << "match next=" << _M_next; 581debfc3dSmrg break; 591debfc3dSmrg case _S_opcode_accept: 601debfc3dSmrg ostr << "accept next=" << _M_next; 611debfc3dSmrg break; 621debfc3dSmrg default: 631debfc3dSmrg ostr << "unknown next=" << _M_next; 641debfc3dSmrg break; 651debfc3dSmrg } 661debfc3dSmrg return ostr; 671debfc3dSmrg } 681debfc3dSmrg 691debfc3dSmrg // Prints graphviz dot commands for state. 701debfc3dSmrg inline std::ostream& _M_dot(std::ostream & __ostr,_StateIdT __id) const711debfc3dSmrg _State_base::_M_dot(std::ostream& __ostr, _StateIdT __id) const 721debfc3dSmrg { 731debfc3dSmrg switch (_M_opcode) 741debfc3dSmrg { 751debfc3dSmrg case _S_opcode_alternative: 761debfc3dSmrg case _S_opcode_repeat: 771debfc3dSmrg __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n" 781debfc3dSmrg << __id << " -> " << _M_next 791debfc3dSmrg << " [label=\"next\", tailport=\"s\"];\n" 801debfc3dSmrg << __id << " -> " << _M_alt 811debfc3dSmrg << " [label=\"alt\", tailport=\"n\"];\n"; 821debfc3dSmrg break; 831debfc3dSmrg case _S_opcode_backref: 841debfc3dSmrg __ostr << __id << " [label=\"" << __id << "\\nBACKREF " 851debfc3dSmrg << _M_subexpr << "\"];\n" 861debfc3dSmrg << __id << " -> " << _M_next << " [label=\"<match>\"];\n"; 871debfc3dSmrg break; 881debfc3dSmrg case _S_opcode_line_begin_assertion: 891debfc3dSmrg __ostr << __id << " [label=\"" << __id << "\\nLINE_BEGIN \"];\n" 901debfc3dSmrg << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 911debfc3dSmrg break; 921debfc3dSmrg case _S_opcode_line_end_assertion: 931debfc3dSmrg __ostr << __id << " [label=\"" << __id << "\\nLINE_END \"];\n" 941debfc3dSmrg << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 951debfc3dSmrg break; 961debfc3dSmrg case _S_opcode_word_boundary: 971debfc3dSmrg __ostr << __id << " [label=\"" << __id << "\\nWORD_BOUNDRY " 981debfc3dSmrg << _M_neg << "\"];\n" 991debfc3dSmrg << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 1001debfc3dSmrg break; 1011debfc3dSmrg case _S_opcode_subexpr_lookahead: 1021debfc3dSmrg __ostr << __id << " [label=\"" << __id << "\\nLOOK_AHEAD\"];\n" 1031debfc3dSmrg << __id << " -> " << _M_next 1041debfc3dSmrg << " [label=\"epsilon\", tailport=\"s\"];\n" 1051debfc3dSmrg << __id << " -> " << _M_alt 1061debfc3dSmrg << " [label=\"<assert>\", tailport=\"n\"];\n"; 1071debfc3dSmrg break; 1081debfc3dSmrg case _S_opcode_subexpr_begin: 1091debfc3dSmrg __ostr << __id << " [label=\"" << __id << "\\nSBEGIN " 1101debfc3dSmrg << _M_subexpr << "\"];\n" 1111debfc3dSmrg << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 1121debfc3dSmrg break; 1131debfc3dSmrg case _S_opcode_subexpr_end: 1141debfc3dSmrg __ostr << __id << " [label=\"" << __id << "\\nSEND " 1151debfc3dSmrg << _M_subexpr << "\"];\n" 1161debfc3dSmrg << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 1171debfc3dSmrg break; 1181debfc3dSmrg case _S_opcode_dummy: 1191debfc3dSmrg break; 1201debfc3dSmrg case _S_opcode_match: 1211debfc3dSmrg __ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n" 1221debfc3dSmrg << __id << " -> " << _M_next << " [label=\"<match>\"];\n"; 1231debfc3dSmrg break; 1241debfc3dSmrg case _S_opcode_accept: 1251debfc3dSmrg __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ; 1261debfc3dSmrg break; 1271debfc3dSmrg default: 1281debfc3dSmrg _GLIBCXX_DEBUG_ASSERT(false); 1291debfc3dSmrg break; 1301debfc3dSmrg } 1311debfc3dSmrg return __ostr; 1321debfc3dSmrg } 1331debfc3dSmrg 1341debfc3dSmrg template<typename _TraitsT> 1351debfc3dSmrg std::ostream& _M_dot(std::ostream & __ostr) const1361debfc3dSmrg _NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const 1371debfc3dSmrg { 1381debfc3dSmrg __ostr << "digraph _Nfa {\n" 1391debfc3dSmrg " rankdir=LR;\n"; 1401debfc3dSmrg for (size_t __i = 0; __i < this->size(); ++__i) 1411debfc3dSmrg (*this)[__i]._M_dot(__ostr, __i); 1421debfc3dSmrg __ostr << "}\n"; 1431debfc3dSmrg return __ostr; 1441debfc3dSmrg } 1451debfc3dSmrg #endif 1461debfc3dSmrg 1471debfc3dSmrg template<typename _TraitsT> 1481debfc3dSmrg _StateIdT _M_insert_backref(size_t __index)1491debfc3dSmrg _NFA<_TraitsT>::_M_insert_backref(size_t __index) 1501debfc3dSmrg { 1511debfc3dSmrg if (this->_M_flags & regex_constants::__polynomial) 1521debfc3dSmrg __throw_regex_error(regex_constants::error_complexity, 1531debfc3dSmrg "Unexpected back-reference in polynomial mode."); 1541debfc3dSmrg // To figure out whether a backref is valid, a stack is used to store 1551debfc3dSmrg // unfinished sub-expressions. For example, when parsing 1561debfc3dSmrg // "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3 1571debfc3dSmrg // sub expressions are parsed or partially parsed(in the stack), aka, 1581debfc3dSmrg // "(a..", "(b)" and "(c.."). 1591debfc3dSmrg // _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this 1601debfc3dSmrg // time, "\\2" is valid, but "\\1" and "\\3" are not. 1611debfc3dSmrg if (__index >= _M_subexpr_count) 1621debfc3dSmrg __throw_regex_error( 1631debfc3dSmrg regex_constants::error_backref, 1641debfc3dSmrg "Back-reference index exceeds current sub-expression count."); 1651debfc3dSmrg for (auto __it : this->_M_paren_stack) 1661debfc3dSmrg if (__index == __it) 1671debfc3dSmrg __throw_regex_error( 1681debfc3dSmrg regex_constants::error_backref, 1691debfc3dSmrg "Back-reference referred to an opened sub-expression."); 1701debfc3dSmrg this->_M_has_backref = true; 1711debfc3dSmrg _StateT __tmp(_S_opcode_backref); 1721debfc3dSmrg __tmp._M_backref_index = __index; 1731debfc3dSmrg return _M_insert_state(std::move(__tmp)); 1741debfc3dSmrg } 1751debfc3dSmrg 1761debfc3dSmrg template<typename _TraitsT> 1771debfc3dSmrg void _M_eliminate_dummy()1781debfc3dSmrg _NFA<_TraitsT>::_M_eliminate_dummy() 1791debfc3dSmrg { 1801debfc3dSmrg for (auto& __it : *this) 1811debfc3dSmrg { 1821debfc3dSmrg while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode() 1831debfc3dSmrg == _S_opcode_dummy) 1841debfc3dSmrg __it._M_next = (*this)[__it._M_next]._M_next; 1851debfc3dSmrg if (__it._M_has_alt()) 1861debfc3dSmrg while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode() 1871debfc3dSmrg == _S_opcode_dummy) 1881debfc3dSmrg __it._M_alt = (*this)[__it._M_alt]._M_next; 1891debfc3dSmrg } 1901debfc3dSmrg } 1911debfc3dSmrg 1921debfc3dSmrg // Just apply DFS on the sequence and re-link their links. 1931debfc3dSmrg template<typename _TraitsT> 1941debfc3dSmrg _StateSeq<_TraitsT> _M_clone()1951debfc3dSmrg _StateSeq<_TraitsT>::_M_clone() 1961debfc3dSmrg { 1971debfc3dSmrg std::map<_StateIdT, _StateIdT> __m; 1981debfc3dSmrg std::stack<_StateIdT> __stack; 1991debfc3dSmrg __stack.push(_M_start); 2001debfc3dSmrg while (!__stack.empty()) 2011debfc3dSmrg { 2021debfc3dSmrg auto __u = __stack.top(); 2031debfc3dSmrg __stack.pop(); 2041debfc3dSmrg auto __dup = _M_nfa[__u]; 2051debfc3dSmrg // _M_insert_state() never return -1 2061debfc3dSmrg auto __id = _M_nfa._M_insert_state(std::move(__dup)); 2071debfc3dSmrg __m[__u] = __id; 2081debfc3dSmrg if (__dup._M_has_alt()) 2091debfc3dSmrg if (__dup._M_alt != _S_invalid_state_id 2101debfc3dSmrg && __m.count(__dup._M_alt) == 0) 2111debfc3dSmrg __stack.push(__dup._M_alt); 2121debfc3dSmrg if (__u == _M_end) 2131debfc3dSmrg continue; 2141debfc3dSmrg if (__dup._M_next != _S_invalid_state_id 2151debfc3dSmrg && __m.count(__dup._M_next) == 0) 2161debfc3dSmrg __stack.push(__dup._M_next); 2171debfc3dSmrg } 2181debfc3dSmrg for (auto __it : __m) 2191debfc3dSmrg { 2201debfc3dSmrg auto __v = __it.second; 2211debfc3dSmrg auto& __ref = _M_nfa[__v]; 2221debfc3dSmrg if (__ref._M_next != _S_invalid_state_id) 223c0a68be4Smrg __ref._M_next = __m.find(__ref._M_next)->second; 224c0a68be4Smrg if (__ref._M_has_alt() && __ref._M_alt != _S_invalid_state_id) 225c0a68be4Smrg __ref._M_alt = __m.find(__ref._M_alt)->second; 2261debfc3dSmrg } 2271debfc3dSmrg return _StateSeq(_M_nfa, __m[_M_start], __m[_M_end]); 2281debfc3dSmrg } 229a2dc1f3fSmrg } // namespace __detail 2301debfc3dSmrg 2311debfc3dSmrg _GLIBCXX_END_NAMESPACE_VERSION 2321debfc3dSmrg } // namespace 233