1 // class template regex -*- C++ -*- 2 3 // Copyright (C) 2013-2015 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 /** 26 * @file bits/regex_automaton.tcc 27 * This is an internal header file, included by other library headers. 28 * Do not attempt to use it directly. @headername{regex} 29 */ 30 31 namespace std _GLIBCXX_VISIBILITY(default) 32 { 33 namespace __detail 34 { 35 _GLIBCXX_BEGIN_NAMESPACE_VERSION 36 37 #ifdef _GLIBCXX_DEBUG 38 inline std::ostream& 39 _State_base::_M_print(std::ostream& ostr) const 40 { 41 switch (_M_opcode) 42 { 43 case _S_opcode_alternative: 44 case _S_opcode_repeat: 45 ostr << "alt next=" << _M_next << " alt=" << _M_alt; 46 break; 47 case _S_opcode_subexpr_begin: 48 ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr; 49 break; 50 case _S_opcode_subexpr_end: 51 ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr; 52 break; 53 case _S_opcode_backref: 54 ostr << "backref next=" << _M_next << " index=" << _M_backref_index; 55 break; 56 case _S_opcode_match: 57 ostr << "match next=" << _M_next; 58 break; 59 case _S_opcode_accept: 60 ostr << "accept next=" << _M_next; 61 break; 62 default: 63 ostr << "unknown next=" << _M_next; 64 break; 65 } 66 return ostr; 67 } 68 69 // Prints graphviz dot commands for state. 70 inline std::ostream& 71 _State_base::_M_dot(std::ostream& __ostr, _StateIdT __id) const 72 { 73 switch (_M_opcode) 74 { 75 case _S_opcode_alternative: 76 case _S_opcode_repeat: 77 __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n" 78 << __id << " -> " << _M_next 79 << " [label=\"next\", tailport=\"s\"];\n" 80 << __id << " -> " << _M_alt 81 << " [label=\"alt\", tailport=\"n\"];\n"; 82 break; 83 case _S_opcode_backref: 84 __ostr << __id << " [label=\"" << __id << "\\nBACKREF " 85 << _M_subexpr << "\"];\n" 86 << __id << " -> " << _M_next << " [label=\"<match>\"];\n"; 87 break; 88 case _S_opcode_line_begin_assertion: 89 __ostr << __id << " [label=\"" << __id << "\\nLINE_BEGIN \"];\n" 90 << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 91 break; 92 case _S_opcode_line_end_assertion: 93 __ostr << __id << " [label=\"" << __id << "\\nLINE_END \"];\n" 94 << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 95 break; 96 case _S_opcode_word_boundary: 97 __ostr << __id << " [label=\"" << __id << "\\nWORD_BOUNDRY " 98 << _M_neg << "\"];\n" 99 << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 100 break; 101 case _S_opcode_subexpr_lookahead: 102 __ostr << __id << " [label=\"" << __id << "\\nLOOK_AHEAD\"];\n" 103 << __id << " -> " << _M_next 104 << " [label=\"epsilon\", tailport=\"s\"];\n" 105 << __id << " -> " << _M_alt 106 << " [label=\"<assert>\", tailport=\"n\"];\n"; 107 break; 108 case _S_opcode_subexpr_begin: 109 __ostr << __id << " [label=\"" << __id << "\\nSBEGIN " 110 << _M_subexpr << "\"];\n" 111 << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 112 break; 113 case _S_opcode_subexpr_end: 114 __ostr << __id << " [label=\"" << __id << "\\nSEND " 115 << _M_subexpr << "\"];\n" 116 << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 117 break; 118 case _S_opcode_dummy: 119 break; 120 case _S_opcode_match: 121 __ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n" 122 << __id << " -> " << _M_next << " [label=\"<match>\"];\n"; 123 break; 124 case _S_opcode_accept: 125 __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ; 126 break; 127 default: 128 _GLIBCXX_DEBUG_ASSERT(false); 129 break; 130 } 131 return __ostr; 132 } 133 134 template<typename _TraitsT> 135 std::ostream& 136 _NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const 137 { 138 __ostr << "digraph _Nfa {\n" 139 " rankdir=LR;\n"; 140 for (size_t __i = 0; __i < this->size(); ++__i) 141 (*this)[__i]._M_dot(__ostr, __i); 142 __ostr << "}\n"; 143 return __ostr; 144 } 145 #endif 146 147 template<typename _TraitsT> 148 _StateIdT 149 _NFA<_TraitsT>::_M_insert_backref(size_t __index) 150 { 151 // To figure out whether a backref is valid, a stack is used to store 152 // unfinished sub-expressions. For example, when parsing 153 // "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3 154 // sub expressions are parsed or partially parsed(in the stack), aka, 155 // "(a..", "(b)" and "(c.."). 156 // _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this 157 // time, "\\2" is valid, but "\\1" and "\\3" are not. 158 if (__index >= _M_subexpr_count) 159 __throw_regex_error(regex_constants::error_backref); 160 for (auto __it : this->_M_paren_stack) 161 if (__index == __it) 162 __throw_regex_error(regex_constants::error_backref); 163 this->_M_has_backref = true; 164 _StateT __tmp(_S_opcode_backref); 165 __tmp._M_backref_index = __index; 166 return _M_insert_state(std::move(__tmp)); 167 } 168 169 template<typename _TraitsT> 170 void 171 _NFA<_TraitsT>::_M_eliminate_dummy() 172 { 173 for (auto& __it : *this) 174 { 175 while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode 176 == _S_opcode_dummy) 177 __it._M_next = (*this)[__it._M_next]._M_next; 178 if (__it._M_opcode == _S_opcode_alternative 179 || __it._M_opcode == _S_opcode_repeat 180 || __it._M_opcode == _S_opcode_subexpr_lookahead) 181 while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode 182 == _S_opcode_dummy) 183 __it._M_alt = (*this)[__it._M_alt]._M_next; 184 } 185 } 186 187 // Just apply DFS on the sequence and re-link their links. 188 template<typename _TraitsT> 189 _StateSeq<_TraitsT> 190 _StateSeq<_TraitsT>::_M_clone() 191 { 192 std::map<_StateIdT, _StateIdT> __m; 193 std::stack<_StateIdT> __stack; 194 __stack.push(_M_start); 195 while (!__stack.empty()) 196 { 197 auto __u = __stack.top(); 198 __stack.pop(); 199 auto __dup = _M_nfa[__u]; 200 // _M_insert_state() never return -1 201 auto __id = _M_nfa._M_insert_state(__dup); 202 __m[__u] = __id; 203 if (__dup._M_opcode == _S_opcode_alternative 204 || __dup._M_opcode == _S_opcode_repeat 205 || __dup._M_opcode == _S_opcode_subexpr_lookahead) 206 if (__dup._M_alt != _S_invalid_state_id 207 && __m.count(__dup._M_alt) == 0) 208 __stack.push(__dup._M_alt); 209 if (__u == _M_end) 210 continue; 211 if (__dup._M_next != _S_invalid_state_id 212 && __m.count(__dup._M_next) == 0) 213 __stack.push(__dup._M_next); 214 } 215 for (auto __it : __m) 216 { 217 auto __v = __it.second; 218 auto& __ref = _M_nfa[__v]; 219 if (__ref._M_next != _S_invalid_state_id) 220 { 221 _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_next) > 0); 222 __ref._M_next = __m[__ref._M_next]; 223 } 224 if (__ref._M_opcode == _S_opcode_alternative 225 || __ref._M_opcode == _S_opcode_repeat 226 || __ref._M_opcode == _S_opcode_subexpr_lookahead) 227 if (__ref._M_alt != _S_invalid_state_id) 228 { 229 _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_alt) > 0); 230 __ref._M_alt = __m[__ref._M_alt]; 231 } 232 } 233 return _StateSeq(_M_nfa, __m[_M_start], __m[_M_end]); 234 } 235 236 _GLIBCXX_END_NAMESPACE_VERSION 237 } // namespace __detail 238 } // namespace 239