1*38fd1498Szrj // class template regex -*- C++ -*- 2*38fd1498Szrj 3*38fd1498Szrj // Copyright (C) 2013-2018 Free Software Foundation, Inc. 4*38fd1498Szrj // 5*38fd1498Szrj // This file is part of the GNU ISO C++ Library. This library is free 6*38fd1498Szrj // software; you can redistribute it and/or modify it under the 7*38fd1498Szrj // terms of the GNU General Public License as published by the 8*38fd1498Szrj // Free Software Foundation; either version 3, or (at your option) 9*38fd1498Szrj // any later version. 10*38fd1498Szrj 11*38fd1498Szrj // This library is distributed in the hope that it will be useful, 12*38fd1498Szrj // but WITHOUT ANY WARRANTY; without even the implied warranty of 13*38fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14*38fd1498Szrj // GNU General Public License for more details. 15*38fd1498Szrj 16*38fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional 17*38fd1498Szrj // permissions described in the GCC Runtime Library Exception, version 18*38fd1498Szrj // 3.1, as published by the Free Software Foundation. 19*38fd1498Szrj 20*38fd1498Szrj // You should have received a copy of the GNU General Public License and 21*38fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program; 22*38fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23*38fd1498Szrj // <http://www.gnu.org/licenses/>. 24*38fd1498Szrj 25*38fd1498Szrj /** 26*38fd1498Szrj * @file bits/regex_executor.tcc 27*38fd1498Szrj * This is an internal header file, included by other library headers. 28*38fd1498Szrj * Do not attempt to use it directly. @headername{regex} 29*38fd1498Szrj */ 30*38fd1498Szrj 31*38fd1498Szrj namespace std _GLIBCXX_VISIBILITY(default) 32*38fd1498Szrj { 33*38fd1498Szrj _GLIBCXX_BEGIN_NAMESPACE_VERSION 34*38fd1498Szrj 35*38fd1498Szrj namespace __detail 36*38fd1498Szrj { 37*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 38*38fd1498Szrj bool __dfs_mode> 39*38fd1498Szrj bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 40*38fd1498Szrj _M_search() 41*38fd1498Szrj { 42*38fd1498Szrj if (_M_search_from_first()) 43*38fd1498Szrj return true; 44*38fd1498Szrj if (_M_flags & regex_constants::match_continuous) 45*38fd1498Szrj return false; 46*38fd1498Szrj _M_flags |= regex_constants::match_prev_avail; 47*38fd1498Szrj while (_M_begin != _M_end) 48*38fd1498Szrj { 49*38fd1498Szrj ++_M_begin; 50*38fd1498Szrj if (_M_search_from_first()) 51*38fd1498Szrj return true; 52*38fd1498Szrj } 53*38fd1498Szrj return false; 54*38fd1498Szrj } 55*38fd1498Szrj 56*38fd1498Szrj // The _M_main function operates in different modes, DFS mode or BFS mode, 57*38fd1498Szrj // indicated by template parameter __dfs_mode, and dispatches to one of the 58*38fd1498Szrj // _M_main_dispatch overloads. 59*38fd1498Szrj // 60*38fd1498Szrj // ------------------------------------------------------------ 61*38fd1498Szrj // 62*38fd1498Szrj // DFS mode: 63*38fd1498Szrj // 64*38fd1498Szrj // It applies a Depth-First-Search (aka backtracking) on given NFA and input 65*38fd1498Szrj // string. 66*38fd1498Szrj // At the very beginning the executor stands in the start state, then it 67*38fd1498Szrj // tries every possible state transition in current state recursively. Some 68*38fd1498Szrj // state transitions consume input string, say, a single-char-matcher or a 69*38fd1498Szrj // back-reference matcher; some don't, like assertion or other anchor nodes. 70*38fd1498Szrj // When the input is exhausted and/or the current state is an accepting 71*38fd1498Szrj // state, the whole executor returns true. 72*38fd1498Szrj // 73*38fd1498Szrj // TODO: This approach is exponentially slow for certain input. 74*38fd1498Szrj // Try to compile the NFA to a DFA. 75*38fd1498Szrj // 76*38fd1498Szrj // Time complexity: \Omega(match_length), O(2^(_M_nfa.size())) 77*38fd1498Szrj // Space complexity: \theta(match_results.size() + match_length) 78*38fd1498Szrj // 79*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 80*38fd1498Szrj bool __dfs_mode> 81*38fd1498Szrj bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 82*38fd1498Szrj _M_main_dispatch(_Match_mode __match_mode, __dfs) 83*38fd1498Szrj { 84*38fd1498Szrj _M_has_sol = false; 85*38fd1498Szrj *_M_states._M_get_sol_pos() = _BiIter(); 86*38fd1498Szrj _M_cur_results = _M_results; 87*38fd1498Szrj _M_dfs(__match_mode, _M_states._M_start); 88*38fd1498Szrj return _M_has_sol; 89*38fd1498Szrj } 90*38fd1498Szrj 91*38fd1498Szrj // ------------------------------------------------------------ 92*38fd1498Szrj // 93*38fd1498Szrj // BFS mode: 94*38fd1498Szrj // 95*38fd1498Szrj // Russ Cox's article (http://swtch.com/~rsc/regexp/regexp1.html) 96*38fd1498Szrj // explained this algorithm clearly. 97*38fd1498Szrj // 98*38fd1498Szrj // It first computes epsilon closure (states that can be achieved without 99*38fd1498Szrj // consuming characters) for every state that's still matching, 100*38fd1498Szrj // using the same DFS algorithm, but doesn't re-enter states (using 101*38fd1498Szrj // _M_states._M_visited to check), nor follow _S_opcode_match. 102*38fd1498Szrj // 103*38fd1498Szrj // Then apply DFS using every _S_opcode_match (in _M_states._M_match_queue) 104*38fd1498Szrj // as the start state. 105*38fd1498Szrj // 106*38fd1498Szrj // It significantly reduces potential duplicate states, so has a better 107*38fd1498Szrj // upper bound; but it requires more overhead. 108*38fd1498Szrj // 109*38fd1498Szrj // Time complexity: \Omega(match_length * match_results.size()) 110*38fd1498Szrj // O(match_length * _M_nfa.size() * match_results.size()) 111*38fd1498Szrj // Space complexity: \Omega(_M_nfa.size() + match_results.size()) 112*38fd1498Szrj // O(_M_nfa.size() * match_results.size()) 113*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 114*38fd1498Szrj bool __dfs_mode> 115*38fd1498Szrj bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 116*38fd1498Szrj _M_main_dispatch(_Match_mode __match_mode, __bfs) 117*38fd1498Szrj { 118*38fd1498Szrj _M_states._M_queue(_M_states._M_start, _M_results); 119*38fd1498Szrj bool __ret = false; 120*38fd1498Szrj while (1) 121*38fd1498Szrj { 122*38fd1498Szrj _M_has_sol = false; 123*38fd1498Szrj if (_M_states._M_match_queue.empty()) 124*38fd1498Szrj break; 125*38fd1498Szrj std::fill_n(_M_states._M_visited_states.get(), _M_nfa.size(), false); 126*38fd1498Szrj auto __old_queue = std::move(_M_states._M_match_queue); 127*38fd1498Szrj for (auto& __task : __old_queue) 128*38fd1498Szrj { 129*38fd1498Szrj _M_cur_results = std::move(__task.second); 130*38fd1498Szrj _M_dfs(__match_mode, __task.first); 131*38fd1498Szrj } 132*38fd1498Szrj if (__match_mode == _Match_mode::_Prefix) 133*38fd1498Szrj __ret |= _M_has_sol; 134*38fd1498Szrj if (_M_current == _M_end) 135*38fd1498Szrj break; 136*38fd1498Szrj ++_M_current; 137*38fd1498Szrj } 138*38fd1498Szrj if (__match_mode == _Match_mode::_Exact) 139*38fd1498Szrj __ret = _M_has_sol; 140*38fd1498Szrj _M_states._M_match_queue.clear(); 141*38fd1498Szrj return __ret; 142*38fd1498Szrj } 143*38fd1498Szrj 144*38fd1498Szrj // Return whether now match the given sub-NFA. 145*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 146*38fd1498Szrj bool __dfs_mode> 147*38fd1498Szrj bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 148*38fd1498Szrj _M_lookahead(_StateIdT __next) 149*38fd1498Szrj { 150*38fd1498Szrj // Backreferences may refer to captured content. 151*38fd1498Szrj // We may want to make this faster by not copying, 152*38fd1498Szrj // but let's not be clever prematurely. 153*38fd1498Szrj _ResultsVec __what(_M_cur_results); 154*38fd1498Szrj _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags); 155*38fd1498Szrj __sub._M_states._M_start = __next; 156*38fd1498Szrj if (__sub._M_search_from_first()) 157*38fd1498Szrj { 158*38fd1498Szrj for (size_t __i = 0; __i < __what.size(); __i++) 159*38fd1498Szrj if (__what[__i].matched) 160*38fd1498Szrj _M_cur_results[__i] = __what[__i]; 161*38fd1498Szrj return true; 162*38fd1498Szrj } 163*38fd1498Szrj return false; 164*38fd1498Szrj } 165*38fd1498Szrj 166*38fd1498Szrj // __rep_count records how many times (__rep_count.second) 167*38fd1498Szrj // this node is visited under certain input iterator 168*38fd1498Szrj // (__rep_count.first). This prevent the executor from entering 169*38fd1498Szrj // infinite loop by refusing to continue when it's already been 170*38fd1498Szrj // visited more than twice. It's `twice` instead of `once` because 171*38fd1498Szrj // we need to spare one more time for potential group capture. 172*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 173*38fd1498Szrj bool __dfs_mode> 174*38fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 175*38fd1498Szrj _M_rep_once_more(_Match_mode __match_mode, _StateIdT __i) 176*38fd1498Szrj { 177*38fd1498Szrj const auto& __state = _M_nfa[__i]; 178*38fd1498Szrj auto& __rep_count = _M_rep_count[__i]; 179*38fd1498Szrj if (__rep_count.second == 0 || __rep_count.first != _M_current) 180*38fd1498Szrj { 181*38fd1498Szrj auto __back = __rep_count; 182*38fd1498Szrj __rep_count.first = _M_current; 183*38fd1498Szrj __rep_count.second = 1; 184*38fd1498Szrj _M_dfs(__match_mode, __state._M_alt); 185*38fd1498Szrj __rep_count = __back; 186*38fd1498Szrj } 187*38fd1498Szrj else 188*38fd1498Szrj { 189*38fd1498Szrj if (__rep_count.second < 2) 190*38fd1498Szrj { 191*38fd1498Szrj __rep_count.second++; 192*38fd1498Szrj _M_dfs(__match_mode, __state._M_alt); 193*38fd1498Szrj __rep_count.second--; 194*38fd1498Szrj } 195*38fd1498Szrj } 196*38fd1498Szrj } 197*38fd1498Szrj 198*38fd1498Szrj // _M_alt branch is "match once more", while _M_next is "get me out 199*38fd1498Szrj // of this quantifier". Executing _M_next first or _M_alt first don't 200*38fd1498Szrj // mean the same thing, and we need to choose the correct order under 201*38fd1498Szrj // given greedy mode. 202*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 203*38fd1498Szrj bool __dfs_mode> 204*38fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 205*38fd1498Szrj _M_handle_repeat(_Match_mode __match_mode, _StateIdT __i) 206*38fd1498Szrj { 207*38fd1498Szrj const auto& __state = _M_nfa[__i]; 208*38fd1498Szrj 209*38fd1498Szrj // Greedy. 210*38fd1498Szrj if (!__state._M_neg) 211*38fd1498Szrj { 212*38fd1498Szrj _M_rep_once_more(__match_mode, __i); 213*38fd1498Szrj // If it's DFS executor and already accepted, we're done. 214*38fd1498Szrj if (!__dfs_mode || !_M_has_sol) 215*38fd1498Szrj _M_dfs(__match_mode, __state._M_next); 216*38fd1498Szrj } 217*38fd1498Szrj else // Non-greedy mode 218*38fd1498Szrj { 219*38fd1498Szrj if (__dfs_mode) 220*38fd1498Szrj { 221*38fd1498Szrj // vice-versa. 222*38fd1498Szrj _M_dfs(__match_mode, __state._M_next); 223*38fd1498Szrj if (!_M_has_sol) 224*38fd1498Szrj _M_rep_once_more(__match_mode, __i); 225*38fd1498Szrj } 226*38fd1498Szrj else 227*38fd1498Szrj { 228*38fd1498Szrj // DON'T attempt anything, because there's already another 229*38fd1498Szrj // state with higher priority accepted. This state cannot 230*38fd1498Szrj // be better by attempting its next node. 231*38fd1498Szrj if (!_M_has_sol) 232*38fd1498Szrj { 233*38fd1498Szrj _M_dfs(__match_mode, __state._M_next); 234*38fd1498Szrj // DON'T attempt anything if it's already accepted. An 235*38fd1498Szrj // accepted state *must* be better than a solution that 236*38fd1498Szrj // matches a non-greedy quantifier one more time. 237*38fd1498Szrj if (!_M_has_sol) 238*38fd1498Szrj _M_rep_once_more(__match_mode, __i); 239*38fd1498Szrj } 240*38fd1498Szrj } 241*38fd1498Szrj } 242*38fd1498Szrj } 243*38fd1498Szrj 244*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 245*38fd1498Szrj bool __dfs_mode> 246*38fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 247*38fd1498Szrj _M_handle_subexpr_begin(_Match_mode __match_mode, _StateIdT __i) 248*38fd1498Szrj { 249*38fd1498Szrj const auto& __state = _M_nfa[__i]; 250*38fd1498Szrj 251*38fd1498Szrj auto& __res = _M_cur_results[__state._M_subexpr]; 252*38fd1498Szrj auto __back = __res.first; 253*38fd1498Szrj __res.first = _M_current; 254*38fd1498Szrj _M_dfs(__match_mode, __state._M_next); 255*38fd1498Szrj __res.first = __back; 256*38fd1498Szrj } 257*38fd1498Szrj 258*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 259*38fd1498Szrj bool __dfs_mode> 260*38fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 261*38fd1498Szrj _M_handle_subexpr_end(_Match_mode __match_mode, _StateIdT __i) 262*38fd1498Szrj { 263*38fd1498Szrj const auto& __state = _M_nfa[__i]; 264*38fd1498Szrj 265*38fd1498Szrj auto& __res = _M_cur_results[__state._M_subexpr]; 266*38fd1498Szrj auto __back = __res; 267*38fd1498Szrj __res.second = _M_current; 268*38fd1498Szrj __res.matched = true; 269*38fd1498Szrj _M_dfs(__match_mode, __state._M_next); 270*38fd1498Szrj __res = __back; 271*38fd1498Szrj } 272*38fd1498Szrj 273*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 274*38fd1498Szrj bool __dfs_mode> 275*38fd1498Szrj inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 276*38fd1498Szrj _M_handle_line_begin_assertion(_Match_mode __match_mode, _StateIdT __i) 277*38fd1498Szrj { 278*38fd1498Szrj const auto& __state = _M_nfa[__i]; 279*38fd1498Szrj if (_M_at_begin()) 280*38fd1498Szrj _M_dfs(__match_mode, __state._M_next); 281*38fd1498Szrj } 282*38fd1498Szrj 283*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 284*38fd1498Szrj bool __dfs_mode> 285*38fd1498Szrj inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 286*38fd1498Szrj _M_handle_line_end_assertion(_Match_mode __match_mode, _StateIdT __i) 287*38fd1498Szrj { 288*38fd1498Szrj const auto& __state = _M_nfa[__i]; 289*38fd1498Szrj if (_M_at_end()) 290*38fd1498Szrj _M_dfs(__match_mode, __state._M_next); 291*38fd1498Szrj } 292*38fd1498Szrj 293*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 294*38fd1498Szrj bool __dfs_mode> 295*38fd1498Szrj inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 296*38fd1498Szrj _M_handle_word_boundary(_Match_mode __match_mode, _StateIdT __i) 297*38fd1498Szrj { 298*38fd1498Szrj const auto& __state = _M_nfa[__i]; 299*38fd1498Szrj if (_M_word_boundary() == !__state._M_neg) 300*38fd1498Szrj _M_dfs(__match_mode, __state._M_next); 301*38fd1498Szrj } 302*38fd1498Szrj 303*38fd1498Szrj // Here __state._M_alt offers a single start node for a sub-NFA. 304*38fd1498Szrj // We recursively invoke our algorithm to match the sub-NFA. 305*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 306*38fd1498Szrj bool __dfs_mode> 307*38fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 308*38fd1498Szrj _M_handle_subexpr_lookahead(_Match_mode __match_mode, _StateIdT __i) 309*38fd1498Szrj { 310*38fd1498Szrj const auto& __state = _M_nfa[__i]; 311*38fd1498Szrj if (_M_lookahead(__state._M_alt) == !__state._M_neg) 312*38fd1498Szrj _M_dfs(__match_mode, __state._M_next); 313*38fd1498Szrj } 314*38fd1498Szrj 315*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 316*38fd1498Szrj bool __dfs_mode> 317*38fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 318*38fd1498Szrj _M_handle_match(_Match_mode __match_mode, _StateIdT __i) 319*38fd1498Szrj { 320*38fd1498Szrj const auto& __state = _M_nfa[__i]; 321*38fd1498Szrj 322*38fd1498Szrj if (_M_current == _M_end) 323*38fd1498Szrj return; 324*38fd1498Szrj if (__dfs_mode) 325*38fd1498Szrj { 326*38fd1498Szrj if (__state._M_matches(*_M_current)) 327*38fd1498Szrj { 328*38fd1498Szrj ++_M_current; 329*38fd1498Szrj _M_dfs(__match_mode, __state._M_next); 330*38fd1498Szrj --_M_current; 331*38fd1498Szrj } 332*38fd1498Szrj } 333*38fd1498Szrj else 334*38fd1498Szrj if (__state._M_matches(*_M_current)) 335*38fd1498Szrj _M_states._M_queue(__state._M_next, _M_cur_results); 336*38fd1498Szrj } 337*38fd1498Szrj 338*38fd1498Szrj template<typename _BiIter, typename _TraitsT> 339*38fd1498Szrj struct _Backref_matcher 340*38fd1498Szrj { 341*38fd1498Szrj _Backref_matcher(bool __icase, const _TraitsT& __traits) 342*38fd1498Szrj : _M_traits(__traits) { } 343*38fd1498Szrj 344*38fd1498Szrj bool 345*38fd1498Szrj _M_apply(_BiIter __expected_begin, 346*38fd1498Szrj _BiIter __expected_end, _BiIter __actual_begin, 347*38fd1498Szrj _BiIter __actual_end) 348*38fd1498Szrj { 349*38fd1498Szrj return _M_traits.transform(__expected_begin, __expected_end) 350*38fd1498Szrj == _M_traits.transform(__actual_begin, __actual_end); 351*38fd1498Szrj } 352*38fd1498Szrj 353*38fd1498Szrj const _TraitsT& _M_traits; 354*38fd1498Szrj }; 355*38fd1498Szrj 356*38fd1498Szrj template<typename _BiIter, typename _CharT> 357*38fd1498Szrj struct _Backref_matcher<_BiIter, std::regex_traits<_CharT>> 358*38fd1498Szrj { 359*38fd1498Szrj using _TraitsT = std::regex_traits<_CharT>; 360*38fd1498Szrj _Backref_matcher(bool __icase, const _TraitsT& __traits) 361*38fd1498Szrj : _M_icase(__icase), _M_traits(__traits) { } 362*38fd1498Szrj 363*38fd1498Szrj bool 364*38fd1498Szrj _M_apply(_BiIter __expected_begin, 365*38fd1498Szrj _BiIter __expected_end, _BiIter __actual_begin, 366*38fd1498Szrj _BiIter __actual_end) 367*38fd1498Szrj { 368*38fd1498Szrj if (!_M_icase) 369*38fd1498Szrj return std::__equal4(__expected_begin, __expected_end, 370*38fd1498Szrj __actual_begin, __actual_end); 371*38fd1498Szrj typedef std::ctype<_CharT> __ctype_type; 372*38fd1498Szrj const auto& __fctyp = use_facet<__ctype_type>(_M_traits.getloc()); 373*38fd1498Szrj return std::__equal4(__expected_begin, __expected_end, 374*38fd1498Szrj __actual_begin, __actual_end, 375*38fd1498Szrj [this, &__fctyp](_CharT __lhs, _CharT __rhs) 376*38fd1498Szrj { 377*38fd1498Szrj return __fctyp.tolower(__lhs) 378*38fd1498Szrj == __fctyp.tolower(__rhs); 379*38fd1498Szrj }); 380*38fd1498Szrj } 381*38fd1498Szrj 382*38fd1498Szrj bool _M_icase; 383*38fd1498Szrj const _TraitsT& _M_traits; 384*38fd1498Szrj }; 385*38fd1498Szrj 386*38fd1498Szrj // First fetch the matched result from _M_cur_results as __submatch; 387*38fd1498Szrj // then compare it with 388*38fd1498Szrj // (_M_current, _M_current + (__submatch.second - __submatch.first)). 389*38fd1498Szrj // If matched, keep going; else just return and try another state. 390*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 391*38fd1498Szrj bool __dfs_mode> 392*38fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 393*38fd1498Szrj _M_handle_backref(_Match_mode __match_mode, _StateIdT __i) 394*38fd1498Szrj { 395*38fd1498Szrj __glibcxx_assert(__dfs_mode); 396*38fd1498Szrj 397*38fd1498Szrj const auto& __state = _M_nfa[__i]; 398*38fd1498Szrj auto& __submatch = _M_cur_results[__state._M_backref_index]; 399*38fd1498Szrj if (!__submatch.matched) 400*38fd1498Szrj return; 401*38fd1498Szrj auto __last = _M_current; 402*38fd1498Szrj for (auto __tmp = __submatch.first; 403*38fd1498Szrj __last != _M_end && __tmp != __submatch.second; 404*38fd1498Szrj ++__tmp) 405*38fd1498Szrj ++__last; 406*38fd1498Szrj if (_Backref_matcher<_BiIter, _TraitsT>( 407*38fd1498Szrj _M_re.flags() & regex_constants::icase, 408*38fd1498Szrj _M_re._M_automaton->_M_traits)._M_apply( 409*38fd1498Szrj __submatch.first, __submatch.second, _M_current, __last)) 410*38fd1498Szrj { 411*38fd1498Szrj if (__last != _M_current) 412*38fd1498Szrj { 413*38fd1498Szrj auto __backup = _M_current; 414*38fd1498Szrj _M_current = __last; 415*38fd1498Szrj _M_dfs(__match_mode, __state._M_next); 416*38fd1498Szrj _M_current = __backup; 417*38fd1498Szrj } 418*38fd1498Szrj else 419*38fd1498Szrj _M_dfs(__match_mode, __state._M_next); 420*38fd1498Szrj } 421*38fd1498Szrj } 422*38fd1498Szrj 423*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 424*38fd1498Szrj bool __dfs_mode> 425*38fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 426*38fd1498Szrj _M_handle_accept(_Match_mode __match_mode, _StateIdT __i) 427*38fd1498Szrj { 428*38fd1498Szrj if (__dfs_mode) 429*38fd1498Szrj { 430*38fd1498Szrj __glibcxx_assert(!_M_has_sol); 431*38fd1498Szrj if (__match_mode == _Match_mode::_Exact) 432*38fd1498Szrj _M_has_sol = _M_current == _M_end; 433*38fd1498Szrj else 434*38fd1498Szrj _M_has_sol = true; 435*38fd1498Szrj if (_M_current == _M_begin 436*38fd1498Szrj && (_M_flags & regex_constants::match_not_null)) 437*38fd1498Szrj _M_has_sol = false; 438*38fd1498Szrj if (_M_has_sol) 439*38fd1498Szrj { 440*38fd1498Szrj if (_M_nfa._M_flags & regex_constants::ECMAScript) 441*38fd1498Szrj _M_results = _M_cur_results; 442*38fd1498Szrj else // POSIX 443*38fd1498Szrj { 444*38fd1498Szrj __glibcxx_assert(_M_states._M_get_sol_pos()); 445*38fd1498Szrj // Here's POSIX's logic: match the longest one. However 446*38fd1498Szrj // we never know which one (lhs or rhs of "|") is longer 447*38fd1498Szrj // unless we try both of them and compare the results. 448*38fd1498Szrj // The member variable _M_sol_pos records the end 449*38fd1498Szrj // position of the last successful match. It's better 450*38fd1498Szrj // to be larger, because POSIX regex is always greedy. 451*38fd1498Szrj // TODO: This could be slow. 452*38fd1498Szrj if (*_M_states._M_get_sol_pos() == _BiIter() 453*38fd1498Szrj || std::distance(_M_begin, 454*38fd1498Szrj *_M_states._M_get_sol_pos()) 455*38fd1498Szrj < std::distance(_M_begin, _M_current)) 456*38fd1498Szrj { 457*38fd1498Szrj *_M_states._M_get_sol_pos() = _M_current; 458*38fd1498Szrj _M_results = _M_cur_results; 459*38fd1498Szrj } 460*38fd1498Szrj } 461*38fd1498Szrj } 462*38fd1498Szrj } 463*38fd1498Szrj else 464*38fd1498Szrj { 465*38fd1498Szrj if (_M_current == _M_begin 466*38fd1498Szrj && (_M_flags & regex_constants::match_not_null)) 467*38fd1498Szrj return; 468*38fd1498Szrj if (__match_mode == _Match_mode::_Prefix || _M_current == _M_end) 469*38fd1498Szrj if (!_M_has_sol) 470*38fd1498Szrj { 471*38fd1498Szrj _M_has_sol = true; 472*38fd1498Szrj _M_results = _M_cur_results; 473*38fd1498Szrj } 474*38fd1498Szrj } 475*38fd1498Szrj } 476*38fd1498Szrj 477*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 478*38fd1498Szrj bool __dfs_mode> 479*38fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 480*38fd1498Szrj _M_handle_alternative(_Match_mode __match_mode, _StateIdT __i) 481*38fd1498Szrj { 482*38fd1498Szrj const auto& __state = _M_nfa[__i]; 483*38fd1498Szrj 484*38fd1498Szrj if (_M_nfa._M_flags & regex_constants::ECMAScript) 485*38fd1498Szrj { 486*38fd1498Szrj // TODO: Fix BFS support. It is wrong. 487*38fd1498Szrj _M_dfs(__match_mode, __state._M_alt); 488*38fd1498Szrj // Pick lhs if it matches. Only try rhs if it doesn't. 489*38fd1498Szrj if (!_M_has_sol) 490*38fd1498Szrj _M_dfs(__match_mode, __state._M_next); 491*38fd1498Szrj } 492*38fd1498Szrj else 493*38fd1498Szrj { 494*38fd1498Szrj // Try both and compare the result. 495*38fd1498Szrj // See "case _S_opcode_accept:" handling above. 496*38fd1498Szrj _M_dfs(__match_mode, __state._M_alt); 497*38fd1498Szrj auto __has_sol = _M_has_sol; 498*38fd1498Szrj _M_has_sol = false; 499*38fd1498Szrj _M_dfs(__match_mode, __state._M_next); 500*38fd1498Szrj _M_has_sol |= __has_sol; 501*38fd1498Szrj } 502*38fd1498Szrj } 503*38fd1498Szrj 504*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 505*38fd1498Szrj bool __dfs_mode> 506*38fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 507*38fd1498Szrj _M_dfs(_Match_mode __match_mode, _StateIdT __i) 508*38fd1498Szrj { 509*38fd1498Szrj if (_M_states._M_visited(__i)) 510*38fd1498Szrj return; 511*38fd1498Szrj 512*38fd1498Szrj switch (_M_nfa[__i]._M_opcode()) 513*38fd1498Szrj { 514*38fd1498Szrj case _S_opcode_repeat: 515*38fd1498Szrj _M_handle_repeat(__match_mode, __i); break; 516*38fd1498Szrj case _S_opcode_subexpr_begin: 517*38fd1498Szrj _M_handle_subexpr_begin(__match_mode, __i); break; 518*38fd1498Szrj case _S_opcode_subexpr_end: 519*38fd1498Szrj _M_handle_subexpr_end(__match_mode, __i); break; 520*38fd1498Szrj case _S_opcode_line_begin_assertion: 521*38fd1498Szrj _M_handle_line_begin_assertion(__match_mode, __i); break; 522*38fd1498Szrj case _S_opcode_line_end_assertion: 523*38fd1498Szrj _M_handle_line_end_assertion(__match_mode, __i); break; 524*38fd1498Szrj case _S_opcode_word_boundary: 525*38fd1498Szrj _M_handle_word_boundary(__match_mode, __i); break; 526*38fd1498Szrj case _S_opcode_subexpr_lookahead: 527*38fd1498Szrj _M_handle_subexpr_lookahead(__match_mode, __i); break; 528*38fd1498Szrj case _S_opcode_match: 529*38fd1498Szrj _M_handle_match(__match_mode, __i); break; 530*38fd1498Szrj case _S_opcode_backref: 531*38fd1498Szrj _M_handle_backref(__match_mode, __i); break; 532*38fd1498Szrj case _S_opcode_accept: 533*38fd1498Szrj _M_handle_accept(__match_mode, __i); break; 534*38fd1498Szrj case _S_opcode_alternative: 535*38fd1498Szrj _M_handle_alternative(__match_mode, __i); break; 536*38fd1498Szrj default: 537*38fd1498Szrj __glibcxx_assert(false); 538*38fd1498Szrj } 539*38fd1498Szrj } 540*38fd1498Szrj 541*38fd1498Szrj // Return whether now is at some word boundary. 542*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 543*38fd1498Szrj bool __dfs_mode> 544*38fd1498Szrj bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: 545*38fd1498Szrj _M_word_boundary() const 546*38fd1498Szrj { 547*38fd1498Szrj if (_M_current == _M_begin && (_M_flags & regex_constants::match_not_bow)) 548*38fd1498Szrj return false; 549*38fd1498Szrj if (_M_current == _M_end && (_M_flags & regex_constants::match_not_eow)) 550*38fd1498Szrj return false; 551*38fd1498Szrj 552*38fd1498Szrj bool __left_is_word = false; 553*38fd1498Szrj if (_M_current != _M_begin 554*38fd1498Szrj || (_M_flags & regex_constants::match_prev_avail)) 555*38fd1498Szrj { 556*38fd1498Szrj auto __prev = _M_current; 557*38fd1498Szrj if (_M_is_word(*std::prev(__prev))) 558*38fd1498Szrj __left_is_word = true; 559*38fd1498Szrj } 560*38fd1498Szrj bool __right_is_word = 561*38fd1498Szrj _M_current != _M_end && _M_is_word(*_M_current); 562*38fd1498Szrj 563*38fd1498Szrj return __left_is_word != __right_is_word; 564*38fd1498Szrj } 565*38fd1498Szrj } // namespace __detail 566*38fd1498Szrj 567*38fd1498Szrj _GLIBCXX_END_NAMESPACE_VERSION 568*38fd1498Szrj } // namespace 569