11debfc3dSmrg // class template regex -*- C++ -*- 21debfc3dSmrg 38feb0f0bSmrg // Copyright (C) 2013-2020 Free Software Foundation, Inc. 41debfc3dSmrg // 51debfc3dSmrg // This file is part of the GNU ISO C++ Library. This library is free 61debfc3dSmrg // software; you can redistribute it and/or modify it under the 71debfc3dSmrg // terms of the GNU General Public License as published by the 81debfc3dSmrg // Free Software Foundation; either version 3, or (at your option) 91debfc3dSmrg // any later version. 101debfc3dSmrg 111debfc3dSmrg // This library is distributed in the hope that it will be useful, 121debfc3dSmrg // but WITHOUT ANY WARRANTY; without even the implied warranty of 131debfc3dSmrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 141debfc3dSmrg // GNU General Public License for more details. 151debfc3dSmrg 161debfc3dSmrg // Under Section 7 of GPL version 3, you are granted additional 171debfc3dSmrg // permissions described in the GCC Runtime Library Exception, version 181debfc3dSmrg // 3.1, as published by the Free Software Foundation. 191debfc3dSmrg 201debfc3dSmrg // You should have received a copy of the GNU General Public License and 211debfc3dSmrg // a copy of the GCC Runtime Library Exception along with this program; 221debfc3dSmrg // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 231debfc3dSmrg // <http://www.gnu.org/licenses/>. 241debfc3dSmrg 251debfc3dSmrg /** 261debfc3dSmrg * @file bits/regex_executor.tcc 271debfc3dSmrg * This is an internal header file, included by other library headers. 281debfc3dSmrg * Do not attempt to use it directly. @headername{regex} 291debfc3dSmrg */ 301debfc3dSmrg 311debfc3dSmrg namespace std _GLIBCXX_VISIBILITY(default) 321debfc3dSmrg { 331debfc3dSmrg _GLIBCXX_BEGIN_NAMESPACE_VERSION 341debfc3dSmrg 35a2dc1f3fSmrg namespace __detail 36a2dc1f3fSmrg { 371debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 381debfc3dSmrg bool __dfs_mode> 391debfc3dSmrg bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_search()401debfc3dSmrg _M_search() 411debfc3dSmrg { 421debfc3dSmrg if (_M_search_from_first()) 431debfc3dSmrg return true; 441debfc3dSmrg if (_M_flags & regex_constants::match_continuous) 451debfc3dSmrg return false; 461debfc3dSmrg _M_flags |= regex_constants::match_prev_avail; 471debfc3dSmrg while (_M_begin != _M_end) 481debfc3dSmrg { 491debfc3dSmrg ++_M_begin; 501debfc3dSmrg if (_M_search_from_first()) 511debfc3dSmrg return true; 521debfc3dSmrg } 531debfc3dSmrg return false; 541debfc3dSmrg } 551debfc3dSmrg 561debfc3dSmrg // The _M_main function operates in different modes, DFS mode or BFS mode, 571debfc3dSmrg // indicated by template parameter __dfs_mode, and dispatches to one of the 581debfc3dSmrg // _M_main_dispatch overloads. 591debfc3dSmrg // 601debfc3dSmrg // ------------------------------------------------------------ 611debfc3dSmrg // 621debfc3dSmrg // DFS mode: 631debfc3dSmrg // 641debfc3dSmrg // It applies a Depth-First-Search (aka backtracking) on given NFA and input 651debfc3dSmrg // string. 661debfc3dSmrg // At the very beginning the executor stands in the start state, then it 671debfc3dSmrg // tries every possible state transition in current state recursively. Some 681debfc3dSmrg // state transitions consume input string, say, a single-char-matcher or a 691debfc3dSmrg // back-reference matcher; some don't, like assertion or other anchor nodes. 701debfc3dSmrg // When the input is exhausted and/or the current state is an accepting 711debfc3dSmrg // state, the whole executor returns true. 721debfc3dSmrg // 731debfc3dSmrg // TODO: This approach is exponentially slow for certain input. 741debfc3dSmrg // Try to compile the NFA to a DFA. 751debfc3dSmrg // 761debfc3dSmrg // Time complexity: \Omega(match_length), O(2^(_M_nfa.size())) 771debfc3dSmrg // Space complexity: \theta(match_results.size() + match_length) 781debfc3dSmrg // 791debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 801debfc3dSmrg bool __dfs_mode> 811debfc3dSmrg bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_main_dispatch(_Match_mode __match_mode,__dfs)821debfc3dSmrg _M_main_dispatch(_Match_mode __match_mode, __dfs) 831debfc3dSmrg { 841debfc3dSmrg _M_has_sol = false; 851debfc3dSmrg *_M_states._M_get_sol_pos() = _BiIter(); 861debfc3dSmrg _M_cur_results = _M_results; 871debfc3dSmrg _M_dfs(__match_mode, _M_states._M_start); 881debfc3dSmrg return _M_has_sol; 891debfc3dSmrg } 901debfc3dSmrg 911debfc3dSmrg // ------------------------------------------------------------ 921debfc3dSmrg // 931debfc3dSmrg // BFS mode: 941debfc3dSmrg // 951debfc3dSmrg // Russ Cox's article (http://swtch.com/~rsc/regexp/regexp1.html) 961debfc3dSmrg // explained this algorithm clearly. 971debfc3dSmrg // 981debfc3dSmrg // It first computes epsilon closure (states that can be achieved without 991debfc3dSmrg // consuming characters) for every state that's still matching, 1001debfc3dSmrg // using the same DFS algorithm, but doesn't re-enter states (using 1011debfc3dSmrg // _M_states._M_visited to check), nor follow _S_opcode_match. 1021debfc3dSmrg // 1031debfc3dSmrg // Then apply DFS using every _S_opcode_match (in _M_states._M_match_queue) 1041debfc3dSmrg // as the start state. 1051debfc3dSmrg // 1061debfc3dSmrg // It significantly reduces potential duplicate states, so has a better 1071debfc3dSmrg // upper bound; but it requires more overhead. 1081debfc3dSmrg // 1091debfc3dSmrg // Time complexity: \Omega(match_length * match_results.size()) 1101debfc3dSmrg // O(match_length * _M_nfa.size() * match_results.size()) 1111debfc3dSmrg // Space complexity: \Omega(_M_nfa.size() + match_results.size()) 1121debfc3dSmrg // O(_M_nfa.size() * match_results.size()) 1131debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 1141debfc3dSmrg bool __dfs_mode> 1151debfc3dSmrg bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_main_dispatch(_Match_mode __match_mode,__bfs)1161debfc3dSmrg _M_main_dispatch(_Match_mode __match_mode, __bfs) 1171debfc3dSmrg { 1181debfc3dSmrg _M_states._M_queue(_M_states._M_start, _M_results); 1191debfc3dSmrg bool __ret = false; 1201debfc3dSmrg while (1) 1211debfc3dSmrg { 1221debfc3dSmrg _M_has_sol = false; 1231debfc3dSmrg if (_M_states._M_match_queue.empty()) 1241debfc3dSmrg break; 1251debfc3dSmrg std::fill_n(_M_states._M_visited_states.get(), _M_nfa.size(), false); 1261debfc3dSmrg auto __old_queue = std::move(_M_states._M_match_queue); 1271debfc3dSmrg for (auto& __task : __old_queue) 1281debfc3dSmrg { 1291debfc3dSmrg _M_cur_results = std::move(__task.second); 1301debfc3dSmrg _M_dfs(__match_mode, __task.first); 1311debfc3dSmrg } 1321debfc3dSmrg if (__match_mode == _Match_mode::_Prefix) 1331debfc3dSmrg __ret |= _M_has_sol; 1341debfc3dSmrg if (_M_current == _M_end) 1351debfc3dSmrg break; 1361debfc3dSmrg ++_M_current; 1371debfc3dSmrg } 1381debfc3dSmrg if (__match_mode == _Match_mode::_Exact) 1391debfc3dSmrg __ret = _M_has_sol; 1401debfc3dSmrg _M_states._M_match_queue.clear(); 1411debfc3dSmrg return __ret; 1421debfc3dSmrg } 1431debfc3dSmrg 1441debfc3dSmrg // Return whether now match the given sub-NFA. 1451debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 1461debfc3dSmrg bool __dfs_mode> 1471debfc3dSmrg bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_lookahead(_StateIdT __next)1481debfc3dSmrg _M_lookahead(_StateIdT __next) 1491debfc3dSmrg { 1501debfc3dSmrg // Backreferences may refer to captured content. 1511debfc3dSmrg // We may want to make this faster by not copying, 1521debfc3dSmrg // but let's not be clever prematurely. 1531debfc3dSmrg _ResultsVec __what(_M_cur_results); 1541debfc3dSmrg _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags); 1551debfc3dSmrg __sub._M_states._M_start = __next; 1561debfc3dSmrg if (__sub._M_search_from_first()) 1571debfc3dSmrg { 1581debfc3dSmrg for (size_t __i = 0; __i < __what.size(); __i++) 1591debfc3dSmrg if (__what[__i].matched) 1601debfc3dSmrg _M_cur_results[__i] = __what[__i]; 1611debfc3dSmrg return true; 1621debfc3dSmrg } 1631debfc3dSmrg return false; 1641debfc3dSmrg } 1651debfc3dSmrg 1661debfc3dSmrg // __rep_count records how many times (__rep_count.second) 1671debfc3dSmrg // this node is visited under certain input iterator 1681debfc3dSmrg // (__rep_count.first). This prevent the executor from entering 1691debfc3dSmrg // infinite loop by refusing to continue when it's already been 1701debfc3dSmrg // visited more than twice. It's `twice` instead of `once` because 1711debfc3dSmrg // we need to spare one more time for potential group capture. 1721debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 1731debfc3dSmrg bool __dfs_mode> 1741debfc3dSmrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_rep_once_more(_Match_mode __match_mode,_StateIdT __i)1751debfc3dSmrg _M_rep_once_more(_Match_mode __match_mode, _StateIdT __i) 1761debfc3dSmrg { 1771debfc3dSmrg const auto& __state = _M_nfa[__i]; 1781debfc3dSmrg auto& __rep_count = _M_rep_count[__i]; 1791debfc3dSmrg if (__rep_count.second == 0 || __rep_count.first != _M_current) 1801debfc3dSmrg { 1811debfc3dSmrg auto __back = __rep_count; 1821debfc3dSmrg __rep_count.first = _M_current; 1831debfc3dSmrg __rep_count.second = 1; 1841debfc3dSmrg _M_dfs(__match_mode, __state._M_alt); 1851debfc3dSmrg __rep_count = __back; 1861debfc3dSmrg } 1871debfc3dSmrg else 1881debfc3dSmrg { 1891debfc3dSmrg if (__rep_count.second < 2) 1901debfc3dSmrg { 1911debfc3dSmrg __rep_count.second++; 1921debfc3dSmrg _M_dfs(__match_mode, __state._M_alt); 1931debfc3dSmrg __rep_count.second--; 1941debfc3dSmrg } 1951debfc3dSmrg } 196a2dc1f3fSmrg } 1971debfc3dSmrg 1981debfc3dSmrg // _M_alt branch is "match once more", while _M_next is "get me out 1991debfc3dSmrg // of this quantifier". Executing _M_next first or _M_alt first don't 2001debfc3dSmrg // mean the same thing, and we need to choose the correct order under 2011debfc3dSmrg // given greedy mode. 2021debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 2031debfc3dSmrg bool __dfs_mode> 2041debfc3dSmrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_repeat(_Match_mode __match_mode,_StateIdT __i)2051debfc3dSmrg _M_handle_repeat(_Match_mode __match_mode, _StateIdT __i) 2061debfc3dSmrg { 2071debfc3dSmrg const auto& __state = _M_nfa[__i]; 2081debfc3dSmrg 2091debfc3dSmrg // Greedy. 2101debfc3dSmrg if (!__state._M_neg) 2111debfc3dSmrg { 2121debfc3dSmrg _M_rep_once_more(__match_mode, __i); 2131debfc3dSmrg // If it's DFS executor and already accepted, we're done. 2141debfc3dSmrg if (!__dfs_mode || !_M_has_sol) 2151debfc3dSmrg _M_dfs(__match_mode, __state._M_next); 2161debfc3dSmrg } 2171debfc3dSmrg else // Non-greedy mode 2181debfc3dSmrg { 2191debfc3dSmrg if (__dfs_mode) 2201debfc3dSmrg { 2211debfc3dSmrg // vice-versa. 2221debfc3dSmrg _M_dfs(__match_mode, __state._M_next); 2231debfc3dSmrg if (!_M_has_sol) 2241debfc3dSmrg _M_rep_once_more(__match_mode, __i); 2251debfc3dSmrg } 2261debfc3dSmrg else 2271debfc3dSmrg { 2281debfc3dSmrg // DON'T attempt anything, because there's already another 2291debfc3dSmrg // state with higher priority accepted. This state cannot 2301debfc3dSmrg // be better by attempting its next node. 2311debfc3dSmrg if (!_M_has_sol) 2321debfc3dSmrg { 2331debfc3dSmrg _M_dfs(__match_mode, __state._M_next); 2341debfc3dSmrg // DON'T attempt anything if it's already accepted. An 2351debfc3dSmrg // accepted state *must* be better than a solution that 2361debfc3dSmrg // matches a non-greedy quantifier one more time. 2371debfc3dSmrg if (!_M_has_sol) 2381debfc3dSmrg _M_rep_once_more(__match_mode, __i); 2391debfc3dSmrg } 2401debfc3dSmrg } 2411debfc3dSmrg } 2421debfc3dSmrg } 2431debfc3dSmrg 2441debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 2451debfc3dSmrg bool __dfs_mode> 2461debfc3dSmrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_subexpr_begin(_Match_mode __match_mode,_StateIdT __i)2471debfc3dSmrg _M_handle_subexpr_begin(_Match_mode __match_mode, _StateIdT __i) 2481debfc3dSmrg { 2491debfc3dSmrg const auto& __state = _M_nfa[__i]; 2501debfc3dSmrg 2511debfc3dSmrg auto& __res = _M_cur_results[__state._M_subexpr]; 2521debfc3dSmrg auto __back = __res.first; 2531debfc3dSmrg __res.first = _M_current; 2541debfc3dSmrg _M_dfs(__match_mode, __state._M_next); 2551debfc3dSmrg __res.first = __back; 2561debfc3dSmrg } 2571debfc3dSmrg 2581debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 2591debfc3dSmrg bool __dfs_mode> 2601debfc3dSmrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_subexpr_end(_Match_mode __match_mode,_StateIdT __i)2611debfc3dSmrg _M_handle_subexpr_end(_Match_mode __match_mode, _StateIdT __i) 2621debfc3dSmrg { 2631debfc3dSmrg const auto& __state = _M_nfa[__i]; 2641debfc3dSmrg 2651debfc3dSmrg auto& __res = _M_cur_results[__state._M_subexpr]; 2661debfc3dSmrg auto __back = __res; 2671debfc3dSmrg __res.second = _M_current; 2681debfc3dSmrg __res.matched = true; 2691debfc3dSmrg _M_dfs(__match_mode, __state._M_next); 2701debfc3dSmrg __res = __back; 2711debfc3dSmrg } 2721debfc3dSmrg 2731debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 2741debfc3dSmrg bool __dfs_mode> 2751debfc3dSmrg inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_line_begin_assertion(_Match_mode __match_mode,_StateIdT __i)2761debfc3dSmrg _M_handle_line_begin_assertion(_Match_mode __match_mode, _StateIdT __i) 2771debfc3dSmrg { 2781debfc3dSmrg const auto& __state = _M_nfa[__i]; 2791debfc3dSmrg if (_M_at_begin()) 2801debfc3dSmrg _M_dfs(__match_mode, __state._M_next); 2811debfc3dSmrg } 2821debfc3dSmrg 2831debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 2841debfc3dSmrg bool __dfs_mode> 2851debfc3dSmrg inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_line_end_assertion(_Match_mode __match_mode,_StateIdT __i)2861debfc3dSmrg _M_handle_line_end_assertion(_Match_mode __match_mode, _StateIdT __i) 2871debfc3dSmrg { 2881debfc3dSmrg const auto& __state = _M_nfa[__i]; 2891debfc3dSmrg if (_M_at_end()) 2901debfc3dSmrg _M_dfs(__match_mode, __state._M_next); 2911debfc3dSmrg } 2921debfc3dSmrg 2931debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 2941debfc3dSmrg bool __dfs_mode> 2951debfc3dSmrg inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_word_boundary(_Match_mode __match_mode,_StateIdT __i)2961debfc3dSmrg _M_handle_word_boundary(_Match_mode __match_mode, _StateIdT __i) 2971debfc3dSmrg { 2981debfc3dSmrg const auto& __state = _M_nfa[__i]; 2991debfc3dSmrg if (_M_word_boundary() == !__state._M_neg) 3001debfc3dSmrg _M_dfs(__match_mode, __state._M_next); 3011debfc3dSmrg } 3021debfc3dSmrg 3031debfc3dSmrg // Here __state._M_alt offers a single start node for a sub-NFA. 3041debfc3dSmrg // We recursively invoke our algorithm to match the sub-NFA. 3051debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 3061debfc3dSmrg bool __dfs_mode> 3071debfc3dSmrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_subexpr_lookahead(_Match_mode __match_mode,_StateIdT __i)3081debfc3dSmrg _M_handle_subexpr_lookahead(_Match_mode __match_mode, _StateIdT __i) 3091debfc3dSmrg { 3101debfc3dSmrg const auto& __state = _M_nfa[__i]; 3111debfc3dSmrg if (_M_lookahead(__state._M_alt) == !__state._M_neg) 3121debfc3dSmrg _M_dfs(__match_mode, __state._M_next); 3131debfc3dSmrg } 3141debfc3dSmrg 3151debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 3161debfc3dSmrg bool __dfs_mode> 3171debfc3dSmrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_match(_Match_mode __match_mode,_StateIdT __i)3181debfc3dSmrg _M_handle_match(_Match_mode __match_mode, _StateIdT __i) 3191debfc3dSmrg { 3201debfc3dSmrg const auto& __state = _M_nfa[__i]; 3211debfc3dSmrg 3221debfc3dSmrg if (_M_current == _M_end) 3231debfc3dSmrg return; 3241debfc3dSmrg if (__dfs_mode) 3251debfc3dSmrg { 3261debfc3dSmrg if (__state._M_matches(*_M_current)) 3271debfc3dSmrg { 3281debfc3dSmrg ++_M_current; 3291debfc3dSmrg _M_dfs(__match_mode, __state._M_next); 3301debfc3dSmrg --_M_current; 3311debfc3dSmrg } 3321debfc3dSmrg } 3331debfc3dSmrg else 3341debfc3dSmrg if (__state._M_matches(*_M_current)) 3351debfc3dSmrg _M_states._M_queue(__state._M_next, _M_cur_results); 3361debfc3dSmrg } 3371debfc3dSmrg 338a2dc1f3fSmrg template<typename _BiIter, typename _TraitsT> 339a2dc1f3fSmrg struct _Backref_matcher 340a2dc1f3fSmrg { _Backref_matcherstd::__detail::_Backref_matcher341a2dc1f3fSmrg _Backref_matcher(bool __icase, const _TraitsT& __traits) 342a2dc1f3fSmrg : _M_traits(__traits) { } 343a2dc1f3fSmrg 344a2dc1f3fSmrg bool _M_applystd::__detail::_Backref_matcher345a2dc1f3fSmrg _M_apply(_BiIter __expected_begin, 346a2dc1f3fSmrg _BiIter __expected_end, _BiIter __actual_begin, 347a2dc1f3fSmrg _BiIter __actual_end) 348a2dc1f3fSmrg { 349a2dc1f3fSmrg return _M_traits.transform(__expected_begin, __expected_end) 350a2dc1f3fSmrg == _M_traits.transform(__actual_begin, __actual_end); 351a2dc1f3fSmrg } 352a2dc1f3fSmrg 353a2dc1f3fSmrg const _TraitsT& _M_traits; 354a2dc1f3fSmrg }; 355a2dc1f3fSmrg 356a2dc1f3fSmrg template<typename _BiIter, typename _CharT> 357a2dc1f3fSmrg struct _Backref_matcher<_BiIter, std::regex_traits<_CharT>> 358a2dc1f3fSmrg { 359a2dc1f3fSmrg using _TraitsT = std::regex_traits<_CharT>; _Backref_matcherstd::__detail::_Backref_matcher360a2dc1f3fSmrg _Backref_matcher(bool __icase, const _TraitsT& __traits) 361a2dc1f3fSmrg : _M_icase(__icase), _M_traits(__traits) { } 362a2dc1f3fSmrg 363a2dc1f3fSmrg bool _M_applystd::__detail::_Backref_matcher364a2dc1f3fSmrg _M_apply(_BiIter __expected_begin, 365a2dc1f3fSmrg _BiIter __expected_end, _BiIter __actual_begin, 366a2dc1f3fSmrg _BiIter __actual_end) 367a2dc1f3fSmrg { 368a2dc1f3fSmrg if (!_M_icase) 369a2dc1f3fSmrg return _GLIBCXX_STD_A::__equal4(__expected_begin, __expected_end, 370a2dc1f3fSmrg __actual_begin, __actual_end); 371a2dc1f3fSmrg typedef std::ctype<_CharT> __ctype_type; 372a2dc1f3fSmrg const auto& __fctyp = use_facet<__ctype_type>(_M_traits.getloc()); 373a2dc1f3fSmrg return _GLIBCXX_STD_A::__equal4(__expected_begin, __expected_end, 374a2dc1f3fSmrg __actual_begin, __actual_end, 375a2dc1f3fSmrg [this, &__fctyp](_CharT __lhs, _CharT __rhs) 376a2dc1f3fSmrg { 377a2dc1f3fSmrg return __fctyp.tolower(__lhs) 378a2dc1f3fSmrg == __fctyp.tolower(__rhs); 379a2dc1f3fSmrg }); 380a2dc1f3fSmrg } 381a2dc1f3fSmrg 382a2dc1f3fSmrg bool _M_icase; 383a2dc1f3fSmrg const _TraitsT& _M_traits; 384a2dc1f3fSmrg }; 385a2dc1f3fSmrg 3861debfc3dSmrg // First fetch the matched result from _M_cur_results as __submatch; 3871debfc3dSmrg // then compare it with 3881debfc3dSmrg // (_M_current, _M_current + (__submatch.second - __submatch.first)). 3891debfc3dSmrg // If matched, keep going; else just return and try another state. 3901debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 3911debfc3dSmrg bool __dfs_mode> 3921debfc3dSmrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_backref(_Match_mode __match_mode,_StateIdT __i)3931debfc3dSmrg _M_handle_backref(_Match_mode __match_mode, _StateIdT __i) 3941debfc3dSmrg { 3951debfc3dSmrg __glibcxx_assert(__dfs_mode); 3961debfc3dSmrg 3971debfc3dSmrg const auto& __state = _M_nfa[__i]; 3981debfc3dSmrg auto& __submatch = _M_cur_results[__state._M_backref_index]; 3991debfc3dSmrg if (!__submatch.matched) 4001debfc3dSmrg return; 4011debfc3dSmrg auto __last = _M_current; 4021debfc3dSmrg for (auto __tmp = __submatch.first; 4031debfc3dSmrg __last != _M_end && __tmp != __submatch.second; 4041debfc3dSmrg ++__tmp) 4051debfc3dSmrg ++__last; 406a2dc1f3fSmrg if (_Backref_matcher<_BiIter, _TraitsT>( 407a2dc1f3fSmrg _M_re.flags() & regex_constants::icase, 408a2dc1f3fSmrg _M_re._M_automaton->_M_traits)._M_apply( 409a2dc1f3fSmrg __submatch.first, __submatch.second, _M_current, __last)) 4101debfc3dSmrg { 4111debfc3dSmrg if (__last != _M_current) 4121debfc3dSmrg { 4131debfc3dSmrg auto __backup = _M_current; 4141debfc3dSmrg _M_current = __last; 4151debfc3dSmrg _M_dfs(__match_mode, __state._M_next); 4161debfc3dSmrg _M_current = __backup; 4171debfc3dSmrg } 4181debfc3dSmrg else 4191debfc3dSmrg _M_dfs(__match_mode, __state._M_next); 4201debfc3dSmrg } 4211debfc3dSmrg } 4221debfc3dSmrg 4231debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 4241debfc3dSmrg bool __dfs_mode> 4251debfc3dSmrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_accept(_Match_mode __match_mode,_StateIdT)426*23f5f463Smrg _M_handle_accept(_Match_mode __match_mode, _StateIdT) 4271debfc3dSmrg { 4281debfc3dSmrg if (__dfs_mode) 4291debfc3dSmrg { 4301debfc3dSmrg __glibcxx_assert(!_M_has_sol); 4311debfc3dSmrg if (__match_mode == _Match_mode::_Exact) 4321debfc3dSmrg _M_has_sol = _M_current == _M_end; 4331debfc3dSmrg else 4341debfc3dSmrg _M_has_sol = true; 4351debfc3dSmrg if (_M_current == _M_begin 4361debfc3dSmrg && (_M_flags & regex_constants::match_not_null)) 4371debfc3dSmrg _M_has_sol = false; 4381debfc3dSmrg if (_M_has_sol) 4391debfc3dSmrg { 4401debfc3dSmrg if (_M_nfa._M_flags & regex_constants::ECMAScript) 4411debfc3dSmrg _M_results = _M_cur_results; 4421debfc3dSmrg else // POSIX 4431debfc3dSmrg { 4441debfc3dSmrg __glibcxx_assert(_M_states._M_get_sol_pos()); 4451debfc3dSmrg // Here's POSIX's logic: match the longest one. However 4461debfc3dSmrg // we never know which one (lhs or rhs of "|") is longer 4471debfc3dSmrg // unless we try both of them and compare the results. 4481debfc3dSmrg // The member variable _M_sol_pos records the end 4491debfc3dSmrg // position of the last successful match. It's better 4501debfc3dSmrg // to be larger, because POSIX regex is always greedy. 4511debfc3dSmrg // TODO: This could be slow. 4521debfc3dSmrg if (*_M_states._M_get_sol_pos() == _BiIter() 4531debfc3dSmrg || std::distance(_M_begin, 4541debfc3dSmrg *_M_states._M_get_sol_pos()) 4551debfc3dSmrg < std::distance(_M_begin, _M_current)) 4561debfc3dSmrg { 4571debfc3dSmrg *_M_states._M_get_sol_pos() = _M_current; 4581debfc3dSmrg _M_results = _M_cur_results; 4591debfc3dSmrg } 4601debfc3dSmrg } 4611debfc3dSmrg } 4621debfc3dSmrg } 4631debfc3dSmrg else 4641debfc3dSmrg { 4651debfc3dSmrg if (_M_current == _M_begin 4661debfc3dSmrg && (_M_flags & regex_constants::match_not_null)) 4671debfc3dSmrg return; 4681debfc3dSmrg if (__match_mode == _Match_mode::_Prefix || _M_current == _M_end) 4691debfc3dSmrg if (!_M_has_sol) 4701debfc3dSmrg { 4711debfc3dSmrg _M_has_sol = true; 4721debfc3dSmrg _M_results = _M_cur_results; 4731debfc3dSmrg } 4741debfc3dSmrg } 4751debfc3dSmrg } 4761debfc3dSmrg 4771debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 4781debfc3dSmrg bool __dfs_mode> 4791debfc3dSmrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_alternative(_Match_mode __match_mode,_StateIdT __i)4801debfc3dSmrg _M_handle_alternative(_Match_mode __match_mode, _StateIdT __i) 4811debfc3dSmrg { 4821debfc3dSmrg const auto& __state = _M_nfa[__i]; 4831debfc3dSmrg 4841debfc3dSmrg if (_M_nfa._M_flags & regex_constants::ECMAScript) 4851debfc3dSmrg { 4861debfc3dSmrg // TODO: Fix BFS support. It is wrong. 4871debfc3dSmrg _M_dfs(__match_mode, __state._M_alt); 4881debfc3dSmrg // Pick lhs if it matches. Only try rhs if it doesn't. 4891debfc3dSmrg if (!_M_has_sol) 4901debfc3dSmrg _M_dfs(__match_mode, __state._M_next); 4911debfc3dSmrg } 4921debfc3dSmrg else 4931debfc3dSmrg { 4941debfc3dSmrg // Try both and compare the result. 4951debfc3dSmrg // See "case _S_opcode_accept:" handling above. 4961debfc3dSmrg _M_dfs(__match_mode, __state._M_alt); 4971debfc3dSmrg auto __has_sol = _M_has_sol; 4981debfc3dSmrg _M_has_sol = false; 4991debfc3dSmrg _M_dfs(__match_mode, __state._M_next); 5001debfc3dSmrg _M_has_sol |= __has_sol; 5011debfc3dSmrg } 5021debfc3dSmrg } 5031debfc3dSmrg 5041debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 5051debfc3dSmrg bool __dfs_mode> 5061debfc3dSmrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_dfs(_Match_mode __match_mode,_StateIdT __i)5071debfc3dSmrg _M_dfs(_Match_mode __match_mode, _StateIdT __i) 5081debfc3dSmrg { 5091debfc3dSmrg if (_M_states._M_visited(__i)) 5101debfc3dSmrg return; 5111debfc3dSmrg 5121debfc3dSmrg switch (_M_nfa[__i]._M_opcode()) 5131debfc3dSmrg { 5141debfc3dSmrg case _S_opcode_repeat: 5151debfc3dSmrg _M_handle_repeat(__match_mode, __i); break; 5161debfc3dSmrg case _S_opcode_subexpr_begin: 5171debfc3dSmrg _M_handle_subexpr_begin(__match_mode, __i); break; 5181debfc3dSmrg case _S_opcode_subexpr_end: 5191debfc3dSmrg _M_handle_subexpr_end(__match_mode, __i); break; 5201debfc3dSmrg case _S_opcode_line_begin_assertion: 5211debfc3dSmrg _M_handle_line_begin_assertion(__match_mode, __i); break; 5221debfc3dSmrg case _S_opcode_line_end_assertion: 5231debfc3dSmrg _M_handle_line_end_assertion(__match_mode, __i); break; 5241debfc3dSmrg case _S_opcode_word_boundary: 5251debfc3dSmrg _M_handle_word_boundary(__match_mode, __i); break; 5261debfc3dSmrg case _S_opcode_subexpr_lookahead: 5271debfc3dSmrg _M_handle_subexpr_lookahead(__match_mode, __i); break; 5281debfc3dSmrg case _S_opcode_match: 5291debfc3dSmrg _M_handle_match(__match_mode, __i); break; 5301debfc3dSmrg case _S_opcode_backref: 5311debfc3dSmrg _M_handle_backref(__match_mode, __i); break; 5321debfc3dSmrg case _S_opcode_accept: 5331debfc3dSmrg _M_handle_accept(__match_mode, __i); break; 5341debfc3dSmrg case _S_opcode_alternative: 5351debfc3dSmrg _M_handle_alternative(__match_mode, __i); break; 5361debfc3dSmrg default: 5371debfc3dSmrg __glibcxx_assert(false); 5381debfc3dSmrg } 5391debfc3dSmrg } 5401debfc3dSmrg 5411debfc3dSmrg // Return whether now is at some word boundary. 5421debfc3dSmrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 5431debfc3dSmrg bool __dfs_mode> 5441debfc3dSmrg bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_word_boundary() const5451debfc3dSmrg _M_word_boundary() const 5461debfc3dSmrg { 5471debfc3dSmrg if (_M_current == _M_begin && (_M_flags & regex_constants::match_not_bow)) 5481debfc3dSmrg return false; 5491debfc3dSmrg if (_M_current == _M_end && (_M_flags & regex_constants::match_not_eow)) 5501debfc3dSmrg return false; 5511debfc3dSmrg 5521debfc3dSmrg bool __left_is_word = false; 5531debfc3dSmrg if (_M_current != _M_begin 5541debfc3dSmrg || (_M_flags & regex_constants::match_prev_avail)) 5551debfc3dSmrg { 5561debfc3dSmrg auto __prev = _M_current; 5571debfc3dSmrg if (_M_is_word(*std::prev(__prev))) 5581debfc3dSmrg __left_is_word = true; 5591debfc3dSmrg } 5601debfc3dSmrg bool __right_is_word = 5611debfc3dSmrg _M_current != _M_end && _M_is_word(*_M_current); 5621debfc3dSmrg 5631debfc3dSmrg return __left_is_word != __right_is_word; 5641debfc3dSmrg } 565a2dc1f3fSmrg } // namespace __detail 5661debfc3dSmrg 5671debfc3dSmrg _GLIBCXX_END_NAMESPACE_VERSION 5681debfc3dSmrg } // namespace 569