138fd1498Szrj // class template regex -*- C++ -*- 238fd1498Szrj 338fd1498Szrj // Copyright (C) 2013-2018 Free Software Foundation, Inc. 438fd1498Szrj // 538fd1498Szrj // This file is part of the GNU ISO C++ Library. This library is free 638fd1498Szrj // software; you can redistribute it and/or modify it under the 738fd1498Szrj // terms of the GNU General Public License as published by the 838fd1498Szrj // Free Software Foundation; either version 3, or (at your option) 938fd1498Szrj // any later version. 1038fd1498Szrj 1138fd1498Szrj // This library is distributed in the hope that it will be useful, 1238fd1498Szrj // but WITHOUT ANY WARRANTY; without even the implied warranty of 1338fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1438fd1498Szrj // GNU General Public License for more details. 1538fd1498Szrj 1638fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional 1738fd1498Szrj // permissions described in the GCC Runtime Library Exception, version 1838fd1498Szrj // 3.1, as published by the Free Software Foundation. 1938fd1498Szrj 2038fd1498Szrj // You should have received a copy of the GNU General Public License and 2138fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program; 2238fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 2338fd1498Szrj // <http://www.gnu.org/licenses/>. 2438fd1498Szrj 2538fd1498Szrj /** 2638fd1498Szrj * @file bits/regex_executor.tcc 2738fd1498Szrj * This is an internal header file, included by other library headers. 2838fd1498Szrj * Do not attempt to use it directly. @headername{regex} 2938fd1498Szrj */ 3038fd1498Szrj 3138fd1498Szrj namespace std _GLIBCXX_VISIBILITY(default) 3238fd1498Szrj { 3338fd1498Szrj _GLIBCXX_BEGIN_NAMESPACE_VERSION 3438fd1498Szrj 3538fd1498Szrj namespace __detail 3638fd1498Szrj { 3738fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 3838fd1498Szrj bool __dfs_mode> 3938fd1498Szrj bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_search()4038fd1498Szrj _M_search() 4138fd1498Szrj { 4238fd1498Szrj if (_M_search_from_first()) 4338fd1498Szrj return true; 4438fd1498Szrj if (_M_flags & regex_constants::match_continuous) 4538fd1498Szrj return false; 4638fd1498Szrj _M_flags |= regex_constants::match_prev_avail; 4738fd1498Szrj while (_M_begin != _M_end) 4838fd1498Szrj { 4938fd1498Szrj ++_M_begin; 5038fd1498Szrj if (_M_search_from_first()) 5138fd1498Szrj return true; 5238fd1498Szrj } 5338fd1498Szrj return false; 5438fd1498Szrj } 5538fd1498Szrj 5638fd1498Szrj // The _M_main function operates in different modes, DFS mode or BFS mode, 5738fd1498Szrj // indicated by template parameter __dfs_mode, and dispatches to one of the 5838fd1498Szrj // _M_main_dispatch overloads. 5938fd1498Szrj // 6038fd1498Szrj // ------------------------------------------------------------ 6138fd1498Szrj // 6238fd1498Szrj // DFS mode: 6338fd1498Szrj // 6438fd1498Szrj // It applies a Depth-First-Search (aka backtracking) on given NFA and input 6538fd1498Szrj // string. 6638fd1498Szrj // At the very beginning the executor stands in the start state, then it 6738fd1498Szrj // tries every possible state transition in current state recursively. Some 6838fd1498Szrj // state transitions consume input string, say, a single-char-matcher or a 6938fd1498Szrj // back-reference matcher; some don't, like assertion or other anchor nodes. 7038fd1498Szrj // When the input is exhausted and/or the current state is an accepting 7138fd1498Szrj // state, the whole executor returns true. 7238fd1498Szrj // 7338fd1498Szrj // TODO: This approach is exponentially slow for certain input. 7438fd1498Szrj // Try to compile the NFA to a DFA. 7538fd1498Szrj // 7638fd1498Szrj // Time complexity: \Omega(match_length), O(2^(_M_nfa.size())) 7738fd1498Szrj // Space complexity: \theta(match_results.size() + match_length) 7838fd1498Szrj // 7938fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 8038fd1498Szrj bool __dfs_mode> 8138fd1498Szrj bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_main_dispatch(_Match_mode __match_mode,__dfs)8238fd1498Szrj _M_main_dispatch(_Match_mode __match_mode, __dfs) 8338fd1498Szrj { 8438fd1498Szrj _M_has_sol = false; 8538fd1498Szrj *_M_states._M_get_sol_pos() = _BiIter(); 8638fd1498Szrj _M_cur_results = _M_results; 8738fd1498Szrj _M_dfs(__match_mode, _M_states._M_start); 8838fd1498Szrj return _M_has_sol; 8938fd1498Szrj } 9038fd1498Szrj 9138fd1498Szrj // ------------------------------------------------------------ 9238fd1498Szrj // 9338fd1498Szrj // BFS mode: 9438fd1498Szrj // 9538fd1498Szrj // Russ Cox's article (http://swtch.com/~rsc/regexp/regexp1.html) 9638fd1498Szrj // explained this algorithm clearly. 9738fd1498Szrj // 9838fd1498Szrj // It first computes epsilon closure (states that can be achieved without 9938fd1498Szrj // consuming characters) for every state that's still matching, 10038fd1498Szrj // using the same DFS algorithm, but doesn't re-enter states (using 10138fd1498Szrj // _M_states._M_visited to check), nor follow _S_opcode_match. 10238fd1498Szrj // 10338fd1498Szrj // Then apply DFS using every _S_opcode_match (in _M_states._M_match_queue) 10438fd1498Szrj // as the start state. 10538fd1498Szrj // 10638fd1498Szrj // It significantly reduces potential duplicate states, so has a better 10738fd1498Szrj // upper bound; but it requires more overhead. 10838fd1498Szrj // 10938fd1498Szrj // Time complexity: \Omega(match_length * match_results.size()) 11038fd1498Szrj // O(match_length * _M_nfa.size() * match_results.size()) 11138fd1498Szrj // Space complexity: \Omega(_M_nfa.size() + match_results.size()) 11238fd1498Szrj // O(_M_nfa.size() * match_results.size()) 11338fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 11438fd1498Szrj bool __dfs_mode> 11538fd1498Szrj bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_main_dispatch(_Match_mode __match_mode,__bfs)11638fd1498Szrj _M_main_dispatch(_Match_mode __match_mode, __bfs) 11738fd1498Szrj { 11838fd1498Szrj _M_states._M_queue(_M_states._M_start, _M_results); 11938fd1498Szrj bool __ret = false; 12038fd1498Szrj while (1) 12138fd1498Szrj { 12238fd1498Szrj _M_has_sol = false; 12338fd1498Szrj if (_M_states._M_match_queue.empty()) 12438fd1498Szrj break; 12538fd1498Szrj std::fill_n(_M_states._M_visited_states.get(), _M_nfa.size(), false); 12638fd1498Szrj auto __old_queue = std::move(_M_states._M_match_queue); 12738fd1498Szrj for (auto& __task : __old_queue) 12838fd1498Szrj { 12938fd1498Szrj _M_cur_results = std::move(__task.second); 13038fd1498Szrj _M_dfs(__match_mode, __task.first); 13138fd1498Szrj } 13238fd1498Szrj if (__match_mode == _Match_mode::_Prefix) 13338fd1498Szrj __ret |= _M_has_sol; 13438fd1498Szrj if (_M_current == _M_end) 13538fd1498Szrj break; 13638fd1498Szrj ++_M_current; 13738fd1498Szrj } 13838fd1498Szrj if (__match_mode == _Match_mode::_Exact) 13938fd1498Szrj __ret = _M_has_sol; 14038fd1498Szrj _M_states._M_match_queue.clear(); 14138fd1498Szrj return __ret; 14238fd1498Szrj } 14338fd1498Szrj 14438fd1498Szrj // Return whether now match the given sub-NFA. 14538fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 14638fd1498Szrj bool __dfs_mode> 14738fd1498Szrj bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_lookahead(_StateIdT __next)14838fd1498Szrj _M_lookahead(_StateIdT __next) 14938fd1498Szrj { 15038fd1498Szrj // Backreferences may refer to captured content. 15138fd1498Szrj // We may want to make this faster by not copying, 15238fd1498Szrj // but let's not be clever prematurely. 15338fd1498Szrj _ResultsVec __what(_M_cur_results); 15438fd1498Szrj _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags); 15538fd1498Szrj __sub._M_states._M_start = __next; 15638fd1498Szrj if (__sub._M_search_from_first()) 15738fd1498Szrj { 15838fd1498Szrj for (size_t __i = 0; __i < __what.size(); __i++) 15938fd1498Szrj if (__what[__i].matched) 16038fd1498Szrj _M_cur_results[__i] = __what[__i]; 16138fd1498Szrj return true; 16238fd1498Szrj } 16338fd1498Szrj return false; 16438fd1498Szrj } 16538fd1498Szrj 16638fd1498Szrj // __rep_count records how many times (__rep_count.second) 16738fd1498Szrj // this node is visited under certain input iterator 16838fd1498Szrj // (__rep_count.first). This prevent the executor from entering 16938fd1498Szrj // infinite loop by refusing to continue when it's already been 17038fd1498Szrj // visited more than twice. It's `twice` instead of `once` because 17138fd1498Szrj // we need to spare one more time for potential group capture. 17238fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 17338fd1498Szrj bool __dfs_mode> 17438fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_rep_once_more(_Match_mode __match_mode,_StateIdT __i)17538fd1498Szrj _M_rep_once_more(_Match_mode __match_mode, _StateIdT __i) 17638fd1498Szrj { 17738fd1498Szrj const auto& __state = _M_nfa[__i]; 17838fd1498Szrj auto& __rep_count = _M_rep_count[__i]; 17938fd1498Szrj if (__rep_count.second == 0 || __rep_count.first != _M_current) 18038fd1498Szrj { 18138fd1498Szrj auto __back = __rep_count; 18238fd1498Szrj __rep_count.first = _M_current; 18338fd1498Szrj __rep_count.second = 1; 18438fd1498Szrj _M_dfs(__match_mode, __state._M_alt); 18538fd1498Szrj __rep_count = __back; 18638fd1498Szrj } 18738fd1498Szrj else 18838fd1498Szrj { 18938fd1498Szrj if (__rep_count.second < 2) 19038fd1498Szrj { 19138fd1498Szrj __rep_count.second++; 19238fd1498Szrj _M_dfs(__match_mode, __state._M_alt); 19338fd1498Szrj __rep_count.second--; 19438fd1498Szrj } 19538fd1498Szrj } 19638fd1498Szrj } 19738fd1498Szrj 19838fd1498Szrj // _M_alt branch is "match once more", while _M_next is "get me out 19938fd1498Szrj // of this quantifier". Executing _M_next first or _M_alt first don't 20038fd1498Szrj // mean the same thing, and we need to choose the correct order under 20138fd1498Szrj // given greedy mode. 20238fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 20338fd1498Szrj bool __dfs_mode> 20438fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_repeat(_Match_mode __match_mode,_StateIdT __i)20538fd1498Szrj _M_handle_repeat(_Match_mode __match_mode, _StateIdT __i) 20638fd1498Szrj { 20738fd1498Szrj const auto& __state = _M_nfa[__i]; 20838fd1498Szrj 20938fd1498Szrj // Greedy. 21038fd1498Szrj if (!__state._M_neg) 21138fd1498Szrj { 21238fd1498Szrj _M_rep_once_more(__match_mode, __i); 21338fd1498Szrj // If it's DFS executor and already accepted, we're done. 21438fd1498Szrj if (!__dfs_mode || !_M_has_sol) 21538fd1498Szrj _M_dfs(__match_mode, __state._M_next); 21638fd1498Szrj } 21738fd1498Szrj else // Non-greedy mode 21838fd1498Szrj { 21938fd1498Szrj if (__dfs_mode) 22038fd1498Szrj { 22138fd1498Szrj // vice-versa. 22238fd1498Szrj _M_dfs(__match_mode, __state._M_next); 22338fd1498Szrj if (!_M_has_sol) 22438fd1498Szrj _M_rep_once_more(__match_mode, __i); 22538fd1498Szrj } 22638fd1498Szrj else 22738fd1498Szrj { 22838fd1498Szrj // DON'T attempt anything, because there's already another 22938fd1498Szrj // state with higher priority accepted. This state cannot 23038fd1498Szrj // be better by attempting its next node. 23138fd1498Szrj if (!_M_has_sol) 23238fd1498Szrj { 23338fd1498Szrj _M_dfs(__match_mode, __state._M_next); 23438fd1498Szrj // DON'T attempt anything if it's already accepted. An 23538fd1498Szrj // accepted state *must* be better than a solution that 23638fd1498Szrj // matches a non-greedy quantifier one more time. 23738fd1498Szrj if (!_M_has_sol) 23838fd1498Szrj _M_rep_once_more(__match_mode, __i); 23938fd1498Szrj } 24038fd1498Szrj } 24138fd1498Szrj } 24238fd1498Szrj } 24338fd1498Szrj 24438fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 24538fd1498Szrj bool __dfs_mode> 24638fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_subexpr_begin(_Match_mode __match_mode,_StateIdT __i)24738fd1498Szrj _M_handle_subexpr_begin(_Match_mode __match_mode, _StateIdT __i) 24838fd1498Szrj { 24938fd1498Szrj const auto& __state = _M_nfa[__i]; 25038fd1498Szrj 25138fd1498Szrj auto& __res = _M_cur_results[__state._M_subexpr]; 25238fd1498Szrj auto __back = __res.first; 25338fd1498Szrj __res.first = _M_current; 25438fd1498Szrj _M_dfs(__match_mode, __state._M_next); 25538fd1498Szrj __res.first = __back; 25638fd1498Szrj } 25738fd1498Szrj 25838fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 25938fd1498Szrj bool __dfs_mode> 26038fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_subexpr_end(_Match_mode __match_mode,_StateIdT __i)26138fd1498Szrj _M_handle_subexpr_end(_Match_mode __match_mode, _StateIdT __i) 26238fd1498Szrj { 26338fd1498Szrj const auto& __state = _M_nfa[__i]; 26438fd1498Szrj 26538fd1498Szrj auto& __res = _M_cur_results[__state._M_subexpr]; 26638fd1498Szrj auto __back = __res; 26738fd1498Szrj __res.second = _M_current; 26838fd1498Szrj __res.matched = true; 26938fd1498Szrj _M_dfs(__match_mode, __state._M_next); 27038fd1498Szrj __res = __back; 27138fd1498Szrj } 27238fd1498Szrj 27338fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 27438fd1498Szrj bool __dfs_mode> 27538fd1498Szrj inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_line_begin_assertion(_Match_mode __match_mode,_StateIdT __i)27638fd1498Szrj _M_handle_line_begin_assertion(_Match_mode __match_mode, _StateIdT __i) 27738fd1498Szrj { 27838fd1498Szrj const auto& __state = _M_nfa[__i]; 27938fd1498Szrj if (_M_at_begin()) 28038fd1498Szrj _M_dfs(__match_mode, __state._M_next); 28138fd1498Szrj } 28238fd1498Szrj 28338fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 28438fd1498Szrj bool __dfs_mode> 28538fd1498Szrj inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_line_end_assertion(_Match_mode __match_mode,_StateIdT __i)28638fd1498Szrj _M_handle_line_end_assertion(_Match_mode __match_mode, _StateIdT __i) 28738fd1498Szrj { 28838fd1498Szrj const auto& __state = _M_nfa[__i]; 28938fd1498Szrj if (_M_at_end()) 29038fd1498Szrj _M_dfs(__match_mode, __state._M_next); 29138fd1498Szrj } 29238fd1498Szrj 29338fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 29438fd1498Szrj bool __dfs_mode> 29538fd1498Szrj inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_word_boundary(_Match_mode __match_mode,_StateIdT __i)29638fd1498Szrj _M_handle_word_boundary(_Match_mode __match_mode, _StateIdT __i) 29738fd1498Szrj { 29838fd1498Szrj const auto& __state = _M_nfa[__i]; 29938fd1498Szrj if (_M_word_boundary() == !__state._M_neg) 30038fd1498Szrj _M_dfs(__match_mode, __state._M_next); 30138fd1498Szrj } 30238fd1498Szrj 30338fd1498Szrj // Here __state._M_alt offers a single start node for a sub-NFA. 30438fd1498Szrj // We recursively invoke our algorithm to match the sub-NFA. 30538fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 30638fd1498Szrj bool __dfs_mode> 30738fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_subexpr_lookahead(_Match_mode __match_mode,_StateIdT __i)30838fd1498Szrj _M_handle_subexpr_lookahead(_Match_mode __match_mode, _StateIdT __i) 30938fd1498Szrj { 31038fd1498Szrj const auto& __state = _M_nfa[__i]; 31138fd1498Szrj if (_M_lookahead(__state._M_alt) == !__state._M_neg) 31238fd1498Szrj _M_dfs(__match_mode, __state._M_next); 31338fd1498Szrj } 31438fd1498Szrj 31538fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 31638fd1498Szrj bool __dfs_mode> 31738fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_match(_Match_mode __match_mode,_StateIdT __i)31838fd1498Szrj _M_handle_match(_Match_mode __match_mode, _StateIdT __i) 31938fd1498Szrj { 32038fd1498Szrj const auto& __state = _M_nfa[__i]; 32138fd1498Szrj 32238fd1498Szrj if (_M_current == _M_end) 32338fd1498Szrj return; 32438fd1498Szrj if (__dfs_mode) 32538fd1498Szrj { 32638fd1498Szrj if (__state._M_matches(*_M_current)) 32738fd1498Szrj { 32838fd1498Szrj ++_M_current; 32938fd1498Szrj _M_dfs(__match_mode, __state._M_next); 33038fd1498Szrj --_M_current; 33138fd1498Szrj } 33238fd1498Szrj } 33338fd1498Szrj else 33438fd1498Szrj if (__state._M_matches(*_M_current)) 33538fd1498Szrj _M_states._M_queue(__state._M_next, _M_cur_results); 33638fd1498Szrj } 33738fd1498Szrj 33838fd1498Szrj template<typename _BiIter, typename _TraitsT> 33938fd1498Szrj struct _Backref_matcher 34038fd1498Szrj { _Backref_matcherstd::__detail::_Backref_matcher34138fd1498Szrj _Backref_matcher(bool __icase, const _TraitsT& __traits) 34238fd1498Szrj : _M_traits(__traits) { } 34338fd1498Szrj 34438fd1498Szrj bool _M_applystd::__detail::_Backref_matcher34538fd1498Szrj _M_apply(_BiIter __expected_begin, 34638fd1498Szrj _BiIter __expected_end, _BiIter __actual_begin, 34738fd1498Szrj _BiIter __actual_end) 34838fd1498Szrj { 34938fd1498Szrj return _M_traits.transform(__expected_begin, __expected_end) 35038fd1498Szrj == _M_traits.transform(__actual_begin, __actual_end); 35138fd1498Szrj } 35238fd1498Szrj 35338fd1498Szrj const _TraitsT& _M_traits; 35438fd1498Szrj }; 35538fd1498Szrj 35638fd1498Szrj template<typename _BiIter, typename _CharT> 35738fd1498Szrj struct _Backref_matcher<_BiIter, std::regex_traits<_CharT>> 35838fd1498Szrj { 35938fd1498Szrj using _TraitsT = std::regex_traits<_CharT>; _Backref_matcherstd::__detail::_Backref_matcher36038fd1498Szrj _Backref_matcher(bool __icase, const _TraitsT& __traits) 36138fd1498Szrj : _M_icase(__icase), _M_traits(__traits) { } 36238fd1498Szrj 36338fd1498Szrj bool _M_applystd::__detail::_Backref_matcher36438fd1498Szrj _M_apply(_BiIter __expected_begin, 36538fd1498Szrj _BiIter __expected_end, _BiIter __actual_begin, 36638fd1498Szrj _BiIter __actual_end) 36738fd1498Szrj { 36838fd1498Szrj if (!_M_icase) 369*58e805e6Szrj return _GLIBCXX_STD_A::__equal4(__expected_begin, __expected_end, 37038fd1498Szrj __actual_begin, __actual_end); 37138fd1498Szrj typedef std::ctype<_CharT> __ctype_type; 37238fd1498Szrj const auto& __fctyp = use_facet<__ctype_type>(_M_traits.getloc()); 373*58e805e6Szrj return _GLIBCXX_STD_A::__equal4(__expected_begin, __expected_end, 37438fd1498Szrj __actual_begin, __actual_end, 37538fd1498Szrj [this, &__fctyp](_CharT __lhs, _CharT __rhs) 37638fd1498Szrj { 37738fd1498Szrj return __fctyp.tolower(__lhs) 37838fd1498Szrj == __fctyp.tolower(__rhs); 37938fd1498Szrj }); 38038fd1498Szrj } 38138fd1498Szrj 38238fd1498Szrj bool _M_icase; 38338fd1498Szrj const _TraitsT& _M_traits; 38438fd1498Szrj }; 38538fd1498Szrj 38638fd1498Szrj // First fetch the matched result from _M_cur_results as __submatch; 38738fd1498Szrj // then compare it with 38838fd1498Szrj // (_M_current, _M_current + (__submatch.second - __submatch.first)). 38938fd1498Szrj // If matched, keep going; else just return and try another state. 39038fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 39138fd1498Szrj bool __dfs_mode> 39238fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_backref(_Match_mode __match_mode,_StateIdT __i)39338fd1498Szrj _M_handle_backref(_Match_mode __match_mode, _StateIdT __i) 39438fd1498Szrj { 39538fd1498Szrj __glibcxx_assert(__dfs_mode); 39638fd1498Szrj 39738fd1498Szrj const auto& __state = _M_nfa[__i]; 39838fd1498Szrj auto& __submatch = _M_cur_results[__state._M_backref_index]; 39938fd1498Szrj if (!__submatch.matched) 40038fd1498Szrj return; 40138fd1498Szrj auto __last = _M_current; 40238fd1498Szrj for (auto __tmp = __submatch.first; 40338fd1498Szrj __last != _M_end && __tmp != __submatch.second; 40438fd1498Szrj ++__tmp) 40538fd1498Szrj ++__last; 40638fd1498Szrj if (_Backref_matcher<_BiIter, _TraitsT>( 40738fd1498Szrj _M_re.flags() & regex_constants::icase, 40838fd1498Szrj _M_re._M_automaton->_M_traits)._M_apply( 40938fd1498Szrj __submatch.first, __submatch.second, _M_current, __last)) 41038fd1498Szrj { 41138fd1498Szrj if (__last != _M_current) 41238fd1498Szrj { 41338fd1498Szrj auto __backup = _M_current; 41438fd1498Szrj _M_current = __last; 41538fd1498Szrj _M_dfs(__match_mode, __state._M_next); 41638fd1498Szrj _M_current = __backup; 41738fd1498Szrj } 41838fd1498Szrj else 41938fd1498Szrj _M_dfs(__match_mode, __state._M_next); 42038fd1498Szrj } 42138fd1498Szrj } 42238fd1498Szrj 42338fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 42438fd1498Szrj bool __dfs_mode> 42538fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_accept(_Match_mode __match_mode,_StateIdT __i)42638fd1498Szrj _M_handle_accept(_Match_mode __match_mode, _StateIdT __i) 42738fd1498Szrj { 42838fd1498Szrj if (__dfs_mode) 42938fd1498Szrj { 43038fd1498Szrj __glibcxx_assert(!_M_has_sol); 43138fd1498Szrj if (__match_mode == _Match_mode::_Exact) 43238fd1498Szrj _M_has_sol = _M_current == _M_end; 43338fd1498Szrj else 43438fd1498Szrj _M_has_sol = true; 43538fd1498Szrj if (_M_current == _M_begin 43638fd1498Szrj && (_M_flags & regex_constants::match_not_null)) 43738fd1498Szrj _M_has_sol = false; 43838fd1498Szrj if (_M_has_sol) 43938fd1498Szrj { 44038fd1498Szrj if (_M_nfa._M_flags & regex_constants::ECMAScript) 44138fd1498Szrj _M_results = _M_cur_results; 44238fd1498Szrj else // POSIX 44338fd1498Szrj { 44438fd1498Szrj __glibcxx_assert(_M_states._M_get_sol_pos()); 44538fd1498Szrj // Here's POSIX's logic: match the longest one. However 44638fd1498Szrj // we never know which one (lhs or rhs of "|") is longer 44738fd1498Szrj // unless we try both of them and compare the results. 44838fd1498Szrj // The member variable _M_sol_pos records the end 44938fd1498Szrj // position of the last successful match. It's better 45038fd1498Szrj // to be larger, because POSIX regex is always greedy. 45138fd1498Szrj // TODO: This could be slow. 45238fd1498Szrj if (*_M_states._M_get_sol_pos() == _BiIter() 45338fd1498Szrj || std::distance(_M_begin, 45438fd1498Szrj *_M_states._M_get_sol_pos()) 45538fd1498Szrj < std::distance(_M_begin, _M_current)) 45638fd1498Szrj { 45738fd1498Szrj *_M_states._M_get_sol_pos() = _M_current; 45838fd1498Szrj _M_results = _M_cur_results; 45938fd1498Szrj } 46038fd1498Szrj } 46138fd1498Szrj } 46238fd1498Szrj } 46338fd1498Szrj else 46438fd1498Szrj { 46538fd1498Szrj if (_M_current == _M_begin 46638fd1498Szrj && (_M_flags & regex_constants::match_not_null)) 46738fd1498Szrj return; 46838fd1498Szrj if (__match_mode == _Match_mode::_Prefix || _M_current == _M_end) 46938fd1498Szrj if (!_M_has_sol) 47038fd1498Szrj { 47138fd1498Szrj _M_has_sol = true; 47238fd1498Szrj _M_results = _M_cur_results; 47338fd1498Szrj } 47438fd1498Szrj } 47538fd1498Szrj } 47638fd1498Szrj 47738fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 47838fd1498Szrj bool __dfs_mode> 47938fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_alternative(_Match_mode __match_mode,_StateIdT __i)48038fd1498Szrj _M_handle_alternative(_Match_mode __match_mode, _StateIdT __i) 48138fd1498Szrj { 48238fd1498Szrj const auto& __state = _M_nfa[__i]; 48338fd1498Szrj 48438fd1498Szrj if (_M_nfa._M_flags & regex_constants::ECMAScript) 48538fd1498Szrj { 48638fd1498Szrj // TODO: Fix BFS support. It is wrong. 48738fd1498Szrj _M_dfs(__match_mode, __state._M_alt); 48838fd1498Szrj // Pick lhs if it matches. Only try rhs if it doesn't. 48938fd1498Szrj if (!_M_has_sol) 49038fd1498Szrj _M_dfs(__match_mode, __state._M_next); 49138fd1498Szrj } 49238fd1498Szrj else 49338fd1498Szrj { 49438fd1498Szrj // Try both and compare the result. 49538fd1498Szrj // See "case _S_opcode_accept:" handling above. 49638fd1498Szrj _M_dfs(__match_mode, __state._M_alt); 49738fd1498Szrj auto __has_sol = _M_has_sol; 49838fd1498Szrj _M_has_sol = false; 49938fd1498Szrj _M_dfs(__match_mode, __state._M_next); 50038fd1498Szrj _M_has_sol |= __has_sol; 50138fd1498Szrj } 50238fd1498Szrj } 50338fd1498Szrj 50438fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 50538fd1498Szrj bool __dfs_mode> 50638fd1498Szrj void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_dfs(_Match_mode __match_mode,_StateIdT __i)50738fd1498Szrj _M_dfs(_Match_mode __match_mode, _StateIdT __i) 50838fd1498Szrj { 50938fd1498Szrj if (_M_states._M_visited(__i)) 51038fd1498Szrj return; 51138fd1498Szrj 51238fd1498Szrj switch (_M_nfa[__i]._M_opcode()) 51338fd1498Szrj { 51438fd1498Szrj case _S_opcode_repeat: 51538fd1498Szrj _M_handle_repeat(__match_mode, __i); break; 51638fd1498Szrj case _S_opcode_subexpr_begin: 51738fd1498Szrj _M_handle_subexpr_begin(__match_mode, __i); break; 51838fd1498Szrj case _S_opcode_subexpr_end: 51938fd1498Szrj _M_handle_subexpr_end(__match_mode, __i); break; 52038fd1498Szrj case _S_opcode_line_begin_assertion: 52138fd1498Szrj _M_handle_line_begin_assertion(__match_mode, __i); break; 52238fd1498Szrj case _S_opcode_line_end_assertion: 52338fd1498Szrj _M_handle_line_end_assertion(__match_mode, __i); break; 52438fd1498Szrj case _S_opcode_word_boundary: 52538fd1498Szrj _M_handle_word_boundary(__match_mode, __i); break; 52638fd1498Szrj case _S_opcode_subexpr_lookahead: 52738fd1498Szrj _M_handle_subexpr_lookahead(__match_mode, __i); break; 52838fd1498Szrj case _S_opcode_match: 52938fd1498Szrj _M_handle_match(__match_mode, __i); break; 53038fd1498Szrj case _S_opcode_backref: 53138fd1498Szrj _M_handle_backref(__match_mode, __i); break; 53238fd1498Szrj case _S_opcode_accept: 53338fd1498Szrj _M_handle_accept(__match_mode, __i); break; 53438fd1498Szrj case _S_opcode_alternative: 53538fd1498Szrj _M_handle_alternative(__match_mode, __i); break; 53638fd1498Szrj default: 53738fd1498Szrj __glibcxx_assert(false); 53838fd1498Szrj } 53938fd1498Szrj } 54038fd1498Szrj 54138fd1498Szrj // Return whether now is at some word boundary. 54238fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT, 54338fd1498Szrj bool __dfs_mode> 54438fd1498Szrj bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_word_boundary() const54538fd1498Szrj _M_word_boundary() const 54638fd1498Szrj { 54738fd1498Szrj if (_M_current == _M_begin && (_M_flags & regex_constants::match_not_bow)) 54838fd1498Szrj return false; 54938fd1498Szrj if (_M_current == _M_end && (_M_flags & regex_constants::match_not_eow)) 55038fd1498Szrj return false; 55138fd1498Szrj 55238fd1498Szrj bool __left_is_word = false; 55338fd1498Szrj if (_M_current != _M_begin 55438fd1498Szrj || (_M_flags & regex_constants::match_prev_avail)) 55538fd1498Szrj { 55638fd1498Szrj auto __prev = _M_current; 55738fd1498Szrj if (_M_is_word(*std::prev(__prev))) 55838fd1498Szrj __left_is_word = true; 55938fd1498Szrj } 56038fd1498Szrj bool __right_is_word = 56138fd1498Szrj _M_current != _M_end && _M_is_word(*_M_current); 56238fd1498Szrj 56338fd1498Szrj return __left_is_word != __right_is_word; 56438fd1498Szrj } 56538fd1498Szrj } // namespace __detail 56638fd1498Szrj 56738fd1498Szrj _GLIBCXX_END_NAMESPACE_VERSION 56838fd1498Szrj } // namespace 569