14d5abbe8Smrg // class template regex -*- C++ -*- 24d5abbe8Smrg 3b1e83836Smrg // Copyright (C) 2013-2022 Free Software Foundation, Inc. 44d5abbe8Smrg // 54d5abbe8Smrg // This file is part of the GNU ISO C++ Library. This library is free 64d5abbe8Smrg // software; you can redistribute it and/or modify it under the 74d5abbe8Smrg // terms of the GNU General Public License as published by the 84d5abbe8Smrg // Free Software Foundation; either version 3, or (at your option) 94d5abbe8Smrg // any later version. 104d5abbe8Smrg 114d5abbe8Smrg // This library is distributed in the hope that it will be useful, 124d5abbe8Smrg // but WITHOUT ANY WARRANTY; without even the implied warranty of 134d5abbe8Smrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 144d5abbe8Smrg // GNU General Public License for more details. 154d5abbe8Smrg 164d5abbe8Smrg // Under Section 7 of GPL version 3, you are granted additional 174d5abbe8Smrg // permissions described in the GCC Runtime Library Exception, version 184d5abbe8Smrg // 3.1, as published by the Free Software Foundation. 194d5abbe8Smrg 204d5abbe8Smrg // You should have received a copy of the GNU General Public License and 214d5abbe8Smrg // a copy of the GCC Runtime Library Exception along with this program; 224d5abbe8Smrg // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 234d5abbe8Smrg // <http://www.gnu.org/licenses/>. 244d5abbe8Smrg 254d5abbe8Smrg /** 264d5abbe8Smrg * @file bits/regex_executor.tcc 274d5abbe8Smrg * This is an internal header file, included by other library headers. 284d5abbe8Smrg * Do not attempt to use it directly. @headername{regex} 294d5abbe8Smrg */ 304d5abbe8Smrg 314d5abbe8Smrg namespace std _GLIBCXX_VISIBILITY(default) 324d5abbe8Smrg { 338b6133e5Smrg _GLIBCXX_BEGIN_NAMESPACE_VERSION 348b6133e5Smrg 35a3e9eb18Smrg namespace __detail 36a3e9eb18Smrg { 374d5abbe8Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 384d5abbe8Smrg bool __dfs_mode> 394d5abbe8Smrg bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_search()404d5abbe8Smrg _M_search() 414d5abbe8Smrg { 424d5abbe8Smrg if (_M_search_from_first()) 434d5abbe8Smrg return true; 444d5abbe8Smrg if (_M_flags & regex_constants::match_continuous) 454d5abbe8Smrg return false; 464d5abbe8Smrg _M_flags |= regex_constants::match_prev_avail; 474d5abbe8Smrg while (_M_begin != _M_end) 484d5abbe8Smrg { 494d5abbe8Smrg ++_M_begin; 504d5abbe8Smrg if (_M_search_from_first()) 514d5abbe8Smrg return true; 524d5abbe8Smrg } 534d5abbe8Smrg return false; 544d5abbe8Smrg } 554d5abbe8Smrg 564d5abbe8Smrg // The _M_main function operates in different modes, DFS mode or BFS mode, 574d5abbe8Smrg // indicated by template parameter __dfs_mode, and dispatches to one of the 584d5abbe8Smrg // _M_main_dispatch overloads. 594d5abbe8Smrg // 604d5abbe8Smrg // ------------------------------------------------------------ 614d5abbe8Smrg // 624d5abbe8Smrg // DFS mode: 634d5abbe8Smrg // 644d5abbe8Smrg // It applies a Depth-First-Search (aka backtracking) on given NFA and input 654d5abbe8Smrg // string. 664d5abbe8Smrg // At the very beginning the executor stands in the start state, then it 674d5abbe8Smrg // tries every possible state transition in current state recursively. Some 684d5abbe8Smrg // state transitions consume input string, say, a single-char-matcher or a 694d5abbe8Smrg // back-reference matcher; some don't, like assertion or other anchor nodes. 704d5abbe8Smrg // When the input is exhausted and/or the current state is an accepting 714d5abbe8Smrg // state, the whole executor returns true. 724d5abbe8Smrg // 734d5abbe8Smrg // TODO: This approach is exponentially slow for certain input. 744d5abbe8Smrg // Try to compile the NFA to a DFA. 754d5abbe8Smrg // 764d5abbe8Smrg // Time complexity: \Omega(match_length), O(2^(_M_nfa.size())) 774d5abbe8Smrg // Space complexity: \theta(match_results.size() + match_length) 784d5abbe8Smrg // 794d5abbe8Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 804d5abbe8Smrg bool __dfs_mode> 814d5abbe8Smrg bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_main_dispatch(_Match_mode __match_mode,__dfs)824d5abbe8Smrg _M_main_dispatch(_Match_mode __match_mode, __dfs) 834d5abbe8Smrg { 844d5abbe8Smrg _M_has_sol = false; 854d5abbe8Smrg *_M_states._M_get_sol_pos() = _BiIter(); 864d5abbe8Smrg _M_cur_results = _M_results; 874d5abbe8Smrg _M_dfs(__match_mode, _M_states._M_start); 884d5abbe8Smrg return _M_has_sol; 894d5abbe8Smrg } 904d5abbe8Smrg 914d5abbe8Smrg // ------------------------------------------------------------ 924d5abbe8Smrg // 934d5abbe8Smrg // BFS mode: 944d5abbe8Smrg // 954d5abbe8Smrg // Russ Cox's article (http://swtch.com/~rsc/regexp/regexp1.html) 964d5abbe8Smrg // explained this algorithm clearly. 974d5abbe8Smrg // 984d5abbe8Smrg // It first computes epsilon closure (states that can be achieved without 994d5abbe8Smrg // consuming characters) for every state that's still matching, 1004d5abbe8Smrg // using the same DFS algorithm, but doesn't re-enter states (using 1014d5abbe8Smrg // _M_states._M_visited to check), nor follow _S_opcode_match. 1024d5abbe8Smrg // 1034d5abbe8Smrg // Then apply DFS using every _S_opcode_match (in _M_states._M_match_queue) 1044d5abbe8Smrg // as the start state. 1054d5abbe8Smrg // 1064d5abbe8Smrg // It significantly reduces potential duplicate states, so has a better 1074d5abbe8Smrg // upper bound; but it requires more overhead. 1084d5abbe8Smrg // 1094d5abbe8Smrg // Time complexity: \Omega(match_length * match_results.size()) 1104d5abbe8Smrg // O(match_length * _M_nfa.size() * match_results.size()) 1114d5abbe8Smrg // Space complexity: \Omega(_M_nfa.size() + match_results.size()) 1124d5abbe8Smrg // O(_M_nfa.size() * match_results.size()) 1134d5abbe8Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 1144d5abbe8Smrg bool __dfs_mode> 1154d5abbe8Smrg bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_main_dispatch(_Match_mode __match_mode,__bfs)1164d5abbe8Smrg _M_main_dispatch(_Match_mode __match_mode, __bfs) 1174d5abbe8Smrg { 1184d5abbe8Smrg _M_states._M_queue(_M_states._M_start, _M_results); 1194d5abbe8Smrg bool __ret = false; 1204d5abbe8Smrg while (1) 1214d5abbe8Smrg { 1224d5abbe8Smrg _M_has_sol = false; 1234d5abbe8Smrg if (_M_states._M_match_queue.empty()) 1244d5abbe8Smrg break; 125b1e83836Smrg std::fill_n(_M_states._M_visited_states, _M_nfa.size(), false); 1264d5abbe8Smrg auto __old_queue = std::move(_M_states._M_match_queue); 127*0a307195Smrg auto __alloc = _M_cur_results.get_allocator(); 1284d5abbe8Smrg for (auto& __task : __old_queue) 1294d5abbe8Smrg { 130*0a307195Smrg _M_cur_results = _ResultsVec(std::move(__task.second), __alloc); 1314d5abbe8Smrg _M_dfs(__match_mode, __task.first); 1324d5abbe8Smrg } 1334d5abbe8Smrg if (__match_mode == _Match_mode::_Prefix) 1344d5abbe8Smrg __ret |= _M_has_sol; 1354d5abbe8Smrg if (_M_current == _M_end) 1364d5abbe8Smrg break; 1374d5abbe8Smrg ++_M_current; 1384d5abbe8Smrg } 1394d5abbe8Smrg if (__match_mode == _Match_mode::_Exact) 1404d5abbe8Smrg __ret = _M_has_sol; 1414d5abbe8Smrg _M_states._M_match_queue.clear(); 1424d5abbe8Smrg return __ret; 1434d5abbe8Smrg } 1444d5abbe8Smrg 1454d5abbe8Smrg // Return whether now match the given sub-NFA. 1464d5abbe8Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 1474d5abbe8Smrg bool __dfs_mode> 1484d5abbe8Smrg bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_lookahead(_StateIdT __next)149f9a78e0eSmrg _M_lookahead(_StateIdT __next) 1504d5abbe8Smrg { 151f30ff588Smrg // Backreferences may refer to captured content. 152f30ff588Smrg // We may want to make this faster by not copying, 153f30ff588Smrg // but let's not be clever prematurely. 154f30ff588Smrg _ResultsVec __what(_M_cur_results); 1554d5abbe8Smrg _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags); 156f9a78e0eSmrg __sub._M_states._M_start = __next; 1574d5abbe8Smrg if (__sub._M_search_from_first()) 1584d5abbe8Smrg { 1594d5abbe8Smrg for (size_t __i = 0; __i < __what.size(); __i++) 1604d5abbe8Smrg if (__what[__i].matched) 1614d5abbe8Smrg _M_cur_results[__i] = __what[__i]; 1624d5abbe8Smrg return true; 1634d5abbe8Smrg } 1644d5abbe8Smrg return false; 1654d5abbe8Smrg } 1664d5abbe8Smrg 1674d5abbe8Smrg // __rep_count records how many times (__rep_count.second) 1684d5abbe8Smrg // this node is visited under certain input iterator 1694d5abbe8Smrg // (__rep_count.first). This prevent the executor from entering 1704d5abbe8Smrg // infinite loop by refusing to continue when it's already been 1714d5abbe8Smrg // visited more than twice. It's `twice` instead of `once` because 1724d5abbe8Smrg // we need to spare one more time for potential group capture. 1734d5abbe8Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 1744d5abbe8Smrg bool __dfs_mode> 1754d5abbe8Smrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_rep_once_more(_Match_mode __match_mode,_StateIdT __i)1764d5abbe8Smrg _M_rep_once_more(_Match_mode __match_mode, _StateIdT __i) 1774d5abbe8Smrg { 1784d5abbe8Smrg const auto& __state = _M_nfa[__i]; 1794d5abbe8Smrg auto& __rep_count = _M_rep_count[__i]; 1804d5abbe8Smrg if (__rep_count.second == 0 || __rep_count.first != _M_current) 1814d5abbe8Smrg { 1824d5abbe8Smrg auto __back = __rep_count; 1834d5abbe8Smrg __rep_count.first = _M_current; 1844d5abbe8Smrg __rep_count.second = 1; 1854d5abbe8Smrg _M_dfs(__match_mode, __state._M_alt); 1864d5abbe8Smrg __rep_count = __back; 1874d5abbe8Smrg } 1884d5abbe8Smrg else 1894d5abbe8Smrg { 1904d5abbe8Smrg if (__rep_count.second < 2) 1914d5abbe8Smrg { 1924d5abbe8Smrg __rep_count.second++; 1934d5abbe8Smrg _M_dfs(__match_mode, __state._M_alt); 1944d5abbe8Smrg __rep_count.second--; 1954d5abbe8Smrg } 1964d5abbe8Smrg } 197a3e9eb18Smrg } 1984d5abbe8Smrg 1994d5abbe8Smrg // _M_alt branch is "match once more", while _M_next is "get me out 2004d5abbe8Smrg // of this quantifier". Executing _M_next first or _M_alt first don't 2014d5abbe8Smrg // mean the same thing, and we need to choose the correct order under 2024d5abbe8Smrg // given greedy mode. 203b17d1066Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 204b17d1066Smrg bool __dfs_mode> 205b17d1066Smrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_repeat(_Match_mode __match_mode,_StateIdT __i)206b17d1066Smrg _M_handle_repeat(_Match_mode __match_mode, _StateIdT __i) 2074d5abbe8Smrg { 208b17d1066Smrg const auto& __state = _M_nfa[__i]; 209b17d1066Smrg 2104d5abbe8Smrg // Greedy. 2114d5abbe8Smrg if (!__state._M_neg) 2124d5abbe8Smrg { 2134d5abbe8Smrg _M_rep_once_more(__match_mode, __i); 2144d5abbe8Smrg // If it's DFS executor and already accepted, we're done. 2154d5abbe8Smrg if (!__dfs_mode || !_M_has_sol) 2164d5abbe8Smrg _M_dfs(__match_mode, __state._M_next); 2174d5abbe8Smrg } 2184d5abbe8Smrg else // Non-greedy mode 2194d5abbe8Smrg { 2204d5abbe8Smrg if (__dfs_mode) 2214d5abbe8Smrg { 2224d5abbe8Smrg // vice-versa. 2234d5abbe8Smrg _M_dfs(__match_mode, __state._M_next); 2244d5abbe8Smrg if (!_M_has_sol) 2254d5abbe8Smrg _M_rep_once_more(__match_mode, __i); 2264d5abbe8Smrg } 2274d5abbe8Smrg else 2284d5abbe8Smrg { 2294d5abbe8Smrg // DON'T attempt anything, because there's already another 2304d5abbe8Smrg // state with higher priority accepted. This state cannot 2314d5abbe8Smrg // be better by attempting its next node. 2324d5abbe8Smrg if (!_M_has_sol) 2334d5abbe8Smrg { 2344d5abbe8Smrg _M_dfs(__match_mode, __state._M_next); 2354d5abbe8Smrg // DON'T attempt anything if it's already accepted. An 2364d5abbe8Smrg // accepted state *must* be better than a solution that 2374d5abbe8Smrg // matches a non-greedy quantifier one more time. 2384d5abbe8Smrg if (!_M_has_sol) 2394d5abbe8Smrg _M_rep_once_more(__match_mode, __i); 2404d5abbe8Smrg } 2414d5abbe8Smrg } 2424d5abbe8Smrg } 2434d5abbe8Smrg } 244b17d1066Smrg 245b17d1066Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 246b17d1066Smrg bool __dfs_mode> 247b17d1066Smrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_subexpr_begin(_Match_mode __match_mode,_StateIdT __i)248b17d1066Smrg _M_handle_subexpr_begin(_Match_mode __match_mode, _StateIdT __i) 2494d5abbe8Smrg { 250b17d1066Smrg const auto& __state = _M_nfa[__i]; 251b17d1066Smrg 2524d5abbe8Smrg auto& __res = _M_cur_results[__state._M_subexpr]; 2534d5abbe8Smrg auto __back = __res.first; 2544d5abbe8Smrg __res.first = _M_current; 2554d5abbe8Smrg _M_dfs(__match_mode, __state._M_next); 2564d5abbe8Smrg __res.first = __back; 2574d5abbe8Smrg } 258b17d1066Smrg 259b17d1066Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 260b17d1066Smrg bool __dfs_mode> 261b17d1066Smrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_subexpr_end(_Match_mode __match_mode,_StateIdT __i)262b17d1066Smrg _M_handle_subexpr_end(_Match_mode __match_mode, _StateIdT __i) 2634d5abbe8Smrg { 264b17d1066Smrg const auto& __state = _M_nfa[__i]; 265b17d1066Smrg 2664d5abbe8Smrg auto& __res = _M_cur_results[__state._M_subexpr]; 2674d5abbe8Smrg auto __back = __res; 2684d5abbe8Smrg __res.second = _M_current; 2694d5abbe8Smrg __res.matched = true; 2704d5abbe8Smrg _M_dfs(__match_mode, __state._M_next); 2714d5abbe8Smrg __res = __back; 2724d5abbe8Smrg } 273b17d1066Smrg 274b17d1066Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 275b17d1066Smrg bool __dfs_mode> 276b17d1066Smrg inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_line_begin_assertion(_Match_mode __match_mode,_StateIdT __i)277b17d1066Smrg _M_handle_line_begin_assertion(_Match_mode __match_mode, _StateIdT __i) 278b17d1066Smrg { 279b17d1066Smrg const auto& __state = _M_nfa[__i]; 2804d5abbe8Smrg if (_M_at_begin()) 2814d5abbe8Smrg _M_dfs(__match_mode, __state._M_next); 282b17d1066Smrg } 283b17d1066Smrg 284b17d1066Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 285b17d1066Smrg bool __dfs_mode> 286b17d1066Smrg inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_line_end_assertion(_Match_mode __match_mode,_StateIdT __i)287b17d1066Smrg _M_handle_line_end_assertion(_Match_mode __match_mode, _StateIdT __i) 288b17d1066Smrg { 289b17d1066Smrg const auto& __state = _M_nfa[__i]; 2904d5abbe8Smrg if (_M_at_end()) 2914d5abbe8Smrg _M_dfs(__match_mode, __state._M_next); 292b17d1066Smrg } 293b17d1066Smrg 294b17d1066Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 295b17d1066Smrg bool __dfs_mode> 296b17d1066Smrg inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_word_boundary(_Match_mode __match_mode,_StateIdT __i)297b17d1066Smrg _M_handle_word_boundary(_Match_mode __match_mode, _StateIdT __i) 298b17d1066Smrg { 299b17d1066Smrg const auto& __state = _M_nfa[__i]; 3004d5abbe8Smrg if (_M_word_boundary() == !__state._M_neg) 3014d5abbe8Smrg _M_dfs(__match_mode, __state._M_next); 302b17d1066Smrg } 303b17d1066Smrg 3044d5abbe8Smrg // Here __state._M_alt offers a single start node for a sub-NFA. 3054d5abbe8Smrg // We recursively invoke our algorithm to match the sub-NFA. 306b17d1066Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 307b17d1066Smrg bool __dfs_mode> 308b17d1066Smrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_subexpr_lookahead(_Match_mode __match_mode,_StateIdT __i)309b17d1066Smrg _M_handle_subexpr_lookahead(_Match_mode __match_mode, _StateIdT __i) 310b17d1066Smrg { 311b17d1066Smrg const auto& __state = _M_nfa[__i]; 312f9a78e0eSmrg if (_M_lookahead(__state._M_alt) == !__state._M_neg) 3134d5abbe8Smrg _M_dfs(__match_mode, __state._M_next); 314b17d1066Smrg } 315b17d1066Smrg 316b17d1066Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 317b17d1066Smrg bool __dfs_mode> 318b17d1066Smrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_match(_Match_mode __match_mode,_StateIdT __i)319b17d1066Smrg _M_handle_match(_Match_mode __match_mode, _StateIdT __i) 320b17d1066Smrg { 321b17d1066Smrg const auto& __state = _M_nfa[__i]; 322b17d1066Smrg 3234d5abbe8Smrg if (_M_current == _M_end) 324b17d1066Smrg return; 3254d5abbe8Smrg if (__dfs_mode) 3264d5abbe8Smrg { 3274d5abbe8Smrg if (__state._M_matches(*_M_current)) 3284d5abbe8Smrg { 3294d5abbe8Smrg ++_M_current; 3304d5abbe8Smrg _M_dfs(__match_mode, __state._M_next); 3314d5abbe8Smrg --_M_current; 3324d5abbe8Smrg } 3334d5abbe8Smrg } 3344d5abbe8Smrg else 3354d5abbe8Smrg if (__state._M_matches(*_M_current)) 3364d5abbe8Smrg _M_states._M_queue(__state._M_next, _M_cur_results); 337b17d1066Smrg } 338b17d1066Smrg 339a3e9eb18Smrg template<typename _BiIter, typename _TraitsT> 340a3e9eb18Smrg struct _Backref_matcher 341a3e9eb18Smrg { _Backref_matcherstd::__detail::_Backref_matcher342a3e9eb18Smrg _Backref_matcher(bool __icase, const _TraitsT& __traits) 343a3e9eb18Smrg : _M_traits(__traits) { } 344a3e9eb18Smrg 345a3e9eb18Smrg bool _M_applystd::__detail::_Backref_matcher346a3e9eb18Smrg _M_apply(_BiIter __expected_begin, 347a3e9eb18Smrg _BiIter __expected_end, _BiIter __actual_begin, 348a3e9eb18Smrg _BiIter __actual_end) 349a3e9eb18Smrg { 350a3e9eb18Smrg return _M_traits.transform(__expected_begin, __expected_end) 351a3e9eb18Smrg == _M_traits.transform(__actual_begin, __actual_end); 352a3e9eb18Smrg } 353a3e9eb18Smrg 354a3e9eb18Smrg const _TraitsT& _M_traits; 355a3e9eb18Smrg }; 356a3e9eb18Smrg 357a3e9eb18Smrg template<typename _BiIter, typename _CharT> 358a3e9eb18Smrg struct _Backref_matcher<_BiIter, std::regex_traits<_CharT>> 359a3e9eb18Smrg { 360a3e9eb18Smrg using _TraitsT = std::regex_traits<_CharT>; _Backref_matcherstd::__detail::_Backref_matcher361a3e9eb18Smrg _Backref_matcher(bool __icase, const _TraitsT& __traits) 362a3e9eb18Smrg : _M_icase(__icase), _M_traits(__traits) { } 363a3e9eb18Smrg 364a3e9eb18Smrg bool _M_applystd::__detail::_Backref_matcher365a3e9eb18Smrg _M_apply(_BiIter __expected_begin, 366a3e9eb18Smrg _BiIter __expected_end, _BiIter __actual_begin, 367a3e9eb18Smrg _BiIter __actual_end) 368a3e9eb18Smrg { 369a3e9eb18Smrg if (!_M_icase) 370a3e9eb18Smrg return _GLIBCXX_STD_A::__equal4(__expected_begin, __expected_end, 371a3e9eb18Smrg __actual_begin, __actual_end); 372a3e9eb18Smrg typedef std::ctype<_CharT> __ctype_type; 373a3e9eb18Smrg const auto& __fctyp = use_facet<__ctype_type>(_M_traits.getloc()); 374a3e9eb18Smrg return _GLIBCXX_STD_A::__equal4(__expected_begin, __expected_end, 375a3e9eb18Smrg __actual_begin, __actual_end, 376a3e9eb18Smrg [this, &__fctyp](_CharT __lhs, _CharT __rhs) 377a3e9eb18Smrg { 378a3e9eb18Smrg return __fctyp.tolower(__lhs) 379a3e9eb18Smrg == __fctyp.tolower(__rhs); 380a3e9eb18Smrg }); 381a3e9eb18Smrg } 382a3e9eb18Smrg 383a3e9eb18Smrg bool _M_icase; 384a3e9eb18Smrg const _TraitsT& _M_traits; 385a3e9eb18Smrg }; 386a3e9eb18Smrg 3874d5abbe8Smrg // First fetch the matched result from _M_cur_results as __submatch; 3884d5abbe8Smrg // then compare it with 3894d5abbe8Smrg // (_M_current, _M_current + (__submatch.second - __submatch.first)). 3904d5abbe8Smrg // If matched, keep going; else just return and try another state. 391b17d1066Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 392b17d1066Smrg bool __dfs_mode> 393b17d1066Smrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_backref(_Match_mode __match_mode,_StateIdT __i)394b17d1066Smrg _M_handle_backref(_Match_mode __match_mode, _StateIdT __i) 3954d5abbe8Smrg { 396f9a78e0eSmrg __glibcxx_assert(__dfs_mode); 397b17d1066Smrg 398b17d1066Smrg const auto& __state = _M_nfa[__i]; 3994d5abbe8Smrg auto& __submatch = _M_cur_results[__state._M_backref_index]; 4004d5abbe8Smrg if (!__submatch.matched) 401b17d1066Smrg return; 4024d5abbe8Smrg auto __last = _M_current; 4034d5abbe8Smrg for (auto __tmp = __submatch.first; 4044d5abbe8Smrg __last != _M_end && __tmp != __submatch.second; 4054d5abbe8Smrg ++__tmp) 4064d5abbe8Smrg ++__last; 407a3e9eb18Smrg if (_Backref_matcher<_BiIter, _TraitsT>( 408a3e9eb18Smrg _M_re.flags() & regex_constants::icase, 409a3e9eb18Smrg _M_re._M_automaton->_M_traits)._M_apply( 410a3e9eb18Smrg __submatch.first, __submatch.second, _M_current, __last)) 4114d5abbe8Smrg { 4124d5abbe8Smrg if (__last != _M_current) 4134d5abbe8Smrg { 4144d5abbe8Smrg auto __backup = _M_current; 4154d5abbe8Smrg _M_current = __last; 4164d5abbe8Smrg _M_dfs(__match_mode, __state._M_next); 4174d5abbe8Smrg _M_current = __backup; 4184d5abbe8Smrg } 4194d5abbe8Smrg else 4204d5abbe8Smrg _M_dfs(__match_mode, __state._M_next); 4214d5abbe8Smrg } 4224d5abbe8Smrg } 423b17d1066Smrg 424b17d1066Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 425b17d1066Smrg bool __dfs_mode> 426b17d1066Smrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_accept(_Match_mode __match_mode,_StateIdT)4277d4dc15bSmrg _M_handle_accept(_Match_mode __match_mode, _StateIdT) 428b17d1066Smrg { 429b1e83836Smrg if _GLIBCXX17_CONSTEXPR (__dfs_mode) 4304d5abbe8Smrg { 431f9a78e0eSmrg __glibcxx_assert(!_M_has_sol); 4324d5abbe8Smrg if (__match_mode == _Match_mode::_Exact) 4334d5abbe8Smrg _M_has_sol = _M_current == _M_end; 4344d5abbe8Smrg else 4354d5abbe8Smrg _M_has_sol = true; 4364d5abbe8Smrg if (_M_current == _M_begin 4374d5abbe8Smrg && (_M_flags & regex_constants::match_not_null)) 4384d5abbe8Smrg _M_has_sol = false; 4394d5abbe8Smrg if (_M_has_sol) 4404d5abbe8Smrg { 4414d5abbe8Smrg if (_M_nfa._M_flags & regex_constants::ECMAScript) 4424d5abbe8Smrg _M_results = _M_cur_results; 4434d5abbe8Smrg else // POSIX 4444d5abbe8Smrg { 445f9a78e0eSmrg __glibcxx_assert(_M_states._M_get_sol_pos()); 4464d5abbe8Smrg // Here's POSIX's logic: match the longest one. However 4474d5abbe8Smrg // we never know which one (lhs or rhs of "|") is longer 4484d5abbe8Smrg // unless we try both of them and compare the results. 4494d5abbe8Smrg // The member variable _M_sol_pos records the end 4504d5abbe8Smrg // position of the last successful match. It's better 4514d5abbe8Smrg // to be larger, because POSIX regex is always greedy. 4524d5abbe8Smrg // TODO: This could be slow. 4534d5abbe8Smrg if (*_M_states._M_get_sol_pos() == _BiIter() 4544d5abbe8Smrg || std::distance(_M_begin, 4554d5abbe8Smrg *_M_states._M_get_sol_pos()) 4564d5abbe8Smrg < std::distance(_M_begin, _M_current)) 4574d5abbe8Smrg { 4584d5abbe8Smrg *_M_states._M_get_sol_pos() = _M_current; 4594d5abbe8Smrg _M_results = _M_cur_results; 4604d5abbe8Smrg } 4614d5abbe8Smrg } 4624d5abbe8Smrg } 4634d5abbe8Smrg } 4644d5abbe8Smrg else 4654d5abbe8Smrg { 4664d5abbe8Smrg if (_M_current == _M_begin 4674d5abbe8Smrg && (_M_flags & regex_constants::match_not_null)) 468b17d1066Smrg return; 4694d5abbe8Smrg if (__match_mode == _Match_mode::_Prefix || _M_current == _M_end) 4704d5abbe8Smrg if (!_M_has_sol) 4714d5abbe8Smrg { 4724d5abbe8Smrg _M_has_sol = true; 4734d5abbe8Smrg _M_results = _M_cur_results; 4744d5abbe8Smrg } 4754d5abbe8Smrg } 476b17d1066Smrg } 477b17d1066Smrg 478b17d1066Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 479b17d1066Smrg bool __dfs_mode> 480b17d1066Smrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_handle_alternative(_Match_mode __match_mode,_StateIdT __i)481b17d1066Smrg _M_handle_alternative(_Match_mode __match_mode, _StateIdT __i) 482b17d1066Smrg { 483b17d1066Smrg const auto& __state = _M_nfa[__i]; 484b17d1066Smrg 4854d5abbe8Smrg if (_M_nfa._M_flags & regex_constants::ECMAScript) 4864d5abbe8Smrg { 487f9a78e0eSmrg // TODO: Fix BFS support. It is wrong. 4884d5abbe8Smrg _M_dfs(__match_mode, __state._M_alt); 4894d5abbe8Smrg // Pick lhs if it matches. Only try rhs if it doesn't. 4904d5abbe8Smrg if (!_M_has_sol) 4914d5abbe8Smrg _M_dfs(__match_mode, __state._M_next); 4924d5abbe8Smrg } 4934d5abbe8Smrg else 4944d5abbe8Smrg { 4954d5abbe8Smrg // Try both and compare the result. 4964d5abbe8Smrg // See "case _S_opcode_accept:" handling above. 4974d5abbe8Smrg _M_dfs(__match_mode, __state._M_alt); 4984d5abbe8Smrg auto __has_sol = _M_has_sol; 4994d5abbe8Smrg _M_has_sol = false; 5004d5abbe8Smrg _M_dfs(__match_mode, __state._M_next); 5014d5abbe8Smrg _M_has_sol |= __has_sol; 5024d5abbe8Smrg } 503b17d1066Smrg } 504b17d1066Smrg 505b17d1066Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 506b17d1066Smrg bool __dfs_mode> 507b17d1066Smrg void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_dfs(_Match_mode __match_mode,_StateIdT __i)508b17d1066Smrg _M_dfs(_Match_mode __match_mode, _StateIdT __i) 509b17d1066Smrg { 510b17d1066Smrg if (_M_states._M_visited(__i)) 511b17d1066Smrg return; 512b17d1066Smrg 513b17d1066Smrg switch (_M_nfa[__i]._M_opcode()) 514b17d1066Smrg { 515b17d1066Smrg case _S_opcode_repeat: 516b17d1066Smrg _M_handle_repeat(__match_mode, __i); break; 517b17d1066Smrg case _S_opcode_subexpr_begin: 518b17d1066Smrg _M_handle_subexpr_begin(__match_mode, __i); break; 519b17d1066Smrg case _S_opcode_subexpr_end: 520b17d1066Smrg _M_handle_subexpr_end(__match_mode, __i); break; 521b17d1066Smrg case _S_opcode_line_begin_assertion: 522b17d1066Smrg _M_handle_line_begin_assertion(__match_mode, __i); break; 523b17d1066Smrg case _S_opcode_line_end_assertion: 524b17d1066Smrg _M_handle_line_end_assertion(__match_mode, __i); break; 525b17d1066Smrg case _S_opcode_word_boundary: 526b17d1066Smrg _M_handle_word_boundary(__match_mode, __i); break; 527b17d1066Smrg case _S_opcode_subexpr_lookahead: 528b17d1066Smrg _M_handle_subexpr_lookahead(__match_mode, __i); break; 529b17d1066Smrg case _S_opcode_match: 530b17d1066Smrg _M_handle_match(__match_mode, __i); break; 531b17d1066Smrg case _S_opcode_backref: 532b17d1066Smrg _M_handle_backref(__match_mode, __i); break; 533b17d1066Smrg case _S_opcode_accept: 534b17d1066Smrg _M_handle_accept(__match_mode, __i); break; 535b17d1066Smrg case _S_opcode_alternative: 536b17d1066Smrg _M_handle_alternative(__match_mode, __i); break; 5374d5abbe8Smrg default: 538f9a78e0eSmrg __glibcxx_assert(false); 5394d5abbe8Smrg } 5404d5abbe8Smrg } 5414d5abbe8Smrg 5424d5abbe8Smrg // Return whether now is at some word boundary. 5434d5abbe8Smrg template<typename _BiIter, typename _Alloc, typename _TraitsT, 5444d5abbe8Smrg bool __dfs_mode> 5454d5abbe8Smrg bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_word_boundary() const5464d5abbe8Smrg _M_word_boundary() const 5474d5abbe8Smrg { 548b17d1066Smrg if (_M_current == _M_begin && (_M_flags & regex_constants::match_not_bow)) 549b17d1066Smrg return false; 550b17d1066Smrg if (_M_current == _M_end && (_M_flags & regex_constants::match_not_eow)) 551b17d1066Smrg return false; 552b17d1066Smrg 5534d5abbe8Smrg bool __left_is_word = false; 5544d5abbe8Smrg if (_M_current != _M_begin 5554d5abbe8Smrg || (_M_flags & regex_constants::match_prev_avail)) 5564d5abbe8Smrg { 5574d5abbe8Smrg auto __prev = _M_current; 5584d5abbe8Smrg if (_M_is_word(*std::prev(__prev))) 5594d5abbe8Smrg __left_is_word = true; 5604d5abbe8Smrg } 5614d5abbe8Smrg bool __right_is_word = 5624d5abbe8Smrg _M_current != _M_end && _M_is_word(*_M_current); 5634d5abbe8Smrg 564b17d1066Smrg return __left_is_word != __right_is_word; 5654d5abbe8Smrg } 566a3e9eb18Smrg } // namespace __detail 5674d5abbe8Smrg 5684d5abbe8Smrg _GLIBCXX_END_NAMESPACE_VERSION 5694d5abbe8Smrg } // namespace 570