xref: /netbsd-src/external/gpl3/gcc/dist/libstdc++-v3/include/bits/regex_executor.tcc (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
14d5abbe8Smrg // class template regex -*- C++ -*-
24d5abbe8Smrg 
3b1e83836Smrg // Copyright (C) 2013-2022 Free Software Foundation, Inc.
44d5abbe8Smrg //
54d5abbe8Smrg // This file is part of the GNU ISO C++ Library.  This library is free
64d5abbe8Smrg // software; you can redistribute it and/or modify it under the
74d5abbe8Smrg // terms of the GNU General Public License as published by the
84d5abbe8Smrg // Free Software Foundation; either version 3, or (at your option)
94d5abbe8Smrg // any later version.
104d5abbe8Smrg 
114d5abbe8Smrg // This library is distributed in the hope that it will be useful,
124d5abbe8Smrg // but WITHOUT ANY WARRANTY; without even the implied warranty of
134d5abbe8Smrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
144d5abbe8Smrg // GNU General Public License for more details.
154d5abbe8Smrg 
164d5abbe8Smrg // Under Section 7 of GPL version 3, you are granted additional
174d5abbe8Smrg // permissions described in the GCC Runtime Library Exception, version
184d5abbe8Smrg // 3.1, as published by the Free Software Foundation.
194d5abbe8Smrg 
204d5abbe8Smrg // You should have received a copy of the GNU General Public License and
214d5abbe8Smrg // a copy of the GCC Runtime Library Exception along with this program;
224d5abbe8Smrg // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
234d5abbe8Smrg // <http://www.gnu.org/licenses/>.
244d5abbe8Smrg 
254d5abbe8Smrg /**
264d5abbe8Smrg  *  @file bits/regex_executor.tcc
274d5abbe8Smrg  *  This is an internal header file, included by other library headers.
284d5abbe8Smrg  *  Do not attempt to use it directly. @headername{regex}
294d5abbe8Smrg  */
304d5abbe8Smrg 
314d5abbe8Smrg namespace std _GLIBCXX_VISIBILITY(default)
324d5abbe8Smrg {
338b6133e5Smrg _GLIBCXX_BEGIN_NAMESPACE_VERSION
348b6133e5Smrg 
35a3e9eb18Smrg namespace __detail
36a3e9eb18Smrg {
374d5abbe8Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
384d5abbe8Smrg 	   bool __dfs_mode>
394d5abbe8Smrg     bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_search()404d5abbe8Smrg     _M_search()
414d5abbe8Smrg     {
424d5abbe8Smrg       if (_M_search_from_first())
434d5abbe8Smrg 	return true;
444d5abbe8Smrg       if (_M_flags & regex_constants::match_continuous)
454d5abbe8Smrg 	return false;
464d5abbe8Smrg       _M_flags |= regex_constants::match_prev_avail;
474d5abbe8Smrg       while (_M_begin != _M_end)
484d5abbe8Smrg 	{
494d5abbe8Smrg 	  ++_M_begin;
504d5abbe8Smrg 	  if (_M_search_from_first())
514d5abbe8Smrg 	    return true;
524d5abbe8Smrg 	}
534d5abbe8Smrg       return false;
544d5abbe8Smrg     }
554d5abbe8Smrg 
564d5abbe8Smrg   // The _M_main function operates in different modes, DFS mode or BFS mode,
574d5abbe8Smrg   // indicated by template parameter __dfs_mode, and dispatches to one of the
584d5abbe8Smrg   // _M_main_dispatch overloads.
594d5abbe8Smrg   //
604d5abbe8Smrg   // ------------------------------------------------------------
614d5abbe8Smrg   //
624d5abbe8Smrg   // DFS mode:
634d5abbe8Smrg   //
644d5abbe8Smrg   // It applies a Depth-First-Search (aka backtracking) on given NFA and input
654d5abbe8Smrg   // string.
664d5abbe8Smrg   // At the very beginning the executor stands in the start state, then it
674d5abbe8Smrg   // tries every possible state transition in current state recursively. Some
684d5abbe8Smrg   // state transitions consume input string, say, a single-char-matcher or a
694d5abbe8Smrg   // back-reference matcher; some don't, like assertion or other anchor nodes.
704d5abbe8Smrg   // When the input is exhausted and/or the current state is an accepting
714d5abbe8Smrg   // state, the whole executor returns true.
724d5abbe8Smrg   //
734d5abbe8Smrg   // TODO: This approach is exponentially slow for certain input.
744d5abbe8Smrg   //       Try to compile the NFA to a DFA.
754d5abbe8Smrg   //
764d5abbe8Smrg   // Time complexity: \Omega(match_length), O(2^(_M_nfa.size()))
774d5abbe8Smrg   // Space complexity: \theta(match_results.size() + match_length)
784d5abbe8Smrg   //
794d5abbe8Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
804d5abbe8Smrg 	   bool __dfs_mode>
814d5abbe8Smrg     bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_main_dispatch(_Match_mode __match_mode,__dfs)824d5abbe8Smrg     _M_main_dispatch(_Match_mode __match_mode, __dfs)
834d5abbe8Smrg     {
844d5abbe8Smrg       _M_has_sol = false;
854d5abbe8Smrg       *_M_states._M_get_sol_pos() = _BiIter();
864d5abbe8Smrg       _M_cur_results = _M_results;
874d5abbe8Smrg       _M_dfs(__match_mode, _M_states._M_start);
884d5abbe8Smrg       return _M_has_sol;
894d5abbe8Smrg     }
904d5abbe8Smrg 
914d5abbe8Smrg   // ------------------------------------------------------------
924d5abbe8Smrg   //
934d5abbe8Smrg   // BFS mode:
944d5abbe8Smrg   //
954d5abbe8Smrg   // Russ Cox's article (http://swtch.com/~rsc/regexp/regexp1.html)
964d5abbe8Smrg   // explained this algorithm clearly.
974d5abbe8Smrg   //
984d5abbe8Smrg   // It first computes epsilon closure (states that can be achieved without
994d5abbe8Smrg   // consuming characters) for every state that's still matching,
1004d5abbe8Smrg   // using the same DFS algorithm, but doesn't re-enter states (using
1014d5abbe8Smrg   // _M_states._M_visited to check), nor follow _S_opcode_match.
1024d5abbe8Smrg   //
1034d5abbe8Smrg   // Then apply DFS using every _S_opcode_match (in _M_states._M_match_queue)
1044d5abbe8Smrg   // as the start state.
1054d5abbe8Smrg   //
1064d5abbe8Smrg   // It significantly reduces potential duplicate states, so has a better
1074d5abbe8Smrg   // upper bound; but it requires more overhead.
1084d5abbe8Smrg   //
1094d5abbe8Smrg   // Time complexity: \Omega(match_length * match_results.size())
1104d5abbe8Smrg   //                  O(match_length * _M_nfa.size() * match_results.size())
1114d5abbe8Smrg   // Space complexity: \Omega(_M_nfa.size() + match_results.size())
1124d5abbe8Smrg   //                   O(_M_nfa.size() * match_results.size())
1134d5abbe8Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
1144d5abbe8Smrg 	   bool __dfs_mode>
1154d5abbe8Smrg     bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_main_dispatch(_Match_mode __match_mode,__bfs)1164d5abbe8Smrg     _M_main_dispatch(_Match_mode __match_mode, __bfs)
1174d5abbe8Smrg     {
1184d5abbe8Smrg       _M_states._M_queue(_M_states._M_start, _M_results);
1194d5abbe8Smrg       bool __ret = false;
1204d5abbe8Smrg       while (1)
1214d5abbe8Smrg 	{
1224d5abbe8Smrg 	  _M_has_sol = false;
1234d5abbe8Smrg 	  if (_M_states._M_match_queue.empty())
1244d5abbe8Smrg 	    break;
125b1e83836Smrg 	  std::fill_n(_M_states._M_visited_states, _M_nfa.size(), false);
1264d5abbe8Smrg 	  auto __old_queue = std::move(_M_states._M_match_queue);
127*0a307195Smrg 	  auto __alloc = _M_cur_results.get_allocator();
1284d5abbe8Smrg 	  for (auto& __task : __old_queue)
1294d5abbe8Smrg 	    {
130*0a307195Smrg 	      _M_cur_results = _ResultsVec(std::move(__task.second), __alloc);
1314d5abbe8Smrg 	      _M_dfs(__match_mode, __task.first);
1324d5abbe8Smrg 	    }
1334d5abbe8Smrg 	  if (__match_mode == _Match_mode::_Prefix)
1344d5abbe8Smrg 	    __ret |= _M_has_sol;
1354d5abbe8Smrg 	  if (_M_current == _M_end)
1364d5abbe8Smrg 	    break;
1374d5abbe8Smrg 	  ++_M_current;
1384d5abbe8Smrg 	}
1394d5abbe8Smrg       if (__match_mode == _Match_mode::_Exact)
1404d5abbe8Smrg 	__ret = _M_has_sol;
1414d5abbe8Smrg       _M_states._M_match_queue.clear();
1424d5abbe8Smrg       return __ret;
1434d5abbe8Smrg     }
1444d5abbe8Smrg 
1454d5abbe8Smrg   // Return whether now match the given sub-NFA.
1464d5abbe8Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
1474d5abbe8Smrg 	   bool __dfs_mode>
1484d5abbe8Smrg     bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_lookahead(_StateIdT __next)149f9a78e0eSmrg     _M_lookahead(_StateIdT __next)
1504d5abbe8Smrg     {
151f30ff588Smrg       // Backreferences may refer to captured content.
152f30ff588Smrg       // We may want to make this faster by not copying,
153f30ff588Smrg       // but let's not be clever prematurely.
154f30ff588Smrg       _ResultsVec __what(_M_cur_results);
1554d5abbe8Smrg       _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags);
156f9a78e0eSmrg       __sub._M_states._M_start = __next;
1574d5abbe8Smrg       if (__sub._M_search_from_first())
1584d5abbe8Smrg 	{
1594d5abbe8Smrg 	  for (size_t __i = 0; __i < __what.size(); __i++)
1604d5abbe8Smrg 	    if (__what[__i].matched)
1614d5abbe8Smrg 	      _M_cur_results[__i] = __what[__i];
1624d5abbe8Smrg 	  return true;
1634d5abbe8Smrg 	}
1644d5abbe8Smrg       return false;
1654d5abbe8Smrg     }
1664d5abbe8Smrg 
1674d5abbe8Smrg   // __rep_count records how many times (__rep_count.second)
1684d5abbe8Smrg   // this node is visited under certain input iterator
1694d5abbe8Smrg   // (__rep_count.first). This prevent the executor from entering
1704d5abbe8Smrg   // infinite loop by refusing to continue when it's already been
1714d5abbe8Smrg   // visited more than twice. It's `twice` instead of `once` because
1724d5abbe8Smrg   // we need to spare one more time for potential group capture.
1734d5abbe8Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
1744d5abbe8Smrg 	   bool __dfs_mode>
1754d5abbe8Smrg     void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_rep_once_more(_Match_mode __match_mode,_StateIdT __i)1764d5abbe8Smrg     _M_rep_once_more(_Match_mode __match_mode, _StateIdT __i)
1774d5abbe8Smrg     {
1784d5abbe8Smrg       const auto& __state = _M_nfa[__i];
1794d5abbe8Smrg       auto& __rep_count = _M_rep_count[__i];
1804d5abbe8Smrg       if (__rep_count.second == 0 || __rep_count.first != _M_current)
1814d5abbe8Smrg 	{
1824d5abbe8Smrg 	  auto __back = __rep_count;
1834d5abbe8Smrg 	  __rep_count.first = _M_current;
1844d5abbe8Smrg 	  __rep_count.second = 1;
1854d5abbe8Smrg 	  _M_dfs(__match_mode, __state._M_alt);
1864d5abbe8Smrg 	  __rep_count = __back;
1874d5abbe8Smrg 	}
1884d5abbe8Smrg       else
1894d5abbe8Smrg 	{
1904d5abbe8Smrg 	  if (__rep_count.second < 2)
1914d5abbe8Smrg 	    {
1924d5abbe8Smrg 	      __rep_count.second++;
1934d5abbe8Smrg 	      _M_dfs(__match_mode, __state._M_alt);
1944d5abbe8Smrg 	      __rep_count.second--;
1954d5abbe8Smrg 	    }
1964d5abbe8Smrg 	}
197a3e9eb18Smrg     }
1984d5abbe8Smrg 
1994d5abbe8Smrg   // _M_alt branch is "match once more", while _M_next is "get me out
2004d5abbe8Smrg   // of this quantifier". Executing _M_next first or _M_alt first don't
2014d5abbe8Smrg   // mean the same thing, and we need to choose the correct order under
2024d5abbe8Smrg   // given greedy mode.
203b17d1066Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
204b17d1066Smrg 	   bool __dfs_mode>
205b17d1066Smrg     void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_handle_repeat(_Match_mode __match_mode,_StateIdT __i)206b17d1066Smrg     _M_handle_repeat(_Match_mode __match_mode, _StateIdT __i)
2074d5abbe8Smrg     {
208b17d1066Smrg       const auto& __state = _M_nfa[__i];
209b17d1066Smrg 
2104d5abbe8Smrg       // Greedy.
2114d5abbe8Smrg       if (!__state._M_neg)
2124d5abbe8Smrg 	{
2134d5abbe8Smrg 	  _M_rep_once_more(__match_mode, __i);
2144d5abbe8Smrg 	  // If it's DFS executor and already accepted, we're done.
2154d5abbe8Smrg 	  if (!__dfs_mode || !_M_has_sol)
2164d5abbe8Smrg 	    _M_dfs(__match_mode, __state._M_next);
2174d5abbe8Smrg 	}
2184d5abbe8Smrg       else // Non-greedy mode
2194d5abbe8Smrg 	{
2204d5abbe8Smrg 	  if (__dfs_mode)
2214d5abbe8Smrg 	    {
2224d5abbe8Smrg 	      // vice-versa.
2234d5abbe8Smrg 	      _M_dfs(__match_mode, __state._M_next);
2244d5abbe8Smrg 	      if (!_M_has_sol)
2254d5abbe8Smrg 		_M_rep_once_more(__match_mode, __i);
2264d5abbe8Smrg 	    }
2274d5abbe8Smrg 	  else
2284d5abbe8Smrg 	    {
2294d5abbe8Smrg 	      // DON'T attempt anything, because there's already another
2304d5abbe8Smrg 	      // state with higher priority accepted. This state cannot
2314d5abbe8Smrg 	      // be better by attempting its next node.
2324d5abbe8Smrg 	      if (!_M_has_sol)
2334d5abbe8Smrg 		{
2344d5abbe8Smrg 		  _M_dfs(__match_mode, __state._M_next);
2354d5abbe8Smrg 		  // DON'T attempt anything if it's already accepted. An
2364d5abbe8Smrg 		  // accepted state *must* be better than a solution that
2374d5abbe8Smrg 		  // matches a non-greedy quantifier one more time.
2384d5abbe8Smrg 		  if (!_M_has_sol)
2394d5abbe8Smrg 		    _M_rep_once_more(__match_mode, __i);
2404d5abbe8Smrg 		}
2414d5abbe8Smrg 	    }
2424d5abbe8Smrg 	}
2434d5abbe8Smrg     }
244b17d1066Smrg 
245b17d1066Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
246b17d1066Smrg 	   bool __dfs_mode>
247b17d1066Smrg     void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_handle_subexpr_begin(_Match_mode __match_mode,_StateIdT __i)248b17d1066Smrg     _M_handle_subexpr_begin(_Match_mode __match_mode, _StateIdT __i)
2494d5abbe8Smrg     {
250b17d1066Smrg       const auto& __state = _M_nfa[__i];
251b17d1066Smrg 
2524d5abbe8Smrg       auto& __res = _M_cur_results[__state._M_subexpr];
2534d5abbe8Smrg       auto __back = __res.first;
2544d5abbe8Smrg       __res.first = _M_current;
2554d5abbe8Smrg       _M_dfs(__match_mode, __state._M_next);
2564d5abbe8Smrg       __res.first = __back;
2574d5abbe8Smrg     }
258b17d1066Smrg 
259b17d1066Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
260b17d1066Smrg 	   bool __dfs_mode>
261b17d1066Smrg     void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_handle_subexpr_end(_Match_mode __match_mode,_StateIdT __i)262b17d1066Smrg     _M_handle_subexpr_end(_Match_mode __match_mode, _StateIdT __i)
2634d5abbe8Smrg     {
264b17d1066Smrg       const auto& __state = _M_nfa[__i];
265b17d1066Smrg 
2664d5abbe8Smrg       auto& __res = _M_cur_results[__state._M_subexpr];
2674d5abbe8Smrg       auto __back = __res;
2684d5abbe8Smrg       __res.second = _M_current;
2694d5abbe8Smrg       __res.matched = true;
2704d5abbe8Smrg       _M_dfs(__match_mode, __state._M_next);
2714d5abbe8Smrg       __res = __back;
2724d5abbe8Smrg     }
273b17d1066Smrg 
274b17d1066Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
275b17d1066Smrg 	   bool __dfs_mode>
276b17d1066Smrg     inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_handle_line_begin_assertion(_Match_mode __match_mode,_StateIdT __i)277b17d1066Smrg     _M_handle_line_begin_assertion(_Match_mode __match_mode, _StateIdT __i)
278b17d1066Smrg     {
279b17d1066Smrg       const auto& __state = _M_nfa[__i];
2804d5abbe8Smrg       if (_M_at_begin())
2814d5abbe8Smrg 	_M_dfs(__match_mode, __state._M_next);
282b17d1066Smrg     }
283b17d1066Smrg 
284b17d1066Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
285b17d1066Smrg 	   bool __dfs_mode>
286b17d1066Smrg     inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_handle_line_end_assertion(_Match_mode __match_mode,_StateIdT __i)287b17d1066Smrg     _M_handle_line_end_assertion(_Match_mode __match_mode, _StateIdT __i)
288b17d1066Smrg     {
289b17d1066Smrg       const auto& __state = _M_nfa[__i];
2904d5abbe8Smrg       if (_M_at_end())
2914d5abbe8Smrg 	_M_dfs(__match_mode, __state._M_next);
292b17d1066Smrg     }
293b17d1066Smrg 
294b17d1066Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
295b17d1066Smrg 	   bool __dfs_mode>
296b17d1066Smrg     inline void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_handle_word_boundary(_Match_mode __match_mode,_StateIdT __i)297b17d1066Smrg     _M_handle_word_boundary(_Match_mode __match_mode, _StateIdT __i)
298b17d1066Smrg     {
299b17d1066Smrg       const auto& __state = _M_nfa[__i];
3004d5abbe8Smrg       if (_M_word_boundary() == !__state._M_neg)
3014d5abbe8Smrg 	_M_dfs(__match_mode, __state._M_next);
302b17d1066Smrg     }
303b17d1066Smrg 
3044d5abbe8Smrg   // Here __state._M_alt offers a single start node for a sub-NFA.
3054d5abbe8Smrg   // We recursively invoke our algorithm to match the sub-NFA.
306b17d1066Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
307b17d1066Smrg 	   bool __dfs_mode>
308b17d1066Smrg     void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_handle_subexpr_lookahead(_Match_mode __match_mode,_StateIdT __i)309b17d1066Smrg     _M_handle_subexpr_lookahead(_Match_mode __match_mode, _StateIdT __i)
310b17d1066Smrg     {
311b17d1066Smrg       const auto& __state = _M_nfa[__i];
312f9a78e0eSmrg       if (_M_lookahead(__state._M_alt) == !__state._M_neg)
3134d5abbe8Smrg 	_M_dfs(__match_mode, __state._M_next);
314b17d1066Smrg     }
315b17d1066Smrg 
316b17d1066Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
317b17d1066Smrg 	   bool __dfs_mode>
318b17d1066Smrg     void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_handle_match(_Match_mode __match_mode,_StateIdT __i)319b17d1066Smrg     _M_handle_match(_Match_mode __match_mode, _StateIdT __i)
320b17d1066Smrg     {
321b17d1066Smrg       const auto& __state = _M_nfa[__i];
322b17d1066Smrg 
3234d5abbe8Smrg       if (_M_current == _M_end)
324b17d1066Smrg 	return;
3254d5abbe8Smrg       if (__dfs_mode)
3264d5abbe8Smrg 	{
3274d5abbe8Smrg 	  if (__state._M_matches(*_M_current))
3284d5abbe8Smrg 	    {
3294d5abbe8Smrg 	      ++_M_current;
3304d5abbe8Smrg 	      _M_dfs(__match_mode, __state._M_next);
3314d5abbe8Smrg 	      --_M_current;
3324d5abbe8Smrg 	    }
3334d5abbe8Smrg 	}
3344d5abbe8Smrg       else
3354d5abbe8Smrg 	if (__state._M_matches(*_M_current))
3364d5abbe8Smrg 	  _M_states._M_queue(__state._M_next, _M_cur_results);
337b17d1066Smrg     }
338b17d1066Smrg 
339a3e9eb18Smrg   template<typename _BiIter, typename _TraitsT>
340a3e9eb18Smrg     struct _Backref_matcher
341a3e9eb18Smrg     {
_Backref_matcherstd::__detail::_Backref_matcher342a3e9eb18Smrg       _Backref_matcher(bool __icase, const _TraitsT& __traits)
343a3e9eb18Smrg       : _M_traits(__traits) { }
344a3e9eb18Smrg 
345a3e9eb18Smrg       bool
_M_applystd::__detail::_Backref_matcher346a3e9eb18Smrg       _M_apply(_BiIter __expected_begin,
347a3e9eb18Smrg 	       _BiIter __expected_end, _BiIter __actual_begin,
348a3e9eb18Smrg 	       _BiIter __actual_end)
349a3e9eb18Smrg       {
350a3e9eb18Smrg 	return _M_traits.transform(__expected_begin, __expected_end)
351a3e9eb18Smrg 	    == _M_traits.transform(__actual_begin, __actual_end);
352a3e9eb18Smrg       }
353a3e9eb18Smrg 
354a3e9eb18Smrg       const _TraitsT& _M_traits;
355a3e9eb18Smrg     };
356a3e9eb18Smrg 
357a3e9eb18Smrg   template<typename _BiIter, typename _CharT>
358a3e9eb18Smrg     struct _Backref_matcher<_BiIter, std::regex_traits<_CharT>>
359a3e9eb18Smrg     {
360a3e9eb18Smrg       using _TraitsT = std::regex_traits<_CharT>;
_Backref_matcherstd::__detail::_Backref_matcher361a3e9eb18Smrg       _Backref_matcher(bool __icase, const _TraitsT& __traits)
362a3e9eb18Smrg       : _M_icase(__icase), _M_traits(__traits) { }
363a3e9eb18Smrg 
364a3e9eb18Smrg       bool
_M_applystd::__detail::_Backref_matcher365a3e9eb18Smrg       _M_apply(_BiIter __expected_begin,
366a3e9eb18Smrg 	       _BiIter __expected_end, _BiIter __actual_begin,
367a3e9eb18Smrg 	       _BiIter __actual_end)
368a3e9eb18Smrg       {
369a3e9eb18Smrg 	if (!_M_icase)
370a3e9eb18Smrg 	  return _GLIBCXX_STD_A::__equal4(__expected_begin, __expected_end,
371a3e9eb18Smrg 			       __actual_begin, __actual_end);
372a3e9eb18Smrg 	typedef std::ctype<_CharT> __ctype_type;
373a3e9eb18Smrg 	const auto& __fctyp = use_facet<__ctype_type>(_M_traits.getloc());
374a3e9eb18Smrg 	return _GLIBCXX_STD_A::__equal4(__expected_begin, __expected_end,
375a3e9eb18Smrg 			     __actual_begin, __actual_end,
376a3e9eb18Smrg 			     [this, &__fctyp](_CharT __lhs, _CharT __rhs)
377a3e9eb18Smrg 			     {
378a3e9eb18Smrg 			       return __fctyp.tolower(__lhs)
379a3e9eb18Smrg 				 == __fctyp.tolower(__rhs);
380a3e9eb18Smrg 			     });
381a3e9eb18Smrg       }
382a3e9eb18Smrg 
383a3e9eb18Smrg       bool _M_icase;
384a3e9eb18Smrg       const _TraitsT& _M_traits;
385a3e9eb18Smrg     };
386a3e9eb18Smrg 
3874d5abbe8Smrg   // First fetch the matched result from _M_cur_results as __submatch;
3884d5abbe8Smrg   // then compare it with
3894d5abbe8Smrg   // (_M_current, _M_current + (__submatch.second - __submatch.first)).
3904d5abbe8Smrg   // If matched, keep going; else just return and try another state.
391b17d1066Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
392b17d1066Smrg 	   bool __dfs_mode>
393b17d1066Smrg     void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_handle_backref(_Match_mode __match_mode,_StateIdT __i)394b17d1066Smrg     _M_handle_backref(_Match_mode __match_mode, _StateIdT __i)
3954d5abbe8Smrg     {
396f9a78e0eSmrg       __glibcxx_assert(__dfs_mode);
397b17d1066Smrg 
398b17d1066Smrg       const auto& __state = _M_nfa[__i];
3994d5abbe8Smrg       auto& __submatch = _M_cur_results[__state._M_backref_index];
4004d5abbe8Smrg       if (!__submatch.matched)
401b17d1066Smrg 	return;
4024d5abbe8Smrg       auto __last = _M_current;
4034d5abbe8Smrg       for (auto __tmp = __submatch.first;
4044d5abbe8Smrg 	   __last != _M_end && __tmp != __submatch.second;
4054d5abbe8Smrg 	   ++__tmp)
4064d5abbe8Smrg 	++__last;
407a3e9eb18Smrg       if (_Backref_matcher<_BiIter, _TraitsT>(
408a3e9eb18Smrg 	      _M_re.flags() & regex_constants::icase,
409a3e9eb18Smrg 	      _M_re._M_automaton->_M_traits)._M_apply(
410a3e9eb18Smrg 		  __submatch.first, __submatch.second, _M_current, __last))
4114d5abbe8Smrg 	{
4124d5abbe8Smrg 	  if (__last != _M_current)
4134d5abbe8Smrg 	    {
4144d5abbe8Smrg 	      auto __backup = _M_current;
4154d5abbe8Smrg 	      _M_current = __last;
4164d5abbe8Smrg 	      _M_dfs(__match_mode, __state._M_next);
4174d5abbe8Smrg 	      _M_current = __backup;
4184d5abbe8Smrg 	    }
4194d5abbe8Smrg 	  else
4204d5abbe8Smrg 	    _M_dfs(__match_mode, __state._M_next);
4214d5abbe8Smrg 	}
4224d5abbe8Smrg     }
423b17d1066Smrg 
424b17d1066Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
425b17d1066Smrg 	   bool __dfs_mode>
426b17d1066Smrg     void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_handle_accept(_Match_mode __match_mode,_StateIdT)4277d4dc15bSmrg     _M_handle_accept(_Match_mode __match_mode, _StateIdT)
428b17d1066Smrg     {
429b1e83836Smrg       if _GLIBCXX17_CONSTEXPR (__dfs_mode)
4304d5abbe8Smrg 	{
431f9a78e0eSmrg 	  __glibcxx_assert(!_M_has_sol);
4324d5abbe8Smrg 	  if (__match_mode == _Match_mode::_Exact)
4334d5abbe8Smrg 	    _M_has_sol = _M_current == _M_end;
4344d5abbe8Smrg 	  else
4354d5abbe8Smrg 	    _M_has_sol = true;
4364d5abbe8Smrg 	  if (_M_current == _M_begin
4374d5abbe8Smrg 	      && (_M_flags & regex_constants::match_not_null))
4384d5abbe8Smrg 	    _M_has_sol = false;
4394d5abbe8Smrg 	  if (_M_has_sol)
4404d5abbe8Smrg 	    {
4414d5abbe8Smrg 	      if (_M_nfa._M_flags & regex_constants::ECMAScript)
4424d5abbe8Smrg 		_M_results = _M_cur_results;
4434d5abbe8Smrg 	      else // POSIX
4444d5abbe8Smrg 		{
445f9a78e0eSmrg 		  __glibcxx_assert(_M_states._M_get_sol_pos());
4464d5abbe8Smrg 		  // Here's POSIX's logic: match the longest one. However
4474d5abbe8Smrg 		  // we never know which one (lhs or rhs of "|") is longer
4484d5abbe8Smrg 		  // unless we try both of them and compare the results.
4494d5abbe8Smrg 		  // The member variable _M_sol_pos records the end
4504d5abbe8Smrg 		  // position of the last successful match. It's better
4514d5abbe8Smrg 		  // to be larger, because POSIX regex is always greedy.
4524d5abbe8Smrg 		  // TODO: This could be slow.
4534d5abbe8Smrg 		  if (*_M_states._M_get_sol_pos() == _BiIter()
4544d5abbe8Smrg 		      || std::distance(_M_begin,
4554d5abbe8Smrg 				       *_M_states._M_get_sol_pos())
4564d5abbe8Smrg 			 < std::distance(_M_begin, _M_current))
4574d5abbe8Smrg 		    {
4584d5abbe8Smrg 		      *_M_states._M_get_sol_pos() = _M_current;
4594d5abbe8Smrg 		      _M_results = _M_cur_results;
4604d5abbe8Smrg 		    }
4614d5abbe8Smrg 		}
4624d5abbe8Smrg 	    }
4634d5abbe8Smrg 	}
4644d5abbe8Smrg       else
4654d5abbe8Smrg 	{
4664d5abbe8Smrg 	  if (_M_current == _M_begin
4674d5abbe8Smrg 	      && (_M_flags & regex_constants::match_not_null))
468b17d1066Smrg 	    return;
4694d5abbe8Smrg 	  if (__match_mode == _Match_mode::_Prefix || _M_current == _M_end)
4704d5abbe8Smrg 	    if (!_M_has_sol)
4714d5abbe8Smrg 	      {
4724d5abbe8Smrg 		_M_has_sol = true;
4734d5abbe8Smrg 		_M_results = _M_cur_results;
4744d5abbe8Smrg 	      }
4754d5abbe8Smrg 	}
476b17d1066Smrg     }
477b17d1066Smrg 
478b17d1066Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
479b17d1066Smrg 	   bool __dfs_mode>
480b17d1066Smrg     void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_handle_alternative(_Match_mode __match_mode,_StateIdT __i)481b17d1066Smrg     _M_handle_alternative(_Match_mode __match_mode, _StateIdT __i)
482b17d1066Smrg     {
483b17d1066Smrg       const auto& __state = _M_nfa[__i];
484b17d1066Smrg 
4854d5abbe8Smrg       if (_M_nfa._M_flags & regex_constants::ECMAScript)
4864d5abbe8Smrg 	{
487f9a78e0eSmrg 	  // TODO: Fix BFS support. It is wrong.
4884d5abbe8Smrg 	  _M_dfs(__match_mode, __state._M_alt);
4894d5abbe8Smrg 	  // Pick lhs if it matches. Only try rhs if it doesn't.
4904d5abbe8Smrg 	  if (!_M_has_sol)
4914d5abbe8Smrg 	    _M_dfs(__match_mode, __state._M_next);
4924d5abbe8Smrg 	}
4934d5abbe8Smrg       else
4944d5abbe8Smrg 	{
4954d5abbe8Smrg 	  // Try both and compare the result.
4964d5abbe8Smrg 	  // See "case _S_opcode_accept:" handling above.
4974d5abbe8Smrg 	  _M_dfs(__match_mode, __state._M_alt);
4984d5abbe8Smrg 	  auto __has_sol = _M_has_sol;
4994d5abbe8Smrg 	  _M_has_sol = false;
5004d5abbe8Smrg 	  _M_dfs(__match_mode, __state._M_next);
5014d5abbe8Smrg 	  _M_has_sol |= __has_sol;
5024d5abbe8Smrg 	}
503b17d1066Smrg     }
504b17d1066Smrg 
505b17d1066Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
506b17d1066Smrg 	   bool __dfs_mode>
507b17d1066Smrg     void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_dfs(_Match_mode __match_mode,_StateIdT __i)508b17d1066Smrg     _M_dfs(_Match_mode __match_mode, _StateIdT __i)
509b17d1066Smrg     {
510b17d1066Smrg       if (_M_states._M_visited(__i))
511b17d1066Smrg 	return;
512b17d1066Smrg 
513b17d1066Smrg       switch (_M_nfa[__i]._M_opcode())
514b17d1066Smrg 	{
515b17d1066Smrg 	case _S_opcode_repeat:
516b17d1066Smrg 	  _M_handle_repeat(__match_mode, __i); break;
517b17d1066Smrg 	case _S_opcode_subexpr_begin:
518b17d1066Smrg 	  _M_handle_subexpr_begin(__match_mode, __i); break;
519b17d1066Smrg 	case _S_opcode_subexpr_end:
520b17d1066Smrg 	  _M_handle_subexpr_end(__match_mode, __i); break;
521b17d1066Smrg 	case _S_opcode_line_begin_assertion:
522b17d1066Smrg 	  _M_handle_line_begin_assertion(__match_mode, __i); break;
523b17d1066Smrg 	case _S_opcode_line_end_assertion:
524b17d1066Smrg 	  _M_handle_line_end_assertion(__match_mode, __i); break;
525b17d1066Smrg 	case _S_opcode_word_boundary:
526b17d1066Smrg 	  _M_handle_word_boundary(__match_mode, __i); break;
527b17d1066Smrg 	case _S_opcode_subexpr_lookahead:
528b17d1066Smrg 	  _M_handle_subexpr_lookahead(__match_mode, __i); break;
529b17d1066Smrg 	case _S_opcode_match:
530b17d1066Smrg 	  _M_handle_match(__match_mode, __i); break;
531b17d1066Smrg 	case _S_opcode_backref:
532b17d1066Smrg 	  _M_handle_backref(__match_mode, __i); break;
533b17d1066Smrg 	case _S_opcode_accept:
534b17d1066Smrg 	  _M_handle_accept(__match_mode, __i); break;
535b17d1066Smrg 	case _S_opcode_alternative:
536b17d1066Smrg 	  _M_handle_alternative(__match_mode, __i); break;
5374d5abbe8Smrg 	default:
538f9a78e0eSmrg 	  __glibcxx_assert(false);
5394d5abbe8Smrg 	}
5404d5abbe8Smrg     }
5414d5abbe8Smrg 
5424d5abbe8Smrg   // Return whether now is at some word boundary.
5434d5abbe8Smrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
5444d5abbe8Smrg 	   bool __dfs_mode>
5454d5abbe8Smrg     bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_word_boundary() const5464d5abbe8Smrg     _M_word_boundary() const
5474d5abbe8Smrg     {
548b17d1066Smrg       if (_M_current == _M_begin && (_M_flags & regex_constants::match_not_bow))
549b17d1066Smrg 	return false;
550b17d1066Smrg       if (_M_current == _M_end && (_M_flags & regex_constants::match_not_eow))
551b17d1066Smrg 	return false;
552b17d1066Smrg 
5534d5abbe8Smrg       bool __left_is_word = false;
5544d5abbe8Smrg       if (_M_current != _M_begin
5554d5abbe8Smrg 	  || (_M_flags & regex_constants::match_prev_avail))
5564d5abbe8Smrg 	{
5574d5abbe8Smrg 	  auto __prev = _M_current;
5584d5abbe8Smrg 	  if (_M_is_word(*std::prev(__prev)))
5594d5abbe8Smrg 	    __left_is_word = true;
5604d5abbe8Smrg 	}
5614d5abbe8Smrg       bool __right_is_word =
5624d5abbe8Smrg         _M_current != _M_end && _M_is_word(*_M_current);
5634d5abbe8Smrg 
564b17d1066Smrg       return __left_is_word != __right_is_word;
5654d5abbe8Smrg     }
566a3e9eb18Smrg } // namespace __detail
5674d5abbe8Smrg 
5684d5abbe8Smrg _GLIBCXX_END_NAMESPACE_VERSION
5694d5abbe8Smrg } // namespace
570