xref: /netbsd-src/external/gpl3/gcc.old/dist/libstdc++-v3/include/bits/regex_executor.h (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
11debfc3dSmrg // class template regex -*- C++ -*-
21debfc3dSmrg 
3*8feb0f0bSmrg // Copyright (C) 2013-2020 Free Software Foundation, Inc.
41debfc3dSmrg //
51debfc3dSmrg // This file is part of the GNU ISO C++ Library.  This library is free
61debfc3dSmrg // software; you can redistribute it and/or modify it under the
71debfc3dSmrg // terms of the GNU General Public License as published by the
81debfc3dSmrg // Free Software Foundation; either version 3, or (at your option)
91debfc3dSmrg // any later version.
101debfc3dSmrg 
111debfc3dSmrg // This library is distributed in the hope that it will be useful,
121debfc3dSmrg // but WITHOUT ANY WARRANTY; without even the implied warranty of
131debfc3dSmrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
141debfc3dSmrg // GNU General Public License for more details.
151debfc3dSmrg 
161debfc3dSmrg // Under Section 7 of GPL version 3, you are granted additional
171debfc3dSmrg // permissions described in the GCC Runtime Library Exception, version
181debfc3dSmrg // 3.1, as published by the Free Software Foundation.
191debfc3dSmrg 
201debfc3dSmrg // You should have received a copy of the GNU General Public License and
211debfc3dSmrg // a copy of the GCC Runtime Library Exception along with this program;
221debfc3dSmrg // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
231debfc3dSmrg // <http://www.gnu.org/licenses/>.
241debfc3dSmrg 
251debfc3dSmrg /**
261debfc3dSmrg  *  @file bits/regex_executor.h
271debfc3dSmrg  *  This is an internal header file, included by other library headers.
281debfc3dSmrg  *  Do not attempt to use it directly. @headername{regex}
291debfc3dSmrg  */
301debfc3dSmrg 
311debfc3dSmrg // FIXME convert comments to doxygen format.
321debfc3dSmrg 
_GLIBCXX_VISIBILITY(default)331debfc3dSmrg namespace std _GLIBCXX_VISIBILITY(default)
341debfc3dSmrg {
351debfc3dSmrg _GLIBCXX_BEGIN_NAMESPACE_VERSION
361debfc3dSmrg 
37a2dc1f3fSmrg namespace __detail
38a2dc1f3fSmrg {
391debfc3dSmrg   /**
401debfc3dSmrg    * @addtogroup regex-detail
411debfc3dSmrg    * @{
421debfc3dSmrg    */
431debfc3dSmrg 
441debfc3dSmrg   /**
451debfc3dSmrg    * @brief Takes a regex and an input string and does the matching.
461debfc3dSmrg    *
471debfc3dSmrg    * The %_Executor class has two modes: DFS mode and BFS mode, controlled
481debfc3dSmrg    * by the template parameter %__dfs_mode.
491debfc3dSmrg    */
501debfc3dSmrg   template<typename _BiIter, typename _Alloc, typename _TraitsT,
511debfc3dSmrg 	   bool __dfs_mode>
521debfc3dSmrg     class _Executor
531debfc3dSmrg     {
541debfc3dSmrg       using __search_mode = integral_constant<bool, __dfs_mode>;
551debfc3dSmrg       using __dfs = true_type;
561debfc3dSmrg       using __bfs = false_type;
571debfc3dSmrg 
581debfc3dSmrg       enum class _Match_mode : unsigned char { _Exact, _Prefix };
591debfc3dSmrg 
601debfc3dSmrg     public:
611debfc3dSmrg       typedef typename iterator_traits<_BiIter>::value_type _CharT;
621debfc3dSmrg       typedef basic_regex<_CharT, _TraitsT>                 _RegexT;
631debfc3dSmrg       typedef std::vector<sub_match<_BiIter>, _Alloc>       _ResultsVec;
641debfc3dSmrg       typedef regex_constants::match_flag_type              _FlagT;
651debfc3dSmrg       typedef typename _TraitsT::char_class_type            _ClassT;
661debfc3dSmrg       typedef _NFA<_TraitsT>                                _NFAT;
671debfc3dSmrg 
681debfc3dSmrg     public:
691debfc3dSmrg       _Executor(_BiIter         __begin,
701debfc3dSmrg 		_BiIter         __end,
711debfc3dSmrg 		_ResultsVec&    __results,
721debfc3dSmrg 		const _RegexT&  __re,
731debfc3dSmrg 		_FlagT          __flags)
741debfc3dSmrg       : _M_begin(__begin),
751debfc3dSmrg       _M_end(__end),
761debfc3dSmrg       _M_re(__re),
771debfc3dSmrg       _M_nfa(*__re._M_automaton),
781debfc3dSmrg       _M_results(__results),
791debfc3dSmrg       _M_rep_count(_M_nfa.size()),
801debfc3dSmrg       _M_states(_M_nfa._M_start(), _M_nfa.size()),
811debfc3dSmrg       _M_flags((__flags & regex_constants::match_prev_avail)
821debfc3dSmrg 	       ? (__flags
831debfc3dSmrg 		  & ~regex_constants::match_not_bol
841debfc3dSmrg 		  & ~regex_constants::match_not_bow)
851debfc3dSmrg 	       : __flags)
861debfc3dSmrg       { }
871debfc3dSmrg 
881debfc3dSmrg       // Set matched when string exactly matches the pattern.
891debfc3dSmrg       bool
901debfc3dSmrg       _M_match()
911debfc3dSmrg       {
921debfc3dSmrg 	_M_current = _M_begin;
931debfc3dSmrg 	return _M_main(_Match_mode::_Exact);
941debfc3dSmrg       }
951debfc3dSmrg 
961debfc3dSmrg       // Set matched when some prefix of the string matches the pattern.
971debfc3dSmrg       bool
981debfc3dSmrg       _M_search_from_first()
991debfc3dSmrg       {
1001debfc3dSmrg 	_M_current = _M_begin;
1011debfc3dSmrg 	return _M_main(_Match_mode::_Prefix);
1021debfc3dSmrg       }
1031debfc3dSmrg 
1041debfc3dSmrg       bool
1051debfc3dSmrg       _M_search();
1061debfc3dSmrg 
1071debfc3dSmrg     private:
1081debfc3dSmrg       void
1091debfc3dSmrg       _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
1101debfc3dSmrg 
1111debfc3dSmrg       void
1121debfc3dSmrg       _M_handle_repeat(_Match_mode, _StateIdT);
1131debfc3dSmrg 
1141debfc3dSmrg       void
1151debfc3dSmrg       _M_handle_subexpr_begin(_Match_mode, _StateIdT);
1161debfc3dSmrg 
1171debfc3dSmrg       void
1181debfc3dSmrg       _M_handle_subexpr_end(_Match_mode, _StateIdT);
1191debfc3dSmrg 
1201debfc3dSmrg       void
1211debfc3dSmrg       _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
1221debfc3dSmrg 
1231debfc3dSmrg       void
1241debfc3dSmrg       _M_handle_line_end_assertion(_Match_mode, _StateIdT);
1251debfc3dSmrg 
1261debfc3dSmrg       void
1271debfc3dSmrg       _M_handle_word_boundary(_Match_mode, _StateIdT);
1281debfc3dSmrg 
1291debfc3dSmrg       void
1301debfc3dSmrg       _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
1311debfc3dSmrg 
1321debfc3dSmrg       void
1331debfc3dSmrg       _M_handle_match(_Match_mode, _StateIdT);
1341debfc3dSmrg 
1351debfc3dSmrg       void
1361debfc3dSmrg       _M_handle_backref(_Match_mode, _StateIdT);
1371debfc3dSmrg 
1381debfc3dSmrg       void
1391debfc3dSmrg       _M_handle_accept(_Match_mode, _StateIdT);
1401debfc3dSmrg 
1411debfc3dSmrg       void
1421debfc3dSmrg       _M_handle_alternative(_Match_mode, _StateIdT);
1431debfc3dSmrg 
1441debfc3dSmrg       void
1451debfc3dSmrg       _M_dfs(_Match_mode __match_mode, _StateIdT __start);
1461debfc3dSmrg 
1471debfc3dSmrg       bool
1481debfc3dSmrg       _M_main(_Match_mode __match_mode)
1491debfc3dSmrg       { return _M_main_dispatch(__match_mode, __search_mode{}); }
1501debfc3dSmrg 
1511debfc3dSmrg       bool
1521debfc3dSmrg       _M_main_dispatch(_Match_mode __match_mode, __dfs);
1531debfc3dSmrg 
1541debfc3dSmrg       bool
1551debfc3dSmrg       _M_main_dispatch(_Match_mode __match_mode, __bfs);
1561debfc3dSmrg 
1571debfc3dSmrg       bool
1581debfc3dSmrg       _M_is_word(_CharT __ch) const
1591debfc3dSmrg       {
1601debfc3dSmrg 	static const _CharT __s[2] = { 'w' };
1611debfc3dSmrg 	return _M_re._M_automaton->_M_traits.isctype
1621debfc3dSmrg 	  (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
1631debfc3dSmrg       }
1641debfc3dSmrg 
1651debfc3dSmrg       bool
1661debfc3dSmrg       _M_at_begin() const
1671debfc3dSmrg       {
1681debfc3dSmrg 	return _M_current == _M_begin
1691debfc3dSmrg 	  && !(_M_flags & (regex_constants::match_not_bol
1701debfc3dSmrg 			   | regex_constants::match_prev_avail));
1711debfc3dSmrg       }
1721debfc3dSmrg 
1731debfc3dSmrg       bool
1741debfc3dSmrg       _M_at_end() const
1751debfc3dSmrg       {
1761debfc3dSmrg 	return _M_current == _M_end
1771debfc3dSmrg 	  && !(_M_flags & regex_constants::match_not_eol);
1781debfc3dSmrg       }
1791debfc3dSmrg 
1801debfc3dSmrg       bool
1811debfc3dSmrg       _M_word_boundary() const;
1821debfc3dSmrg 
1831debfc3dSmrg       bool
1841debfc3dSmrg       _M_lookahead(_StateIdT __next);
1851debfc3dSmrg 
1861debfc3dSmrg        // Holds additional information used in BFS-mode.
1871debfc3dSmrg       template<typename _SearchMode, typename _ResultsVec>
1881debfc3dSmrg 	struct _State_info;
1891debfc3dSmrg 
1901debfc3dSmrg       template<typename _ResultsVec>
1911debfc3dSmrg 	struct _State_info<__bfs, _ResultsVec>
1921debfc3dSmrg 	{
1931debfc3dSmrg 	  explicit
1941debfc3dSmrg 	  _State_info(_StateIdT __start, size_t __n)
1951debfc3dSmrg 	  : _M_visited_states(new bool[__n]()), _M_start(__start)
1961debfc3dSmrg 	  { }
1971debfc3dSmrg 
1981debfc3dSmrg 	  bool _M_visited(_StateIdT __i)
1991debfc3dSmrg 	  {
2001debfc3dSmrg 	    if (_M_visited_states[__i])
2011debfc3dSmrg 	      return true;
2021debfc3dSmrg 	    _M_visited_states[__i] = true;
2031debfc3dSmrg 	    return false;
2041debfc3dSmrg 	  }
2051debfc3dSmrg 
2061debfc3dSmrg 	  void _M_queue(_StateIdT __i, const _ResultsVec& __res)
2071debfc3dSmrg 	  { _M_match_queue.emplace_back(__i, __res); }
2081debfc3dSmrg 
2091debfc3dSmrg 	  // Dummy implementations for BFS mode.
2101debfc3dSmrg 	  _BiIter* _M_get_sol_pos() { return nullptr; }
2111debfc3dSmrg 
2121debfc3dSmrg 	  // Saves states that need to be considered for the next character.
2131debfc3dSmrg 	  vector<pair<_StateIdT, _ResultsVec>>	_M_match_queue;
2141debfc3dSmrg 	  // Indicates which states are already visited.
2151debfc3dSmrg 	  unique_ptr<bool[]>			_M_visited_states;
2161debfc3dSmrg 	  // To record current solution.
2171debfc3dSmrg 	  _StateIdT _M_start;
2181debfc3dSmrg 	};
2191debfc3dSmrg 
2201debfc3dSmrg       template<typename _ResultsVec>
2211debfc3dSmrg 	struct _State_info<__dfs, _ResultsVec>
2221debfc3dSmrg 	{
2231debfc3dSmrg 	  explicit
2241debfc3dSmrg 	  _State_info(_StateIdT __start, size_t) : _M_start(__start)
2251debfc3dSmrg 	  { }
2261debfc3dSmrg 
2271debfc3dSmrg 	  // Dummy implementations for DFS mode.
2281debfc3dSmrg 	  bool _M_visited(_StateIdT) const { return false; }
2291debfc3dSmrg 	  void _M_queue(_StateIdT, const _ResultsVec&) { }
2301debfc3dSmrg 
2311debfc3dSmrg 	  _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
2321debfc3dSmrg 
2331debfc3dSmrg 	  // To record current solution.
2341debfc3dSmrg 	  _StateIdT _M_start;
2351debfc3dSmrg 	  _BiIter   _M_sol_pos;
2361debfc3dSmrg 	};
2371debfc3dSmrg 
2381debfc3dSmrg     public:
2391debfc3dSmrg       _ResultsVec                                           _M_cur_results;
2401debfc3dSmrg       _BiIter                                               _M_current;
2411debfc3dSmrg       _BiIter                                               _M_begin;
2421debfc3dSmrg       const _BiIter                                         _M_end;
2431debfc3dSmrg       const _RegexT&                                        _M_re;
2441debfc3dSmrg       const _NFAT&                                          _M_nfa;
2451debfc3dSmrg       _ResultsVec&                                          _M_results;
2461debfc3dSmrg       vector<pair<_BiIter, int>>                            _M_rep_count;
2471debfc3dSmrg       _State_info<__search_mode, _ResultsVec>		    _M_states;
2481debfc3dSmrg       _FlagT                                                _M_flags;
2491debfc3dSmrg       // Do we have a solution so far?
2501debfc3dSmrg       bool                                                  _M_has_sol;
2511debfc3dSmrg     };
2521debfc3dSmrg 
253*8feb0f0bSmrg  ///@} regex-detail
2541debfc3dSmrg } // namespace __detail
255a2dc1f3fSmrg _GLIBCXX_END_NAMESPACE_VERSION
2561debfc3dSmrg } // namespace std
2571debfc3dSmrg 
2581debfc3dSmrg #include <bits/regex_executor.tcc>
259