11debfc3dSmrg // class template regex -*- C++ -*-
21debfc3dSmrg
38feb0f0bSmrg // Copyright (C) 2013-2020 Free Software Foundation, Inc.
41debfc3dSmrg //
51debfc3dSmrg // This file is part of the GNU ISO C++ Library. This library is free
61debfc3dSmrg // software; you can redistribute it and/or modify it under the
71debfc3dSmrg // terms of the GNU General Public License as published by the
81debfc3dSmrg // Free Software Foundation; either version 3, or (at your option)
91debfc3dSmrg // any later version.
101debfc3dSmrg
111debfc3dSmrg // This library is distributed in the hope that it will be useful,
121debfc3dSmrg // but WITHOUT ANY WARRANTY; without even the implied warranty of
131debfc3dSmrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
141debfc3dSmrg // GNU General Public License for more details.
151debfc3dSmrg
161debfc3dSmrg // Under Section 7 of GPL version 3, you are granted additional
171debfc3dSmrg // permissions described in the GCC Runtime Library Exception, version
181debfc3dSmrg // 3.1, as published by the Free Software Foundation.
191debfc3dSmrg
201debfc3dSmrg // You should have received a copy of the GNU General Public License and
211debfc3dSmrg // a copy of the GCC Runtime Library Exception along with this program;
221debfc3dSmrg // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
231debfc3dSmrg // <http://www.gnu.org/licenses/>.
241debfc3dSmrg
251debfc3dSmrg /**
261debfc3dSmrg * @file bits/regex_automaton.h
271debfc3dSmrg * This is an internal header file, included by other library headers.
281debfc3dSmrg * Do not attempt to use it directly. @headername{regex}
291debfc3dSmrg */
301debfc3dSmrg
311debfc3dSmrg // This macro defines the maximal state number a NFA can have.
321debfc3dSmrg #ifndef _GLIBCXX_REGEX_STATE_LIMIT
331debfc3dSmrg #define _GLIBCXX_REGEX_STATE_LIMIT 100000
341debfc3dSmrg #endif
351debfc3dSmrg
_GLIBCXX_VISIBILITY(default)361debfc3dSmrg namespace std _GLIBCXX_VISIBILITY(default)
371debfc3dSmrg {
381debfc3dSmrg _GLIBCXX_BEGIN_NAMESPACE_VERSION
391debfc3dSmrg
40a2dc1f3fSmrg namespace __detail
41a2dc1f3fSmrg {
421debfc3dSmrg /**
431debfc3dSmrg * @defgroup regex-detail Base and Implementation Classes
441debfc3dSmrg * @ingroup regex
451debfc3dSmrg * @{
461debfc3dSmrg */
471debfc3dSmrg
481debfc3dSmrg typedef long _StateIdT;
491debfc3dSmrg static const _StateIdT _S_invalid_state_id = -1;
501debfc3dSmrg
511debfc3dSmrg template<typename _CharT>
521debfc3dSmrg using _Matcher = std::function<bool (_CharT)>;
531debfc3dSmrg
541debfc3dSmrg /// Operation codes that define the type of transitions within the base NFA
551debfc3dSmrg /// that represents the regular expression.
561debfc3dSmrg enum _Opcode : int
571debfc3dSmrg {
581debfc3dSmrg _S_opcode_unknown,
591debfc3dSmrg _S_opcode_alternative,
601debfc3dSmrg _S_opcode_repeat,
611debfc3dSmrg _S_opcode_backref,
621debfc3dSmrg _S_opcode_line_begin_assertion,
631debfc3dSmrg _S_opcode_line_end_assertion,
641debfc3dSmrg _S_opcode_word_boundary,
651debfc3dSmrg _S_opcode_subexpr_lookahead,
661debfc3dSmrg _S_opcode_subexpr_begin,
671debfc3dSmrg _S_opcode_subexpr_end,
681debfc3dSmrg _S_opcode_dummy,
691debfc3dSmrg _S_opcode_match,
701debfc3dSmrg _S_opcode_accept,
711debfc3dSmrg };
721debfc3dSmrg
731debfc3dSmrg struct _State_base
741debfc3dSmrg {
751debfc3dSmrg protected:
761debfc3dSmrg _Opcode _M_opcode; // type of outgoing transition
771debfc3dSmrg
781debfc3dSmrg public:
791debfc3dSmrg _StateIdT _M_next; // outgoing transition
801debfc3dSmrg union // Since they are mutually exclusive.
811debfc3dSmrg {
821debfc3dSmrg size_t _M_subexpr; // for _S_opcode_subexpr_*
831debfc3dSmrg size_t _M_backref_index; // for _S_opcode_backref
841debfc3dSmrg struct
851debfc3dSmrg {
861debfc3dSmrg // for _S_opcode_alternative, _S_opcode_repeat and
871debfc3dSmrg // _S_opcode_subexpr_lookahead
881debfc3dSmrg _StateIdT _M_alt;
891debfc3dSmrg // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
901debfc3dSmrg // quantifiers (ungreedy if set true)
911debfc3dSmrg bool _M_neg;
921debfc3dSmrg };
931debfc3dSmrg // For _S_opcode_match
941debfc3dSmrg __gnu_cxx::__aligned_membuf<_Matcher<char>> _M_matcher_storage;
951debfc3dSmrg };
961debfc3dSmrg
971debfc3dSmrg protected:
98*23f5f463Smrg explicit _State_base(_Opcode __opcode) noexcept
991debfc3dSmrg : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
1001debfc3dSmrg { }
1011debfc3dSmrg
1021debfc3dSmrg public:
1031debfc3dSmrg bool
104*23f5f463Smrg _M_has_alt() const noexcept
1051debfc3dSmrg {
1061debfc3dSmrg return _M_opcode == _S_opcode_alternative
1071debfc3dSmrg || _M_opcode == _S_opcode_repeat
1081debfc3dSmrg || _M_opcode == _S_opcode_subexpr_lookahead;
1091debfc3dSmrg }
1101debfc3dSmrg
1111debfc3dSmrg #ifdef _GLIBCXX_DEBUG
1121debfc3dSmrg std::ostream&
1131debfc3dSmrg _M_print(std::ostream& ostr) const;
1141debfc3dSmrg
1151debfc3dSmrg // Prints graphviz dot commands for state.
1161debfc3dSmrg std::ostream&
1171debfc3dSmrg _M_dot(std::ostream& __ostr, _StateIdT __id) const;
1181debfc3dSmrg #endif
1191debfc3dSmrg };
1201debfc3dSmrg
1211debfc3dSmrg template<typename _Char_type>
1221debfc3dSmrg struct _State : _State_base
1231debfc3dSmrg {
1241debfc3dSmrg typedef _Matcher<_Char_type> _MatcherT;
1251debfc3dSmrg static_assert(sizeof(_MatcherT) == sizeof(_Matcher<char>),
1261debfc3dSmrg "std::function<bool(T)> has the same size as "
1271debfc3dSmrg "std::function<bool(char)>");
1281debfc3dSmrg static_assert(alignof(_MatcherT) == alignof(_Matcher<char>),
1291debfc3dSmrg "std::function<bool(T)> has the same alignment as "
1301debfc3dSmrg "std::function<bool(char)>");
1311debfc3dSmrg
1321debfc3dSmrg explicit
133*23f5f463Smrg _State(_Opcode __opcode) noexcept : _State_base(__opcode)
1341debfc3dSmrg {
1351debfc3dSmrg if (_M_opcode() == _S_opcode_match)
1361debfc3dSmrg new (this->_M_matcher_storage._M_addr()) _MatcherT();
1371debfc3dSmrg }
1381debfc3dSmrg
1391debfc3dSmrg _State(const _State& __rhs) : _State_base(__rhs)
1401debfc3dSmrg {
1411debfc3dSmrg if (__rhs._M_opcode() == _S_opcode_match)
1421debfc3dSmrg new (this->_M_matcher_storage._M_addr())
1431debfc3dSmrg _MatcherT(__rhs._M_get_matcher());
1441debfc3dSmrg }
1451debfc3dSmrg
146*23f5f463Smrg _State(_State&& __rhs) noexcept : _State_base(__rhs)
1471debfc3dSmrg {
1481debfc3dSmrg if (__rhs._M_opcode() == _S_opcode_match)
1491debfc3dSmrg new (this->_M_matcher_storage._M_addr())
1501debfc3dSmrg _MatcherT(std::move(__rhs._M_get_matcher()));
1511debfc3dSmrg }
1521debfc3dSmrg
1531debfc3dSmrg _State&
1541debfc3dSmrg operator=(const _State&) = delete;
1551debfc3dSmrg
1561debfc3dSmrg ~_State()
1571debfc3dSmrg {
1581debfc3dSmrg if (_M_opcode() == _S_opcode_match)
1591debfc3dSmrg _M_get_matcher().~_MatcherT();
1601debfc3dSmrg }
1611debfc3dSmrg
1621debfc3dSmrg // Since correct ctor and dtor rely on _M_opcode, it's better not to
1631debfc3dSmrg // change it over time.
1641debfc3dSmrg _Opcode
165*23f5f463Smrg _M_opcode() const noexcept
1661debfc3dSmrg { return _State_base::_M_opcode; }
1671debfc3dSmrg
1681debfc3dSmrg bool
1691debfc3dSmrg _M_matches(_Char_type __char) const
1701debfc3dSmrg { return _M_get_matcher()(__char); }
1711debfc3dSmrg
1721debfc3dSmrg _MatcherT&
173*23f5f463Smrg _M_get_matcher() noexcept
1741debfc3dSmrg { return *static_cast<_MatcherT*>(this->_M_matcher_storage._M_addr()); }
1751debfc3dSmrg
1761debfc3dSmrg const _MatcherT&
177*23f5f463Smrg _M_get_matcher() const noexcept
1781debfc3dSmrg {
1791debfc3dSmrg return *static_cast<const _MatcherT*>(
1801debfc3dSmrg this->_M_matcher_storage._M_addr());
1811debfc3dSmrg }
1821debfc3dSmrg };
1831debfc3dSmrg
1841debfc3dSmrg struct _NFA_base
1851debfc3dSmrg {
1861debfc3dSmrg typedef size_t _SizeT;
1871debfc3dSmrg typedef regex_constants::syntax_option_type _FlagT;
1881debfc3dSmrg
1891debfc3dSmrg explicit
190*23f5f463Smrg _NFA_base(_FlagT __f) noexcept
1911debfc3dSmrg : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
1921debfc3dSmrg _M_has_backref(false)
1931debfc3dSmrg { }
1941debfc3dSmrg
1951debfc3dSmrg _NFA_base(_NFA_base&&) = default;
1961debfc3dSmrg
1971debfc3dSmrg protected:
1981debfc3dSmrg ~_NFA_base() = default;
1991debfc3dSmrg
2001debfc3dSmrg public:
2011debfc3dSmrg _FlagT
202*23f5f463Smrg _M_options() const noexcept
2031debfc3dSmrg { return _M_flags; }
2041debfc3dSmrg
2051debfc3dSmrg _StateIdT
206*23f5f463Smrg _M_start() const noexcept
2071debfc3dSmrg { return _M_start_state; }
2081debfc3dSmrg
2091debfc3dSmrg _SizeT
210*23f5f463Smrg _M_sub_count() const noexcept
2111debfc3dSmrg { return _M_subexpr_count; }
2121debfc3dSmrg
213c0a68be4Smrg _GLIBCXX_STD_C::vector<size_t> _M_paren_stack;
2141debfc3dSmrg _FlagT _M_flags;
2151debfc3dSmrg _StateIdT _M_start_state;
2161debfc3dSmrg _SizeT _M_subexpr_count;
2171debfc3dSmrg bool _M_has_backref;
2181debfc3dSmrg };
2191debfc3dSmrg
2201debfc3dSmrg template<typename _TraitsT>
2211debfc3dSmrg struct _NFA
222c0a68be4Smrg : _NFA_base, _GLIBCXX_STD_C::vector<_State<typename _TraitsT::char_type>>
2231debfc3dSmrg {
2241debfc3dSmrg typedef typename _TraitsT::char_type _Char_type;
2251debfc3dSmrg typedef _State<_Char_type> _StateT;
2261debfc3dSmrg typedef _Matcher<_Char_type> _MatcherT;
2271debfc3dSmrg
2281debfc3dSmrg _NFA(const typename _TraitsT::locale_type& __loc, _FlagT __flags)
2291debfc3dSmrg : _NFA_base(__flags)
2301debfc3dSmrg { _M_traits.imbue(__loc); }
2311debfc3dSmrg
2321debfc3dSmrg // for performance reasons _NFA objects should only be moved not copied
2331debfc3dSmrg _NFA(const _NFA&) = delete;
2341debfc3dSmrg _NFA(_NFA&&) = default;
2351debfc3dSmrg
2361debfc3dSmrg _StateIdT
2371debfc3dSmrg _M_insert_accept()
2381debfc3dSmrg {
2391debfc3dSmrg auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
2401debfc3dSmrg return __ret;
2411debfc3dSmrg }
2421debfc3dSmrg
2431debfc3dSmrg _StateIdT
2441debfc3dSmrg _M_insert_alt(_StateIdT __next, _StateIdT __alt,
2451debfc3dSmrg bool __neg __attribute__((__unused__)))
2461debfc3dSmrg {
2471debfc3dSmrg _StateT __tmp(_S_opcode_alternative);
2481debfc3dSmrg // It labels every quantifier to make greedy comparison easier in BFS
2491debfc3dSmrg // approach.
2501debfc3dSmrg __tmp._M_next = __next;
2511debfc3dSmrg __tmp._M_alt = __alt;
2521debfc3dSmrg return _M_insert_state(std::move(__tmp));
2531debfc3dSmrg }
2541debfc3dSmrg
2551debfc3dSmrg _StateIdT
2561debfc3dSmrg _M_insert_repeat(_StateIdT __next, _StateIdT __alt, bool __neg)
2571debfc3dSmrg {
2581debfc3dSmrg _StateT __tmp(_S_opcode_repeat);
2591debfc3dSmrg // It labels every quantifier to make greedy comparison easier in BFS
2601debfc3dSmrg // approach.
2611debfc3dSmrg __tmp._M_next = __next;
2621debfc3dSmrg __tmp._M_alt = __alt;
2631debfc3dSmrg __tmp._M_neg = __neg;
2641debfc3dSmrg return _M_insert_state(std::move(__tmp));
2651debfc3dSmrg }
2661debfc3dSmrg
2671debfc3dSmrg _StateIdT
2681debfc3dSmrg _M_insert_matcher(_MatcherT __m)
2691debfc3dSmrg {
2701debfc3dSmrg _StateT __tmp(_S_opcode_match);
2711debfc3dSmrg __tmp._M_get_matcher() = std::move(__m);
2721debfc3dSmrg return _M_insert_state(std::move(__tmp));
2731debfc3dSmrg }
2741debfc3dSmrg
2751debfc3dSmrg _StateIdT
2761debfc3dSmrg _M_insert_subexpr_begin()
2771debfc3dSmrg {
2781debfc3dSmrg auto __id = this->_M_subexpr_count++;
2791debfc3dSmrg this->_M_paren_stack.push_back(__id);
2801debfc3dSmrg _StateT __tmp(_S_opcode_subexpr_begin);
2811debfc3dSmrg __tmp._M_subexpr = __id;
2821debfc3dSmrg return _M_insert_state(std::move(__tmp));
2831debfc3dSmrg }
2841debfc3dSmrg
2851debfc3dSmrg _StateIdT
2861debfc3dSmrg _M_insert_subexpr_end()
2871debfc3dSmrg {
2881debfc3dSmrg _StateT __tmp(_S_opcode_subexpr_end);
2891debfc3dSmrg __tmp._M_subexpr = this->_M_paren_stack.back();
2901debfc3dSmrg this->_M_paren_stack.pop_back();
2911debfc3dSmrg return _M_insert_state(std::move(__tmp));
2921debfc3dSmrg }
2931debfc3dSmrg
2941debfc3dSmrg _StateIdT
2951debfc3dSmrg _M_insert_backref(size_t __index);
2961debfc3dSmrg
2971debfc3dSmrg _StateIdT
2981debfc3dSmrg _M_insert_line_begin()
2991debfc3dSmrg { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion)); }
3001debfc3dSmrg
3011debfc3dSmrg _StateIdT
3021debfc3dSmrg _M_insert_line_end()
3031debfc3dSmrg { return _M_insert_state(_StateT(_S_opcode_line_end_assertion)); }
3041debfc3dSmrg
3051debfc3dSmrg _StateIdT
3061debfc3dSmrg _M_insert_word_bound(bool __neg)
3071debfc3dSmrg {
3081debfc3dSmrg _StateT __tmp(_S_opcode_word_boundary);
3091debfc3dSmrg __tmp._M_neg = __neg;
3101debfc3dSmrg return _M_insert_state(std::move(__tmp));
3111debfc3dSmrg }
3121debfc3dSmrg
3131debfc3dSmrg _StateIdT
3141debfc3dSmrg _M_insert_lookahead(_StateIdT __alt, bool __neg)
3151debfc3dSmrg {
3161debfc3dSmrg _StateT __tmp(_S_opcode_subexpr_lookahead);
3171debfc3dSmrg __tmp._M_alt = __alt;
3181debfc3dSmrg __tmp._M_neg = __neg;
3191debfc3dSmrg return _M_insert_state(std::move(__tmp));
3201debfc3dSmrg }
3211debfc3dSmrg
3221debfc3dSmrg _StateIdT
3231debfc3dSmrg _M_insert_dummy()
3241debfc3dSmrg { return _M_insert_state(_StateT(_S_opcode_dummy)); }
3251debfc3dSmrg
3261debfc3dSmrg _StateIdT
3271debfc3dSmrg _M_insert_state(_StateT __s)
3281debfc3dSmrg {
3291debfc3dSmrg this->push_back(std::move(__s));
3301debfc3dSmrg if (this->size() > _GLIBCXX_REGEX_STATE_LIMIT)
3311debfc3dSmrg __throw_regex_error(
3321debfc3dSmrg regex_constants::error_space,
3331debfc3dSmrg "Number of NFA states exceeds limit. Please use shorter regex "
3341debfc3dSmrg "string, or use smaller brace expression, or make "
3351debfc3dSmrg "_GLIBCXX_REGEX_STATE_LIMIT larger.");
3361debfc3dSmrg return this->size() - 1;
3371debfc3dSmrg }
3381debfc3dSmrg
3391debfc3dSmrg // Eliminate dummy node in this NFA to make it compact.
3401debfc3dSmrg void
3411debfc3dSmrg _M_eliminate_dummy();
3421debfc3dSmrg
3431debfc3dSmrg #ifdef _GLIBCXX_DEBUG
3441debfc3dSmrg std::ostream&
3451debfc3dSmrg _M_dot(std::ostream& __ostr) const;
3461debfc3dSmrg #endif
3471debfc3dSmrg public:
3481debfc3dSmrg _TraitsT _M_traits;
3491debfc3dSmrg };
3501debfc3dSmrg
3511debfc3dSmrg /// Describes a sequence of one or more %_State, its current start
3521debfc3dSmrg /// and end(s). This structure contains fragments of an NFA during
3531debfc3dSmrg /// construction.
3541debfc3dSmrg template<typename _TraitsT>
3551debfc3dSmrg class _StateSeq
3561debfc3dSmrg {
3571debfc3dSmrg public:
3581debfc3dSmrg typedef _NFA<_TraitsT> _RegexT;
3591debfc3dSmrg
3601debfc3dSmrg public:
3611debfc3dSmrg _StateSeq(_RegexT& __nfa, _StateIdT __s)
3621debfc3dSmrg : _M_nfa(__nfa), _M_start(__s), _M_end(__s)
3631debfc3dSmrg { }
3641debfc3dSmrg
3651debfc3dSmrg _StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end)
3661debfc3dSmrg : _M_nfa(__nfa), _M_start(__s), _M_end(__end)
3671debfc3dSmrg { }
3681debfc3dSmrg
3691debfc3dSmrg // Append a state on *this and change *this to the new sequence.
3701debfc3dSmrg void
3711debfc3dSmrg _M_append(_StateIdT __id)
3721debfc3dSmrg {
3731debfc3dSmrg _M_nfa[_M_end]._M_next = __id;
3741debfc3dSmrg _M_end = __id;
3751debfc3dSmrg }
3761debfc3dSmrg
3771debfc3dSmrg // Append a sequence on *this and change *this to the new sequence.
3781debfc3dSmrg void
3791debfc3dSmrg _M_append(const _StateSeq& __s)
3801debfc3dSmrg {
3811debfc3dSmrg _M_nfa[_M_end]._M_next = __s._M_start;
3821debfc3dSmrg _M_end = __s._M_end;
3831debfc3dSmrg }
3841debfc3dSmrg
3851debfc3dSmrg // Clones an entire sequence.
3861debfc3dSmrg _StateSeq
3871debfc3dSmrg _M_clone();
3881debfc3dSmrg
3891debfc3dSmrg public:
3901debfc3dSmrg _RegexT& _M_nfa;
3911debfc3dSmrg _StateIdT _M_start;
3921debfc3dSmrg _StateIdT _M_end;
3931debfc3dSmrg };
3941debfc3dSmrg
3958feb0f0bSmrg ///@} regex-detail
3961debfc3dSmrg } // namespace __detail
397a2dc1f3fSmrg
398a2dc1f3fSmrg _GLIBCXX_END_NAMESPACE_VERSION
3991debfc3dSmrg } // namespace std
4001debfc3dSmrg
4011debfc3dSmrg #include <bits/regex_automaton.tcc>
402