148fb7bfaSmrg // class template regex -*- C++ -*-
248fb7bfaSmrg
3*b1e83836Smrg // Copyright (C) 2010-2022 Free Software Foundation, Inc.
448fb7bfaSmrg //
548fb7bfaSmrg // This file is part of the GNU ISO C++ Library. This library is free
648fb7bfaSmrg // software; you can redistribute it and/or modify it under the
748fb7bfaSmrg // terms of the GNU General Public License as published by the
848fb7bfaSmrg // Free Software Foundation; either version 3, or (at your option)
948fb7bfaSmrg // any later version.
1048fb7bfaSmrg
1148fb7bfaSmrg // This library is distributed in the hope that it will be useful,
1248fb7bfaSmrg // but WITHOUT ANY WARRANTY; without even the implied warranty of
1348fb7bfaSmrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1448fb7bfaSmrg // GNU General Public License for more details.
1548fb7bfaSmrg
1648fb7bfaSmrg // Under Section 7 of GPL version 3, you are granted additional
1748fb7bfaSmrg // permissions described in the GCC Runtime Library Exception, version
1848fb7bfaSmrg // 3.1, as published by the Free Software Foundation.
1948fb7bfaSmrg
2048fb7bfaSmrg // You should have received a copy of the GNU General Public License and
2148fb7bfaSmrg // a copy of the GCC Runtime Library Exception along with this program;
2248fb7bfaSmrg // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
2348fb7bfaSmrg // <http://www.gnu.org/licenses/>.
2448fb7bfaSmrg
2548fb7bfaSmrg /**
2648fb7bfaSmrg * @file bits/regex_compiler.h
2748fb7bfaSmrg * This is an internal header file, included by other library headers.
2848fb7bfaSmrg * Do not attempt to use it directly. @headername{regex}
2948fb7bfaSmrg */
3048fb7bfaSmrg
_GLIBCXX_VISIBILITY(default)3148fb7bfaSmrg namespace std _GLIBCXX_VISIBILITY(default)
3248fb7bfaSmrg {
33b17d1066Smrg _GLIBCXX_BEGIN_NAMESPACE_VERSION
34b17d1066Smrg _GLIBCXX_BEGIN_NAMESPACE_CXX11
35b17d1066Smrg
36b17d1066Smrg template<typename>
37b17d1066Smrg class regex_traits;
38b17d1066Smrg
39b17d1066Smrg _GLIBCXX_END_NAMESPACE_CXX11
40b17d1066Smrg
4148fb7bfaSmrg namespace __detail
4248fb7bfaSmrg {
4348fb7bfaSmrg /**
4448fb7bfaSmrg * @addtogroup regex-detail
4548fb7bfaSmrg * @{
4648fb7bfaSmrg */
4748fb7bfaSmrg
484d5abbe8Smrg template<typename, bool, bool>
494d5abbe8Smrg struct _BracketMatcher;
5048fb7bfaSmrg
5148fb7bfaSmrg /**
524d5abbe8Smrg * @brief Builds an NFA from an input iterator range.
5348fb7bfaSmrg *
544d5abbe8Smrg * The %_TraitsT type should fulfill requirements [28.3].
5548fb7bfaSmrg */
564d5abbe8Smrg template<typename _TraitsT>
5748fb7bfaSmrg class _Compiler
5848fb7bfaSmrg {
5948fb7bfaSmrg public:
604d5abbe8Smrg typedef typename _TraitsT::char_type _CharT;
614d5abbe8Smrg typedef _NFA<_TraitsT> _RegexT;
6248fb7bfaSmrg typedef regex_constants::syntax_option_type _FlagT;
6348fb7bfaSmrg
64*b1e83836Smrg _Compiler(const _CharT* __b, const _CharT* __e,
654d5abbe8Smrg const typename _TraitsT::locale_type& __traits, _FlagT __flags);
6648fb7bfaSmrg
674d5abbe8Smrg shared_ptr<const _RegexT>
687d4dc15bSmrg _M_get_nfa() noexcept
694d5abbe8Smrg { return std::move(_M_nfa); }
7048fb7bfaSmrg
7148fb7bfaSmrg private:
724d5abbe8Smrg typedef _Scanner<_CharT> _ScannerT;
734d5abbe8Smrg typedef typename _TraitsT::string_type _StringT;
7448fb7bfaSmrg typedef typename _ScannerT::_TokenT _TokenT;
754d5abbe8Smrg typedef _StateSeq<_TraitsT> _StateSeqT;
764d5abbe8Smrg typedef std::stack<_StateSeqT> _StackT;
774d5abbe8Smrg typedef std::ctype<_CharT> _CtypeT;
7848fb7bfaSmrg
7948fb7bfaSmrg // accepts a specific token or returns false.
8048fb7bfaSmrg bool
8148fb7bfaSmrg _M_match_token(_TokenT __token);
8248fb7bfaSmrg
8348fb7bfaSmrg void
8448fb7bfaSmrg _M_disjunction();
8548fb7bfaSmrg
864d5abbe8Smrg void
8748fb7bfaSmrg _M_alternative();
8848fb7bfaSmrg
8948fb7bfaSmrg bool
9048fb7bfaSmrg _M_term();
9148fb7bfaSmrg
9248fb7bfaSmrg bool
9348fb7bfaSmrg _M_assertion();
9448fb7bfaSmrg
9548fb7bfaSmrg bool
9648fb7bfaSmrg _M_quantifier();
9748fb7bfaSmrg
9848fb7bfaSmrg bool
9948fb7bfaSmrg _M_atom();
10048fb7bfaSmrg
10148fb7bfaSmrg bool
10248fb7bfaSmrg _M_bracket_expression();
10348fb7bfaSmrg
1044d5abbe8Smrg template<bool __icase, bool __collate>
1054d5abbe8Smrg void
1064d5abbe8Smrg _M_insert_any_matcher_ecma();
10748fb7bfaSmrg
1084d5abbe8Smrg template<bool __icase, bool __collate>
1094d5abbe8Smrg void
1104d5abbe8Smrg _M_insert_any_matcher_posix();
11148fb7bfaSmrg
1124d5abbe8Smrg template<bool __icase, bool __collate>
1134d5abbe8Smrg void
1144d5abbe8Smrg _M_insert_char_matcher();
11548fb7bfaSmrg
1164d5abbe8Smrg template<bool __icase, bool __collate>
1174d5abbe8Smrg void
1184d5abbe8Smrg _M_insert_character_class_matcher();
11948fb7bfaSmrg
1204d5abbe8Smrg template<bool __icase, bool __collate>
1214d5abbe8Smrg void
1224d5abbe8Smrg _M_insert_bracket_matcher(bool __neg);
12348fb7bfaSmrg
1247d4dc15bSmrg // Cache of the last atom seen in a bracketed range expression.
1257d4dc15bSmrg struct _BracketState
1267d4dc15bSmrg {
1277d4dc15bSmrg enum class _Type : char { _None, _Char, _Class } _M_type = _Type::_None;
128*b1e83836Smrg _CharT _M_char = _CharT();
1297d4dc15bSmrg
1307d4dc15bSmrg void
1317d4dc15bSmrg set(_CharT __c) noexcept { _M_type = _Type::_Char; _M_char = __c; }
1327d4dc15bSmrg
1337d4dc15bSmrg _GLIBCXX_NODISCARD _CharT
1347d4dc15bSmrg get() const noexcept { return _M_char; }
1357d4dc15bSmrg
1367d4dc15bSmrg void
1377d4dc15bSmrg reset(_Type __t = _Type::_None) noexcept { _M_type = __t; }
1387d4dc15bSmrg
1397d4dc15bSmrg explicit operator bool() const noexcept
1407d4dc15bSmrg { return _M_type != _Type::_None; }
1417d4dc15bSmrg
1427d4dc15bSmrg // Previous token was a single character.
1437d4dc15bSmrg _GLIBCXX_NODISCARD bool
1447d4dc15bSmrg _M_is_char() const noexcept { return _M_type == _Type::_Char; }
1457d4dc15bSmrg
1467d4dc15bSmrg // Previous token was a character class, equivalent class,
1477d4dc15bSmrg // collating symbol etc.
1487d4dc15bSmrg _GLIBCXX_NODISCARD bool
1497d4dc15bSmrg _M_is_class() const noexcept { return _M_type == _Type::_Class; }
1507d4dc15bSmrg };
1517d4dc15bSmrg
1527d4dc15bSmrg template<bool __icase, bool __collate>
1537d4dc15bSmrg using _BracketMatcher
1547d4dc15bSmrg = std::__detail::_BracketMatcher<_TraitsT, __icase, __collate>;
1557d4dc15bSmrg
1567d4dc15bSmrg // Returns true if successfully parsed one term and should continue
1577d4dc15bSmrg // compiling a bracket expression.
1584d5abbe8Smrg // Returns false if the compiler should move on.
1594d5abbe8Smrg template<bool __icase, bool __collate>
16048fb7bfaSmrg bool
1617d4dc15bSmrg _M_expression_term(_BracketState& __last_char,
1627d4dc15bSmrg _BracketMatcher<__icase, __collate>& __matcher);
16348fb7bfaSmrg
16448fb7bfaSmrg int
16548fb7bfaSmrg _M_cur_int_value(int __radix);
16648fb7bfaSmrg
1674d5abbe8Smrg bool
1684d5abbe8Smrg _M_try_char();
1694d5abbe8Smrg
1704d5abbe8Smrg _StateSeqT
1714d5abbe8Smrg _M_pop()
1724d5abbe8Smrg {
1734d5abbe8Smrg auto ret = _M_stack.top();
1744d5abbe8Smrg _M_stack.pop();
1754d5abbe8Smrg return ret;
1764d5abbe8Smrg }
1774d5abbe8Smrg
1787d4dc15bSmrg static _FlagT
1797d4dc15bSmrg _S_validate(_FlagT __f)
1807d4dc15bSmrg {
1817d4dc15bSmrg using namespace regex_constants;
1827d4dc15bSmrg switch (__f & (ECMAScript|basic|extended|awk|grep|egrep))
1837d4dc15bSmrg {
1847d4dc15bSmrg case ECMAScript:
1857d4dc15bSmrg case basic:
1867d4dc15bSmrg case extended:
1877d4dc15bSmrg case awk:
1887d4dc15bSmrg case grep:
1897d4dc15bSmrg case egrep:
1907d4dc15bSmrg return __f;
1917d4dc15bSmrg case _FlagT(0):
1927d4dc15bSmrg return __f | ECMAScript;
1937d4dc15bSmrg default:
1947d4dc15bSmrg std::__throw_regex_error(_S_grammar, "conflicting grammar options");
1957d4dc15bSmrg }
1967d4dc15bSmrg }
1977d4dc15bSmrg
1984d5abbe8Smrg _FlagT _M_flags;
19948fb7bfaSmrg _ScannerT _M_scanner;
2004d5abbe8Smrg shared_ptr<_RegexT> _M_nfa;
2014d5abbe8Smrg _StringT _M_value;
20248fb7bfaSmrg _StackT _M_stack;
2034d5abbe8Smrg const _TraitsT& _M_traits;
2044d5abbe8Smrg const _CtypeT& _M_ctype;
20548fb7bfaSmrg };
20648fb7bfaSmrg
2074d5abbe8Smrg // [28.13.14]
2084d5abbe8Smrg template<typename _TraitsT, bool __icase, bool __collate>
209b17d1066Smrg class _RegexTranslatorBase
21048fb7bfaSmrg {
2114d5abbe8Smrg public:
2124d5abbe8Smrg typedef typename _TraitsT::char_type _CharT;
2134d5abbe8Smrg typedef typename _TraitsT::string_type _StringT;
214b17d1066Smrg typedef _StringT _StrTransT;
21548fb7bfaSmrg
2164d5abbe8Smrg explicit
217b17d1066Smrg _RegexTranslatorBase(const _TraitsT& __traits)
2184d5abbe8Smrg : _M_traits(__traits)
2194d5abbe8Smrg { }
22048fb7bfaSmrg
2214d5abbe8Smrg _CharT
2224d5abbe8Smrg _M_translate(_CharT __ch) const
22348fb7bfaSmrg {
224*b1e83836Smrg if _GLIBCXX17_CONSTEXPR (__icase)
2254d5abbe8Smrg return _M_traits.translate_nocase(__ch);
226*b1e83836Smrg else if _GLIBCXX17_CONSTEXPR (__collate)
2274d5abbe8Smrg return _M_traits.translate(__ch);
22848fb7bfaSmrg else
2294d5abbe8Smrg return __ch;
23048fb7bfaSmrg }
23148fb7bfaSmrg
2324d5abbe8Smrg _StrTransT
2334d5abbe8Smrg _M_transform(_CharT __ch) const
2344d5abbe8Smrg {
235b17d1066Smrg _StrTransT __str(1, __ch);
2364d5abbe8Smrg return _M_traits.transform(__str.begin(), __str.end());
2374d5abbe8Smrg }
2384d5abbe8Smrg
239b17d1066Smrg // See LWG 523. It's not efficiently implementable when _TraitsT is not
240b17d1066Smrg // std::regex_traits<>, and __collate is true. See specializations for
241b17d1066Smrg // implementations of other cases.
242b17d1066Smrg bool
243b17d1066Smrg _M_match_range(const _StrTransT& __first, const _StrTransT& __last,
244b17d1066Smrg const _StrTransT& __s) const
245b17d1066Smrg { return __first <= __s && __s <= __last; }
246b17d1066Smrg
247b17d1066Smrg protected:
248b17d1066Smrg bool _M_in_range_icase(_CharT __first, _CharT __last, _CharT __ch) const
249b17d1066Smrg {
250b17d1066Smrg typedef std::ctype<_CharT> __ctype_type;
251b17d1066Smrg const auto& __fctyp = use_facet<__ctype_type>(this->_M_traits.getloc());
252b17d1066Smrg auto __lower = __fctyp.tolower(__ch);
253b17d1066Smrg auto __upper = __fctyp.toupper(__ch);
254b17d1066Smrg return (__first <= __lower && __lower <= __last)
255b17d1066Smrg || (__first <= __upper && __upper <= __last);
256b17d1066Smrg }
257b17d1066Smrg
2584d5abbe8Smrg const _TraitsT& _M_traits;
2594d5abbe8Smrg };
2604d5abbe8Smrg
261b17d1066Smrg template<typename _TraitsT, bool __icase, bool __collate>
262b17d1066Smrg class _RegexTranslator
263b17d1066Smrg : public _RegexTranslatorBase<_TraitsT, __icase, __collate>
264b17d1066Smrg {
265b17d1066Smrg public:
266b17d1066Smrg typedef _RegexTranslatorBase<_TraitsT, __icase, __collate> _Base;
267b17d1066Smrg using _Base::_Base;
268b17d1066Smrg };
269b17d1066Smrg
270b17d1066Smrg template<typename _TraitsT, bool __icase>
271b17d1066Smrg class _RegexTranslator<_TraitsT, __icase, false>
272b17d1066Smrg : public _RegexTranslatorBase<_TraitsT, __icase, false>
273b17d1066Smrg {
274b17d1066Smrg public:
275b17d1066Smrg typedef _RegexTranslatorBase<_TraitsT, __icase, false> _Base;
276b17d1066Smrg typedef typename _Base::_CharT _CharT;
277b17d1066Smrg typedef _CharT _StrTransT;
278b17d1066Smrg
279b17d1066Smrg using _Base::_Base;
280b17d1066Smrg
281b17d1066Smrg _StrTransT
282b17d1066Smrg _M_transform(_CharT __ch) const
283b17d1066Smrg { return __ch; }
284b17d1066Smrg
285b17d1066Smrg bool
286b17d1066Smrg _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
287b17d1066Smrg {
288*b1e83836Smrg if _GLIBCXX17_CONSTEXPR (!__icase)
289b17d1066Smrg return __first <= __ch && __ch <= __last;
290*b1e83836Smrg else
291b17d1066Smrg return this->_M_in_range_icase(__first, __last, __ch);
292b17d1066Smrg }
293b17d1066Smrg };
294b17d1066Smrg
295b17d1066Smrg template<typename _CharType>
296b17d1066Smrg class _RegexTranslator<std::regex_traits<_CharType>, true, true>
297b17d1066Smrg : public _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
298b17d1066Smrg {
299b17d1066Smrg public:
300b17d1066Smrg typedef _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
301b17d1066Smrg _Base;
302b17d1066Smrg typedef typename _Base::_CharT _CharT;
303b17d1066Smrg typedef typename _Base::_StrTransT _StrTransT;
304b17d1066Smrg
305b17d1066Smrg using _Base::_Base;
306b17d1066Smrg
307b17d1066Smrg bool
308b17d1066Smrg _M_match_range(const _StrTransT& __first, const _StrTransT& __last,
309b17d1066Smrg const _StrTransT& __str) const
310b17d1066Smrg {
311b17d1066Smrg __glibcxx_assert(__first.size() == 1);
312b17d1066Smrg __glibcxx_assert(__last.size() == 1);
313b17d1066Smrg __glibcxx_assert(__str.size() == 1);
314b17d1066Smrg return this->_M_in_range_icase(__first[0], __last[0], __str[0]);
315b17d1066Smrg }
316b17d1066Smrg };
317b17d1066Smrg
3184d5abbe8Smrg template<typename _TraitsT>
3194d5abbe8Smrg class _RegexTranslator<_TraitsT, false, false>
3204d5abbe8Smrg {
3214d5abbe8Smrg public:
3224d5abbe8Smrg typedef typename _TraitsT::char_type _CharT;
3234d5abbe8Smrg typedef _CharT _StrTransT;
3244d5abbe8Smrg
3254d5abbe8Smrg explicit
3264d5abbe8Smrg _RegexTranslator(const _TraitsT&)
3274d5abbe8Smrg { }
3284d5abbe8Smrg
3294d5abbe8Smrg _CharT
3304d5abbe8Smrg _M_translate(_CharT __ch) const
3314d5abbe8Smrg { return __ch; }
3324d5abbe8Smrg
3334d5abbe8Smrg _StrTransT
3344d5abbe8Smrg _M_transform(_CharT __ch) const
3354d5abbe8Smrg { return __ch; }
336b17d1066Smrg
337b17d1066Smrg bool
338b17d1066Smrg _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
339b17d1066Smrg { return __first <= __ch && __ch <= __last; }
3404d5abbe8Smrg };
3414d5abbe8Smrg
3424d5abbe8Smrg template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
3434d5abbe8Smrg struct _AnyMatcher;
3444d5abbe8Smrg
3454d5abbe8Smrg template<typename _TraitsT, bool __icase, bool __collate>
3464d5abbe8Smrg struct _AnyMatcher<_TraitsT, false, __icase, __collate>
3474d5abbe8Smrg {
3484d5abbe8Smrg typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
3494d5abbe8Smrg typedef typename _TransT::_CharT _CharT;
3504d5abbe8Smrg
3514d5abbe8Smrg explicit
3524d5abbe8Smrg _AnyMatcher(const _TraitsT& __traits)
3534d5abbe8Smrg : _M_translator(__traits)
3544d5abbe8Smrg { }
3554d5abbe8Smrg
35648fb7bfaSmrg bool
3574d5abbe8Smrg operator()(_CharT __ch) const
35848fb7bfaSmrg {
3594d5abbe8Smrg static auto __nul = _M_translator._M_translate('\0');
3604d5abbe8Smrg return _M_translator._M_translate(__ch) != __nul;
36148fb7bfaSmrg }
36248fb7bfaSmrg
3634d5abbe8Smrg _TransT _M_translator;
3644d5abbe8Smrg };
3654d5abbe8Smrg
3664d5abbe8Smrg template<typename _TraitsT, bool __icase, bool __collate>
3674d5abbe8Smrg struct _AnyMatcher<_TraitsT, true, __icase, __collate>
3684d5abbe8Smrg {
3694d5abbe8Smrg typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
3704d5abbe8Smrg typedef typename _TransT::_CharT _CharT;
3714d5abbe8Smrg
3724d5abbe8Smrg explicit
3734d5abbe8Smrg _AnyMatcher(const _TraitsT& __traits)
3744d5abbe8Smrg : _M_translator(__traits)
3754d5abbe8Smrg { }
3764d5abbe8Smrg
37748fb7bfaSmrg bool
3784d5abbe8Smrg operator()(_CharT __ch) const
3794d5abbe8Smrg { return _M_apply(__ch, typename is_same<_CharT, char>::type()); }
3804d5abbe8Smrg
3814d5abbe8Smrg bool
3824d5abbe8Smrg _M_apply(_CharT __ch, true_type) const
38348fb7bfaSmrg {
3844d5abbe8Smrg auto __c = _M_translator._M_translate(__ch);
3854d5abbe8Smrg auto __n = _M_translator._M_translate('\n');
3864d5abbe8Smrg auto __r = _M_translator._M_translate('\r');
3874d5abbe8Smrg return __c != __n && __c != __r;
38848fb7bfaSmrg }
38948fb7bfaSmrg
39048fb7bfaSmrg bool
3914d5abbe8Smrg _M_apply(_CharT __ch, false_type) const
39248fb7bfaSmrg {
3934d5abbe8Smrg auto __c = _M_translator._M_translate(__ch);
3944d5abbe8Smrg auto __n = _M_translator._M_translate('\n');
3954d5abbe8Smrg auto __r = _M_translator._M_translate('\r');
3964d5abbe8Smrg auto __u2028 = _M_translator._M_translate(u'\u2028');
3974d5abbe8Smrg auto __u2029 = _M_translator._M_translate(u'\u2029');
3984d5abbe8Smrg return __c != __n && __c != __r && __c != __u2028 && __c != __u2029;
39948fb7bfaSmrg }
40048fb7bfaSmrg
4014d5abbe8Smrg _TransT _M_translator;
4024d5abbe8Smrg };
40348fb7bfaSmrg
4044d5abbe8Smrg template<typename _TraitsT, bool __icase, bool __collate>
4054d5abbe8Smrg struct _CharMatcher
40648fb7bfaSmrg {
4074d5abbe8Smrg typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
4084d5abbe8Smrg typedef typename _TransT::_CharT _CharT;
4094d5abbe8Smrg
4104d5abbe8Smrg _CharMatcher(_CharT __ch, const _TraitsT& __traits)
4114d5abbe8Smrg : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch))
4124d5abbe8Smrg { }
4134d5abbe8Smrg
4144d5abbe8Smrg bool
4154d5abbe8Smrg operator()(_CharT __ch) const
4164d5abbe8Smrg { return _M_ch == _M_translator._M_translate(__ch); }
4174d5abbe8Smrg
4184d5abbe8Smrg _TransT _M_translator;
4194d5abbe8Smrg _CharT _M_ch;
4204d5abbe8Smrg };
4214d5abbe8Smrg
4224d5abbe8Smrg /// Matches a character range (bracket expression)
4234d5abbe8Smrg template<typename _TraitsT, bool __icase, bool __collate>
4244d5abbe8Smrg struct _BracketMatcher
4254d5abbe8Smrg {
4264d5abbe8Smrg public:
4274d5abbe8Smrg typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
4284d5abbe8Smrg typedef typename _TransT::_CharT _CharT;
4294d5abbe8Smrg typedef typename _TransT::_StrTransT _StrTransT;
4304d5abbe8Smrg typedef typename _TraitsT::string_type _StringT;
4314d5abbe8Smrg typedef typename _TraitsT::char_class_type _CharClassT;
4324d5abbe8Smrg
4334d5abbe8Smrg public:
4344d5abbe8Smrg _BracketMatcher(bool __is_non_matching,
4354d5abbe8Smrg const _TraitsT& __traits)
4364d5abbe8Smrg : _M_class_set(0), _M_translator(__traits), _M_traits(__traits),
4374d5abbe8Smrg _M_is_non_matching(__is_non_matching)
4384d5abbe8Smrg { }
4394d5abbe8Smrg
4404d5abbe8Smrg bool
4414d5abbe8Smrg operator()(_CharT __ch) const
4424d5abbe8Smrg {
4434d5abbe8Smrg _GLIBCXX_DEBUG_ASSERT(_M_is_ready);
4444d5abbe8Smrg return _M_apply(__ch, _UseCache());
44548fb7bfaSmrg }
44648fb7bfaSmrg
4474d5abbe8Smrg void
4484d5abbe8Smrg _M_add_char(_CharT __c)
44948fb7bfaSmrg {
4504d5abbe8Smrg _M_char_set.push_back(_M_translator._M_translate(__c));
451f9a78e0eSmrg _GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
45248fb7bfaSmrg }
45348fb7bfaSmrg
4544d5abbe8Smrg _StringT
4554d5abbe8Smrg _M_add_collate_element(const _StringT& __s)
45648fb7bfaSmrg {
4574d5abbe8Smrg auto __st = _M_traits.lookup_collatename(__s.data(),
4584d5abbe8Smrg __s.data() + __s.size());
4594d5abbe8Smrg if (__st.empty())
460f9a78e0eSmrg __throw_regex_error(regex_constants::error_collate,
461f9a78e0eSmrg "Invalid collate element.");
4624d5abbe8Smrg _M_char_set.push_back(_M_translator._M_translate(__st[0]));
463f9a78e0eSmrg _GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
4644d5abbe8Smrg return __st;
4654d5abbe8Smrg }
4664d5abbe8Smrg
4674d5abbe8Smrg void
4684d5abbe8Smrg _M_add_equivalence_class(const _StringT& __s)
4694d5abbe8Smrg {
4704d5abbe8Smrg auto __st = _M_traits.lookup_collatename(__s.data(),
4714d5abbe8Smrg __s.data() + __s.size());
4724d5abbe8Smrg if (__st.empty())
473f9a78e0eSmrg __throw_regex_error(regex_constants::error_collate,
474f9a78e0eSmrg "Invalid equivalence class.");
4754d5abbe8Smrg __st = _M_traits.transform_primary(__st.data(),
4764d5abbe8Smrg __st.data() + __st.size());
4774d5abbe8Smrg _M_equiv_set.push_back(__st);
478f9a78e0eSmrg _GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
4794d5abbe8Smrg }
4804d5abbe8Smrg
4814d5abbe8Smrg // __neg should be true for \D, \S and \W only.
4824d5abbe8Smrg void
4834d5abbe8Smrg _M_add_character_class(const _StringT& __s, bool __neg)
4844d5abbe8Smrg {
4854d5abbe8Smrg auto __mask = _M_traits.lookup_classname(__s.data(),
4864d5abbe8Smrg __s.data() + __s.size(),
4874d5abbe8Smrg __icase);
4884d5abbe8Smrg if (__mask == 0)
489f9a78e0eSmrg __throw_regex_error(regex_constants::error_collate,
490f9a78e0eSmrg "Invalid character class.");
4914d5abbe8Smrg if (!__neg)
4924d5abbe8Smrg _M_class_set |= __mask;
4934d5abbe8Smrg else
4944d5abbe8Smrg _M_neg_class_set.push_back(__mask);
495f9a78e0eSmrg _GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
4964d5abbe8Smrg }
4974d5abbe8Smrg
4984d5abbe8Smrg void
4994d5abbe8Smrg _M_make_range(_CharT __l, _CharT __r)
5004d5abbe8Smrg {
5014d5abbe8Smrg if (__l > __r)
502f9a78e0eSmrg __throw_regex_error(regex_constants::error_range,
503f9a78e0eSmrg "Invalid range in bracket expression.");
5044d5abbe8Smrg _M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
5054d5abbe8Smrg _M_translator._M_transform(__r)));
506f9a78e0eSmrg _GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
50748fb7bfaSmrg }
50848fb7bfaSmrg
5094d5abbe8Smrg void
5104d5abbe8Smrg _M_ready()
5114d5abbe8Smrg {
5124d5abbe8Smrg std::sort(_M_char_set.begin(), _M_char_set.end());
5134d5abbe8Smrg auto __end = std::unique(_M_char_set.begin(), _M_char_set.end());
5144d5abbe8Smrg _M_char_set.erase(__end, _M_char_set.end());
5154d5abbe8Smrg _M_make_cache(_UseCache());
516f9a78e0eSmrg _GLIBCXX_DEBUG_ONLY(_M_is_ready = true);
5174d5abbe8Smrg }
5184d5abbe8Smrg
5194d5abbe8Smrg private:
5204d5abbe8Smrg // Currently we only use the cache for char
521*b1e83836Smrg using _UseCache = typename std::is_same<_CharT, char>::type;
5224d5abbe8Smrg
5234d5abbe8Smrg static constexpr size_t
524181254a7Smrg _S_cache_size =
525181254a7Smrg 1ul << (sizeof(_CharT) * __CHAR_BIT__ * int(_UseCache::value));
5264d5abbe8Smrg
5274d5abbe8Smrg struct _Dummy { };
528*b1e83836Smrg using _CacheT = std::__conditional_t<_UseCache::value,
529181254a7Smrg std::bitset<_S_cache_size>,
530*b1e83836Smrg _Dummy>;
531*b1e83836Smrg using _UnsignedCharT = typename std::make_unsigned<_CharT>::type;
5324d5abbe8Smrg
53348fb7bfaSmrg bool
5344d5abbe8Smrg _M_apply(_CharT __ch, false_type) const;
53548fb7bfaSmrg
53648fb7bfaSmrg bool
5374d5abbe8Smrg _M_apply(_CharT __ch, true_type) const
5384d5abbe8Smrg { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
5394d5abbe8Smrg
5404d5abbe8Smrg void
5414d5abbe8Smrg _M_make_cache(true_type)
54248fb7bfaSmrg {
5434d5abbe8Smrg for (unsigned __i = 0; __i < _M_cache.size(); __i++)
5444d5abbe8Smrg _M_cache[__i] = _M_apply(static_cast<_CharT>(__i), false_type());
54548fb7bfaSmrg }
54648fb7bfaSmrg
5474d5abbe8Smrg void
5484d5abbe8Smrg _M_make_cache(false_type)
5494d5abbe8Smrg { }
55048fb7bfaSmrg
5514d5abbe8Smrg private:
552*b1e83836Smrg _GLIBCXX_STD_C::vector<_CharT> _M_char_set;
553*b1e83836Smrg _GLIBCXX_STD_C::vector<_StringT> _M_equiv_set;
554*b1e83836Smrg _GLIBCXX_STD_C::vector<pair<_StrTransT, _StrTransT>> _M_range_set;
555*b1e83836Smrg _GLIBCXX_STD_C::vector<_CharClassT> _M_neg_class_set;
5564d5abbe8Smrg _CharClassT _M_class_set;
5574d5abbe8Smrg _TransT _M_translator;
5584d5abbe8Smrg const _TraitsT& _M_traits;
5594d5abbe8Smrg bool _M_is_non_matching;
5604d5abbe8Smrg _CacheT _M_cache;
5614d5abbe8Smrg #ifdef _GLIBCXX_DEBUG
562f9a78e0eSmrg bool _M_is_ready = false;
5634d5abbe8Smrg #endif
5644d5abbe8Smrg };
56548fb7bfaSmrg
566a448f87cSmrg ///@} regex-detail
5678b6133e5Smrg } // namespace __detail
568a3e9eb18Smrg _GLIBCXX_END_NAMESPACE_VERSION
56948fb7bfaSmrg } // namespace std
5704d5abbe8Smrg
5714d5abbe8Smrg #include <bits/regex_compiler.tcc>
572