xref: /netbsd-src/external/gpl3/gcc/dist/libstdc++-v3/include/bits/regex_compiler.h (revision b1e838363e3c6fc78a55519254d99869742dd33c)
148fb7bfaSmrg // class template regex -*- C++ -*-
248fb7bfaSmrg 
3*b1e83836Smrg // Copyright (C) 2010-2022 Free Software Foundation, Inc.
448fb7bfaSmrg //
548fb7bfaSmrg // This file is part of the GNU ISO C++ Library.  This library is free
648fb7bfaSmrg // software; you can redistribute it and/or modify it under the
748fb7bfaSmrg // terms of the GNU General Public License as published by the
848fb7bfaSmrg // Free Software Foundation; either version 3, or (at your option)
948fb7bfaSmrg // any later version.
1048fb7bfaSmrg 
1148fb7bfaSmrg // This library is distributed in the hope that it will be useful,
1248fb7bfaSmrg // but WITHOUT ANY WARRANTY; without even the implied warranty of
1348fb7bfaSmrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1448fb7bfaSmrg // GNU General Public License for more details.
1548fb7bfaSmrg 
1648fb7bfaSmrg // Under Section 7 of GPL version 3, you are granted additional
1748fb7bfaSmrg // permissions described in the GCC Runtime Library Exception, version
1848fb7bfaSmrg // 3.1, as published by the Free Software Foundation.
1948fb7bfaSmrg 
2048fb7bfaSmrg // You should have received a copy of the GNU General Public License and
2148fb7bfaSmrg // a copy of the GCC Runtime Library Exception along with this program;
2248fb7bfaSmrg // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
2348fb7bfaSmrg // <http://www.gnu.org/licenses/>.
2448fb7bfaSmrg 
2548fb7bfaSmrg /**
2648fb7bfaSmrg  *  @file bits/regex_compiler.h
2748fb7bfaSmrg  *  This is an internal header file, included by other library headers.
2848fb7bfaSmrg  *  Do not attempt to use it directly. @headername{regex}
2948fb7bfaSmrg  */
3048fb7bfaSmrg 
_GLIBCXX_VISIBILITY(default)3148fb7bfaSmrg namespace std _GLIBCXX_VISIBILITY(default)
3248fb7bfaSmrg {
33b17d1066Smrg _GLIBCXX_BEGIN_NAMESPACE_VERSION
34b17d1066Smrg _GLIBCXX_BEGIN_NAMESPACE_CXX11
35b17d1066Smrg 
36b17d1066Smrg   template<typename>
37b17d1066Smrg     class regex_traits;
38b17d1066Smrg 
39b17d1066Smrg _GLIBCXX_END_NAMESPACE_CXX11
40b17d1066Smrg 
4148fb7bfaSmrg namespace __detail
4248fb7bfaSmrg {
4348fb7bfaSmrg   /**
4448fb7bfaSmrg    * @addtogroup regex-detail
4548fb7bfaSmrg    * @{
4648fb7bfaSmrg    */
4748fb7bfaSmrg 
484d5abbe8Smrg   template<typename, bool, bool>
494d5abbe8Smrg     struct _BracketMatcher;
5048fb7bfaSmrg 
5148fb7bfaSmrg   /**
524d5abbe8Smrg    * @brief Builds an NFA from an input iterator range.
5348fb7bfaSmrg    *
544d5abbe8Smrg    * The %_TraitsT type should fulfill requirements [28.3].
5548fb7bfaSmrg    */
564d5abbe8Smrg   template<typename _TraitsT>
5748fb7bfaSmrg     class _Compiler
5848fb7bfaSmrg     {
5948fb7bfaSmrg     public:
604d5abbe8Smrg       typedef typename _TraitsT::char_type        _CharT;
614d5abbe8Smrg       typedef _NFA<_TraitsT>              	  _RegexT;
6248fb7bfaSmrg       typedef regex_constants::syntax_option_type _FlagT;
6348fb7bfaSmrg 
64*b1e83836Smrg       _Compiler(const _CharT* __b, const _CharT* __e,
654d5abbe8Smrg 		const typename _TraitsT::locale_type& __traits, _FlagT __flags);
6648fb7bfaSmrg 
674d5abbe8Smrg       shared_ptr<const _RegexT>
687d4dc15bSmrg       _M_get_nfa() noexcept
694d5abbe8Smrg       { return std::move(_M_nfa); }
7048fb7bfaSmrg 
7148fb7bfaSmrg     private:
724d5abbe8Smrg       typedef _Scanner<_CharT>               _ScannerT;
734d5abbe8Smrg       typedef typename _TraitsT::string_type _StringT;
7448fb7bfaSmrg       typedef typename _ScannerT::_TokenT    _TokenT;
754d5abbe8Smrg       typedef _StateSeq<_TraitsT>            _StateSeqT;
764d5abbe8Smrg       typedef std::stack<_StateSeqT>         _StackT;
774d5abbe8Smrg       typedef std::ctype<_CharT>             _CtypeT;
7848fb7bfaSmrg 
7948fb7bfaSmrg       // accepts a specific token or returns false.
8048fb7bfaSmrg       bool
8148fb7bfaSmrg       _M_match_token(_TokenT __token);
8248fb7bfaSmrg 
8348fb7bfaSmrg       void
8448fb7bfaSmrg       _M_disjunction();
8548fb7bfaSmrg 
864d5abbe8Smrg       void
8748fb7bfaSmrg       _M_alternative();
8848fb7bfaSmrg 
8948fb7bfaSmrg       bool
9048fb7bfaSmrg       _M_term();
9148fb7bfaSmrg 
9248fb7bfaSmrg       bool
9348fb7bfaSmrg       _M_assertion();
9448fb7bfaSmrg 
9548fb7bfaSmrg       bool
9648fb7bfaSmrg       _M_quantifier();
9748fb7bfaSmrg 
9848fb7bfaSmrg       bool
9948fb7bfaSmrg       _M_atom();
10048fb7bfaSmrg 
10148fb7bfaSmrg       bool
10248fb7bfaSmrg       _M_bracket_expression();
10348fb7bfaSmrg 
1044d5abbe8Smrg       template<bool __icase, bool __collate>
1054d5abbe8Smrg 	void
1064d5abbe8Smrg 	_M_insert_any_matcher_ecma();
10748fb7bfaSmrg 
1084d5abbe8Smrg       template<bool __icase, bool __collate>
1094d5abbe8Smrg 	void
1104d5abbe8Smrg 	_M_insert_any_matcher_posix();
11148fb7bfaSmrg 
1124d5abbe8Smrg       template<bool __icase, bool __collate>
1134d5abbe8Smrg 	void
1144d5abbe8Smrg 	_M_insert_char_matcher();
11548fb7bfaSmrg 
1164d5abbe8Smrg       template<bool __icase, bool __collate>
1174d5abbe8Smrg 	void
1184d5abbe8Smrg 	_M_insert_character_class_matcher();
11948fb7bfaSmrg 
1204d5abbe8Smrg       template<bool __icase, bool __collate>
1214d5abbe8Smrg 	void
1224d5abbe8Smrg 	_M_insert_bracket_matcher(bool __neg);
12348fb7bfaSmrg 
1247d4dc15bSmrg       // Cache of the last atom seen in a bracketed range expression.
1257d4dc15bSmrg       struct _BracketState
1267d4dc15bSmrg       {
1277d4dc15bSmrg 	enum class _Type : char { _None, _Char, _Class } _M_type = _Type::_None;
128*b1e83836Smrg 	_CharT _M_char = _CharT();
1297d4dc15bSmrg 
1307d4dc15bSmrg 	void
1317d4dc15bSmrg 	set(_CharT __c) noexcept { _M_type = _Type::_Char; _M_char = __c; }
1327d4dc15bSmrg 
1337d4dc15bSmrg 	_GLIBCXX_NODISCARD _CharT
1347d4dc15bSmrg 	get() const noexcept { return _M_char; }
1357d4dc15bSmrg 
1367d4dc15bSmrg 	void
1377d4dc15bSmrg 	reset(_Type __t = _Type::_None) noexcept { _M_type = __t; }
1387d4dc15bSmrg 
1397d4dc15bSmrg 	explicit operator bool() const noexcept
1407d4dc15bSmrg 	{ return _M_type != _Type::_None; }
1417d4dc15bSmrg 
1427d4dc15bSmrg 	// Previous token was a single character.
1437d4dc15bSmrg 	_GLIBCXX_NODISCARD bool
1447d4dc15bSmrg 	_M_is_char() const noexcept { return _M_type == _Type::_Char; }
1457d4dc15bSmrg 
1467d4dc15bSmrg 	// Previous token was a character class, equivalent class,
1477d4dc15bSmrg 	// collating symbol etc.
1487d4dc15bSmrg 	_GLIBCXX_NODISCARD bool
1497d4dc15bSmrg 	_M_is_class() const noexcept { return _M_type == _Type::_Class; }
1507d4dc15bSmrg       };
1517d4dc15bSmrg 
1527d4dc15bSmrg       template<bool __icase, bool __collate>
1537d4dc15bSmrg 	using _BracketMatcher
1547d4dc15bSmrg 	  = std::__detail::_BracketMatcher<_TraitsT, __icase, __collate>;
1557d4dc15bSmrg 
1567d4dc15bSmrg       // Returns true if successfully parsed one term and should continue
1577d4dc15bSmrg       // compiling a bracket expression.
1584d5abbe8Smrg       // Returns false if the compiler should move on.
1594d5abbe8Smrg       template<bool __icase, bool __collate>
16048fb7bfaSmrg 	bool
1617d4dc15bSmrg 	_M_expression_term(_BracketState& __last_char,
1627d4dc15bSmrg 			   _BracketMatcher<__icase, __collate>& __matcher);
16348fb7bfaSmrg 
16448fb7bfaSmrg       int
16548fb7bfaSmrg       _M_cur_int_value(int __radix);
16648fb7bfaSmrg 
1674d5abbe8Smrg       bool
1684d5abbe8Smrg       _M_try_char();
1694d5abbe8Smrg 
1704d5abbe8Smrg       _StateSeqT
1714d5abbe8Smrg       _M_pop()
1724d5abbe8Smrg       {
1734d5abbe8Smrg 	auto ret = _M_stack.top();
1744d5abbe8Smrg 	_M_stack.pop();
1754d5abbe8Smrg 	return ret;
1764d5abbe8Smrg       }
1774d5abbe8Smrg 
1787d4dc15bSmrg       static _FlagT
1797d4dc15bSmrg       _S_validate(_FlagT __f)
1807d4dc15bSmrg       {
1817d4dc15bSmrg 	using namespace regex_constants;
1827d4dc15bSmrg 	switch (__f & (ECMAScript|basic|extended|awk|grep|egrep))
1837d4dc15bSmrg 	  {
1847d4dc15bSmrg 	  case ECMAScript:
1857d4dc15bSmrg 	  case basic:
1867d4dc15bSmrg 	  case extended:
1877d4dc15bSmrg 	  case awk:
1887d4dc15bSmrg 	  case grep:
1897d4dc15bSmrg 	  case egrep:
1907d4dc15bSmrg 	    return __f;
1917d4dc15bSmrg 	  case _FlagT(0):
1927d4dc15bSmrg 	    return __f | ECMAScript;
1937d4dc15bSmrg 	  default:
1947d4dc15bSmrg 	    std::__throw_regex_error(_S_grammar, "conflicting grammar options");
1957d4dc15bSmrg 	  }
1967d4dc15bSmrg       }
1977d4dc15bSmrg 
1984d5abbe8Smrg       _FlagT              _M_flags;
19948fb7bfaSmrg       _ScannerT           _M_scanner;
2004d5abbe8Smrg       shared_ptr<_RegexT> _M_nfa;
2014d5abbe8Smrg       _StringT            _M_value;
20248fb7bfaSmrg       _StackT             _M_stack;
2034d5abbe8Smrg       const _TraitsT&     _M_traits;
2044d5abbe8Smrg       const _CtypeT&      _M_ctype;
20548fb7bfaSmrg     };
20648fb7bfaSmrg 
2074d5abbe8Smrg   // [28.13.14]
2084d5abbe8Smrg   template<typename _TraitsT, bool __icase, bool __collate>
209b17d1066Smrg     class _RegexTranslatorBase
21048fb7bfaSmrg     {
2114d5abbe8Smrg     public:
2124d5abbe8Smrg       typedef typename _TraitsT::char_type	      _CharT;
2134d5abbe8Smrg       typedef typename _TraitsT::string_type	      _StringT;
214b17d1066Smrg       typedef _StringT _StrTransT;
21548fb7bfaSmrg 
2164d5abbe8Smrg       explicit
217b17d1066Smrg       _RegexTranslatorBase(const _TraitsT& __traits)
2184d5abbe8Smrg       : _M_traits(__traits)
2194d5abbe8Smrg       { }
22048fb7bfaSmrg 
2214d5abbe8Smrg       _CharT
2224d5abbe8Smrg       _M_translate(_CharT __ch) const
22348fb7bfaSmrg       {
224*b1e83836Smrg 	if _GLIBCXX17_CONSTEXPR (__icase)
2254d5abbe8Smrg 	  return _M_traits.translate_nocase(__ch);
226*b1e83836Smrg 	else if _GLIBCXX17_CONSTEXPR (__collate)
2274d5abbe8Smrg 	  return _M_traits.translate(__ch);
22848fb7bfaSmrg 	else
2294d5abbe8Smrg 	  return __ch;
23048fb7bfaSmrg       }
23148fb7bfaSmrg 
2324d5abbe8Smrg       _StrTransT
2334d5abbe8Smrg       _M_transform(_CharT __ch) const
2344d5abbe8Smrg       {
235b17d1066Smrg 	_StrTransT __str(1, __ch);
2364d5abbe8Smrg 	return _M_traits.transform(__str.begin(), __str.end());
2374d5abbe8Smrg       }
2384d5abbe8Smrg 
239b17d1066Smrg       // See LWG 523. It's not efficiently implementable when _TraitsT is not
240b17d1066Smrg       // std::regex_traits<>, and __collate is true. See specializations for
241b17d1066Smrg       // implementations of other cases.
242b17d1066Smrg       bool
243b17d1066Smrg       _M_match_range(const _StrTransT& __first, const _StrTransT& __last,
244b17d1066Smrg 		     const _StrTransT& __s) const
245b17d1066Smrg       { return __first <= __s && __s <= __last; }
246b17d1066Smrg 
247b17d1066Smrg     protected:
248b17d1066Smrg       bool _M_in_range_icase(_CharT __first, _CharT __last, _CharT __ch) const
249b17d1066Smrg       {
250b17d1066Smrg 	typedef std::ctype<_CharT> __ctype_type;
251b17d1066Smrg 	const auto& __fctyp = use_facet<__ctype_type>(this->_M_traits.getloc());
252b17d1066Smrg 	auto __lower = __fctyp.tolower(__ch);
253b17d1066Smrg 	auto __upper = __fctyp.toupper(__ch);
254b17d1066Smrg 	return (__first <= __lower && __lower <= __last)
255b17d1066Smrg 	  || (__first <= __upper && __upper <= __last);
256b17d1066Smrg       }
257b17d1066Smrg 
2584d5abbe8Smrg       const _TraitsT& _M_traits;
2594d5abbe8Smrg     };
2604d5abbe8Smrg 
261b17d1066Smrg   template<typename _TraitsT, bool __icase, bool __collate>
262b17d1066Smrg     class _RegexTranslator
263b17d1066Smrg     : public _RegexTranslatorBase<_TraitsT, __icase, __collate>
264b17d1066Smrg     {
265b17d1066Smrg     public:
266b17d1066Smrg       typedef _RegexTranslatorBase<_TraitsT, __icase, __collate> _Base;
267b17d1066Smrg       using _Base::_Base;
268b17d1066Smrg     };
269b17d1066Smrg 
270b17d1066Smrg   template<typename _TraitsT, bool __icase>
271b17d1066Smrg     class _RegexTranslator<_TraitsT, __icase, false>
272b17d1066Smrg     : public _RegexTranslatorBase<_TraitsT, __icase, false>
273b17d1066Smrg     {
274b17d1066Smrg     public:
275b17d1066Smrg       typedef _RegexTranslatorBase<_TraitsT, __icase, false> _Base;
276b17d1066Smrg       typedef typename _Base::_CharT _CharT;
277b17d1066Smrg       typedef _CharT _StrTransT;
278b17d1066Smrg 
279b17d1066Smrg       using _Base::_Base;
280b17d1066Smrg 
281b17d1066Smrg       _StrTransT
282b17d1066Smrg       _M_transform(_CharT __ch) const
283b17d1066Smrg       { return __ch; }
284b17d1066Smrg 
285b17d1066Smrg       bool
286b17d1066Smrg       _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
287b17d1066Smrg       {
288*b1e83836Smrg 	if _GLIBCXX17_CONSTEXPR (!__icase)
289b17d1066Smrg 	  return __first <= __ch && __ch <= __last;
290*b1e83836Smrg 	else
291b17d1066Smrg 	  return this->_M_in_range_icase(__first, __last, __ch);
292b17d1066Smrg       }
293b17d1066Smrg     };
294b17d1066Smrg 
295b17d1066Smrg   template<typename _CharType>
296b17d1066Smrg     class _RegexTranslator<std::regex_traits<_CharType>, true, true>
297b17d1066Smrg     : public _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
298b17d1066Smrg     {
299b17d1066Smrg     public:
300b17d1066Smrg       typedef _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
301b17d1066Smrg 	_Base;
302b17d1066Smrg       typedef typename _Base::_CharT _CharT;
303b17d1066Smrg       typedef typename _Base::_StrTransT _StrTransT;
304b17d1066Smrg 
305b17d1066Smrg       using _Base::_Base;
306b17d1066Smrg 
307b17d1066Smrg       bool
308b17d1066Smrg       _M_match_range(const _StrTransT& __first, const _StrTransT& __last,
309b17d1066Smrg 		     const _StrTransT& __str) const
310b17d1066Smrg       {
311b17d1066Smrg 	__glibcxx_assert(__first.size() == 1);
312b17d1066Smrg 	__glibcxx_assert(__last.size() == 1);
313b17d1066Smrg 	__glibcxx_assert(__str.size() == 1);
314b17d1066Smrg 	return this->_M_in_range_icase(__first[0], __last[0], __str[0]);
315b17d1066Smrg       }
316b17d1066Smrg     };
317b17d1066Smrg 
3184d5abbe8Smrg   template<typename _TraitsT>
3194d5abbe8Smrg     class _RegexTranslator<_TraitsT, false, false>
3204d5abbe8Smrg     {
3214d5abbe8Smrg     public:
3224d5abbe8Smrg       typedef typename _TraitsT::char_type _CharT;
3234d5abbe8Smrg       typedef _CharT                       _StrTransT;
3244d5abbe8Smrg 
3254d5abbe8Smrg       explicit
3264d5abbe8Smrg       _RegexTranslator(const _TraitsT&)
3274d5abbe8Smrg       { }
3284d5abbe8Smrg 
3294d5abbe8Smrg       _CharT
3304d5abbe8Smrg       _M_translate(_CharT __ch) const
3314d5abbe8Smrg       { return __ch; }
3324d5abbe8Smrg 
3334d5abbe8Smrg       _StrTransT
3344d5abbe8Smrg       _M_transform(_CharT __ch) const
3354d5abbe8Smrg       { return __ch; }
336b17d1066Smrg 
337b17d1066Smrg       bool
338b17d1066Smrg       _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
339b17d1066Smrg       { return __first <= __ch && __ch <= __last; }
3404d5abbe8Smrg     };
3414d5abbe8Smrg 
3424d5abbe8Smrg   template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
3434d5abbe8Smrg     struct _AnyMatcher;
3444d5abbe8Smrg 
3454d5abbe8Smrg   template<typename _TraitsT, bool __icase, bool __collate>
3464d5abbe8Smrg     struct _AnyMatcher<_TraitsT, false, __icase, __collate>
3474d5abbe8Smrg     {
3484d5abbe8Smrg       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
3494d5abbe8Smrg       typedef typename _TransT::_CharT                       _CharT;
3504d5abbe8Smrg 
3514d5abbe8Smrg       explicit
3524d5abbe8Smrg       _AnyMatcher(const _TraitsT& __traits)
3534d5abbe8Smrg       : _M_translator(__traits)
3544d5abbe8Smrg       { }
3554d5abbe8Smrg 
35648fb7bfaSmrg       bool
3574d5abbe8Smrg       operator()(_CharT __ch) const
35848fb7bfaSmrg       {
3594d5abbe8Smrg 	static auto __nul = _M_translator._M_translate('\0');
3604d5abbe8Smrg 	return _M_translator._M_translate(__ch) != __nul;
36148fb7bfaSmrg       }
36248fb7bfaSmrg 
3634d5abbe8Smrg       _TransT _M_translator;
3644d5abbe8Smrg     };
3654d5abbe8Smrg 
3664d5abbe8Smrg   template<typename _TraitsT, bool __icase, bool __collate>
3674d5abbe8Smrg     struct _AnyMatcher<_TraitsT, true, __icase, __collate>
3684d5abbe8Smrg     {
3694d5abbe8Smrg       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
3704d5abbe8Smrg       typedef typename _TransT::_CharT                       _CharT;
3714d5abbe8Smrg 
3724d5abbe8Smrg       explicit
3734d5abbe8Smrg       _AnyMatcher(const _TraitsT& __traits)
3744d5abbe8Smrg       : _M_translator(__traits)
3754d5abbe8Smrg       { }
3764d5abbe8Smrg 
37748fb7bfaSmrg       bool
3784d5abbe8Smrg       operator()(_CharT __ch) const
3794d5abbe8Smrg       { return _M_apply(__ch, typename is_same<_CharT, char>::type()); }
3804d5abbe8Smrg 
3814d5abbe8Smrg       bool
3824d5abbe8Smrg       _M_apply(_CharT __ch, true_type) const
38348fb7bfaSmrg       {
3844d5abbe8Smrg 	auto __c = _M_translator._M_translate(__ch);
3854d5abbe8Smrg 	auto __n = _M_translator._M_translate('\n');
3864d5abbe8Smrg 	auto __r = _M_translator._M_translate('\r');
3874d5abbe8Smrg 	return __c != __n && __c != __r;
38848fb7bfaSmrg       }
38948fb7bfaSmrg 
39048fb7bfaSmrg       bool
3914d5abbe8Smrg       _M_apply(_CharT __ch, false_type) const
39248fb7bfaSmrg       {
3934d5abbe8Smrg 	auto __c = _M_translator._M_translate(__ch);
3944d5abbe8Smrg 	auto __n = _M_translator._M_translate('\n');
3954d5abbe8Smrg 	auto __r = _M_translator._M_translate('\r');
3964d5abbe8Smrg 	auto __u2028 = _M_translator._M_translate(u'\u2028');
3974d5abbe8Smrg 	auto __u2029 = _M_translator._M_translate(u'\u2029');
3984d5abbe8Smrg 	return __c != __n && __c != __r && __c != __u2028 && __c != __u2029;
39948fb7bfaSmrg       }
40048fb7bfaSmrg 
4014d5abbe8Smrg       _TransT _M_translator;
4024d5abbe8Smrg     };
40348fb7bfaSmrg 
4044d5abbe8Smrg   template<typename _TraitsT, bool __icase, bool __collate>
4054d5abbe8Smrg     struct _CharMatcher
40648fb7bfaSmrg     {
4074d5abbe8Smrg       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
4084d5abbe8Smrg       typedef typename _TransT::_CharT                       _CharT;
4094d5abbe8Smrg 
4104d5abbe8Smrg       _CharMatcher(_CharT __ch, const _TraitsT& __traits)
4114d5abbe8Smrg       : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch))
4124d5abbe8Smrg       { }
4134d5abbe8Smrg 
4144d5abbe8Smrg       bool
4154d5abbe8Smrg       operator()(_CharT __ch) const
4164d5abbe8Smrg       { return _M_ch == _M_translator._M_translate(__ch); }
4174d5abbe8Smrg 
4184d5abbe8Smrg       _TransT _M_translator;
4194d5abbe8Smrg       _CharT  _M_ch;
4204d5abbe8Smrg     };
4214d5abbe8Smrg 
4224d5abbe8Smrg   /// Matches a character range (bracket expression)
4234d5abbe8Smrg   template<typename _TraitsT, bool __icase, bool __collate>
4244d5abbe8Smrg     struct _BracketMatcher
4254d5abbe8Smrg     {
4264d5abbe8Smrg     public:
4274d5abbe8Smrg       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
4284d5abbe8Smrg       typedef typename _TransT::_CharT                       _CharT;
4294d5abbe8Smrg       typedef typename _TransT::_StrTransT                   _StrTransT;
4304d5abbe8Smrg       typedef typename _TraitsT::string_type                 _StringT;
4314d5abbe8Smrg       typedef typename _TraitsT::char_class_type             _CharClassT;
4324d5abbe8Smrg 
4334d5abbe8Smrg     public:
4344d5abbe8Smrg       _BracketMatcher(bool __is_non_matching,
4354d5abbe8Smrg 		      const _TraitsT& __traits)
4364d5abbe8Smrg       : _M_class_set(0), _M_translator(__traits), _M_traits(__traits),
4374d5abbe8Smrg       _M_is_non_matching(__is_non_matching)
4384d5abbe8Smrg       { }
4394d5abbe8Smrg 
4404d5abbe8Smrg       bool
4414d5abbe8Smrg       operator()(_CharT __ch) const
4424d5abbe8Smrg       {
4434d5abbe8Smrg 	_GLIBCXX_DEBUG_ASSERT(_M_is_ready);
4444d5abbe8Smrg 	return _M_apply(__ch, _UseCache());
44548fb7bfaSmrg       }
44648fb7bfaSmrg 
4474d5abbe8Smrg       void
4484d5abbe8Smrg       _M_add_char(_CharT __c)
44948fb7bfaSmrg       {
4504d5abbe8Smrg 	_M_char_set.push_back(_M_translator._M_translate(__c));
451f9a78e0eSmrg 	_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
45248fb7bfaSmrg       }
45348fb7bfaSmrg 
4544d5abbe8Smrg       _StringT
4554d5abbe8Smrg       _M_add_collate_element(const _StringT& __s)
45648fb7bfaSmrg       {
4574d5abbe8Smrg 	auto __st = _M_traits.lookup_collatename(__s.data(),
4584d5abbe8Smrg 						 __s.data() + __s.size());
4594d5abbe8Smrg 	if (__st.empty())
460f9a78e0eSmrg 	  __throw_regex_error(regex_constants::error_collate,
461f9a78e0eSmrg 			      "Invalid collate element.");
4624d5abbe8Smrg 	_M_char_set.push_back(_M_translator._M_translate(__st[0]));
463f9a78e0eSmrg 	_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
4644d5abbe8Smrg 	return __st;
4654d5abbe8Smrg       }
4664d5abbe8Smrg 
4674d5abbe8Smrg       void
4684d5abbe8Smrg       _M_add_equivalence_class(const _StringT& __s)
4694d5abbe8Smrg       {
4704d5abbe8Smrg 	auto __st = _M_traits.lookup_collatename(__s.data(),
4714d5abbe8Smrg 						 __s.data() + __s.size());
4724d5abbe8Smrg 	if (__st.empty())
473f9a78e0eSmrg 	  __throw_regex_error(regex_constants::error_collate,
474f9a78e0eSmrg 			      "Invalid equivalence class.");
4754d5abbe8Smrg 	__st = _M_traits.transform_primary(__st.data(),
4764d5abbe8Smrg 					   __st.data() + __st.size());
4774d5abbe8Smrg 	_M_equiv_set.push_back(__st);
478f9a78e0eSmrg 	_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
4794d5abbe8Smrg       }
4804d5abbe8Smrg 
4814d5abbe8Smrg       // __neg should be true for \D, \S and \W only.
4824d5abbe8Smrg       void
4834d5abbe8Smrg       _M_add_character_class(const _StringT& __s, bool __neg)
4844d5abbe8Smrg       {
4854d5abbe8Smrg 	auto __mask = _M_traits.lookup_classname(__s.data(),
4864d5abbe8Smrg 						 __s.data() + __s.size(),
4874d5abbe8Smrg 						 __icase);
4884d5abbe8Smrg 	if (__mask == 0)
489f9a78e0eSmrg 	  __throw_regex_error(regex_constants::error_collate,
490f9a78e0eSmrg 			      "Invalid character class.");
4914d5abbe8Smrg 	if (!__neg)
4924d5abbe8Smrg 	  _M_class_set |= __mask;
4934d5abbe8Smrg 	else
4944d5abbe8Smrg 	  _M_neg_class_set.push_back(__mask);
495f9a78e0eSmrg 	_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
4964d5abbe8Smrg       }
4974d5abbe8Smrg 
4984d5abbe8Smrg       void
4994d5abbe8Smrg       _M_make_range(_CharT __l, _CharT __r)
5004d5abbe8Smrg       {
5014d5abbe8Smrg 	if (__l > __r)
502f9a78e0eSmrg 	  __throw_regex_error(regex_constants::error_range,
503f9a78e0eSmrg 			      "Invalid range in bracket expression.");
5044d5abbe8Smrg 	_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
5054d5abbe8Smrg 					 _M_translator._M_transform(__r)));
506f9a78e0eSmrg 	_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
50748fb7bfaSmrg       }
50848fb7bfaSmrg 
5094d5abbe8Smrg       void
5104d5abbe8Smrg       _M_ready()
5114d5abbe8Smrg       {
5124d5abbe8Smrg 	std::sort(_M_char_set.begin(), _M_char_set.end());
5134d5abbe8Smrg 	auto __end = std::unique(_M_char_set.begin(), _M_char_set.end());
5144d5abbe8Smrg 	_M_char_set.erase(__end, _M_char_set.end());
5154d5abbe8Smrg 	_M_make_cache(_UseCache());
516f9a78e0eSmrg 	_GLIBCXX_DEBUG_ONLY(_M_is_ready = true);
5174d5abbe8Smrg       }
5184d5abbe8Smrg 
5194d5abbe8Smrg     private:
5204d5abbe8Smrg       // Currently we only use the cache for char
521*b1e83836Smrg       using _UseCache = typename std::is_same<_CharT, char>::type;
5224d5abbe8Smrg 
5234d5abbe8Smrg       static constexpr size_t
524181254a7Smrg       _S_cache_size =
525181254a7Smrg 	1ul << (sizeof(_CharT) * __CHAR_BIT__ * int(_UseCache::value));
5264d5abbe8Smrg 
5274d5abbe8Smrg       struct _Dummy { };
528*b1e83836Smrg       using _CacheT = std::__conditional_t<_UseCache::value,
529181254a7Smrg 					   std::bitset<_S_cache_size>,
530*b1e83836Smrg 					   _Dummy>;
531*b1e83836Smrg       using _UnsignedCharT = typename std::make_unsigned<_CharT>::type;
5324d5abbe8Smrg 
53348fb7bfaSmrg       bool
5344d5abbe8Smrg       _M_apply(_CharT __ch, false_type) const;
53548fb7bfaSmrg 
53648fb7bfaSmrg       bool
5374d5abbe8Smrg       _M_apply(_CharT __ch, true_type) const
5384d5abbe8Smrg       { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
5394d5abbe8Smrg 
5404d5abbe8Smrg       void
5414d5abbe8Smrg       _M_make_cache(true_type)
54248fb7bfaSmrg       {
5434d5abbe8Smrg 	for (unsigned __i = 0; __i < _M_cache.size(); __i++)
5444d5abbe8Smrg 	  _M_cache[__i] = _M_apply(static_cast<_CharT>(__i), false_type());
54548fb7bfaSmrg       }
54648fb7bfaSmrg 
5474d5abbe8Smrg       void
5484d5abbe8Smrg       _M_make_cache(false_type)
5494d5abbe8Smrg       { }
55048fb7bfaSmrg 
5514d5abbe8Smrg     private:
552*b1e83836Smrg       _GLIBCXX_STD_C::vector<_CharT>            _M_char_set;
553*b1e83836Smrg       _GLIBCXX_STD_C::vector<_StringT>          _M_equiv_set;
554*b1e83836Smrg       _GLIBCXX_STD_C::vector<pair<_StrTransT, _StrTransT>> _M_range_set;
555*b1e83836Smrg       _GLIBCXX_STD_C::vector<_CharClassT>       _M_neg_class_set;
5564d5abbe8Smrg       _CharClassT                               _M_class_set;
5574d5abbe8Smrg       _TransT                                   _M_translator;
5584d5abbe8Smrg       const _TraitsT&                           _M_traits;
5594d5abbe8Smrg       bool                                      _M_is_non_matching;
5604d5abbe8Smrg       _CacheT					_M_cache;
5614d5abbe8Smrg #ifdef _GLIBCXX_DEBUG
562f9a78e0eSmrg       bool                                      _M_is_ready = false;
5634d5abbe8Smrg #endif
5644d5abbe8Smrg     };
56548fb7bfaSmrg 
566a448f87cSmrg  ///@} regex-detail
5678b6133e5Smrg } // namespace __detail
568a3e9eb18Smrg _GLIBCXX_END_NAMESPACE_VERSION
56948fb7bfaSmrg } // namespace std
5704d5abbe8Smrg 
5714d5abbe8Smrg #include <bits/regex_compiler.tcc>
572