xref: /netbsd-src/external/gpl3/gcc.old/dist/libstdc++-v3/include/bits/regex_compiler.h (revision 76c7fc5f6b13ed0b1508e6b313e88e59977ed78e)
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2010-2017 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  *  @file bits/regex_compiler.h
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 _GLIBCXX_BEGIN_NAMESPACE_CXX11
35 
36   template<typename>
37     class regex_traits;
38 
39 _GLIBCXX_END_NAMESPACE_CXX11
40 _GLIBCXX_END_NAMESPACE_VERSION
41 
42 namespace __detail
43 {
44 _GLIBCXX_BEGIN_NAMESPACE_VERSION
45 
46   /**
47    * @addtogroup regex-detail
48    * @{
49    */
50 
51   template<typename, bool, bool>
52     struct _BracketMatcher;
53 
54   /**
55    * @brief Builds an NFA from an input iterator range.
56    *
57    * The %_TraitsT type should fulfill requirements [28.3].
58    */
59   template<typename _TraitsT>
60     class _Compiler
61     {
62     public:
63       typedef typename _TraitsT::char_type        _CharT;
64       typedef const _CharT*                       _IterT;
65       typedef _NFA<_TraitsT>              	  _RegexT;
66       typedef regex_constants::syntax_option_type _FlagT;
67 
68       _Compiler(_IterT __b, _IterT __e,
69 		const typename _TraitsT::locale_type& __traits, _FlagT __flags);
70 
71       shared_ptr<const _RegexT>
72       _M_get_nfa()
73       { return std::move(_M_nfa); }
74 
75     private:
76       typedef _Scanner<_CharT>               _ScannerT;
77       typedef typename _TraitsT::string_type _StringT;
78       typedef typename _ScannerT::_TokenT    _TokenT;
79       typedef _StateSeq<_TraitsT>            _StateSeqT;
80       typedef std::stack<_StateSeqT>         _StackT;
81       typedef std::ctype<_CharT>             _CtypeT;
82 
83       // accepts a specific token or returns false.
84       bool
85       _M_match_token(_TokenT __token);
86 
87       void
88       _M_disjunction();
89 
90       void
91       _M_alternative();
92 
93       bool
94       _M_term();
95 
96       bool
97       _M_assertion();
98 
99       bool
100       _M_quantifier();
101 
102       bool
103       _M_atom();
104 
105       bool
106       _M_bracket_expression();
107 
108       template<bool __icase, bool __collate>
109 	void
110 	_M_insert_any_matcher_ecma();
111 
112       template<bool __icase, bool __collate>
113 	void
114 	_M_insert_any_matcher_posix();
115 
116       template<bool __icase, bool __collate>
117 	void
118 	_M_insert_char_matcher();
119 
120       template<bool __icase, bool __collate>
121 	void
122 	_M_insert_character_class_matcher();
123 
124       template<bool __icase, bool __collate>
125 	void
126 	_M_insert_bracket_matcher(bool __neg);
127 
128       // Returns true if successfully matched one term and should continue.
129       // Returns false if the compiler should move on.
130       template<bool __icase, bool __collate>
131 	bool
132 	_M_expression_term(pair<bool, _CharT>& __last_char,
133 			   _BracketMatcher<_TraitsT, __icase, __collate>&
134 			   __matcher);
135 
136       int
137       _M_cur_int_value(int __radix);
138 
139       bool
140       _M_try_char();
141 
142       _StateSeqT
143       _M_pop()
144       {
145 	auto ret = _M_stack.top();
146 	_M_stack.pop();
147 	return ret;
148       }
149 
150       _FlagT              _M_flags;
151       _ScannerT           _M_scanner;
152       shared_ptr<_RegexT> _M_nfa;
153       _StringT            _M_value;
154       _StackT             _M_stack;
155       const _TraitsT&     _M_traits;
156       const _CtypeT&      _M_ctype;
157     };
158 
159   template<typename _Tp>
160     struct __has_contiguous_iter : std::false_type { };
161 
162   template<typename _Ch, typename _Tr, typename _Alloc>
163     struct __has_contiguous_iter<std::basic_string<_Ch, _Tr, _Alloc>>
164     : std::true_type
165     { };
166 
167   template<typename _Tp, typename _Alloc>
168     struct __has_contiguous_iter<std::vector<_Tp, _Alloc>>
169     : std::true_type
170     { };
171 
172   template<typename _Tp>
173     struct __is_contiguous_normal_iter : std::false_type { };
174 
175   template<typename _CharT>
176     struct __is_contiguous_normal_iter<_CharT*> : std::true_type { };
177 
178   template<typename _Tp, typename _Cont>
179     struct
180     __is_contiguous_normal_iter<__gnu_cxx::__normal_iterator<_Tp, _Cont>>
181     : __has_contiguous_iter<_Cont>::type
182     { };
183 
184   template<typename _Iter, typename _TraitsT>
185     using __enable_if_contiguous_normal_iter
186       = typename enable_if< __is_contiguous_normal_iter<_Iter>::value,
187                            std::shared_ptr<const _NFA<_TraitsT>> >::type;
188 
189   template<typename _Iter, typename _TraitsT>
190     using __disable_if_contiguous_normal_iter
191       = typename enable_if< !__is_contiguous_normal_iter<_Iter>::value,
192                            std::shared_ptr<const _NFA<_TraitsT>> >::type;
193 
194   template<typename _FwdIter, typename _TraitsT>
195     inline __enable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
196     __compile_nfa(_FwdIter __first, _FwdIter __last,
197 		  const typename _TraitsT::locale_type& __loc,
198 		  regex_constants::syntax_option_type __flags)
199     {
200       size_t __len = __last - __first;
201       const auto* __cfirst = __len ? std::__addressof(*__first) : nullptr;
202       using _Cmplr = _Compiler<_TraitsT>;
203       return _Cmplr(__cfirst, __cfirst + __len, __loc, __flags)._M_get_nfa();
204     }
205 
206   template<typename _FwdIter, typename _TraitsT>
207     inline __disable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
208     __compile_nfa(_FwdIter __first, _FwdIter __last,
209 		  const typename _TraitsT::locale_type& __loc,
210 		  regex_constants::syntax_option_type __flags)
211     {
212       using char_type = typename _TraitsT::char_type;
213       const basic_string<char_type> __str(__first, __last);
214       return __compile_nfa<const char_type*, _TraitsT>(__str.data(),
215 	  __str.data() + __str.size(), __loc, __flags);
216     }
217 
218   // [28.13.14]
219   template<typename _TraitsT, bool __icase, bool __collate>
220     class _RegexTranslatorBase
221     {
222     public:
223       typedef typename _TraitsT::char_type	      _CharT;
224       typedef typename _TraitsT::string_type	      _StringT;
225       typedef _StringT _StrTransT;
226 
227       explicit
228       _RegexTranslatorBase(const _TraitsT& __traits)
229       : _M_traits(__traits)
230       { }
231 
232       _CharT
233       _M_translate(_CharT __ch) const
234       {
235 	if (__icase)
236 	  return _M_traits.translate_nocase(__ch);
237 	else if (__collate)
238 	  return _M_traits.translate(__ch);
239 	else
240 	  return __ch;
241       }
242 
243       _StrTransT
244       _M_transform(_CharT __ch) const
245       {
246 	_StrTransT __str(1, __ch);
247 	return _M_traits.transform(__str.begin(), __str.end());
248       }
249 
250       // See LWG 523. It's not efficiently implementable when _TraitsT is not
251       // std::regex_traits<>, and __collate is true. See specializations for
252       // implementations of other cases.
253       bool
254       _M_match_range(const _StrTransT& __first, const _StrTransT& __last,
255 		     const _StrTransT& __s) const
256       { return __first <= __s && __s <= __last; }
257 
258     protected:
259       bool _M_in_range_icase(_CharT __first, _CharT __last, _CharT __ch) const
260       {
261 	typedef std::ctype<_CharT> __ctype_type;
262 	const auto& __fctyp = use_facet<__ctype_type>(this->_M_traits.getloc());
263 	auto __lower = __fctyp.tolower(__ch);
264 	auto __upper = __fctyp.toupper(__ch);
265 	return (__first <= __lower && __lower <= __last)
266 	  || (__first <= __upper && __upper <= __last);
267       }
268 
269       const _TraitsT& _M_traits;
270     };
271 
272   template<typename _TraitsT, bool __icase, bool __collate>
273     class _RegexTranslator
274     : public _RegexTranslatorBase<_TraitsT, __icase, __collate>
275     {
276     public:
277       typedef _RegexTranslatorBase<_TraitsT, __icase, __collate> _Base;
278       using _Base::_Base;
279     };
280 
281   template<typename _TraitsT, bool __icase>
282     class _RegexTranslator<_TraitsT, __icase, false>
283     : public _RegexTranslatorBase<_TraitsT, __icase, false>
284     {
285     public:
286       typedef _RegexTranslatorBase<_TraitsT, __icase, false> _Base;
287       typedef typename _Base::_CharT _CharT;
288       typedef _CharT _StrTransT;
289 
290       using _Base::_Base;
291 
292       _StrTransT
293       _M_transform(_CharT __ch) const
294       { return __ch; }
295 
296       bool
297       _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
298       {
299 	if (!__icase)
300 	  return __first <= __ch && __ch <= __last;
301 	return this->_M_in_range_icase(__first, __last, __ch);
302       }
303     };
304 
305   template<typename _CharType>
306     class _RegexTranslator<std::regex_traits<_CharType>, true, true>
307     : public _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
308     {
309     public:
310       typedef _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
311 	_Base;
312       typedef typename _Base::_CharT _CharT;
313       typedef typename _Base::_StrTransT _StrTransT;
314 
315       using _Base::_Base;
316 
317       bool
318       _M_match_range(const _StrTransT& __first, const _StrTransT& __last,
319 		     const _StrTransT& __str) const
320       {
321 	__glibcxx_assert(__first.size() == 1);
322 	__glibcxx_assert(__last.size() == 1);
323 	__glibcxx_assert(__str.size() == 1);
324 	return this->_M_in_range_icase(__first[0], __last[0], __str[0]);
325       }
326     };
327 
328   template<typename _TraitsT>
329     class _RegexTranslator<_TraitsT, false, false>
330     {
331     public:
332       typedef typename _TraitsT::char_type _CharT;
333       typedef _CharT                       _StrTransT;
334 
335       explicit
336       _RegexTranslator(const _TraitsT&)
337       { }
338 
339       _CharT
340       _M_translate(_CharT __ch) const
341       { return __ch; }
342 
343       _StrTransT
344       _M_transform(_CharT __ch) const
345       { return __ch; }
346 
347       bool
348       _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
349       { return __first <= __ch && __ch <= __last; }
350     };
351 
352   template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
353     struct _AnyMatcher;
354 
355   template<typename _TraitsT, bool __icase, bool __collate>
356     struct _AnyMatcher<_TraitsT, false, __icase, __collate>
357     {
358       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
359       typedef typename _TransT::_CharT                       _CharT;
360 
361       explicit
362       _AnyMatcher(const _TraitsT& __traits)
363       : _M_translator(__traits)
364       { }
365 
366       bool
367       operator()(_CharT __ch) const
368       {
369 	static auto __nul = _M_translator._M_translate('\0');
370 	return _M_translator._M_translate(__ch) != __nul;
371       }
372 
373       _TransT _M_translator;
374     };
375 
376   template<typename _TraitsT, bool __icase, bool __collate>
377     struct _AnyMatcher<_TraitsT, true, __icase, __collate>
378     {
379       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
380       typedef typename _TransT::_CharT                       _CharT;
381 
382       explicit
383       _AnyMatcher(const _TraitsT& __traits)
384       : _M_translator(__traits)
385       { }
386 
387       bool
388       operator()(_CharT __ch) const
389       { return _M_apply(__ch, typename is_same<_CharT, char>::type()); }
390 
391       bool
392       _M_apply(_CharT __ch, true_type) const
393       {
394 	auto __c = _M_translator._M_translate(__ch);
395 	auto __n = _M_translator._M_translate('\n');
396 	auto __r = _M_translator._M_translate('\r');
397 	return __c != __n && __c != __r;
398       }
399 
400       bool
401       _M_apply(_CharT __ch, false_type) const
402       {
403 	auto __c = _M_translator._M_translate(__ch);
404 	auto __n = _M_translator._M_translate('\n');
405 	auto __r = _M_translator._M_translate('\r');
406 	auto __u2028 = _M_translator._M_translate(u'\u2028');
407 	auto __u2029 = _M_translator._M_translate(u'\u2029');
408 	return __c != __n && __c != __r && __c != __u2028 && __c != __u2029;
409       }
410 
411       _TransT _M_translator;
412     };
413 
414   template<typename _TraitsT, bool __icase, bool __collate>
415     struct _CharMatcher
416     {
417       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
418       typedef typename _TransT::_CharT                       _CharT;
419 
420       _CharMatcher(_CharT __ch, const _TraitsT& __traits)
421       : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch))
422       { }
423 
424       bool
425       operator()(_CharT __ch) const
426       { return _M_ch == _M_translator._M_translate(__ch); }
427 
428       _TransT _M_translator;
429       _CharT  _M_ch;
430     };
431 
432   /// Matches a character range (bracket expression)
433   template<typename _TraitsT, bool __icase, bool __collate>
434     struct _BracketMatcher
435     {
436     public:
437       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
438       typedef typename _TransT::_CharT                       _CharT;
439       typedef typename _TransT::_StrTransT                   _StrTransT;
440       typedef typename _TraitsT::string_type                 _StringT;
441       typedef typename _TraitsT::char_class_type             _CharClassT;
442 
443     public:
444       _BracketMatcher(bool __is_non_matching,
445 		      const _TraitsT& __traits)
446       : _M_class_set(0), _M_translator(__traits), _M_traits(__traits),
447       _M_is_non_matching(__is_non_matching)
448       { }
449 
450       bool
451       operator()(_CharT __ch) const
452       {
453 	_GLIBCXX_DEBUG_ASSERT(_M_is_ready);
454 	return _M_apply(__ch, _UseCache());
455       }
456 
457       void
458       _M_add_char(_CharT __c)
459       {
460 	_M_char_set.push_back(_M_translator._M_translate(__c));
461 	_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
462       }
463 
464       _StringT
465       _M_add_collate_element(const _StringT& __s)
466       {
467 	auto __st = _M_traits.lookup_collatename(__s.data(),
468 						 __s.data() + __s.size());
469 	if (__st.empty())
470 	  __throw_regex_error(regex_constants::error_collate,
471 			      "Invalid collate element.");
472 	_M_char_set.push_back(_M_translator._M_translate(__st[0]));
473 	_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
474 	return __st;
475       }
476 
477       void
478       _M_add_equivalence_class(const _StringT& __s)
479       {
480 	auto __st = _M_traits.lookup_collatename(__s.data(),
481 						 __s.data() + __s.size());
482 	if (__st.empty())
483 	  __throw_regex_error(regex_constants::error_collate,
484 			      "Invalid equivalence class.");
485 	__st = _M_traits.transform_primary(__st.data(),
486 					   __st.data() + __st.size());
487 	_M_equiv_set.push_back(__st);
488 	_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
489       }
490 
491       // __neg should be true for \D, \S and \W only.
492       void
493       _M_add_character_class(const _StringT& __s, bool __neg)
494       {
495 	auto __mask = _M_traits.lookup_classname(__s.data(),
496 						 __s.data() + __s.size(),
497 						 __icase);
498 	if (__mask == 0)
499 	  __throw_regex_error(regex_constants::error_collate,
500 			      "Invalid character class.");
501 	if (!__neg)
502 	  _M_class_set |= __mask;
503 	else
504 	  _M_neg_class_set.push_back(__mask);
505 	_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
506       }
507 
508       void
509       _M_make_range(_CharT __l, _CharT __r)
510       {
511 	if (__l > __r)
512 	  __throw_regex_error(regex_constants::error_range,
513 			      "Invalid range in bracket expression.");
514 	_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
515 					 _M_translator._M_transform(__r)));
516 	_GLIBCXX_DEBUG_ONLY(_M_is_ready = false);
517       }
518 
519       void
520       _M_ready()
521       {
522 	std::sort(_M_char_set.begin(), _M_char_set.end());
523 	auto __end = std::unique(_M_char_set.begin(), _M_char_set.end());
524 	_M_char_set.erase(__end, _M_char_set.end());
525 	_M_make_cache(_UseCache());
526 	_GLIBCXX_DEBUG_ONLY(_M_is_ready = true);
527       }
528 
529     private:
530       // Currently we only use the cache for char
531       typedef typename std::is_same<_CharT, char>::type _UseCache;
532 
533       static constexpr size_t
534       _S_cache_size()
535       {
536 	return 1ul << (sizeof(_CharT) * __CHAR_BIT__ * int(_UseCache::value));
537       }
538 
539       struct _Dummy { };
540       typedef typename std::conditional<_UseCache::value,
541 					std::bitset<_S_cache_size()>,
542 					_Dummy>::type _CacheT;
543       typedef typename std::make_unsigned<_CharT>::type _UnsignedCharT;
544 
545       bool
546       _M_apply(_CharT __ch, false_type) const;
547 
548       bool
549       _M_apply(_CharT __ch, true_type) const
550       { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
551 
552       void
553       _M_make_cache(true_type)
554       {
555 	for (unsigned __i = 0; __i < _M_cache.size(); __i++)
556 	  _M_cache[__i] = _M_apply(static_cast<_CharT>(__i), false_type());
557       }
558 
559       void
560       _M_make_cache(false_type)
561       { }
562 
563     private:
564       std::vector<_CharT>                       _M_char_set;
565       std::vector<_StringT>                     _M_equiv_set;
566       std::vector<pair<_StrTransT, _StrTransT>> _M_range_set;
567       std::vector<_CharClassT>                  _M_neg_class_set;
568       _CharClassT                               _M_class_set;
569       _TransT                                   _M_translator;
570       const _TraitsT&                           _M_traits;
571       bool                                      _M_is_non_matching;
572       _CacheT					_M_cache;
573 #ifdef _GLIBCXX_DEBUG
574       bool                                      _M_is_ready = false;
575 #endif
576     };
577 
578  //@} regex-detail
579 _GLIBCXX_END_NAMESPACE_VERSION
580 } // namespace __detail
581 } // namespace std
582 
583 #include <bits/regex_compiler.tcc>
584