xref: /netbsd-src/external/gpl3/gcc.old/dist/libstdc++-v3/include/bits/regex_scanner.h (revision 23f5f46327e37e7811da3520f4bb933f9489322f)
11debfc3dSmrg // class template regex -*- C++ -*-
21debfc3dSmrg 
38feb0f0bSmrg // Copyright (C) 2013-2020 Free Software Foundation, Inc.
41debfc3dSmrg //
51debfc3dSmrg // This file is part of the GNU ISO C++ Library.  This library is free
61debfc3dSmrg // software; you can redistribute it and/or modify it under the
71debfc3dSmrg // terms of the GNU General Public License as published by the
81debfc3dSmrg // Free Software Foundation; either version 3, or (at your option)
91debfc3dSmrg // any later version.
101debfc3dSmrg 
111debfc3dSmrg // This library is distributed in the hope that it will be useful,
121debfc3dSmrg // but WITHOUT ANY WARRANTY; without even the implied warranty of
131debfc3dSmrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
141debfc3dSmrg // GNU General Public License for more details.
151debfc3dSmrg 
161debfc3dSmrg // Under Section 7 of GPL version 3, you are granted additional
171debfc3dSmrg // permissions described in the GCC Runtime Library Exception, version
181debfc3dSmrg // 3.1, as published by the Free Software Foundation.
191debfc3dSmrg 
201debfc3dSmrg // You should have received a copy of the GNU General Public License and
211debfc3dSmrg // a copy of the GCC Runtime Library Exception along with this program;
221debfc3dSmrg // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
231debfc3dSmrg // <http://www.gnu.org/licenses/>.
241debfc3dSmrg 
251debfc3dSmrg /**
261debfc3dSmrg  *  @file bits/regex_scanner.h
271debfc3dSmrg  *  This is an internal header file, included by other library headers.
281debfc3dSmrg  *  Do not attempt to use it directly. @headername{regex}
291debfc3dSmrg  */
301debfc3dSmrg 
_GLIBCXX_VISIBILITY(default)311debfc3dSmrg namespace std _GLIBCXX_VISIBILITY(default)
321debfc3dSmrg {
331debfc3dSmrg _GLIBCXX_BEGIN_NAMESPACE_VERSION
341debfc3dSmrg 
35a2dc1f3fSmrg namespace __detail
36a2dc1f3fSmrg {
371debfc3dSmrg   /**
381debfc3dSmrg    * @addtogroup regex-detail
391debfc3dSmrg    * @{
401debfc3dSmrg    */
411debfc3dSmrg 
421debfc3dSmrg   struct _ScannerBase
431debfc3dSmrg   {
441debfc3dSmrg   public:
451debfc3dSmrg     /// Token types returned from the scanner.
461debfc3dSmrg     enum _TokenT : unsigned
471debfc3dSmrg     {
481debfc3dSmrg       _S_token_anychar,
491debfc3dSmrg       _S_token_ord_char,
501debfc3dSmrg       _S_token_oct_num,
511debfc3dSmrg       _S_token_hex_num,
521debfc3dSmrg       _S_token_backref,
531debfc3dSmrg       _S_token_subexpr_begin,
541debfc3dSmrg       _S_token_subexpr_no_group_begin,
551debfc3dSmrg       _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
561debfc3dSmrg       _S_token_subexpr_end,
571debfc3dSmrg       _S_token_bracket_begin,
581debfc3dSmrg       _S_token_bracket_neg_begin,
591debfc3dSmrg       _S_token_bracket_end,
601debfc3dSmrg       _S_token_interval_begin,
611debfc3dSmrg       _S_token_interval_end,
621debfc3dSmrg       _S_token_quoted_class,
631debfc3dSmrg       _S_token_char_class_name,
641debfc3dSmrg       _S_token_collsymbol,
651debfc3dSmrg       _S_token_equiv_class_name,
661debfc3dSmrg       _S_token_opt,
671debfc3dSmrg       _S_token_or,
681debfc3dSmrg       _S_token_closure0,
691debfc3dSmrg       _S_token_closure1,
701debfc3dSmrg       _S_token_line_begin,
711debfc3dSmrg       _S_token_line_end,
721debfc3dSmrg       _S_token_word_bound, // neg if _M_value[0] == 'n'
731debfc3dSmrg       _S_token_comma,
741debfc3dSmrg       _S_token_dup_count,
751debfc3dSmrg       _S_token_eof,
761debfc3dSmrg       _S_token_bracket_dash,
771debfc3dSmrg       _S_token_unknown = -1u
781debfc3dSmrg     };
791debfc3dSmrg 
801debfc3dSmrg   protected:
811debfc3dSmrg     typedef regex_constants::syntax_option_type _FlagT;
821debfc3dSmrg 
831debfc3dSmrg     enum _StateT
841debfc3dSmrg     {
851debfc3dSmrg       _S_state_normal,
861debfc3dSmrg       _S_state_in_brace,
871debfc3dSmrg       _S_state_in_bracket,
881debfc3dSmrg     };
891debfc3dSmrg 
901debfc3dSmrg   protected:
911debfc3dSmrg     _ScannerBase(_FlagT __flags)
921debfc3dSmrg     : _M_state(_S_state_normal),
931debfc3dSmrg     _M_flags(__flags),
941debfc3dSmrg     _M_escape_tbl(_M_is_ecma()
951debfc3dSmrg 		  ? _M_ecma_escape_tbl
961debfc3dSmrg 		  : _M_awk_escape_tbl),
971debfc3dSmrg     _M_spec_char(_M_is_ecma()
981debfc3dSmrg 		 ? _M_ecma_spec_char
991debfc3dSmrg 		 : _M_flags & regex_constants::basic
1001debfc3dSmrg 		 ? _M_basic_spec_char
1011debfc3dSmrg 		 : _M_flags & regex_constants::extended
1021debfc3dSmrg 		 ? _M_extended_spec_char
1031debfc3dSmrg 		 : _M_flags & regex_constants::grep
1041debfc3dSmrg 		 ?  ".[\\*^$\n"
1051debfc3dSmrg 		 : _M_flags & regex_constants::egrep
1061debfc3dSmrg 		 ? ".[\\()*+?{|^$\n"
1071debfc3dSmrg 		 : _M_flags & regex_constants::awk
1081debfc3dSmrg 		 ? _M_extended_spec_char
1091debfc3dSmrg 		 : nullptr),
1101debfc3dSmrg     _M_at_bracket_start(false)
1111debfc3dSmrg     { __glibcxx_assert(_M_spec_char); }
1121debfc3dSmrg 
1131debfc3dSmrg   protected:
1141debfc3dSmrg     const char*
1151debfc3dSmrg     _M_find_escape(char __c)
1161debfc3dSmrg     {
1171debfc3dSmrg       auto __it = _M_escape_tbl;
1181debfc3dSmrg       for (; __it->first != '\0'; ++__it)
1191debfc3dSmrg 	if (__it->first == __c)
1201debfc3dSmrg 	  return &__it->second;
1211debfc3dSmrg       return nullptr;
1221debfc3dSmrg     }
1231debfc3dSmrg 
1241debfc3dSmrg     bool
1251debfc3dSmrg     _M_is_ecma() const
1261debfc3dSmrg     { return _M_flags & regex_constants::ECMAScript; }
1271debfc3dSmrg 
1281debfc3dSmrg     bool
1291debfc3dSmrg     _M_is_basic() const
1301debfc3dSmrg     { return _M_flags & (regex_constants::basic | regex_constants::grep); }
1311debfc3dSmrg 
1321debfc3dSmrg     bool
1331debfc3dSmrg     _M_is_extended() const
1341debfc3dSmrg     {
1351debfc3dSmrg       return _M_flags & (regex_constants::extended
1361debfc3dSmrg 			 | regex_constants::egrep
1371debfc3dSmrg 			 | regex_constants::awk);
1381debfc3dSmrg     }
1391debfc3dSmrg 
1401debfc3dSmrg     bool
1411debfc3dSmrg     _M_is_grep() const
1421debfc3dSmrg     { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
1431debfc3dSmrg 
1441debfc3dSmrg     bool
1451debfc3dSmrg     _M_is_awk() const
1461debfc3dSmrg     { return _M_flags & regex_constants::awk; }
1471debfc3dSmrg 
1481debfc3dSmrg   protected:
1491debfc3dSmrg     // TODO: Make them static in the next abi change.
1501debfc3dSmrg     const std::pair<char, _TokenT> _M_token_tbl[9] =
1511debfc3dSmrg       {
1521debfc3dSmrg 	{'^', _S_token_line_begin},
1531debfc3dSmrg 	{'$', _S_token_line_end},
1541debfc3dSmrg 	{'.', _S_token_anychar},
1551debfc3dSmrg 	{'*', _S_token_closure0},
1561debfc3dSmrg 	{'+', _S_token_closure1},
1571debfc3dSmrg 	{'?', _S_token_opt},
1581debfc3dSmrg 	{'|', _S_token_or},
1591debfc3dSmrg 	{'\n', _S_token_or}, // grep and egrep
1601debfc3dSmrg 	{'\0', _S_token_or},
1611debfc3dSmrg       };
1621debfc3dSmrg     const std::pair<char, char> _M_ecma_escape_tbl[8] =
1631debfc3dSmrg       {
1641debfc3dSmrg 	{'0', '\0'},
1651debfc3dSmrg 	{'b', '\b'},
1661debfc3dSmrg 	{'f', '\f'},
1671debfc3dSmrg 	{'n', '\n'},
1681debfc3dSmrg 	{'r', '\r'},
1691debfc3dSmrg 	{'t', '\t'},
1701debfc3dSmrg 	{'v', '\v'},
1711debfc3dSmrg 	{'\0', '\0'},
1721debfc3dSmrg       };
1731debfc3dSmrg     const std::pair<char, char> _M_awk_escape_tbl[11] =
1741debfc3dSmrg       {
1751debfc3dSmrg 	{'"', '"'},
1761debfc3dSmrg 	{'/', '/'},
1771debfc3dSmrg 	{'\\', '\\'},
1781debfc3dSmrg 	{'a', '\a'},
1791debfc3dSmrg 	{'b', '\b'},
1801debfc3dSmrg 	{'f', '\f'},
1811debfc3dSmrg 	{'n', '\n'},
1821debfc3dSmrg 	{'r', '\r'},
1831debfc3dSmrg 	{'t', '\t'},
1841debfc3dSmrg 	{'v', '\v'},
1851debfc3dSmrg 	{'\0', '\0'},
1861debfc3dSmrg       };
1871debfc3dSmrg     const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
1881debfc3dSmrg     const char* _M_basic_spec_char = ".[\\*^$";
1891debfc3dSmrg     const char* _M_extended_spec_char = ".[\\()*+?{|^$";
1901debfc3dSmrg 
1911debfc3dSmrg     _StateT                       _M_state;
1921debfc3dSmrg     _FlagT                        _M_flags;
1931debfc3dSmrg     _TokenT                       _M_token;
1941debfc3dSmrg     const std::pair<char, char>*  _M_escape_tbl;
1951debfc3dSmrg     const char*                   _M_spec_char;
1961debfc3dSmrg     bool                          _M_at_bracket_start;
1971debfc3dSmrg   };
1981debfc3dSmrg 
1991debfc3dSmrg   /**
2001debfc3dSmrg    * @brief Scans an input range for regex tokens.
2011debfc3dSmrg    *
2021debfc3dSmrg    * The %_Scanner class interprets the regular expression pattern in
2031debfc3dSmrg    * the input range passed to its constructor as a sequence of parse
2041debfc3dSmrg    * tokens passed to the regular expression compiler.  The sequence
2051debfc3dSmrg    * of tokens provided depends on the flag settings passed to the
2061debfc3dSmrg    * constructor: different regular expression grammars will interpret
2071debfc3dSmrg    * the same input pattern in syntactically different ways.
2081debfc3dSmrg    */
2091debfc3dSmrg   template<typename _CharT>
2101debfc3dSmrg     class _Scanner
2111debfc3dSmrg     : public _ScannerBase
2121debfc3dSmrg     {
2131debfc3dSmrg     public:
2141debfc3dSmrg       typedef const _CharT*                                       _IterT;
2151debfc3dSmrg       typedef std::basic_string<_CharT>                           _StringT;
2161debfc3dSmrg       typedef regex_constants::syntax_option_type                 _FlagT;
2171debfc3dSmrg       typedef const std::ctype<_CharT>                            _CtypeT;
2181debfc3dSmrg 
2191debfc3dSmrg       _Scanner(_IterT __begin, _IterT __end,
2201debfc3dSmrg 	       _FlagT __flags, std::locale __loc);
2211debfc3dSmrg 
2221debfc3dSmrg       void
2231debfc3dSmrg       _M_advance();
2241debfc3dSmrg 
2251debfc3dSmrg       _TokenT
226*23f5f463Smrg       _M_get_token() const noexcept
2271debfc3dSmrg       { return _M_token; }
2281debfc3dSmrg 
2291debfc3dSmrg       const _StringT&
230*23f5f463Smrg       _M_get_value() const noexcept
2311debfc3dSmrg       { return _M_value; }
2321debfc3dSmrg 
2331debfc3dSmrg #ifdef _GLIBCXX_DEBUG
2341debfc3dSmrg       std::ostream&
2351debfc3dSmrg       _M_print(std::ostream&);
2361debfc3dSmrg #endif
2371debfc3dSmrg 
2381debfc3dSmrg     private:
2391debfc3dSmrg       void
2401debfc3dSmrg       _M_scan_normal();
2411debfc3dSmrg 
2421debfc3dSmrg       void
2431debfc3dSmrg       _M_scan_in_bracket();
2441debfc3dSmrg 
2451debfc3dSmrg       void
2461debfc3dSmrg       _M_scan_in_brace();
2471debfc3dSmrg 
2481debfc3dSmrg       void
2491debfc3dSmrg       _M_eat_escape_ecma();
2501debfc3dSmrg 
2511debfc3dSmrg       void
2521debfc3dSmrg       _M_eat_escape_posix();
2531debfc3dSmrg 
2541debfc3dSmrg       void
2551debfc3dSmrg       _M_eat_escape_awk();
2561debfc3dSmrg 
2571debfc3dSmrg       void
2581debfc3dSmrg       _M_eat_class(char);
2591debfc3dSmrg 
2601debfc3dSmrg       _IterT                        _M_current;
2611debfc3dSmrg       _IterT                        _M_end;
2621debfc3dSmrg       _CtypeT&                      _M_ctype;
2631debfc3dSmrg       _StringT                      _M_value;
2641debfc3dSmrg       void (_Scanner::* _M_eat_escape)();
2651debfc3dSmrg     };
2661debfc3dSmrg 
2678feb0f0bSmrg  ///@} regex-detail
2681debfc3dSmrg } // namespace __detail
269a2dc1f3fSmrg _GLIBCXX_END_NAMESPACE_VERSION
2701debfc3dSmrg } // namespace std
2711debfc3dSmrg 
2721debfc3dSmrg #include <bits/regex_scanner.tcc>
273