xref: /netbsd-src/external/gpl3/gcc/dist/libstdc++-v3/include/bits/regex_scanner.h (revision b1e838363e3c6fc78a55519254d99869742dd33c)
14d5abbe8Smrg // class template regex -*- C++ -*-
24d5abbe8Smrg 
3*b1e83836Smrg // Copyright (C) 2013-2022 Free Software Foundation, Inc.
44d5abbe8Smrg //
54d5abbe8Smrg // This file is part of the GNU ISO C++ Library.  This library is free
64d5abbe8Smrg // software; you can redistribute it and/or modify it under the
74d5abbe8Smrg // terms of the GNU General Public License as published by the
84d5abbe8Smrg // Free Software Foundation; either version 3, or (at your option)
94d5abbe8Smrg // any later version.
104d5abbe8Smrg 
114d5abbe8Smrg // This library is distributed in the hope that it will be useful,
124d5abbe8Smrg // but WITHOUT ANY WARRANTY; without even the implied warranty of
134d5abbe8Smrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
144d5abbe8Smrg // GNU General Public License for more details.
154d5abbe8Smrg 
164d5abbe8Smrg // Under Section 7 of GPL version 3, you are granted additional
174d5abbe8Smrg // permissions described in the GCC Runtime Library Exception, version
184d5abbe8Smrg // 3.1, as published by the Free Software Foundation.
194d5abbe8Smrg 
204d5abbe8Smrg // You should have received a copy of the GNU General Public License and
214d5abbe8Smrg // a copy of the GCC Runtime Library Exception along with this program;
224d5abbe8Smrg // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
234d5abbe8Smrg // <http://www.gnu.org/licenses/>.
244d5abbe8Smrg 
254d5abbe8Smrg /**
264d5abbe8Smrg  *  @file bits/regex_scanner.h
274d5abbe8Smrg  *  This is an internal header file, included by other library headers.
284d5abbe8Smrg  *  Do not attempt to use it directly. @headername{regex}
294d5abbe8Smrg  */
304d5abbe8Smrg 
_GLIBCXX_VISIBILITY(default)314d5abbe8Smrg namespace std _GLIBCXX_VISIBILITY(default)
324d5abbe8Smrg {
338b6133e5Smrg _GLIBCXX_BEGIN_NAMESPACE_VERSION
348b6133e5Smrg 
35a3e9eb18Smrg namespace __detail
36a3e9eb18Smrg {
374d5abbe8Smrg   /**
384d5abbe8Smrg    * @addtogroup regex-detail
394d5abbe8Smrg    * @{
404d5abbe8Smrg    */
414d5abbe8Smrg 
424d5abbe8Smrg   struct _ScannerBase
434d5abbe8Smrg   {
444d5abbe8Smrg   public:
454d5abbe8Smrg     /// Token types returned from the scanner.
46b17d1066Smrg     enum _TokenT : unsigned
474d5abbe8Smrg     {
484d5abbe8Smrg       _S_token_anychar,
494d5abbe8Smrg       _S_token_ord_char,
504d5abbe8Smrg       _S_token_oct_num,
514d5abbe8Smrg       _S_token_hex_num,
524d5abbe8Smrg       _S_token_backref,
534d5abbe8Smrg       _S_token_subexpr_begin,
544d5abbe8Smrg       _S_token_subexpr_no_group_begin,
554d5abbe8Smrg       _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
564d5abbe8Smrg       _S_token_subexpr_end,
574d5abbe8Smrg       _S_token_bracket_begin,
584d5abbe8Smrg       _S_token_bracket_neg_begin,
594d5abbe8Smrg       _S_token_bracket_end,
604d5abbe8Smrg       _S_token_interval_begin,
614d5abbe8Smrg       _S_token_interval_end,
624d5abbe8Smrg       _S_token_quoted_class,
634d5abbe8Smrg       _S_token_char_class_name,
644d5abbe8Smrg       _S_token_collsymbol,
654d5abbe8Smrg       _S_token_equiv_class_name,
664d5abbe8Smrg       _S_token_opt,
674d5abbe8Smrg       _S_token_or,
684d5abbe8Smrg       _S_token_closure0,
694d5abbe8Smrg       _S_token_closure1,
704d5abbe8Smrg       _S_token_line_begin,
714d5abbe8Smrg       _S_token_line_end,
724d5abbe8Smrg       _S_token_word_bound, // neg if _M_value[0] == 'n'
734d5abbe8Smrg       _S_token_comma,
744d5abbe8Smrg       _S_token_dup_count,
754d5abbe8Smrg       _S_token_eof,
76b17d1066Smrg       _S_token_bracket_dash,
77b17d1066Smrg       _S_token_unknown = -1u
784d5abbe8Smrg     };
794d5abbe8Smrg 
804d5abbe8Smrg   protected:
814d5abbe8Smrg     typedef regex_constants::syntax_option_type _FlagT;
824d5abbe8Smrg 
834d5abbe8Smrg     enum _StateT
844d5abbe8Smrg     {
854d5abbe8Smrg       _S_state_normal,
864d5abbe8Smrg       _S_state_in_brace,
874d5abbe8Smrg       _S_state_in_bracket,
884d5abbe8Smrg     };
894d5abbe8Smrg 
904d5abbe8Smrg   protected:
914d5abbe8Smrg     _ScannerBase(_FlagT __flags)
924d5abbe8Smrg     : _M_state(_S_state_normal),
934d5abbe8Smrg     _M_flags(__flags),
944d5abbe8Smrg     _M_escape_tbl(_M_is_ecma()
954d5abbe8Smrg 		  ? _M_ecma_escape_tbl
964d5abbe8Smrg 		  : _M_awk_escape_tbl),
974d5abbe8Smrg     _M_spec_char(_M_is_ecma()
984d5abbe8Smrg 		 ? _M_ecma_spec_char
99f30ff588Smrg 		 : _M_flags & regex_constants::basic
1004d5abbe8Smrg 		 ? _M_basic_spec_char
101f30ff588Smrg 		 : _M_flags & regex_constants::extended
102f30ff588Smrg 		 ? _M_extended_spec_char
103f30ff588Smrg 		 : _M_flags & regex_constants::grep
104f30ff588Smrg 		 ?  ".[\\*^$\n"
105f30ff588Smrg 		 : _M_flags & regex_constants::egrep
106f30ff588Smrg 		 ? ".[\\()*+?{|^$\n"
107f30ff588Smrg 		 : _M_flags & regex_constants::awk
108f30ff588Smrg 		 ? _M_extended_spec_char
109f30ff588Smrg 		 : nullptr),
1104d5abbe8Smrg     _M_at_bracket_start(false)
111f30ff588Smrg     { __glibcxx_assert(_M_spec_char); }
1124d5abbe8Smrg 
1134d5abbe8Smrg   protected:
1144d5abbe8Smrg     const char*
1154d5abbe8Smrg     _M_find_escape(char __c)
1164d5abbe8Smrg     {
1174d5abbe8Smrg       auto __it = _M_escape_tbl;
1184d5abbe8Smrg       for (; __it->first != '\0'; ++__it)
1194d5abbe8Smrg 	if (__it->first == __c)
1204d5abbe8Smrg 	  return &__it->second;
1214d5abbe8Smrg       return nullptr;
1224d5abbe8Smrg     }
1234d5abbe8Smrg 
1244d5abbe8Smrg     bool
1254d5abbe8Smrg     _M_is_ecma() const
1264d5abbe8Smrg     { return _M_flags & regex_constants::ECMAScript; }
1274d5abbe8Smrg 
1284d5abbe8Smrg     bool
1294d5abbe8Smrg     _M_is_basic() const
1304d5abbe8Smrg     { return _M_flags & (regex_constants::basic | regex_constants::grep); }
1314d5abbe8Smrg 
1324d5abbe8Smrg     bool
1334d5abbe8Smrg     _M_is_extended() const
1344d5abbe8Smrg     {
1354d5abbe8Smrg       return _M_flags & (regex_constants::extended
1364d5abbe8Smrg 			 | regex_constants::egrep
1374d5abbe8Smrg 			 | regex_constants::awk);
1384d5abbe8Smrg     }
1394d5abbe8Smrg 
1404d5abbe8Smrg     bool
1414d5abbe8Smrg     _M_is_grep() const
1424d5abbe8Smrg     { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
1434d5abbe8Smrg 
1444d5abbe8Smrg     bool
1454d5abbe8Smrg     _M_is_awk() const
1464d5abbe8Smrg     { return _M_flags & regex_constants::awk; }
1474d5abbe8Smrg 
1484d5abbe8Smrg   protected:
149f30ff588Smrg     // TODO: Make them static in the next abi change.
1504d5abbe8Smrg     const std::pair<char, _TokenT> _M_token_tbl[9] =
1514d5abbe8Smrg       {
1524d5abbe8Smrg 	{'^', _S_token_line_begin},
1534d5abbe8Smrg 	{'$', _S_token_line_end},
1544d5abbe8Smrg 	{'.', _S_token_anychar},
1554d5abbe8Smrg 	{'*', _S_token_closure0},
1564d5abbe8Smrg 	{'+', _S_token_closure1},
1574d5abbe8Smrg 	{'?', _S_token_opt},
1584d5abbe8Smrg 	{'|', _S_token_or},
1594d5abbe8Smrg 	{'\n', _S_token_or}, // grep and egrep
1604d5abbe8Smrg 	{'\0', _S_token_or},
1614d5abbe8Smrg       };
1624d5abbe8Smrg     const std::pair<char, char> _M_ecma_escape_tbl[8] =
1634d5abbe8Smrg       {
1644d5abbe8Smrg 	{'0', '\0'},
1654d5abbe8Smrg 	{'b', '\b'},
1664d5abbe8Smrg 	{'f', '\f'},
1674d5abbe8Smrg 	{'n', '\n'},
1684d5abbe8Smrg 	{'r', '\r'},
1694d5abbe8Smrg 	{'t', '\t'},
1704d5abbe8Smrg 	{'v', '\v'},
1714d5abbe8Smrg 	{'\0', '\0'},
1724d5abbe8Smrg       };
1734d5abbe8Smrg     const std::pair<char, char> _M_awk_escape_tbl[11] =
1744d5abbe8Smrg       {
1754d5abbe8Smrg 	{'"', '"'},
1764d5abbe8Smrg 	{'/', '/'},
1774d5abbe8Smrg 	{'\\', '\\'},
1784d5abbe8Smrg 	{'a', '\a'},
1794d5abbe8Smrg 	{'b', '\b'},
1804d5abbe8Smrg 	{'f', '\f'},
1814d5abbe8Smrg 	{'n', '\n'},
1824d5abbe8Smrg 	{'r', '\r'},
1834d5abbe8Smrg 	{'t', '\t'},
1844d5abbe8Smrg 	{'v', '\v'},
1854d5abbe8Smrg 	{'\0', '\0'},
1864d5abbe8Smrg       };
1874d5abbe8Smrg     const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
1884d5abbe8Smrg     const char* _M_basic_spec_char = ".[\\*^$";
1894d5abbe8Smrg     const char* _M_extended_spec_char = ".[\\()*+?{|^$";
1904d5abbe8Smrg 
1914d5abbe8Smrg     _StateT                       _M_state;
1924d5abbe8Smrg     _FlagT                        _M_flags;
1934d5abbe8Smrg     _TokenT                       _M_token;
1944d5abbe8Smrg     const std::pair<char, char>*  _M_escape_tbl;
1954d5abbe8Smrg     const char*                   _M_spec_char;
1964d5abbe8Smrg     bool                          _M_at_bracket_start;
1974d5abbe8Smrg   };
1984d5abbe8Smrg 
1994d5abbe8Smrg   /**
2004d5abbe8Smrg    * @brief Scans an input range for regex tokens.
2014d5abbe8Smrg    *
2024d5abbe8Smrg    * The %_Scanner class interprets the regular expression pattern in
2034d5abbe8Smrg    * the input range passed to its constructor as a sequence of parse
2044d5abbe8Smrg    * tokens passed to the regular expression compiler.  The sequence
2054d5abbe8Smrg    * of tokens provided depends on the flag settings passed to the
2064d5abbe8Smrg    * constructor: different regular expression grammars will interpret
2074d5abbe8Smrg    * the same input pattern in syntactically different ways.
2084d5abbe8Smrg    */
2094d5abbe8Smrg   template<typename _CharT>
2104d5abbe8Smrg     class _Scanner
2114d5abbe8Smrg     : public _ScannerBase
2124d5abbe8Smrg     {
2134d5abbe8Smrg     public:
2144d5abbe8Smrg       typedef std::basic_string<_CharT>                           _StringT;
2154d5abbe8Smrg       typedef regex_constants::syntax_option_type                 _FlagT;
2164d5abbe8Smrg       typedef const std::ctype<_CharT>                            _CtypeT;
2174d5abbe8Smrg 
218*b1e83836Smrg       _Scanner(const _CharT* __begin, const _CharT* __end,
2194d5abbe8Smrg 	       _FlagT __flags, std::locale __loc);
2204d5abbe8Smrg 
2214d5abbe8Smrg       void
2224d5abbe8Smrg       _M_advance();
2234d5abbe8Smrg 
2244d5abbe8Smrg       _TokenT
2257d4dc15bSmrg       _M_get_token() const noexcept
2264d5abbe8Smrg       { return _M_token; }
2274d5abbe8Smrg 
2284d5abbe8Smrg       const _StringT&
2297d4dc15bSmrg       _M_get_value() const noexcept
2304d5abbe8Smrg       { return _M_value; }
2314d5abbe8Smrg 
2324d5abbe8Smrg #ifdef _GLIBCXX_DEBUG
2334d5abbe8Smrg       std::ostream&
2344d5abbe8Smrg       _M_print(std::ostream&);
2354d5abbe8Smrg #endif
2364d5abbe8Smrg 
2374d5abbe8Smrg     private:
2384d5abbe8Smrg       void
2394d5abbe8Smrg       _M_scan_normal();
2404d5abbe8Smrg 
2414d5abbe8Smrg       void
2424d5abbe8Smrg       _M_scan_in_bracket();
2434d5abbe8Smrg 
2444d5abbe8Smrg       void
2454d5abbe8Smrg       _M_scan_in_brace();
2464d5abbe8Smrg 
2474d5abbe8Smrg       void
2484d5abbe8Smrg       _M_eat_escape_ecma();
2494d5abbe8Smrg 
2504d5abbe8Smrg       void
2514d5abbe8Smrg       _M_eat_escape_posix();
2524d5abbe8Smrg 
2534d5abbe8Smrg       void
2544d5abbe8Smrg       _M_eat_escape_awk();
2554d5abbe8Smrg 
2564d5abbe8Smrg       void
2574d5abbe8Smrg       _M_eat_class(char);
2584d5abbe8Smrg 
259*b1e83836Smrg       const _CharT*                 _M_current;
260*b1e83836Smrg       const _CharT*                 _M_end;
2614d5abbe8Smrg       _CtypeT&                      _M_ctype;
2624d5abbe8Smrg       _StringT                      _M_value;
2634d5abbe8Smrg       void (_Scanner::* _M_eat_escape)();
2644d5abbe8Smrg     };
2654d5abbe8Smrg 
266a448f87cSmrg  ///@} regex-detail
2678b6133e5Smrg } // namespace __detail
268a3e9eb18Smrg _GLIBCXX_END_NAMESPACE_VERSION
2694d5abbe8Smrg } // namespace std
2704d5abbe8Smrg 
2714d5abbe8Smrg #include <bits/regex_scanner.tcc>
272