11debfc3dSmrg // class template regex -*- C++ -*-
21debfc3dSmrg
38feb0f0bSmrg // Copyright (C) 2013-2020 Free Software Foundation, Inc.
41debfc3dSmrg //
51debfc3dSmrg // This file is part of the GNU ISO C++ Library. This library is free
61debfc3dSmrg // software; you can redistribute it and/or modify it under the
71debfc3dSmrg // terms of the GNU General Public License as published by the
81debfc3dSmrg // Free Software Foundation; either version 3, or (at your option)
91debfc3dSmrg // any later version.
101debfc3dSmrg
111debfc3dSmrg // This library is distributed in the hope that it will be useful,
121debfc3dSmrg // but WITHOUT ANY WARRANTY; without even the implied warranty of
131debfc3dSmrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
141debfc3dSmrg // GNU General Public License for more details.
151debfc3dSmrg
161debfc3dSmrg // Under Section 7 of GPL version 3, you are granted additional
171debfc3dSmrg // permissions described in the GCC Runtime Library Exception, version
181debfc3dSmrg // 3.1, as published by the Free Software Foundation.
191debfc3dSmrg
201debfc3dSmrg // You should have received a copy of the GNU General Public License and
211debfc3dSmrg // a copy of the GCC Runtime Library Exception along with this program;
221debfc3dSmrg // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
231debfc3dSmrg // <http://www.gnu.org/licenses/>.
241debfc3dSmrg
251debfc3dSmrg /**
261debfc3dSmrg * @file bits/regex_scanner.h
271debfc3dSmrg * This is an internal header file, included by other library headers.
281debfc3dSmrg * Do not attempt to use it directly. @headername{regex}
291debfc3dSmrg */
301debfc3dSmrg
_GLIBCXX_VISIBILITY(default)311debfc3dSmrg namespace std _GLIBCXX_VISIBILITY(default)
321debfc3dSmrg {
331debfc3dSmrg _GLIBCXX_BEGIN_NAMESPACE_VERSION
341debfc3dSmrg
35a2dc1f3fSmrg namespace __detail
36a2dc1f3fSmrg {
371debfc3dSmrg /**
381debfc3dSmrg * @addtogroup regex-detail
391debfc3dSmrg * @{
401debfc3dSmrg */
411debfc3dSmrg
421debfc3dSmrg struct _ScannerBase
431debfc3dSmrg {
441debfc3dSmrg public:
451debfc3dSmrg /// Token types returned from the scanner.
461debfc3dSmrg enum _TokenT : unsigned
471debfc3dSmrg {
481debfc3dSmrg _S_token_anychar,
491debfc3dSmrg _S_token_ord_char,
501debfc3dSmrg _S_token_oct_num,
511debfc3dSmrg _S_token_hex_num,
521debfc3dSmrg _S_token_backref,
531debfc3dSmrg _S_token_subexpr_begin,
541debfc3dSmrg _S_token_subexpr_no_group_begin,
551debfc3dSmrg _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
561debfc3dSmrg _S_token_subexpr_end,
571debfc3dSmrg _S_token_bracket_begin,
581debfc3dSmrg _S_token_bracket_neg_begin,
591debfc3dSmrg _S_token_bracket_end,
601debfc3dSmrg _S_token_interval_begin,
611debfc3dSmrg _S_token_interval_end,
621debfc3dSmrg _S_token_quoted_class,
631debfc3dSmrg _S_token_char_class_name,
641debfc3dSmrg _S_token_collsymbol,
651debfc3dSmrg _S_token_equiv_class_name,
661debfc3dSmrg _S_token_opt,
671debfc3dSmrg _S_token_or,
681debfc3dSmrg _S_token_closure0,
691debfc3dSmrg _S_token_closure1,
701debfc3dSmrg _S_token_line_begin,
711debfc3dSmrg _S_token_line_end,
721debfc3dSmrg _S_token_word_bound, // neg if _M_value[0] == 'n'
731debfc3dSmrg _S_token_comma,
741debfc3dSmrg _S_token_dup_count,
751debfc3dSmrg _S_token_eof,
761debfc3dSmrg _S_token_bracket_dash,
771debfc3dSmrg _S_token_unknown = -1u
781debfc3dSmrg };
791debfc3dSmrg
801debfc3dSmrg protected:
811debfc3dSmrg typedef regex_constants::syntax_option_type _FlagT;
821debfc3dSmrg
831debfc3dSmrg enum _StateT
841debfc3dSmrg {
851debfc3dSmrg _S_state_normal,
861debfc3dSmrg _S_state_in_brace,
871debfc3dSmrg _S_state_in_bracket,
881debfc3dSmrg };
891debfc3dSmrg
901debfc3dSmrg protected:
911debfc3dSmrg _ScannerBase(_FlagT __flags)
921debfc3dSmrg : _M_state(_S_state_normal),
931debfc3dSmrg _M_flags(__flags),
941debfc3dSmrg _M_escape_tbl(_M_is_ecma()
951debfc3dSmrg ? _M_ecma_escape_tbl
961debfc3dSmrg : _M_awk_escape_tbl),
971debfc3dSmrg _M_spec_char(_M_is_ecma()
981debfc3dSmrg ? _M_ecma_spec_char
991debfc3dSmrg : _M_flags & regex_constants::basic
1001debfc3dSmrg ? _M_basic_spec_char
1011debfc3dSmrg : _M_flags & regex_constants::extended
1021debfc3dSmrg ? _M_extended_spec_char
1031debfc3dSmrg : _M_flags & regex_constants::grep
1041debfc3dSmrg ? ".[\\*^$\n"
1051debfc3dSmrg : _M_flags & regex_constants::egrep
1061debfc3dSmrg ? ".[\\()*+?{|^$\n"
1071debfc3dSmrg : _M_flags & regex_constants::awk
1081debfc3dSmrg ? _M_extended_spec_char
1091debfc3dSmrg : nullptr),
1101debfc3dSmrg _M_at_bracket_start(false)
1111debfc3dSmrg { __glibcxx_assert(_M_spec_char); }
1121debfc3dSmrg
1131debfc3dSmrg protected:
1141debfc3dSmrg const char*
1151debfc3dSmrg _M_find_escape(char __c)
1161debfc3dSmrg {
1171debfc3dSmrg auto __it = _M_escape_tbl;
1181debfc3dSmrg for (; __it->first != '\0'; ++__it)
1191debfc3dSmrg if (__it->first == __c)
1201debfc3dSmrg return &__it->second;
1211debfc3dSmrg return nullptr;
1221debfc3dSmrg }
1231debfc3dSmrg
1241debfc3dSmrg bool
1251debfc3dSmrg _M_is_ecma() const
1261debfc3dSmrg { return _M_flags & regex_constants::ECMAScript; }
1271debfc3dSmrg
1281debfc3dSmrg bool
1291debfc3dSmrg _M_is_basic() const
1301debfc3dSmrg { return _M_flags & (regex_constants::basic | regex_constants::grep); }
1311debfc3dSmrg
1321debfc3dSmrg bool
1331debfc3dSmrg _M_is_extended() const
1341debfc3dSmrg {
1351debfc3dSmrg return _M_flags & (regex_constants::extended
1361debfc3dSmrg | regex_constants::egrep
1371debfc3dSmrg | regex_constants::awk);
1381debfc3dSmrg }
1391debfc3dSmrg
1401debfc3dSmrg bool
1411debfc3dSmrg _M_is_grep() const
1421debfc3dSmrg { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
1431debfc3dSmrg
1441debfc3dSmrg bool
1451debfc3dSmrg _M_is_awk() const
1461debfc3dSmrg { return _M_flags & regex_constants::awk; }
1471debfc3dSmrg
1481debfc3dSmrg protected:
1491debfc3dSmrg // TODO: Make them static in the next abi change.
1501debfc3dSmrg const std::pair<char, _TokenT> _M_token_tbl[9] =
1511debfc3dSmrg {
1521debfc3dSmrg {'^', _S_token_line_begin},
1531debfc3dSmrg {'$', _S_token_line_end},
1541debfc3dSmrg {'.', _S_token_anychar},
1551debfc3dSmrg {'*', _S_token_closure0},
1561debfc3dSmrg {'+', _S_token_closure1},
1571debfc3dSmrg {'?', _S_token_opt},
1581debfc3dSmrg {'|', _S_token_or},
1591debfc3dSmrg {'\n', _S_token_or}, // grep and egrep
1601debfc3dSmrg {'\0', _S_token_or},
1611debfc3dSmrg };
1621debfc3dSmrg const std::pair<char, char> _M_ecma_escape_tbl[8] =
1631debfc3dSmrg {
1641debfc3dSmrg {'0', '\0'},
1651debfc3dSmrg {'b', '\b'},
1661debfc3dSmrg {'f', '\f'},
1671debfc3dSmrg {'n', '\n'},
1681debfc3dSmrg {'r', '\r'},
1691debfc3dSmrg {'t', '\t'},
1701debfc3dSmrg {'v', '\v'},
1711debfc3dSmrg {'\0', '\0'},
1721debfc3dSmrg };
1731debfc3dSmrg const std::pair<char, char> _M_awk_escape_tbl[11] =
1741debfc3dSmrg {
1751debfc3dSmrg {'"', '"'},
1761debfc3dSmrg {'/', '/'},
1771debfc3dSmrg {'\\', '\\'},
1781debfc3dSmrg {'a', '\a'},
1791debfc3dSmrg {'b', '\b'},
1801debfc3dSmrg {'f', '\f'},
1811debfc3dSmrg {'n', '\n'},
1821debfc3dSmrg {'r', '\r'},
1831debfc3dSmrg {'t', '\t'},
1841debfc3dSmrg {'v', '\v'},
1851debfc3dSmrg {'\0', '\0'},
1861debfc3dSmrg };
1871debfc3dSmrg const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
1881debfc3dSmrg const char* _M_basic_spec_char = ".[\\*^$";
1891debfc3dSmrg const char* _M_extended_spec_char = ".[\\()*+?{|^$";
1901debfc3dSmrg
1911debfc3dSmrg _StateT _M_state;
1921debfc3dSmrg _FlagT _M_flags;
1931debfc3dSmrg _TokenT _M_token;
1941debfc3dSmrg const std::pair<char, char>* _M_escape_tbl;
1951debfc3dSmrg const char* _M_spec_char;
1961debfc3dSmrg bool _M_at_bracket_start;
1971debfc3dSmrg };
1981debfc3dSmrg
1991debfc3dSmrg /**
2001debfc3dSmrg * @brief Scans an input range for regex tokens.
2011debfc3dSmrg *
2021debfc3dSmrg * The %_Scanner class interprets the regular expression pattern in
2031debfc3dSmrg * the input range passed to its constructor as a sequence of parse
2041debfc3dSmrg * tokens passed to the regular expression compiler. The sequence
2051debfc3dSmrg * of tokens provided depends on the flag settings passed to the
2061debfc3dSmrg * constructor: different regular expression grammars will interpret
2071debfc3dSmrg * the same input pattern in syntactically different ways.
2081debfc3dSmrg */
2091debfc3dSmrg template<typename _CharT>
2101debfc3dSmrg class _Scanner
2111debfc3dSmrg : public _ScannerBase
2121debfc3dSmrg {
2131debfc3dSmrg public:
2141debfc3dSmrg typedef const _CharT* _IterT;
2151debfc3dSmrg typedef std::basic_string<_CharT> _StringT;
2161debfc3dSmrg typedef regex_constants::syntax_option_type _FlagT;
2171debfc3dSmrg typedef const std::ctype<_CharT> _CtypeT;
2181debfc3dSmrg
2191debfc3dSmrg _Scanner(_IterT __begin, _IterT __end,
2201debfc3dSmrg _FlagT __flags, std::locale __loc);
2211debfc3dSmrg
2221debfc3dSmrg void
2231debfc3dSmrg _M_advance();
2241debfc3dSmrg
2251debfc3dSmrg _TokenT
226*23f5f463Smrg _M_get_token() const noexcept
2271debfc3dSmrg { return _M_token; }
2281debfc3dSmrg
2291debfc3dSmrg const _StringT&
230*23f5f463Smrg _M_get_value() const noexcept
2311debfc3dSmrg { return _M_value; }
2321debfc3dSmrg
2331debfc3dSmrg #ifdef _GLIBCXX_DEBUG
2341debfc3dSmrg std::ostream&
2351debfc3dSmrg _M_print(std::ostream&);
2361debfc3dSmrg #endif
2371debfc3dSmrg
2381debfc3dSmrg private:
2391debfc3dSmrg void
2401debfc3dSmrg _M_scan_normal();
2411debfc3dSmrg
2421debfc3dSmrg void
2431debfc3dSmrg _M_scan_in_bracket();
2441debfc3dSmrg
2451debfc3dSmrg void
2461debfc3dSmrg _M_scan_in_brace();
2471debfc3dSmrg
2481debfc3dSmrg void
2491debfc3dSmrg _M_eat_escape_ecma();
2501debfc3dSmrg
2511debfc3dSmrg void
2521debfc3dSmrg _M_eat_escape_posix();
2531debfc3dSmrg
2541debfc3dSmrg void
2551debfc3dSmrg _M_eat_escape_awk();
2561debfc3dSmrg
2571debfc3dSmrg void
2581debfc3dSmrg _M_eat_class(char);
2591debfc3dSmrg
2601debfc3dSmrg _IterT _M_current;
2611debfc3dSmrg _IterT _M_end;
2621debfc3dSmrg _CtypeT& _M_ctype;
2631debfc3dSmrg _StringT _M_value;
2641debfc3dSmrg void (_Scanner::* _M_eat_escape)();
2651debfc3dSmrg };
2661debfc3dSmrg
2678feb0f0bSmrg ///@} regex-detail
2681debfc3dSmrg } // namespace __detail
269a2dc1f3fSmrg _GLIBCXX_END_NAMESPACE_VERSION
2701debfc3dSmrg } // namespace std
2711debfc3dSmrg
2721debfc3dSmrg #include <bits/regex_scanner.tcc>
273