1*38fd1498Szrj // class template regex -*- C++ -*-
2*38fd1498Szrj
3*38fd1498Szrj // Copyright (C) 2013-2018 Free Software Foundation, Inc.
4*38fd1498Szrj //
5*38fd1498Szrj // This file is part of the GNU ISO C++ Library. This library is free
6*38fd1498Szrj // software; you can redistribute it and/or modify it under the
7*38fd1498Szrj // terms of the GNU General Public License as published by the
8*38fd1498Szrj // Free Software Foundation; either version 3, or (at your option)
9*38fd1498Szrj // any later version.
10*38fd1498Szrj
11*38fd1498Szrj // This library is distributed in the hope that it will be useful,
12*38fd1498Szrj // but WITHOUT ANY WARRANTY; without even the implied warranty of
13*38fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14*38fd1498Szrj // GNU General Public License for more details.
15*38fd1498Szrj
16*38fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional
17*38fd1498Szrj // permissions described in the GCC Runtime Library Exception, version
18*38fd1498Szrj // 3.1, as published by the Free Software Foundation.
19*38fd1498Szrj
20*38fd1498Szrj // You should have received a copy of the GNU General Public License and
21*38fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program;
22*38fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23*38fd1498Szrj // <http://www.gnu.org/licenses/>.
24*38fd1498Szrj
25*38fd1498Szrj /**
26*38fd1498Szrj * @file bits/regex_scanner.h
27*38fd1498Szrj * This is an internal header file, included by other library headers.
28*38fd1498Szrj * Do not attempt to use it directly. @headername{regex}
29*38fd1498Szrj */
30*38fd1498Szrj
_GLIBCXX_VISIBILITY(default)31*38fd1498Szrj namespace std _GLIBCXX_VISIBILITY(default)
32*38fd1498Szrj {
33*38fd1498Szrj _GLIBCXX_BEGIN_NAMESPACE_VERSION
34*38fd1498Szrj
35*38fd1498Szrj namespace __detail
36*38fd1498Szrj {
37*38fd1498Szrj /**
38*38fd1498Szrj * @addtogroup regex-detail
39*38fd1498Szrj * @{
40*38fd1498Szrj */
41*38fd1498Szrj
42*38fd1498Szrj struct _ScannerBase
43*38fd1498Szrj {
44*38fd1498Szrj public:
45*38fd1498Szrj /// Token types returned from the scanner.
46*38fd1498Szrj enum _TokenT : unsigned
47*38fd1498Szrj {
48*38fd1498Szrj _S_token_anychar,
49*38fd1498Szrj _S_token_ord_char,
50*38fd1498Szrj _S_token_oct_num,
51*38fd1498Szrj _S_token_hex_num,
52*38fd1498Szrj _S_token_backref,
53*38fd1498Szrj _S_token_subexpr_begin,
54*38fd1498Szrj _S_token_subexpr_no_group_begin,
55*38fd1498Szrj _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
56*38fd1498Szrj _S_token_subexpr_end,
57*38fd1498Szrj _S_token_bracket_begin,
58*38fd1498Szrj _S_token_bracket_neg_begin,
59*38fd1498Szrj _S_token_bracket_end,
60*38fd1498Szrj _S_token_interval_begin,
61*38fd1498Szrj _S_token_interval_end,
62*38fd1498Szrj _S_token_quoted_class,
63*38fd1498Szrj _S_token_char_class_name,
64*38fd1498Szrj _S_token_collsymbol,
65*38fd1498Szrj _S_token_equiv_class_name,
66*38fd1498Szrj _S_token_opt,
67*38fd1498Szrj _S_token_or,
68*38fd1498Szrj _S_token_closure0,
69*38fd1498Szrj _S_token_closure1,
70*38fd1498Szrj _S_token_line_begin,
71*38fd1498Szrj _S_token_line_end,
72*38fd1498Szrj _S_token_word_bound, // neg if _M_value[0] == 'n'
73*38fd1498Szrj _S_token_comma,
74*38fd1498Szrj _S_token_dup_count,
75*38fd1498Szrj _S_token_eof,
76*38fd1498Szrj _S_token_bracket_dash,
77*38fd1498Szrj _S_token_unknown = -1u
78*38fd1498Szrj };
79*38fd1498Szrj
80*38fd1498Szrj protected:
81*38fd1498Szrj typedef regex_constants::syntax_option_type _FlagT;
82*38fd1498Szrj
83*38fd1498Szrj enum _StateT
84*38fd1498Szrj {
85*38fd1498Szrj _S_state_normal,
86*38fd1498Szrj _S_state_in_brace,
87*38fd1498Szrj _S_state_in_bracket,
88*38fd1498Szrj };
89*38fd1498Szrj
90*38fd1498Szrj protected:
91*38fd1498Szrj _ScannerBase(_FlagT __flags)
92*38fd1498Szrj : _M_state(_S_state_normal),
93*38fd1498Szrj _M_flags(__flags),
94*38fd1498Szrj _M_escape_tbl(_M_is_ecma()
95*38fd1498Szrj ? _M_ecma_escape_tbl
96*38fd1498Szrj : _M_awk_escape_tbl),
97*38fd1498Szrj _M_spec_char(_M_is_ecma()
98*38fd1498Szrj ? _M_ecma_spec_char
99*38fd1498Szrj : _M_flags & regex_constants::basic
100*38fd1498Szrj ? _M_basic_spec_char
101*38fd1498Szrj : _M_flags & regex_constants::extended
102*38fd1498Szrj ? _M_extended_spec_char
103*38fd1498Szrj : _M_flags & regex_constants::grep
104*38fd1498Szrj ? ".[\\*^$\n"
105*38fd1498Szrj : _M_flags & regex_constants::egrep
106*38fd1498Szrj ? ".[\\()*+?{|^$\n"
107*38fd1498Szrj : _M_flags & regex_constants::awk
108*38fd1498Szrj ? _M_extended_spec_char
109*38fd1498Szrj : nullptr),
110*38fd1498Szrj _M_at_bracket_start(false)
111*38fd1498Szrj { __glibcxx_assert(_M_spec_char); }
112*38fd1498Szrj
113*38fd1498Szrj protected:
114*38fd1498Szrj const char*
115*38fd1498Szrj _M_find_escape(char __c)
116*38fd1498Szrj {
117*38fd1498Szrj auto __it = _M_escape_tbl;
118*38fd1498Szrj for (; __it->first != '\0'; ++__it)
119*38fd1498Szrj if (__it->first == __c)
120*38fd1498Szrj return &__it->second;
121*38fd1498Szrj return nullptr;
122*38fd1498Szrj }
123*38fd1498Szrj
124*38fd1498Szrj bool
125*38fd1498Szrj _M_is_ecma() const
126*38fd1498Szrj { return _M_flags & regex_constants::ECMAScript; }
127*38fd1498Szrj
128*38fd1498Szrj bool
129*38fd1498Szrj _M_is_basic() const
130*38fd1498Szrj { return _M_flags & (regex_constants::basic | regex_constants::grep); }
131*38fd1498Szrj
132*38fd1498Szrj bool
133*38fd1498Szrj _M_is_extended() const
134*38fd1498Szrj {
135*38fd1498Szrj return _M_flags & (regex_constants::extended
136*38fd1498Szrj | regex_constants::egrep
137*38fd1498Szrj | regex_constants::awk);
138*38fd1498Szrj }
139*38fd1498Szrj
140*38fd1498Szrj bool
141*38fd1498Szrj _M_is_grep() const
142*38fd1498Szrj { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
143*38fd1498Szrj
144*38fd1498Szrj bool
145*38fd1498Szrj _M_is_awk() const
146*38fd1498Szrj { return _M_flags & regex_constants::awk; }
147*38fd1498Szrj
148*38fd1498Szrj protected:
149*38fd1498Szrj // TODO: Make them static in the next abi change.
150*38fd1498Szrj const std::pair<char, _TokenT> _M_token_tbl[9] =
151*38fd1498Szrj {
152*38fd1498Szrj {'^', _S_token_line_begin},
153*38fd1498Szrj {'$', _S_token_line_end},
154*38fd1498Szrj {'.', _S_token_anychar},
155*38fd1498Szrj {'*', _S_token_closure0},
156*38fd1498Szrj {'+', _S_token_closure1},
157*38fd1498Szrj {'?', _S_token_opt},
158*38fd1498Szrj {'|', _S_token_or},
159*38fd1498Szrj {'\n', _S_token_or}, // grep and egrep
160*38fd1498Szrj {'\0', _S_token_or},
161*38fd1498Szrj };
162*38fd1498Szrj const std::pair<char, char> _M_ecma_escape_tbl[8] =
163*38fd1498Szrj {
164*38fd1498Szrj {'0', '\0'},
165*38fd1498Szrj {'b', '\b'},
166*38fd1498Szrj {'f', '\f'},
167*38fd1498Szrj {'n', '\n'},
168*38fd1498Szrj {'r', '\r'},
169*38fd1498Szrj {'t', '\t'},
170*38fd1498Szrj {'v', '\v'},
171*38fd1498Szrj {'\0', '\0'},
172*38fd1498Szrj };
173*38fd1498Szrj const std::pair<char, char> _M_awk_escape_tbl[11] =
174*38fd1498Szrj {
175*38fd1498Szrj {'"', '"'},
176*38fd1498Szrj {'/', '/'},
177*38fd1498Szrj {'\\', '\\'},
178*38fd1498Szrj {'a', '\a'},
179*38fd1498Szrj {'b', '\b'},
180*38fd1498Szrj {'f', '\f'},
181*38fd1498Szrj {'n', '\n'},
182*38fd1498Szrj {'r', '\r'},
183*38fd1498Szrj {'t', '\t'},
184*38fd1498Szrj {'v', '\v'},
185*38fd1498Szrj {'\0', '\0'},
186*38fd1498Szrj };
187*38fd1498Szrj const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
188*38fd1498Szrj const char* _M_basic_spec_char = ".[\\*^$";
189*38fd1498Szrj const char* _M_extended_spec_char = ".[\\()*+?{|^$";
190*38fd1498Szrj
191*38fd1498Szrj _StateT _M_state;
192*38fd1498Szrj _FlagT _M_flags;
193*38fd1498Szrj _TokenT _M_token;
194*38fd1498Szrj const std::pair<char, char>* _M_escape_tbl;
195*38fd1498Szrj const char* _M_spec_char;
196*38fd1498Szrj bool _M_at_bracket_start;
197*38fd1498Szrj };
198*38fd1498Szrj
199*38fd1498Szrj /**
200*38fd1498Szrj * @brief Scans an input range for regex tokens.
201*38fd1498Szrj *
202*38fd1498Szrj * The %_Scanner class interprets the regular expression pattern in
203*38fd1498Szrj * the input range passed to its constructor as a sequence of parse
204*38fd1498Szrj * tokens passed to the regular expression compiler. The sequence
205*38fd1498Szrj * of tokens provided depends on the flag settings passed to the
206*38fd1498Szrj * constructor: different regular expression grammars will interpret
207*38fd1498Szrj * the same input pattern in syntactically different ways.
208*38fd1498Szrj */
209*38fd1498Szrj template<typename _CharT>
210*38fd1498Szrj class _Scanner
211*38fd1498Szrj : public _ScannerBase
212*38fd1498Szrj {
213*38fd1498Szrj public:
214*38fd1498Szrj typedef const _CharT* _IterT;
215*38fd1498Szrj typedef std::basic_string<_CharT> _StringT;
216*38fd1498Szrj typedef regex_constants::syntax_option_type _FlagT;
217*38fd1498Szrj typedef const std::ctype<_CharT> _CtypeT;
218*38fd1498Szrj
219*38fd1498Szrj _Scanner(_IterT __begin, _IterT __end,
220*38fd1498Szrj _FlagT __flags, std::locale __loc);
221*38fd1498Szrj
222*38fd1498Szrj void
223*38fd1498Szrj _M_advance();
224*38fd1498Szrj
225*38fd1498Szrj _TokenT
226*38fd1498Szrj _M_get_token() const
227*38fd1498Szrj { return _M_token; }
228*38fd1498Szrj
229*38fd1498Szrj const _StringT&
230*38fd1498Szrj _M_get_value() const
231*38fd1498Szrj { return _M_value; }
232*38fd1498Szrj
233*38fd1498Szrj #ifdef _GLIBCXX_DEBUG
234*38fd1498Szrj std::ostream&
235*38fd1498Szrj _M_print(std::ostream&);
236*38fd1498Szrj #endif
237*38fd1498Szrj
238*38fd1498Szrj private:
239*38fd1498Szrj void
240*38fd1498Szrj _M_scan_normal();
241*38fd1498Szrj
242*38fd1498Szrj void
243*38fd1498Szrj _M_scan_in_bracket();
244*38fd1498Szrj
245*38fd1498Szrj void
246*38fd1498Szrj _M_scan_in_brace();
247*38fd1498Szrj
248*38fd1498Szrj void
249*38fd1498Szrj _M_eat_escape_ecma();
250*38fd1498Szrj
251*38fd1498Szrj void
252*38fd1498Szrj _M_eat_escape_posix();
253*38fd1498Szrj
254*38fd1498Szrj void
255*38fd1498Szrj _M_eat_escape_awk();
256*38fd1498Szrj
257*38fd1498Szrj void
258*38fd1498Szrj _M_eat_class(char);
259*38fd1498Szrj
260*38fd1498Szrj _IterT _M_current;
261*38fd1498Szrj _IterT _M_end;
262*38fd1498Szrj _CtypeT& _M_ctype;
263*38fd1498Szrj _StringT _M_value;
264*38fd1498Szrj void (_Scanner::* _M_eat_escape)();
265*38fd1498Szrj };
266*38fd1498Szrj
267*38fd1498Szrj //@} regex-detail
268*38fd1498Szrj } // namespace __detail
269*38fd1498Szrj _GLIBCXX_END_NAMESPACE_VERSION
270*38fd1498Szrj } // namespace std
271*38fd1498Szrj
272*38fd1498Szrj #include <bits/regex_scanner.tcc>
273