xref: /dflybsd-src/contrib/gcc-8.0/libstdc++-v3/include/bits/regex_scanner.h (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj // class template regex -*- C++ -*-
2*38fd1498Szrj 
3*38fd1498Szrj // Copyright (C) 2013-2018 Free Software Foundation, Inc.
4*38fd1498Szrj //
5*38fd1498Szrj // This file is part of the GNU ISO C++ Library.  This library is free
6*38fd1498Szrj // software; you can redistribute it and/or modify it under the
7*38fd1498Szrj // terms of the GNU General Public License as published by the
8*38fd1498Szrj // Free Software Foundation; either version 3, or (at your option)
9*38fd1498Szrj // any later version.
10*38fd1498Szrj 
11*38fd1498Szrj // This library is distributed in the hope that it will be useful,
12*38fd1498Szrj // but WITHOUT ANY WARRANTY; without even the implied warranty of
13*38fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14*38fd1498Szrj // GNU General Public License for more details.
15*38fd1498Szrj 
16*38fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional
17*38fd1498Szrj // permissions described in the GCC Runtime Library Exception, version
18*38fd1498Szrj // 3.1, as published by the Free Software Foundation.
19*38fd1498Szrj 
20*38fd1498Szrj // You should have received a copy of the GNU General Public License and
21*38fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program;
22*38fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23*38fd1498Szrj // <http://www.gnu.org/licenses/>.
24*38fd1498Szrj 
25*38fd1498Szrj /**
26*38fd1498Szrj  *  @file bits/regex_scanner.h
27*38fd1498Szrj  *  This is an internal header file, included by other library headers.
28*38fd1498Szrj  *  Do not attempt to use it directly. @headername{regex}
29*38fd1498Szrj  */
30*38fd1498Szrj 
_GLIBCXX_VISIBILITY(default)31*38fd1498Szrj namespace std _GLIBCXX_VISIBILITY(default)
32*38fd1498Szrj {
33*38fd1498Szrj _GLIBCXX_BEGIN_NAMESPACE_VERSION
34*38fd1498Szrj 
35*38fd1498Szrj namespace __detail
36*38fd1498Szrj {
37*38fd1498Szrj   /**
38*38fd1498Szrj    * @addtogroup regex-detail
39*38fd1498Szrj    * @{
40*38fd1498Szrj    */
41*38fd1498Szrj 
42*38fd1498Szrj   struct _ScannerBase
43*38fd1498Szrj   {
44*38fd1498Szrj   public:
45*38fd1498Szrj     /// Token types returned from the scanner.
46*38fd1498Szrj     enum _TokenT : unsigned
47*38fd1498Szrj     {
48*38fd1498Szrj       _S_token_anychar,
49*38fd1498Szrj       _S_token_ord_char,
50*38fd1498Szrj       _S_token_oct_num,
51*38fd1498Szrj       _S_token_hex_num,
52*38fd1498Szrj       _S_token_backref,
53*38fd1498Szrj       _S_token_subexpr_begin,
54*38fd1498Szrj       _S_token_subexpr_no_group_begin,
55*38fd1498Szrj       _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
56*38fd1498Szrj       _S_token_subexpr_end,
57*38fd1498Szrj       _S_token_bracket_begin,
58*38fd1498Szrj       _S_token_bracket_neg_begin,
59*38fd1498Szrj       _S_token_bracket_end,
60*38fd1498Szrj       _S_token_interval_begin,
61*38fd1498Szrj       _S_token_interval_end,
62*38fd1498Szrj       _S_token_quoted_class,
63*38fd1498Szrj       _S_token_char_class_name,
64*38fd1498Szrj       _S_token_collsymbol,
65*38fd1498Szrj       _S_token_equiv_class_name,
66*38fd1498Szrj       _S_token_opt,
67*38fd1498Szrj       _S_token_or,
68*38fd1498Szrj       _S_token_closure0,
69*38fd1498Szrj       _S_token_closure1,
70*38fd1498Szrj       _S_token_line_begin,
71*38fd1498Szrj       _S_token_line_end,
72*38fd1498Szrj       _S_token_word_bound, // neg if _M_value[0] == 'n'
73*38fd1498Szrj       _S_token_comma,
74*38fd1498Szrj       _S_token_dup_count,
75*38fd1498Szrj       _S_token_eof,
76*38fd1498Szrj       _S_token_bracket_dash,
77*38fd1498Szrj       _S_token_unknown = -1u
78*38fd1498Szrj     };
79*38fd1498Szrj 
80*38fd1498Szrj   protected:
81*38fd1498Szrj     typedef regex_constants::syntax_option_type _FlagT;
82*38fd1498Szrj 
83*38fd1498Szrj     enum _StateT
84*38fd1498Szrj     {
85*38fd1498Szrj       _S_state_normal,
86*38fd1498Szrj       _S_state_in_brace,
87*38fd1498Szrj       _S_state_in_bracket,
88*38fd1498Szrj     };
89*38fd1498Szrj 
90*38fd1498Szrj   protected:
91*38fd1498Szrj     _ScannerBase(_FlagT __flags)
92*38fd1498Szrj     : _M_state(_S_state_normal),
93*38fd1498Szrj     _M_flags(__flags),
94*38fd1498Szrj     _M_escape_tbl(_M_is_ecma()
95*38fd1498Szrj 		  ? _M_ecma_escape_tbl
96*38fd1498Szrj 		  : _M_awk_escape_tbl),
97*38fd1498Szrj     _M_spec_char(_M_is_ecma()
98*38fd1498Szrj 		 ? _M_ecma_spec_char
99*38fd1498Szrj 		 : _M_flags & regex_constants::basic
100*38fd1498Szrj 		 ? _M_basic_spec_char
101*38fd1498Szrj 		 : _M_flags & regex_constants::extended
102*38fd1498Szrj 		 ? _M_extended_spec_char
103*38fd1498Szrj 		 : _M_flags & regex_constants::grep
104*38fd1498Szrj 		 ?  ".[\\*^$\n"
105*38fd1498Szrj 		 : _M_flags & regex_constants::egrep
106*38fd1498Szrj 		 ? ".[\\()*+?{|^$\n"
107*38fd1498Szrj 		 : _M_flags & regex_constants::awk
108*38fd1498Szrj 		 ? _M_extended_spec_char
109*38fd1498Szrj 		 : nullptr),
110*38fd1498Szrj     _M_at_bracket_start(false)
111*38fd1498Szrj     { __glibcxx_assert(_M_spec_char); }
112*38fd1498Szrj 
113*38fd1498Szrj   protected:
114*38fd1498Szrj     const char*
115*38fd1498Szrj     _M_find_escape(char __c)
116*38fd1498Szrj     {
117*38fd1498Szrj       auto __it = _M_escape_tbl;
118*38fd1498Szrj       for (; __it->first != '\0'; ++__it)
119*38fd1498Szrj 	if (__it->first == __c)
120*38fd1498Szrj 	  return &__it->second;
121*38fd1498Szrj       return nullptr;
122*38fd1498Szrj     }
123*38fd1498Szrj 
124*38fd1498Szrj     bool
125*38fd1498Szrj     _M_is_ecma() const
126*38fd1498Szrj     { return _M_flags & regex_constants::ECMAScript; }
127*38fd1498Szrj 
128*38fd1498Szrj     bool
129*38fd1498Szrj     _M_is_basic() const
130*38fd1498Szrj     { return _M_flags & (regex_constants::basic | regex_constants::grep); }
131*38fd1498Szrj 
132*38fd1498Szrj     bool
133*38fd1498Szrj     _M_is_extended() const
134*38fd1498Szrj     {
135*38fd1498Szrj       return _M_flags & (regex_constants::extended
136*38fd1498Szrj 			 | regex_constants::egrep
137*38fd1498Szrj 			 | regex_constants::awk);
138*38fd1498Szrj     }
139*38fd1498Szrj 
140*38fd1498Szrj     bool
141*38fd1498Szrj     _M_is_grep() const
142*38fd1498Szrj     { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
143*38fd1498Szrj 
144*38fd1498Szrj     bool
145*38fd1498Szrj     _M_is_awk() const
146*38fd1498Szrj     { return _M_flags & regex_constants::awk; }
147*38fd1498Szrj 
148*38fd1498Szrj   protected:
149*38fd1498Szrj     // TODO: Make them static in the next abi change.
150*38fd1498Szrj     const std::pair<char, _TokenT> _M_token_tbl[9] =
151*38fd1498Szrj       {
152*38fd1498Szrj 	{'^', _S_token_line_begin},
153*38fd1498Szrj 	{'$', _S_token_line_end},
154*38fd1498Szrj 	{'.', _S_token_anychar},
155*38fd1498Szrj 	{'*', _S_token_closure0},
156*38fd1498Szrj 	{'+', _S_token_closure1},
157*38fd1498Szrj 	{'?', _S_token_opt},
158*38fd1498Szrj 	{'|', _S_token_or},
159*38fd1498Szrj 	{'\n', _S_token_or}, // grep and egrep
160*38fd1498Szrj 	{'\0', _S_token_or},
161*38fd1498Szrj       };
162*38fd1498Szrj     const std::pair<char, char> _M_ecma_escape_tbl[8] =
163*38fd1498Szrj       {
164*38fd1498Szrj 	{'0', '\0'},
165*38fd1498Szrj 	{'b', '\b'},
166*38fd1498Szrj 	{'f', '\f'},
167*38fd1498Szrj 	{'n', '\n'},
168*38fd1498Szrj 	{'r', '\r'},
169*38fd1498Szrj 	{'t', '\t'},
170*38fd1498Szrj 	{'v', '\v'},
171*38fd1498Szrj 	{'\0', '\0'},
172*38fd1498Szrj       };
173*38fd1498Szrj     const std::pair<char, char> _M_awk_escape_tbl[11] =
174*38fd1498Szrj       {
175*38fd1498Szrj 	{'"', '"'},
176*38fd1498Szrj 	{'/', '/'},
177*38fd1498Szrj 	{'\\', '\\'},
178*38fd1498Szrj 	{'a', '\a'},
179*38fd1498Szrj 	{'b', '\b'},
180*38fd1498Szrj 	{'f', '\f'},
181*38fd1498Szrj 	{'n', '\n'},
182*38fd1498Szrj 	{'r', '\r'},
183*38fd1498Szrj 	{'t', '\t'},
184*38fd1498Szrj 	{'v', '\v'},
185*38fd1498Szrj 	{'\0', '\0'},
186*38fd1498Szrj       };
187*38fd1498Szrj     const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
188*38fd1498Szrj     const char* _M_basic_spec_char = ".[\\*^$";
189*38fd1498Szrj     const char* _M_extended_spec_char = ".[\\()*+?{|^$";
190*38fd1498Szrj 
191*38fd1498Szrj     _StateT                       _M_state;
192*38fd1498Szrj     _FlagT                        _M_flags;
193*38fd1498Szrj     _TokenT                       _M_token;
194*38fd1498Szrj     const std::pair<char, char>*  _M_escape_tbl;
195*38fd1498Szrj     const char*                   _M_spec_char;
196*38fd1498Szrj     bool                          _M_at_bracket_start;
197*38fd1498Szrj   };
198*38fd1498Szrj 
199*38fd1498Szrj   /**
200*38fd1498Szrj    * @brief Scans an input range for regex tokens.
201*38fd1498Szrj    *
202*38fd1498Szrj    * The %_Scanner class interprets the regular expression pattern in
203*38fd1498Szrj    * the input range passed to its constructor as a sequence of parse
204*38fd1498Szrj    * tokens passed to the regular expression compiler.  The sequence
205*38fd1498Szrj    * of tokens provided depends on the flag settings passed to the
206*38fd1498Szrj    * constructor: different regular expression grammars will interpret
207*38fd1498Szrj    * the same input pattern in syntactically different ways.
208*38fd1498Szrj    */
209*38fd1498Szrj   template<typename _CharT>
210*38fd1498Szrj     class _Scanner
211*38fd1498Szrj     : public _ScannerBase
212*38fd1498Szrj     {
213*38fd1498Szrj     public:
214*38fd1498Szrj       typedef const _CharT*                                       _IterT;
215*38fd1498Szrj       typedef std::basic_string<_CharT>                           _StringT;
216*38fd1498Szrj       typedef regex_constants::syntax_option_type                 _FlagT;
217*38fd1498Szrj       typedef const std::ctype<_CharT>                            _CtypeT;
218*38fd1498Szrj 
219*38fd1498Szrj       _Scanner(_IterT __begin, _IterT __end,
220*38fd1498Szrj 	       _FlagT __flags, std::locale __loc);
221*38fd1498Szrj 
222*38fd1498Szrj       void
223*38fd1498Szrj       _M_advance();
224*38fd1498Szrj 
225*38fd1498Szrj       _TokenT
226*38fd1498Szrj       _M_get_token() const
227*38fd1498Szrj       { return _M_token; }
228*38fd1498Szrj 
229*38fd1498Szrj       const _StringT&
230*38fd1498Szrj       _M_get_value() const
231*38fd1498Szrj       { return _M_value; }
232*38fd1498Szrj 
233*38fd1498Szrj #ifdef _GLIBCXX_DEBUG
234*38fd1498Szrj       std::ostream&
235*38fd1498Szrj       _M_print(std::ostream&);
236*38fd1498Szrj #endif
237*38fd1498Szrj 
238*38fd1498Szrj     private:
239*38fd1498Szrj       void
240*38fd1498Szrj       _M_scan_normal();
241*38fd1498Szrj 
242*38fd1498Szrj       void
243*38fd1498Szrj       _M_scan_in_bracket();
244*38fd1498Szrj 
245*38fd1498Szrj       void
246*38fd1498Szrj       _M_scan_in_brace();
247*38fd1498Szrj 
248*38fd1498Szrj       void
249*38fd1498Szrj       _M_eat_escape_ecma();
250*38fd1498Szrj 
251*38fd1498Szrj       void
252*38fd1498Szrj       _M_eat_escape_posix();
253*38fd1498Szrj 
254*38fd1498Szrj       void
255*38fd1498Szrj       _M_eat_escape_awk();
256*38fd1498Szrj 
257*38fd1498Szrj       void
258*38fd1498Szrj       _M_eat_class(char);
259*38fd1498Szrj 
260*38fd1498Szrj       _IterT                        _M_current;
261*38fd1498Szrj       _IterT                        _M_end;
262*38fd1498Szrj       _CtypeT&                      _M_ctype;
263*38fd1498Szrj       _StringT                      _M_value;
264*38fd1498Szrj       void (_Scanner::* _M_eat_escape)();
265*38fd1498Szrj     };
266*38fd1498Szrj 
267*38fd1498Szrj  //@} regex-detail
268*38fd1498Szrj } // namespace __detail
269*38fd1498Szrj _GLIBCXX_END_NAMESPACE_VERSION
270*38fd1498Szrj } // namespace std
271*38fd1498Szrj 
272*38fd1498Szrj #include <bits/regex_scanner.tcc>
273