1*e4b17023SJohn Marino // class template regex -*- C++ -*-
2*e4b17023SJohn Marino
3*e4b17023SJohn Marino // Copyright (C) 2010, 2011 Free Software Foundation, Inc.
4*e4b17023SJohn Marino //
5*e4b17023SJohn Marino // This file is part of the GNU ISO C++ Library. This library is free
6*e4b17023SJohn Marino // software; you can redistribute it and/or modify it under the
7*e4b17023SJohn Marino // terms of the GNU General Public License as published by the
8*e4b17023SJohn Marino // Free Software Foundation; either version 3, or (at your option)
9*e4b17023SJohn Marino // any later version.
10*e4b17023SJohn Marino
11*e4b17023SJohn Marino // This library is distributed in the hope that it will be useful,
12*e4b17023SJohn Marino // but WITHOUT ANY WARRANTY; without even the implied warranty of
13*e4b17023SJohn Marino // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14*e4b17023SJohn Marino // GNU General Public License for more details.
15*e4b17023SJohn Marino
16*e4b17023SJohn Marino // Under Section 7 of GPL version 3, you are granted additional
17*e4b17023SJohn Marino // permissions described in the GCC Runtime Library Exception, version
18*e4b17023SJohn Marino // 3.1, as published by the Free Software Foundation.
19*e4b17023SJohn Marino
20*e4b17023SJohn Marino // You should have received a copy of the GNU General Public License and
21*e4b17023SJohn Marino // a copy of the GCC Runtime Library Exception along with this program;
22*e4b17023SJohn Marino // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23*e4b17023SJohn Marino // <http://www.gnu.org/licenses/>.
24*e4b17023SJohn Marino
25*e4b17023SJohn Marino /**
26*e4b17023SJohn Marino * @file bits/regex_nfa.h
27*e4b17023SJohn Marino * This is an internal header file, included by other library headers.
28*e4b17023SJohn Marino * Do not attempt to use it directly. @headername{regex}
29*e4b17023SJohn Marino */
30*e4b17023SJohn Marino
_GLIBCXX_VISIBILITY(default)31*e4b17023SJohn Marino namespace std _GLIBCXX_VISIBILITY(default)
32*e4b17023SJohn Marino {
33*e4b17023SJohn Marino namespace __regex
34*e4b17023SJohn Marino {
35*e4b17023SJohn Marino _GLIBCXX_BEGIN_NAMESPACE_VERSION
36*e4b17023SJohn Marino
37*e4b17023SJohn Marino // Base class for, um, automata. Could be an NFA or a DFA. Your choice.
38*e4b17023SJohn Marino class _Automaton
39*e4b17023SJohn Marino {
40*e4b17023SJohn Marino public:
41*e4b17023SJohn Marino typedef unsigned int _SizeT;
42*e4b17023SJohn Marino
43*e4b17023SJohn Marino public:
44*e4b17023SJohn Marino virtual
45*e4b17023SJohn Marino ~_Automaton() { }
46*e4b17023SJohn Marino
47*e4b17023SJohn Marino virtual _SizeT
48*e4b17023SJohn Marino _M_sub_count() const = 0;
49*e4b17023SJohn Marino
50*e4b17023SJohn Marino #ifdef _GLIBCXX_DEBUG
51*e4b17023SJohn Marino virtual std::ostream&
52*e4b17023SJohn Marino _M_dot(std::ostream& __ostr) const = 0;
53*e4b17023SJohn Marino #endif
54*e4b17023SJohn Marino };
55*e4b17023SJohn Marino
56*e4b17023SJohn Marino // Generic shared pointer to an automaton.
57*e4b17023SJohn Marino typedef std::shared_ptr<_Automaton> _AutomatonPtr;
58*e4b17023SJohn Marino
59*e4b17023SJohn Marino // Operation codes that define the type of transitions within the base NFA
60*e4b17023SJohn Marino // that represents the regular expression.
61*e4b17023SJohn Marino enum _Opcode
62*e4b17023SJohn Marino {
63*e4b17023SJohn Marino _S_opcode_unknown = 0,
64*e4b17023SJohn Marino _S_opcode_alternative = 1,
65*e4b17023SJohn Marino _S_opcode_subexpr_begin = 4,
66*e4b17023SJohn Marino _S_opcode_subexpr_end = 5,
67*e4b17023SJohn Marino _S_opcode_match = 100,
68*e4b17023SJohn Marino _S_opcode_accept = 255
69*e4b17023SJohn Marino };
70*e4b17023SJohn Marino
71*e4b17023SJohn Marino // Provides a generic facade for a templated match_results.
72*e4b17023SJohn Marino struct _Results
73*e4b17023SJohn Marino {
74*e4b17023SJohn Marino virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
75*e4b17023SJohn Marino virtual void _M_set_matched(int __i, bool __is_matched) = 0;
76*e4b17023SJohn Marino };
77*e4b17023SJohn Marino
78*e4b17023SJohn Marino // Tags current state (for subexpr begin/end).
79*e4b17023SJohn Marino typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
80*e4b17023SJohn Marino
81*e4b17023SJohn Marino template<typename _FwdIterT, typename _TraitsT>
82*e4b17023SJohn Marino struct _StartTagger
83*e4b17023SJohn Marino {
84*e4b17023SJohn Marino explicit
85*e4b17023SJohn Marino _StartTagger(int __i)
86*e4b17023SJohn Marino : _M_index(__i)
87*e4b17023SJohn Marino { }
88*e4b17023SJohn Marino
89*e4b17023SJohn Marino void
90*e4b17023SJohn Marino operator()(const _PatternCursor& __pc, _Results& __r)
91*e4b17023SJohn Marino { __r._M_set_pos(_M_index, 0, __pc); }
92*e4b17023SJohn Marino
93*e4b17023SJohn Marino int _M_index;
94*e4b17023SJohn Marino };
95*e4b17023SJohn Marino
96*e4b17023SJohn Marino template<typename _FwdIterT, typename _TraitsT>
97*e4b17023SJohn Marino struct _EndTagger
98*e4b17023SJohn Marino {
99*e4b17023SJohn Marino explicit
100*e4b17023SJohn Marino _EndTagger(int __i)
101*e4b17023SJohn Marino : _M_index(__i)
102*e4b17023SJohn Marino { }
103*e4b17023SJohn Marino
104*e4b17023SJohn Marino void
105*e4b17023SJohn Marino operator()(const _PatternCursor& __pc, _Results& __r)
106*e4b17023SJohn Marino { __r._M_set_pos(_M_index, 1, __pc); }
107*e4b17023SJohn Marino
108*e4b17023SJohn Marino int _M_index;
109*e4b17023SJohn Marino _FwdIterT _M_pos;
110*e4b17023SJohn Marino };
111*e4b17023SJohn Marino // Indicates if current state matches cursor current.
112*e4b17023SJohn Marino typedef std::function<bool (const _PatternCursor&)> _Matcher;
113*e4b17023SJohn Marino
114*e4b17023SJohn Marino // Matches any character
115*e4b17023SJohn Marino inline bool
116*e4b17023SJohn Marino _AnyMatcher(const _PatternCursor&)
117*e4b17023SJohn Marino { return true; }
118*e4b17023SJohn Marino
119*e4b17023SJohn Marino // Matches a single character
120*e4b17023SJohn Marino template<typename _InIterT, typename _TraitsT>
121*e4b17023SJohn Marino struct _CharMatcher
122*e4b17023SJohn Marino {
123*e4b17023SJohn Marino typedef typename _TraitsT::char_type char_type;
124*e4b17023SJohn Marino
125*e4b17023SJohn Marino explicit
126*e4b17023SJohn Marino _CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT())
127*e4b17023SJohn Marino : _M_traits(__t), _M_c(_M_traits.translate(__c))
128*e4b17023SJohn Marino { }
129*e4b17023SJohn Marino
130*e4b17023SJohn Marino bool
131*e4b17023SJohn Marino operator()(const _PatternCursor& __pc) const
132*e4b17023SJohn Marino {
133*e4b17023SJohn Marino typedef const _SpecializedCursor<_InIterT>& _CursorT;
134*e4b17023SJohn Marino _CursorT __c = static_cast<_CursorT>(__pc);
135*e4b17023SJohn Marino return _M_traits.translate(__c._M_current()) == _M_c;
136*e4b17023SJohn Marino }
137*e4b17023SJohn Marino
138*e4b17023SJohn Marino const _TraitsT& _M_traits;
139*e4b17023SJohn Marino char_type _M_c;
140*e4b17023SJohn Marino };
141*e4b17023SJohn Marino
142*e4b17023SJohn Marino // Matches a character range (bracket expression)
143*e4b17023SJohn Marino template<typename _InIterT, typename _TraitsT>
144*e4b17023SJohn Marino struct _RangeMatcher
145*e4b17023SJohn Marino {
146*e4b17023SJohn Marino typedef typename _TraitsT::char_type _CharT;
147*e4b17023SJohn Marino typedef std::basic_string<_CharT> _StringT;
148*e4b17023SJohn Marino
149*e4b17023SJohn Marino explicit
150*e4b17023SJohn Marino _RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT())
151*e4b17023SJohn Marino : _M_traits(__t), _M_is_non_matching(__is_non_matching)
152*e4b17023SJohn Marino { }
153*e4b17023SJohn Marino
154*e4b17023SJohn Marino bool
155*e4b17023SJohn Marino operator()(const _PatternCursor& __pc) const
156*e4b17023SJohn Marino {
157*e4b17023SJohn Marino typedef const _SpecializedCursor<_InIterT>& _CursorT;
158*e4b17023SJohn Marino _CursorT __c = static_cast<_CursorT>(__pc);
159*e4b17023SJohn Marino return true;
160*e4b17023SJohn Marino }
161*e4b17023SJohn Marino
162*e4b17023SJohn Marino void
163*e4b17023SJohn Marino _M_add_char(_CharT __c)
164*e4b17023SJohn Marino { }
165*e4b17023SJohn Marino
166*e4b17023SJohn Marino void
167*e4b17023SJohn Marino _M_add_collating_element(const _StringT& __s)
168*e4b17023SJohn Marino { }
169*e4b17023SJohn Marino
170*e4b17023SJohn Marino void
171*e4b17023SJohn Marino _M_add_equivalence_class(const _StringT& __s)
172*e4b17023SJohn Marino { }
173*e4b17023SJohn Marino
174*e4b17023SJohn Marino void
175*e4b17023SJohn Marino _M_add_character_class(const _StringT& __s)
176*e4b17023SJohn Marino { }
177*e4b17023SJohn Marino
178*e4b17023SJohn Marino void
179*e4b17023SJohn Marino _M_make_range()
180*e4b17023SJohn Marino { }
181*e4b17023SJohn Marino
182*e4b17023SJohn Marino const _TraitsT& _M_traits;
183*e4b17023SJohn Marino bool _M_is_non_matching;
184*e4b17023SJohn Marino };
185*e4b17023SJohn Marino
186*e4b17023SJohn Marino // Identifies a state in the NFA.
187*e4b17023SJohn Marino typedef int _StateIdT;
188*e4b17023SJohn Marino
189*e4b17023SJohn Marino // The special case in which a state identifier is not an index.
190*e4b17023SJohn Marino static const _StateIdT _S_invalid_state_id = -1;
191*e4b17023SJohn Marino
192*e4b17023SJohn Marino
193*e4b17023SJohn Marino // An individual state in an NFA
194*e4b17023SJohn Marino //
195*e4b17023SJohn Marino // In this case a "state" is an entry in the NFA definition coupled with its
196*e4b17023SJohn Marino // outgoing transition(s). All states have a single outgoing transition,
197*e4b17023SJohn Marino // except for accepting states (which have no outgoing transitions) and alt
198*e4b17023SJohn Marino // states, which have two outgoing transitions.
199*e4b17023SJohn Marino //
200*e4b17023SJohn Marino struct _State
201*e4b17023SJohn Marino {
202*e4b17023SJohn Marino typedef int _OpcodeT;
203*e4b17023SJohn Marino
204*e4b17023SJohn Marino _OpcodeT _M_opcode; // type of outgoing transition
205*e4b17023SJohn Marino _StateIdT _M_next; // outgoing transition
206*e4b17023SJohn Marino _StateIdT _M_alt; // for _S_opcode_alternative
207*e4b17023SJohn Marino unsigned int _M_subexpr; // for _S_opcode_subexpr_*
208*e4b17023SJohn Marino _Tagger _M_tagger; // for _S_opcode_subexpr_*
209*e4b17023SJohn Marino _Matcher _M_matches; // for _S_opcode_match
210*e4b17023SJohn Marino
211*e4b17023SJohn Marino explicit _State(_OpcodeT __opcode)
212*e4b17023SJohn Marino : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
213*e4b17023SJohn Marino { }
214*e4b17023SJohn Marino
215*e4b17023SJohn Marino _State(const _Matcher& __m)
216*e4b17023SJohn Marino : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m)
217*e4b17023SJohn Marino { }
218*e4b17023SJohn Marino
219*e4b17023SJohn Marino _State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t)
220*e4b17023SJohn Marino : _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s),
221*e4b17023SJohn Marino _M_tagger(__t)
222*e4b17023SJohn Marino { }
223*e4b17023SJohn Marino
224*e4b17023SJohn Marino _State(_StateIdT __next, _StateIdT __alt)
225*e4b17023SJohn Marino : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
226*e4b17023SJohn Marino { }
227*e4b17023SJohn Marino
228*e4b17023SJohn Marino #ifdef _GLIBCXX_DEBUG
229*e4b17023SJohn Marino std::ostream&
230*e4b17023SJohn Marino _M_print(std::ostream& ostr) const;
231*e4b17023SJohn Marino
232*e4b17023SJohn Marino // Prints graphviz dot commands for state.
233*e4b17023SJohn Marino std::ostream&
234*e4b17023SJohn Marino _M_dot(std::ostream& __ostr, _StateIdT __id) const;
235*e4b17023SJohn Marino #endif
236*e4b17023SJohn Marino };
237*e4b17023SJohn Marino
238*e4b17023SJohn Marino
239*e4b17023SJohn Marino // The Grep Matcher works on sets of states. Here are sets of states.
240*e4b17023SJohn Marino typedef std::set<_StateIdT> _StateSet;
241*e4b17023SJohn Marino
242*e4b17023SJohn Marino // A collection of all states making up an NFA
243*e4b17023SJohn Marino //
244*e4b17023SJohn Marino // An NFA is a 4-tuple M = (K, S, s, F), where
245*e4b17023SJohn Marino // K is a finite set of states,
246*e4b17023SJohn Marino // S is the alphabet of the NFA,
247*e4b17023SJohn Marino // s is the initial state,
248*e4b17023SJohn Marino // F is a set of final (accepting) states.
249*e4b17023SJohn Marino //
250*e4b17023SJohn Marino // This NFA class is templated on S, a type that will hold values of the
251*e4b17023SJohn Marino // underlying alphabet (without regard to semantics of that alphabet). The
252*e4b17023SJohn Marino // other elements of the tuple are generated during construction of the NFA
253*e4b17023SJohn Marino // and are available through accessor member functions.
254*e4b17023SJohn Marino //
255*e4b17023SJohn Marino class _Nfa
256*e4b17023SJohn Marino : public _Automaton, public std::vector<_State>
257*e4b17023SJohn Marino {
258*e4b17023SJohn Marino public:
259*e4b17023SJohn Marino typedef _State _StateT;
260*e4b17023SJohn Marino typedef unsigned int _SizeT;
261*e4b17023SJohn Marino typedef regex_constants::syntax_option_type _FlagT;
262*e4b17023SJohn Marino
263*e4b17023SJohn Marino public:
264*e4b17023SJohn Marino _Nfa(_FlagT __f)
265*e4b17023SJohn Marino : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0)
266*e4b17023SJohn Marino { }
267*e4b17023SJohn Marino
268*e4b17023SJohn Marino ~_Nfa()
269*e4b17023SJohn Marino { }
270*e4b17023SJohn Marino
271*e4b17023SJohn Marino _FlagT
272*e4b17023SJohn Marino _M_options() const
273*e4b17023SJohn Marino { return _M_flags; }
274*e4b17023SJohn Marino
275*e4b17023SJohn Marino _StateIdT
276*e4b17023SJohn Marino _M_start() const
277*e4b17023SJohn Marino { return _M_start_state; }
278*e4b17023SJohn Marino
279*e4b17023SJohn Marino const _StateSet&
280*e4b17023SJohn Marino _M_final_states() const
281*e4b17023SJohn Marino { return _M_accepting_states; }
282*e4b17023SJohn Marino
283*e4b17023SJohn Marino _SizeT
284*e4b17023SJohn Marino _M_sub_count() const
285*e4b17023SJohn Marino { return _M_subexpr_count; }
286*e4b17023SJohn Marino
287*e4b17023SJohn Marino _StateIdT
288*e4b17023SJohn Marino _M_insert_accept()
289*e4b17023SJohn Marino {
290*e4b17023SJohn Marino this->push_back(_StateT(_S_opcode_accept));
291*e4b17023SJohn Marino _M_accepting_states.insert(this->size()-1);
292*e4b17023SJohn Marino return this->size()-1;
293*e4b17023SJohn Marino }
294*e4b17023SJohn Marino
295*e4b17023SJohn Marino _StateIdT
296*e4b17023SJohn Marino _M_insert_alt(_StateIdT __next, _StateIdT __alt)
297*e4b17023SJohn Marino {
298*e4b17023SJohn Marino this->push_back(_StateT(__next, __alt));
299*e4b17023SJohn Marino return this->size()-1;
300*e4b17023SJohn Marino }
301*e4b17023SJohn Marino
302*e4b17023SJohn Marino _StateIdT
303*e4b17023SJohn Marino _M_insert_matcher(_Matcher __m)
304*e4b17023SJohn Marino {
305*e4b17023SJohn Marino this->push_back(_StateT(__m));
306*e4b17023SJohn Marino return this->size()-1;
307*e4b17023SJohn Marino }
308*e4b17023SJohn Marino
309*e4b17023SJohn Marino _StateIdT
310*e4b17023SJohn Marino _M_insert_subexpr_begin(const _Tagger& __t)
311*e4b17023SJohn Marino {
312*e4b17023SJohn Marino this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++, __t));
313*e4b17023SJohn Marino return this->size()-1;
314*e4b17023SJohn Marino }
315*e4b17023SJohn Marino
316*e4b17023SJohn Marino _StateIdT
317*e4b17023SJohn Marino _M_insert_subexpr_end(unsigned int __i, const _Tagger& __t)
318*e4b17023SJohn Marino {
319*e4b17023SJohn Marino this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t));
320*e4b17023SJohn Marino return this->size()-1;
321*e4b17023SJohn Marino }
322*e4b17023SJohn Marino
323*e4b17023SJohn Marino #ifdef _GLIBCXX_DEBUG
324*e4b17023SJohn Marino std::ostream&
325*e4b17023SJohn Marino _M_dot(std::ostream& __ostr) const;
326*e4b17023SJohn Marino #endif
327*e4b17023SJohn Marino
328*e4b17023SJohn Marino private:
329*e4b17023SJohn Marino _FlagT _M_flags;
330*e4b17023SJohn Marino _StateIdT _M_start_state;
331*e4b17023SJohn Marino _StateSet _M_accepting_states;
332*e4b17023SJohn Marino _SizeT _M_subexpr_count;
333*e4b17023SJohn Marino };
334*e4b17023SJohn Marino
335*e4b17023SJohn Marino // Describes a sequence of one or more %_State, its current start and end(s).
336*e4b17023SJohn Marino //
337*e4b17023SJohn Marino // This structure contains fragments of an NFA during construction.
338*e4b17023SJohn Marino class _StateSeq
339*e4b17023SJohn Marino {
340*e4b17023SJohn Marino public:
341*e4b17023SJohn Marino // Constructs a single-node sequence
342*e4b17023SJohn Marino _StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id)
343*e4b17023SJohn Marino : _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
344*e4b17023SJohn Marino { }
345*e4b17023SJohn Marino // Constructs a split sequence from two other sequencces
346*e4b17023SJohn Marino _StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
347*e4b17023SJohn Marino : _M_nfa(__e1._M_nfa),
348*e4b17023SJohn Marino _M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
349*e4b17023SJohn Marino _M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
350*e4b17023SJohn Marino { }
351*e4b17023SJohn Marino
352*e4b17023SJohn Marino // Constructs a split sequence from a single sequence
353*e4b17023SJohn Marino _StateSeq(const _StateSeq& __e, _StateIdT __id)
354*e4b17023SJohn Marino : _M_nfa(__e._M_nfa),
355*e4b17023SJohn Marino _M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
356*e4b17023SJohn Marino _M_end1(__id), _M_end2(__e._M_end1)
357*e4b17023SJohn Marino { }
358*e4b17023SJohn Marino
359*e4b17023SJohn Marino // Constructs a copy of a %_StateSeq
360*e4b17023SJohn Marino _StateSeq(const _StateSeq& __rhs)
361*e4b17023SJohn Marino : _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
362*e4b17023SJohn Marino _M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
363*e4b17023SJohn Marino { }
364*e4b17023SJohn Marino
365*e4b17023SJohn Marino
366*e4b17023SJohn Marino _StateSeq& operator=(const _StateSeq& __rhs);
367*e4b17023SJohn Marino
368*e4b17023SJohn Marino _StateIdT
369*e4b17023SJohn Marino _M_front() const
370*e4b17023SJohn Marino { return _M_start; }
371*e4b17023SJohn Marino
372*e4b17023SJohn Marino // Extends a sequence by one.
373*e4b17023SJohn Marino void
374*e4b17023SJohn Marino _M_push_back(_StateIdT __id);
375*e4b17023SJohn Marino
376*e4b17023SJohn Marino // Extends and maybe joins a sequence.
377*e4b17023SJohn Marino void
378*e4b17023SJohn Marino _M_append(_StateIdT __id);
379*e4b17023SJohn Marino
380*e4b17023SJohn Marino void
381*e4b17023SJohn Marino _M_append(_StateSeq& __rhs);
382*e4b17023SJohn Marino
383*e4b17023SJohn Marino // Clones an entire sequence.
384*e4b17023SJohn Marino _StateIdT
385*e4b17023SJohn Marino _M_clone();
386*e4b17023SJohn Marino
387*e4b17023SJohn Marino private:
388*e4b17023SJohn Marino _Nfa& _M_nfa;
389*e4b17023SJohn Marino _StateIdT _M_start;
390*e4b17023SJohn Marino _StateIdT _M_end1;
391*e4b17023SJohn Marino _StateIdT _M_end2;
392*e4b17023SJohn Marino
393*e4b17023SJohn Marino };
394*e4b17023SJohn Marino
395*e4b17023SJohn Marino _GLIBCXX_END_NAMESPACE_VERSION
396*e4b17023SJohn Marino } // namespace __regex
397*e4b17023SJohn Marino } // namespace std
398*e4b17023SJohn Marino
399*e4b17023SJohn Marino #include <bits/regex_nfa.tcc>
400*e4b17023SJohn Marino
401