xref: /netbsd-src/external/gpl2/xcvs/dist/lib/regex_internal.c (revision 5a6c14c844c4c665da5632061aebde7bb2cb5766)
1a7c91847Schristos /* Extended regular expression matching and search library.
2a7c91847Schristos    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3a7c91847Schristos    This file is part of the GNU C Library.
4a7c91847Schristos    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5a7c91847Schristos 
6a7c91847Schristos    This program is free software; you can redistribute it and/or modify
7a7c91847Schristos    it under the terms of the GNU General Public License as published by
8a7c91847Schristos    the Free Software Foundation; either version 2, or (at your option)
9a7c91847Schristos    any later version.
10a7c91847Schristos 
11a7c91847Schristos    This program is distributed in the hope that it will be useful,
12a7c91847Schristos    but WITHOUT ANY WARRANTY; without even the implied warranty of
13a7c91847Schristos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14a7c91847Schristos    GNU General Public License for more details.
15a7c91847Schristos 
16a7c91847Schristos    You should have received a copy of the GNU General Public License along
17a7c91847Schristos    with this program; if not, write to the Free Software Foundation,
18a7c91847Schristos    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19*5a6c14c8Schristos #include <sys/cdefs.h>
20*5a6c14c8Schristos __RCSID("$NetBSD: regex_internal.c,v 1.2 2016/05/17 14:00:09 christos Exp $");
21*5a6c14c8Schristos 
22a7c91847Schristos 
23a7c91847Schristos static void re_string_construct_common (const char *str, Idx len,
24a7c91847Schristos 					re_string_t *pstr,
25a7c91847Schristos 					REG_TRANSLATE_TYPE trans, bool icase,
26a7c91847Schristos 					const re_dfa_t *dfa) internal_function;
27a7c91847Schristos static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
28a7c91847Schristos 					  const re_node_set *nodes,
29a7c91847Schristos 					  re_hashval_t hash) internal_function;
30a7c91847Schristos static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
31a7c91847Schristos 					  const re_node_set *nodes,
32a7c91847Schristos 					  unsigned int context,
33a7c91847Schristos 					  re_hashval_t hash) internal_function;
34a7c91847Schristos 
35a7c91847Schristos /* Functions for string operation.  */
36a7c91847Schristos 
37a7c91847Schristos /* This function allocate the buffers.  It is necessary to call
38a7c91847Schristos    re_string_reconstruct before using the object.  */
39a7c91847Schristos 
40a7c91847Schristos static reg_errcode_t
41a7c91847Schristos internal_function
re_string_allocate(re_string_t * pstr,const char * str,Idx len,Idx init_len,REG_TRANSLATE_TYPE trans,bool icase,const re_dfa_t * dfa)42a7c91847Schristos re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
43a7c91847Schristos 		    REG_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
44a7c91847Schristos {
45a7c91847Schristos   reg_errcode_t ret;
46a7c91847Schristos   Idx init_buf_len;
47a7c91847Schristos 
48a7c91847Schristos   /* Ensure at least one character fits into the buffers.  */
49a7c91847Schristos   if (init_len < dfa->mb_cur_max)
50a7c91847Schristos     init_len = dfa->mb_cur_max;
51a7c91847Schristos   init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
52a7c91847Schristos   re_string_construct_common (str, len, pstr, trans, icase, dfa);
53a7c91847Schristos 
54a7c91847Schristos   ret = re_string_realloc_buffers (pstr, init_buf_len);
55a7c91847Schristos   if (BE (ret != REG_NOERROR, 0))
56a7c91847Schristos     return ret;
57a7c91847Schristos 
58a7c91847Schristos   pstr->word_char = dfa->word_char;
59a7c91847Schristos   pstr->word_ops_used = dfa->word_ops_used;
60a7c91847Schristos   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
61a7c91847Schristos   pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
62a7c91847Schristos   pstr->valid_raw_len = pstr->valid_len;
63a7c91847Schristos   return REG_NOERROR;
64a7c91847Schristos }
65a7c91847Schristos 
66a7c91847Schristos /* This function allocate the buffers, and initialize them.  */
67a7c91847Schristos 
68a7c91847Schristos static reg_errcode_t
69a7c91847Schristos internal_function
re_string_construct(re_string_t * pstr,const char * str,Idx len,REG_TRANSLATE_TYPE trans,bool icase,const re_dfa_t * dfa)70a7c91847Schristos re_string_construct (re_string_t *pstr, const char *str, Idx len,
71a7c91847Schristos 		     REG_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
72a7c91847Schristos {
73a7c91847Schristos   reg_errcode_t ret;
74a7c91847Schristos   memset (pstr, '\0', sizeof (re_string_t));
75a7c91847Schristos   re_string_construct_common (str, len, pstr, trans, icase, dfa);
76a7c91847Schristos 
77a7c91847Schristos   if (len > 0)
78a7c91847Schristos     {
79a7c91847Schristos       ret = re_string_realloc_buffers (pstr, len + 1);
80a7c91847Schristos       if (BE (ret != REG_NOERROR, 0))
81a7c91847Schristos 	return ret;
82a7c91847Schristos     }
83a7c91847Schristos   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
84a7c91847Schristos 
85a7c91847Schristos   if (icase)
86a7c91847Schristos     {
87a7c91847Schristos #ifdef RE_ENABLE_I18N
88a7c91847Schristos       if (dfa->mb_cur_max > 1)
89a7c91847Schristos 	{
90a7c91847Schristos 	  while (1)
91a7c91847Schristos 	    {
92a7c91847Schristos 	      ret = build_wcs_upper_buffer (pstr);
93a7c91847Schristos 	      if (BE (ret != REG_NOERROR, 0))
94a7c91847Schristos 		return ret;
95a7c91847Schristos 	      if (pstr->valid_raw_len >= len)
96a7c91847Schristos 		break;
97a7c91847Schristos 	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
98a7c91847Schristos 		break;
99a7c91847Schristos 	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
100a7c91847Schristos 	      if (BE (ret != REG_NOERROR, 0))
101a7c91847Schristos 		return ret;
102a7c91847Schristos 	    }
103a7c91847Schristos 	}
104a7c91847Schristos       else
105a7c91847Schristos #endif /* RE_ENABLE_I18N  */
106a7c91847Schristos 	build_upper_buffer (pstr);
107a7c91847Schristos     }
108a7c91847Schristos   else
109a7c91847Schristos     {
110a7c91847Schristos #ifdef RE_ENABLE_I18N
111a7c91847Schristos       if (dfa->mb_cur_max > 1)
112a7c91847Schristos 	build_wcs_buffer (pstr);
113a7c91847Schristos       else
114a7c91847Schristos #endif /* RE_ENABLE_I18N  */
115a7c91847Schristos 	{
116a7c91847Schristos 	  if (trans != NULL)
117a7c91847Schristos 	    re_string_translate_buffer (pstr);
118a7c91847Schristos 	  else
119a7c91847Schristos 	    {
120a7c91847Schristos 	      pstr->valid_len = pstr->bufs_len;
121a7c91847Schristos 	      pstr->valid_raw_len = pstr->bufs_len;
122a7c91847Schristos 	    }
123a7c91847Schristos 	}
124a7c91847Schristos     }
125a7c91847Schristos 
126a7c91847Schristos   return REG_NOERROR;
127a7c91847Schristos }
128a7c91847Schristos 
129a7c91847Schristos /* Helper functions for re_string_allocate, and re_string_construct.  */
130a7c91847Schristos 
131a7c91847Schristos static reg_errcode_t
132a7c91847Schristos internal_function
re_string_realloc_buffers(re_string_t * pstr,Idx new_buf_len)133a7c91847Schristos re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
134a7c91847Schristos {
135a7c91847Schristos #ifdef RE_ENABLE_I18N
136a7c91847Schristos   if (pstr->mb_cur_max > 1)
137a7c91847Schristos     {
138a7c91847Schristos       wint_t *new_wcs = re_xrealloc (pstr->wcs, wint_t, new_buf_len);
139a7c91847Schristos       if (BE (new_wcs == NULL, 0))
140a7c91847Schristos 	return REG_ESPACE;
141a7c91847Schristos       pstr->wcs = new_wcs;
142a7c91847Schristos       if (pstr->offsets != NULL)
143a7c91847Schristos 	{
144a7c91847Schristos 	  Idx *new_offsets = re_xrealloc (pstr->offsets, Idx, new_buf_len);
145a7c91847Schristos 	  if (BE (new_offsets == NULL, 0))
146a7c91847Schristos 	    return REG_ESPACE;
147a7c91847Schristos 	  pstr->offsets = new_offsets;
148a7c91847Schristos 	}
149a7c91847Schristos     }
150a7c91847Schristos #endif /* RE_ENABLE_I18N  */
151a7c91847Schristos   if (pstr->mbs_allocated)
152a7c91847Schristos     {
153a7c91847Schristos       unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
154a7c91847Schristos 					   new_buf_len);
155a7c91847Schristos       if (BE (new_mbs == NULL, 0))
156a7c91847Schristos 	return REG_ESPACE;
157a7c91847Schristos       pstr->mbs = new_mbs;
158a7c91847Schristos     }
159a7c91847Schristos   pstr->bufs_len = new_buf_len;
160a7c91847Schristos   return REG_NOERROR;
161a7c91847Schristos }
162a7c91847Schristos 
163a7c91847Schristos 
164a7c91847Schristos static void
165a7c91847Schristos internal_function
re_string_construct_common(const char * str,Idx len,re_string_t * pstr,REG_TRANSLATE_TYPE trans,bool icase,const re_dfa_t * dfa)166a7c91847Schristos re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
167a7c91847Schristos 			    REG_TRANSLATE_TYPE trans, bool icase,
168a7c91847Schristos 			    const re_dfa_t *dfa)
169a7c91847Schristos {
170a7c91847Schristos   pstr->raw_mbs = (const unsigned char *) str;
171a7c91847Schristos   pstr->len = len;
172a7c91847Schristos   pstr->raw_len = len;
173a7c91847Schristos   pstr->trans = (unsigned REG_TRANSLATE_TYPE) trans;
174a7c91847Schristos   pstr->icase = icase;
175a7c91847Schristos   pstr->mbs_allocated = (trans != NULL || icase);
176a7c91847Schristos   pstr->mb_cur_max = dfa->mb_cur_max;
177a7c91847Schristos   pstr->is_utf8 = dfa->is_utf8;
178a7c91847Schristos   pstr->map_notascii = dfa->map_notascii;
179a7c91847Schristos   pstr->stop = pstr->len;
180a7c91847Schristos   pstr->raw_stop = pstr->stop;
181a7c91847Schristos }
182a7c91847Schristos 
183a7c91847Schristos #ifdef RE_ENABLE_I18N
184a7c91847Schristos 
185a7c91847Schristos /* Build wide character buffer PSTR->WCS.
186a7c91847Schristos    If the byte sequence of the string are:
187a7c91847Schristos      <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
188a7c91847Schristos    Then wide character buffer will be:
189a7c91847Schristos      <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
190a7c91847Schristos    We use WEOF for padding, they indicate that the position isn't
191a7c91847Schristos    a first byte of a multibyte character.
192a7c91847Schristos 
193a7c91847Schristos    Note that this function assumes PSTR->VALID_LEN elements are already
194a7c91847Schristos    built and starts from PSTR->VALID_LEN.  */
195a7c91847Schristos 
196a7c91847Schristos static void
197a7c91847Schristos internal_function
build_wcs_buffer(re_string_t * pstr)198a7c91847Schristos build_wcs_buffer (re_string_t *pstr)
199a7c91847Schristos {
200a7c91847Schristos #ifdef _LIBC
201a7c91847Schristos   unsigned char buf[MB_LEN_MAX];
202a7c91847Schristos   assert (MB_LEN_MAX >= pstr->mb_cur_max);
203a7c91847Schristos #else
204a7c91847Schristos   unsigned char buf[64];
205a7c91847Schristos #endif
206a7c91847Schristos   mbstate_t prev_st;
207a7c91847Schristos   Idx byte_idx, end_idx, remain_len;
208a7c91847Schristos   size_t mbclen;
209a7c91847Schristos 
210a7c91847Schristos   /* Build the buffers from pstr->valid_len to either pstr->len or
211a7c91847Schristos      pstr->bufs_len.  */
212a7c91847Schristos   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
213a7c91847Schristos   for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
214a7c91847Schristos     {
215a7c91847Schristos       wchar_t wc;
216a7c91847Schristos       const char *p;
217a7c91847Schristos 
218a7c91847Schristos       remain_len = end_idx - byte_idx;
219a7c91847Schristos       prev_st = pstr->cur_state;
220a7c91847Schristos       /* Apply the translation if we need.  */
221a7c91847Schristos       if (BE (pstr->trans != NULL, 0))
222a7c91847Schristos 	{
223a7c91847Schristos 	  int i, ch;
224a7c91847Schristos 
225a7c91847Schristos 	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
226a7c91847Schristos 	    {
227a7c91847Schristos 	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
228a7c91847Schristos 	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
229a7c91847Schristos 	    }
230a7c91847Schristos 	  p = (const char *) buf;
231a7c91847Schristos 	}
232a7c91847Schristos       else
233a7c91847Schristos 	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
234a7c91847Schristos       mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
235a7c91847Schristos       if (BE (mbclen == (size_t) -2, 0))
236a7c91847Schristos 	{
237a7c91847Schristos 	  /* The buffer doesn't have enough space, finish to build.  */
238a7c91847Schristos 	  pstr->cur_state = prev_st;
239a7c91847Schristos 	  break;
240a7c91847Schristos 	}
241a7c91847Schristos       else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
242a7c91847Schristos 	{
243a7c91847Schristos 	  /* We treat these cases as a singlebyte character.  */
244a7c91847Schristos 	  mbclen = 1;
245a7c91847Schristos 	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
246a7c91847Schristos 	  if (BE (pstr->trans != NULL, 0))
247a7c91847Schristos 	    wc = pstr->trans[wc];
248a7c91847Schristos 	  pstr->cur_state = prev_st;
249a7c91847Schristos 	}
250a7c91847Schristos 
251a7c91847Schristos       /* Write wide character and padding.  */
252a7c91847Schristos       pstr->wcs[byte_idx++] = wc;
253a7c91847Schristos       /* Write paddings.  */
254a7c91847Schristos       for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
255a7c91847Schristos 	pstr->wcs[byte_idx++] = WEOF;
256a7c91847Schristos     }
257a7c91847Schristos   pstr->valid_len = byte_idx;
258a7c91847Schristos   pstr->valid_raw_len = byte_idx;
259a7c91847Schristos }
260a7c91847Schristos 
261a7c91847Schristos /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
262a7c91847Schristos    but for REG_ICASE.  */
263a7c91847Schristos 
264a7c91847Schristos static reg_errcode_t
265a7c91847Schristos internal_function
build_wcs_upper_buffer(re_string_t * pstr)266a7c91847Schristos build_wcs_upper_buffer (re_string_t *pstr)
267a7c91847Schristos {
268a7c91847Schristos   mbstate_t prev_st;
269a7c91847Schristos   Idx src_idx, byte_idx, end_idx, remain_len;
270a7c91847Schristos   size_t mbclen;
271a7c91847Schristos #ifdef _LIBC
272a7c91847Schristos   char buf[MB_LEN_MAX];
273a7c91847Schristos   assert (MB_LEN_MAX >= pstr->mb_cur_max);
274a7c91847Schristos #else
275a7c91847Schristos   char buf[64];
276a7c91847Schristos #endif
277a7c91847Schristos 
278a7c91847Schristos   byte_idx = pstr->valid_len;
279a7c91847Schristos   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
280a7c91847Schristos 
281a7c91847Schristos   /* The following optimization assumes that ASCII characters can be
282a7c91847Schristos      mapped to wide characters with a simple cast.  */
283a7c91847Schristos   if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
284a7c91847Schristos     {
285a7c91847Schristos       while (byte_idx < end_idx)
286a7c91847Schristos 	{
287a7c91847Schristos 	  wchar_t wc;
288a7c91847Schristos 
289a7c91847Schristos 	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
290a7c91847Schristos 	      && mbsinit (&pstr->cur_state))
291a7c91847Schristos 	    {
292a7c91847Schristos 	      /* In case of a singlebyte character.  */
293a7c91847Schristos 	      pstr->mbs[byte_idx]
294a7c91847Schristos 		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
295a7c91847Schristos 	      /* The next step uses the assumption that wchar_t is encoded
296a7c91847Schristos 		 ASCII-safe: all ASCII values can be converted like this.  */
297a7c91847Schristos 	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
298a7c91847Schristos 	      ++byte_idx;
299a7c91847Schristos 	      continue;
300a7c91847Schristos 	    }
301a7c91847Schristos 
302a7c91847Schristos 	  remain_len = end_idx - byte_idx;
303a7c91847Schristos 	  prev_st = pstr->cur_state;
304a7c91847Schristos 	  mbclen = mbrtowc (&wc,
305a7c91847Schristos 			    ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
306a7c91847Schristos 			     + byte_idx), remain_len, &pstr->cur_state);
307a7c91847Schristos 	  if (BE ((size_t) (mbclen + 2) > 2, 1))
308a7c91847Schristos 	    {
309a7c91847Schristos 	      wchar_t wcu = wc;
310a7c91847Schristos 	      if (iswlower (wc))
311a7c91847Schristos 		{
312a7c91847Schristos 		  size_t mbcdlen;
313a7c91847Schristos 
314a7c91847Schristos 		  wcu = towupper (wc);
315a7c91847Schristos 		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
316a7c91847Schristos 		  if (BE (mbclen == mbcdlen, 1))
317a7c91847Schristos 		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
318a7c91847Schristos 		  else
319a7c91847Schristos 		    {
320a7c91847Schristos 		      src_idx = byte_idx;
321a7c91847Schristos 		      goto offsets_needed;
322a7c91847Schristos 		    }
323a7c91847Schristos 		}
324a7c91847Schristos 	      else
325a7c91847Schristos 		memcpy (pstr->mbs + byte_idx,
326a7c91847Schristos 			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
327a7c91847Schristos 	      pstr->wcs[byte_idx++] = wcu;
328a7c91847Schristos 	      /* Write paddings.  */
329a7c91847Schristos 	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
330a7c91847Schristos 		pstr->wcs[byte_idx++] = WEOF;
331a7c91847Schristos 	    }
332a7c91847Schristos 	  else if (mbclen == (size_t) -1 || mbclen == 0)
333a7c91847Schristos 	    {
334a7c91847Schristos 	      /* It is an invalid character or '\0'.  Just use the byte.  */
335a7c91847Schristos 	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
336a7c91847Schristos 	      pstr->mbs[byte_idx] = ch;
337a7c91847Schristos 	      /* And also cast it to wide char.  */
338a7c91847Schristos 	      pstr->wcs[byte_idx++] = (wchar_t) ch;
339a7c91847Schristos 	      if (BE (mbclen == (size_t) -1, 0))
340a7c91847Schristos 		pstr->cur_state = prev_st;
341a7c91847Schristos 	    }
342a7c91847Schristos 	  else
343a7c91847Schristos 	    {
344a7c91847Schristos 	      /* The buffer doesn't have enough space, finish to build.  */
345a7c91847Schristos 	      pstr->cur_state = prev_st;
346a7c91847Schristos 	      break;
347a7c91847Schristos 	    }
348a7c91847Schristos 	}
349a7c91847Schristos       pstr->valid_len = byte_idx;
350a7c91847Schristos       pstr->valid_raw_len = byte_idx;
351a7c91847Schristos       return REG_NOERROR;
352a7c91847Schristos     }
353a7c91847Schristos   else
354a7c91847Schristos     for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
355a7c91847Schristos       {
356a7c91847Schristos 	wchar_t wc;
357a7c91847Schristos 	const char *p;
358a7c91847Schristos       offsets_needed:
359a7c91847Schristos 	remain_len = end_idx - byte_idx;
360a7c91847Schristos 	prev_st = pstr->cur_state;
361a7c91847Schristos 	if (BE (pstr->trans != NULL, 0))
362a7c91847Schristos 	  {
363a7c91847Schristos 	    int i, ch;
364a7c91847Schristos 
365a7c91847Schristos 	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
366a7c91847Schristos 	      {
367a7c91847Schristos 		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
368a7c91847Schristos 		buf[i] = pstr->trans[ch];
369a7c91847Schristos 	      }
370a7c91847Schristos 	    p = (const char *) buf;
371a7c91847Schristos 	  }
372a7c91847Schristos 	else
373a7c91847Schristos 	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
374a7c91847Schristos 	mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
375a7c91847Schristos 	if (BE ((size_t) (mbclen + 2) > 2, 1))
376a7c91847Schristos 	  {
377a7c91847Schristos 	    wchar_t wcu = wc;
378a7c91847Schristos 	    if (iswlower (wc))
379a7c91847Schristos 	      {
380a7c91847Schristos 		size_t mbcdlen;
381a7c91847Schristos 
382a7c91847Schristos 		wcu = towupper (wc);
383a7c91847Schristos 		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
384a7c91847Schristos 		if (BE (mbclen == mbcdlen, 1))
385a7c91847Schristos 		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
386a7c91847Schristos 		else if (mbcdlen != (size_t) -1)
387a7c91847Schristos 		  {
388a7c91847Schristos 		    size_t i;
389a7c91847Schristos 
390a7c91847Schristos 		    if (byte_idx + mbcdlen > pstr->bufs_len)
391a7c91847Schristos 		      {
392a7c91847Schristos 			pstr->cur_state = prev_st;
393a7c91847Schristos 			break;
394a7c91847Schristos 		      }
395a7c91847Schristos 
396a7c91847Schristos 		    if (pstr->offsets == NULL)
397a7c91847Schristos 		      {
398a7c91847Schristos 			pstr->offsets = re_xmalloc (Idx, pstr->bufs_len);
399a7c91847Schristos 
400a7c91847Schristos 			if (pstr->offsets == NULL)
401a7c91847Schristos 			  return REG_ESPACE;
402a7c91847Schristos 		      }
403a7c91847Schristos 		    if (!pstr->offsets_needed)
404a7c91847Schristos 		      {
405a7c91847Schristos 			for (i = 0; i < (size_t) byte_idx; ++i)
406a7c91847Schristos 			  pstr->offsets[i] = i;
407a7c91847Schristos 			pstr->offsets_needed = 1;
408a7c91847Schristos 		      }
409a7c91847Schristos 
410a7c91847Schristos 		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
411a7c91847Schristos 		    pstr->wcs[byte_idx] = wcu;
412a7c91847Schristos 		    pstr->offsets[byte_idx] = src_idx;
413a7c91847Schristos 		    for (i = 1; i < mbcdlen; ++i)
414a7c91847Schristos 		      {
415a7c91847Schristos 			pstr->offsets[byte_idx + i]
416a7c91847Schristos 			  = src_idx + (i < mbclen ? i : mbclen - 1);
417a7c91847Schristos 			pstr->wcs[byte_idx + i] = WEOF;
418a7c91847Schristos 		      }
419a7c91847Schristos 		    pstr->len += mbcdlen - mbclen;
420a7c91847Schristos 		    if (pstr->raw_stop > src_idx)
421a7c91847Schristos 		      pstr->stop += mbcdlen - mbclen;
422a7c91847Schristos 		    end_idx = (pstr->bufs_len > pstr->len)
423a7c91847Schristos 			      ? pstr->len : pstr->bufs_len;
424a7c91847Schristos 		    byte_idx += mbcdlen;
425a7c91847Schristos 		    src_idx += mbclen;
426a7c91847Schristos 		    continue;
427a7c91847Schristos 		  }
428a7c91847Schristos                 else
429a7c91847Schristos                   memcpy (pstr->mbs + byte_idx, p, mbclen);
430a7c91847Schristos 	      }
431a7c91847Schristos 	    else
432a7c91847Schristos 	      memcpy (pstr->mbs + byte_idx, p, mbclen);
433a7c91847Schristos 
434a7c91847Schristos 	    if (BE (pstr->offsets_needed != 0, 0))
435a7c91847Schristos 	      {
436a7c91847Schristos 		size_t i;
437a7c91847Schristos 		for (i = 0; i < mbclen; ++i)
438a7c91847Schristos 		  pstr->offsets[byte_idx + i] = src_idx + i;
439a7c91847Schristos 	      }
440a7c91847Schristos 	    src_idx += mbclen;
441a7c91847Schristos 
442a7c91847Schristos 	    pstr->wcs[byte_idx++] = wcu;
443a7c91847Schristos 	    /* Write paddings.  */
444a7c91847Schristos 	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
445a7c91847Schristos 	      pstr->wcs[byte_idx++] = WEOF;
446a7c91847Schristos 	  }
447a7c91847Schristos 	else if (mbclen == (size_t) -1 || mbclen == 0)
448a7c91847Schristos 	  {
449a7c91847Schristos 	    /* It is an invalid character or '\0'.  Just use the byte.  */
450a7c91847Schristos 	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
451a7c91847Schristos 
452a7c91847Schristos 	    if (BE (pstr->trans != NULL, 0))
453a7c91847Schristos 	      ch = pstr->trans [ch];
454a7c91847Schristos 	    pstr->mbs[byte_idx] = ch;
455a7c91847Schristos 
456a7c91847Schristos 	    if (BE (pstr->offsets_needed != 0, 0))
457a7c91847Schristos 	      pstr->offsets[byte_idx] = src_idx;
458a7c91847Schristos 	    ++src_idx;
459a7c91847Schristos 
460a7c91847Schristos 	    /* And also cast it to wide char.  */
461a7c91847Schristos 	    pstr->wcs[byte_idx++] = (wchar_t) ch;
462a7c91847Schristos 	    if (BE (mbclen == (size_t) -1, 0))
463a7c91847Schristos 	      pstr->cur_state = prev_st;
464a7c91847Schristos 	  }
465a7c91847Schristos 	else
466a7c91847Schristos 	  {
467a7c91847Schristos 	    /* The buffer doesn't have enough space, finish to build.  */
468a7c91847Schristos 	    pstr->cur_state = prev_st;
469a7c91847Schristos 	    break;
470a7c91847Schristos 	  }
471a7c91847Schristos       }
472a7c91847Schristos   pstr->valid_len = byte_idx;
473a7c91847Schristos   pstr->valid_raw_len = src_idx;
474a7c91847Schristos   return REG_NOERROR;
475a7c91847Schristos }
476a7c91847Schristos 
477a7c91847Schristos /* Skip characters until the index becomes greater than NEW_RAW_IDX.
478a7c91847Schristos    Return the index.  */
479a7c91847Schristos 
480a7c91847Schristos static Idx
481a7c91847Schristos internal_function
re_string_skip_chars(re_string_t * pstr,Idx new_raw_idx,wint_t * last_wc)482a7c91847Schristos re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
483a7c91847Schristos {
484a7c91847Schristos   mbstate_t prev_st;
485a7c91847Schristos   Idx rawbuf_idx;
486a7c91847Schristos   size_t mbclen;
487a7c91847Schristos   wchar_t wc = 0;
488a7c91847Schristos 
489a7c91847Schristos   /* Skip the characters which are not necessary to check.  */
490a7c91847Schristos   for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
491a7c91847Schristos        rawbuf_idx < new_raw_idx;)
492a7c91847Schristos     {
493a7c91847Schristos       Idx remain_len;
494a7c91847Schristos       remain_len = pstr->len - rawbuf_idx;
495a7c91847Schristos       prev_st = pstr->cur_state;
496a7c91847Schristos       mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
497a7c91847Schristos 			remain_len, &pstr->cur_state);
498a7c91847Schristos       if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
499a7c91847Schristos 	{
500a7c91847Schristos 	  /* We treat these cases as a singlebyte character.  */
501a7c91847Schristos 	  mbclen = 1;
502a7c91847Schristos 	  pstr->cur_state = prev_st;
503a7c91847Schristos 	}
504a7c91847Schristos       /* Then proceed the next character.  */
505a7c91847Schristos       rawbuf_idx += mbclen;
506a7c91847Schristos     }
507a7c91847Schristos   *last_wc = (wint_t) wc;
508a7c91847Schristos   return rawbuf_idx;
509a7c91847Schristos }
510a7c91847Schristos #endif /* RE_ENABLE_I18N  */
511a7c91847Schristos 
512a7c91847Schristos /* Build the buffer PSTR->MBS, and apply the translation if we need.
513a7c91847Schristos    This function is used in case of REG_ICASE.  */
514a7c91847Schristos 
515a7c91847Schristos static void
516a7c91847Schristos internal_function
build_upper_buffer(re_string_t * pstr)517a7c91847Schristos build_upper_buffer (re_string_t *pstr)
518a7c91847Schristos {
519a7c91847Schristos   Idx char_idx, end_idx;
520a7c91847Schristos   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
521a7c91847Schristos 
522a7c91847Schristos   for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
523a7c91847Schristos     {
524a7c91847Schristos       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
525a7c91847Schristos       if (BE (pstr->trans != NULL, 0))
526a7c91847Schristos 	ch = pstr->trans[ch];
527a7c91847Schristos       if (islower (ch))
528a7c91847Schristos 	pstr->mbs[char_idx] = toupper (ch);
529a7c91847Schristos       else
530a7c91847Schristos 	pstr->mbs[char_idx] = ch;
531a7c91847Schristos     }
532a7c91847Schristos   pstr->valid_len = char_idx;
533a7c91847Schristos   pstr->valid_raw_len = char_idx;
534a7c91847Schristos }
535a7c91847Schristos 
536a7c91847Schristos /* Apply TRANS to the buffer in PSTR.  */
537a7c91847Schristos 
538a7c91847Schristos static void
539a7c91847Schristos internal_function
re_string_translate_buffer(re_string_t * pstr)540a7c91847Schristos re_string_translate_buffer (re_string_t *pstr)
541a7c91847Schristos {
542a7c91847Schristos   Idx buf_idx, end_idx;
543a7c91847Schristos   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
544a7c91847Schristos 
545a7c91847Schristos   for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
546a7c91847Schristos     {
547a7c91847Schristos       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
548a7c91847Schristos       pstr->mbs[buf_idx] = pstr->trans[ch];
549a7c91847Schristos     }
550a7c91847Schristos 
551a7c91847Schristos   pstr->valid_len = buf_idx;
552a7c91847Schristos   pstr->valid_raw_len = buf_idx;
553a7c91847Schristos }
554a7c91847Schristos 
555a7c91847Schristos /* This function re-construct the buffers.
556a7c91847Schristos    Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
557a7c91847Schristos    convert to upper case in case of REG_ICASE, apply translation.  */
558a7c91847Schristos 
559a7c91847Schristos static reg_errcode_t
560a7c91847Schristos internal_function
re_string_reconstruct(re_string_t * pstr,Idx idx,int eflags)561a7c91847Schristos re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
562a7c91847Schristos {
563a7c91847Schristos   Idx offset;
564a7c91847Schristos 
565a7c91847Schristos   if (BE (pstr->raw_mbs_idx <= idx, 0))
566a7c91847Schristos     offset = idx - pstr->raw_mbs_idx;
567a7c91847Schristos   else
568a7c91847Schristos     {
569a7c91847Schristos       /* Reset buffer.  */
570a7c91847Schristos #ifdef RE_ENABLE_I18N
571a7c91847Schristos       if (pstr->mb_cur_max > 1)
572a7c91847Schristos 	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
573a7c91847Schristos #endif /* RE_ENABLE_I18N */
574a7c91847Schristos       pstr->len = pstr->raw_len;
575a7c91847Schristos       pstr->stop = pstr->raw_stop;
576a7c91847Schristos       pstr->valid_len = 0;
577a7c91847Schristos       pstr->raw_mbs_idx = 0;
578a7c91847Schristos       pstr->valid_raw_len = 0;
579a7c91847Schristos       pstr->offsets_needed = 0;
580a7c91847Schristos       pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
581a7c91847Schristos 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
582a7c91847Schristos       if (!pstr->mbs_allocated)
583a7c91847Schristos 	pstr->mbs = (unsigned char *) pstr->raw_mbs;
584a7c91847Schristos       offset = idx;
585a7c91847Schristos     }
586a7c91847Schristos 
587a7c91847Schristos   if (BE (offset != 0, 1))
588a7c91847Schristos     {
589a7c91847Schristos       /* Are the characters which are already checked remain?  */
590a7c91847Schristos       if (BE (offset < pstr->valid_raw_len, 1)
591a7c91847Schristos #ifdef RE_ENABLE_I18N
592a7c91847Schristos 	  /* Handling this would enlarge the code too much.
593a7c91847Schristos 	     Accept a slowdown in that case.  */
594a7c91847Schristos 	  && pstr->offsets_needed == 0
595a7c91847Schristos #endif
596a7c91847Schristos 	 )
597a7c91847Schristos 	{
598a7c91847Schristos 	  /* Yes, move them to the front of the buffer.  */
599a7c91847Schristos 	  pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags);
600a7c91847Schristos #ifdef RE_ENABLE_I18N
601a7c91847Schristos 	  if (pstr->mb_cur_max > 1)
602a7c91847Schristos 	    memmove (pstr->wcs, pstr->wcs + offset,
603a7c91847Schristos 		     (pstr->valid_len - offset) * sizeof (wint_t));
604a7c91847Schristos #endif /* RE_ENABLE_I18N */
605a7c91847Schristos 	  if (BE (pstr->mbs_allocated, 0))
606a7c91847Schristos 	    memmove (pstr->mbs, pstr->mbs + offset,
607a7c91847Schristos 		     pstr->valid_len - offset);
608a7c91847Schristos 	  pstr->valid_len -= offset;
609a7c91847Schristos 	  pstr->valid_raw_len -= offset;
610a7c91847Schristos #if DEBUG
611a7c91847Schristos 	  assert (pstr->valid_len > 0);
612a7c91847Schristos #endif
613a7c91847Schristos 	}
614a7c91847Schristos       else
615a7c91847Schristos 	{
616a7c91847Schristos 	  /* No, skip all characters until IDX.  */
617a7c91847Schristos #ifdef RE_ENABLE_I18N
618a7c91847Schristos 	  if (BE (pstr->offsets_needed, 0))
619a7c91847Schristos 	    {
620a7c91847Schristos 	      pstr->len = pstr->raw_len - idx + offset;
621a7c91847Schristos 	      pstr->stop = pstr->raw_stop - idx + offset;
622a7c91847Schristos 	      pstr->offsets_needed = 0;
623a7c91847Schristos 	    }
624a7c91847Schristos #endif
625a7c91847Schristos 	  pstr->valid_len = 0;
626a7c91847Schristos 	  pstr->valid_raw_len = 0;
627a7c91847Schristos #ifdef RE_ENABLE_I18N
628a7c91847Schristos 	  if (pstr->mb_cur_max > 1)
629a7c91847Schristos 	    {
630a7c91847Schristos 	      Idx wcs_idx;
631a7c91847Schristos 	      wint_t wc = WEOF;
632a7c91847Schristos 
633a7c91847Schristos 	      if (pstr->is_utf8)
634a7c91847Schristos 		{
635a7c91847Schristos 		  const unsigned char *raw, *p, *q, *end;
636a7c91847Schristos 
637a7c91847Schristos 		  /* Special case UTF-8.  Multi-byte chars start with any
638a7c91847Schristos 		     byte other than 0x80 - 0xbf.  */
639a7c91847Schristos 		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
640a7c91847Schristos 		  end = raw + (offset - pstr->mb_cur_max);
641a7c91847Schristos 		  for (p = raw + offset - 1; p >= end; --p)
642a7c91847Schristos 		    if ((*p & 0xc0) != 0x80)
643a7c91847Schristos 		      {
644a7c91847Schristos 			mbstate_t cur_state;
645a7c91847Schristos 			wchar_t wc2;
646a7c91847Schristos 			Idx mlen = raw + pstr->len - p;
647a7c91847Schristos 			unsigned char buf[6];
648a7c91847Schristos 			size_t mbclen;
649a7c91847Schristos 
650a7c91847Schristos 			q = p;
651a7c91847Schristos 			if (BE (pstr->trans != NULL, 0))
652a7c91847Schristos 			  {
653a7c91847Schristos 			    int i = mlen < 6 ? mlen : 6;
654a7c91847Schristos 			    while (--i >= 0)
655a7c91847Schristos 			      buf[i] = pstr->trans[p[i]];
656a7c91847Schristos 			    q = buf;
657a7c91847Schristos 			  }
658a7c91847Schristos 			/* XXX Don't use mbrtowc, we know which conversion
659a7c91847Schristos 			   to use (UTF-8 -> UCS4).  */
660a7c91847Schristos 			memset (&cur_state, 0, sizeof (cur_state));
661a7c91847Schristos 			mbclen = mbrtowc (&wc2, (const char *) p, mlen,
662a7c91847Schristos 					  &cur_state);
663a7c91847Schristos 			if (raw + offset - p <= mbclen && mbclen < (size_t) -2)
664a7c91847Schristos 			  {
665a7c91847Schristos 			    memset (&pstr->cur_state, '\0',
666a7c91847Schristos 				    sizeof (mbstate_t));
667a7c91847Schristos 			    pstr->valid_len = mbclen - (raw + offset - p);
668a7c91847Schristos 			    wc = wc2;
669a7c91847Schristos 			  }
670a7c91847Schristos 			break;
671a7c91847Schristos 		      }
672a7c91847Schristos 		}
673a7c91847Schristos 
674a7c91847Schristos 	      if (wc == WEOF)
675a7c91847Schristos 		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
676a7c91847Schristos 	      if (BE (pstr->valid_len, 0))
677a7c91847Schristos 		{
678a7c91847Schristos 		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
679a7c91847Schristos 		    pstr->wcs[wcs_idx] = WEOF;
680a7c91847Schristos 		  if (pstr->mbs_allocated)
681a7c91847Schristos 		    memset (pstr->mbs, -1, pstr->valid_len);
682a7c91847Schristos 		}
683a7c91847Schristos 	      pstr->valid_raw_len = pstr->valid_len;
684a7c91847Schristos 	      pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
685a7c91847Schristos 				    && IS_WIDE_WORD_CHAR (wc))
686a7c91847Schristos 				   ? CONTEXT_WORD
687a7c91847Schristos 				   : ((IS_WIDE_NEWLINE (wc)
688a7c91847Schristos 				       && pstr->newline_anchor)
689a7c91847Schristos 				      ? CONTEXT_NEWLINE : 0));
690a7c91847Schristos 	    }
691a7c91847Schristos 	  else
692a7c91847Schristos #endif /* RE_ENABLE_I18N */
693a7c91847Schristos 	    {
694a7c91847Schristos 	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
695a7c91847Schristos 	      if (pstr->trans)
696a7c91847Schristos 		c = pstr->trans[c];
697a7c91847Schristos 	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
698a7c91847Schristos 				   ? CONTEXT_WORD
699a7c91847Schristos 				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
700a7c91847Schristos 				      ? CONTEXT_NEWLINE : 0));
701a7c91847Schristos 	    }
702a7c91847Schristos 	}
703a7c91847Schristos       if (!BE (pstr->mbs_allocated, 0))
704a7c91847Schristos 	pstr->mbs += offset;
705a7c91847Schristos     }
706a7c91847Schristos   pstr->raw_mbs_idx = idx;
707a7c91847Schristos   pstr->len -= offset;
708a7c91847Schristos   pstr->stop -= offset;
709a7c91847Schristos 
710a7c91847Schristos   /* Then build the buffers.  */
711a7c91847Schristos #ifdef RE_ENABLE_I18N
712a7c91847Schristos   if (pstr->mb_cur_max > 1)
713a7c91847Schristos     {
714a7c91847Schristos       if (pstr->icase)
715a7c91847Schristos 	{
716a7c91847Schristos 	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
717a7c91847Schristos 	  if (BE (ret != REG_NOERROR, 0))
718a7c91847Schristos 	    return ret;
719a7c91847Schristos 	}
720a7c91847Schristos       else
721a7c91847Schristos 	build_wcs_buffer (pstr);
722a7c91847Schristos     }
723a7c91847Schristos   else
724a7c91847Schristos #endif /* RE_ENABLE_I18N */
725a7c91847Schristos   if (BE (pstr->mbs_allocated, 0))
726a7c91847Schristos     {
727a7c91847Schristos       if (pstr->icase)
728a7c91847Schristos 	build_upper_buffer (pstr);
729a7c91847Schristos       else if (pstr->trans != NULL)
730a7c91847Schristos 	re_string_translate_buffer (pstr);
731a7c91847Schristos     }
732a7c91847Schristos   else
733a7c91847Schristos     pstr->valid_len = pstr->len;
734a7c91847Schristos 
735a7c91847Schristos   pstr->cur_idx = 0;
736a7c91847Schristos   return REG_NOERROR;
737a7c91847Schristos }
738a7c91847Schristos 
739a7c91847Schristos static unsigned char
internal_function(pure)740a7c91847Schristos internal_function __attribute ((pure))
741a7c91847Schristos re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
742a7c91847Schristos {
743a7c91847Schristos   int ch;
744a7c91847Schristos   Idx off;
745a7c91847Schristos 
746a7c91847Schristos   /* Handle the common (easiest) cases first.  */
747a7c91847Schristos   if (BE (!pstr->mbs_allocated, 1))
748a7c91847Schristos     return re_string_peek_byte (pstr, idx);
749a7c91847Schristos 
750a7c91847Schristos #ifdef RE_ENABLE_I18N
751a7c91847Schristos   if (pstr->mb_cur_max > 1
752a7c91847Schristos       && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
753a7c91847Schristos     return re_string_peek_byte (pstr, idx);
754a7c91847Schristos #endif
755a7c91847Schristos 
756a7c91847Schristos   off = pstr->cur_idx + idx;
757a7c91847Schristos #ifdef RE_ENABLE_I18N
758a7c91847Schristos   if (pstr->offsets_needed)
759a7c91847Schristos     off = pstr->offsets[off];
760a7c91847Schristos #endif
761a7c91847Schristos 
762a7c91847Schristos   ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
763a7c91847Schristos 
764a7c91847Schristos #ifdef RE_ENABLE_I18N
765a7c91847Schristos   /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
766a7c91847Schristos      this function returns CAPITAL LETTER I instead of first byte of
767a7c91847Schristos      DOTLESS SMALL LETTER I.  The latter would confuse the parser,
768a7c91847Schristos      since peek_byte_case doesn't advance cur_idx in any way.  */
769a7c91847Schristos   if (pstr->offsets_needed && !isascii (ch))
770a7c91847Schristos     return re_string_peek_byte (pstr, idx);
771a7c91847Schristos #endif
772a7c91847Schristos 
773a7c91847Schristos   return ch;
774a7c91847Schristos }
775a7c91847Schristos 
776a7c91847Schristos static unsigned char
internal_function(pure)777a7c91847Schristos internal_function __attribute ((pure))
778a7c91847Schristos re_string_fetch_byte_case (re_string_t *pstr)
779a7c91847Schristos {
780a7c91847Schristos   if (BE (!pstr->mbs_allocated, 1))
781a7c91847Schristos     return re_string_fetch_byte (pstr);
782a7c91847Schristos 
783a7c91847Schristos #ifdef RE_ENABLE_I18N
784a7c91847Schristos   if (pstr->offsets_needed)
785a7c91847Schristos     {
786a7c91847Schristos       Idx off;
787a7c91847Schristos       int ch;
788a7c91847Schristos 
789a7c91847Schristos       /* For tr_TR.UTF-8 [[:islower:]] there is
790a7c91847Schristos 	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
791a7c91847Schristos 	 in that case the whole multi-byte character and return
792a7c91847Schristos 	 the original letter.  On the other side, with
793a7c91847Schristos 	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
794a7c91847Schristos 	 anything else would complicate things too much.  */
795a7c91847Schristos 
796a7c91847Schristos       if (!re_string_first_byte (pstr, pstr->cur_idx))
797a7c91847Schristos 	return re_string_fetch_byte (pstr);
798a7c91847Schristos 
799a7c91847Schristos       off = pstr->offsets[pstr->cur_idx];
800a7c91847Schristos       ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
801a7c91847Schristos 
802a7c91847Schristos       if (! isascii (ch))
803a7c91847Schristos 	return re_string_fetch_byte (pstr);
804a7c91847Schristos 
805a7c91847Schristos       re_string_skip_bytes (pstr,
806a7c91847Schristos 			    re_string_char_size_at (pstr, pstr->cur_idx));
807a7c91847Schristos       return ch;
808a7c91847Schristos     }
809a7c91847Schristos #endif
810a7c91847Schristos 
811a7c91847Schristos   return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
812a7c91847Schristos }
813a7c91847Schristos 
814a7c91847Schristos static void
815a7c91847Schristos internal_function
re_string_destruct(re_string_t * pstr)816a7c91847Schristos re_string_destruct (re_string_t *pstr)
817a7c91847Schristos {
818a7c91847Schristos #ifdef RE_ENABLE_I18N
819a7c91847Schristos   re_free (pstr->wcs);
820a7c91847Schristos   re_free (pstr->offsets);
821a7c91847Schristos #endif /* RE_ENABLE_I18N  */
822a7c91847Schristos   if (pstr->mbs_allocated)
823a7c91847Schristos     re_free (pstr->mbs);
824a7c91847Schristos }
825a7c91847Schristos 
826a7c91847Schristos /* Return the context at IDX in INPUT.  */
827a7c91847Schristos 
828a7c91847Schristos static unsigned int
829a7c91847Schristos internal_function
re_string_context_at(const re_string_t * input,Idx idx,int eflags)830a7c91847Schristos re_string_context_at (const re_string_t *input, Idx idx, int eflags)
831a7c91847Schristos {
832a7c91847Schristos   int c;
833a7c91847Schristos   if (BE (! REG_VALID_INDEX (idx), 0))
834a7c91847Schristos     /* In this case, we use the value stored in input->tip_context,
835a7c91847Schristos        since we can't know the character in input->mbs[-1] here.  */
836a7c91847Schristos     return input->tip_context;
837a7c91847Schristos   if (BE (idx == input->len, 0))
838a7c91847Schristos     return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
839a7c91847Schristos 	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
840a7c91847Schristos #ifdef RE_ENABLE_I18N
841a7c91847Schristos   if (input->mb_cur_max > 1)
842a7c91847Schristos     {
843a7c91847Schristos       wint_t wc;
844a7c91847Schristos       Idx wc_idx = idx;
845a7c91847Schristos       while(input->wcs[wc_idx] == WEOF)
846a7c91847Schristos 	{
847a7c91847Schristos #ifdef DEBUG
848a7c91847Schristos 	  /* It must not happen.  */
849a7c91847Schristos 	  assert (REG_VALID_INDEX (wc_idx));
850a7c91847Schristos #endif
851a7c91847Schristos 	  --wc_idx;
852a7c91847Schristos 	  if (! REG_VALID_INDEX (wc_idx))
853a7c91847Schristos 	    return input->tip_context;
854a7c91847Schristos 	}
855a7c91847Schristos       wc = input->wcs[wc_idx];
856a7c91847Schristos       if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
857a7c91847Schristos 	return CONTEXT_WORD;
858a7c91847Schristos       return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
859a7c91847Schristos 	      ? CONTEXT_NEWLINE : 0);
860a7c91847Schristos     }
861a7c91847Schristos   else
862a7c91847Schristos #endif
863a7c91847Schristos     {
864a7c91847Schristos       c = re_string_byte_at (input, idx);
865a7c91847Schristos       if (bitset_contain (input->word_char, c))
866a7c91847Schristos 	return CONTEXT_WORD;
867a7c91847Schristos       return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
868a7c91847Schristos     }
869a7c91847Schristos }
870a7c91847Schristos 
871a7c91847Schristos /* Functions for set operation.  */
872a7c91847Schristos 
873a7c91847Schristos static reg_errcode_t
874a7c91847Schristos internal_function
re_node_set_alloc(re_node_set * set,Idx size)875a7c91847Schristos re_node_set_alloc (re_node_set *set, Idx size)
876a7c91847Schristos {
877a7c91847Schristos   set->alloc = size;
878a7c91847Schristos   set->nelem = 0;
879a7c91847Schristos   set->elems = re_xmalloc (Idx, size);
880a7c91847Schristos   if (BE (set->elems == NULL, 0))
881a7c91847Schristos     return REG_ESPACE;
882a7c91847Schristos   return REG_NOERROR;
883a7c91847Schristos }
884a7c91847Schristos 
885a7c91847Schristos static reg_errcode_t
886a7c91847Schristos internal_function
re_node_set_init_1(re_node_set * set,Idx elem)887a7c91847Schristos re_node_set_init_1 (re_node_set *set, Idx elem)
888a7c91847Schristos {
889a7c91847Schristos   set->alloc = 1;
890a7c91847Schristos   set->nelem = 1;
891a7c91847Schristos   set->elems = re_malloc (Idx, 1);
892a7c91847Schristos   if (BE (set->elems == NULL, 0))
893a7c91847Schristos     {
894a7c91847Schristos       set->alloc = set->nelem = 0;
895a7c91847Schristos       return REG_ESPACE;
896a7c91847Schristos     }
897a7c91847Schristos   set->elems[0] = elem;
898a7c91847Schristos   return REG_NOERROR;
899a7c91847Schristos }
900a7c91847Schristos 
901a7c91847Schristos static reg_errcode_t
902a7c91847Schristos internal_function
re_node_set_init_2(re_node_set * set,Idx elem1,Idx elem2)903a7c91847Schristos re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
904a7c91847Schristos {
905a7c91847Schristos   set->alloc = 2;
906a7c91847Schristos   set->elems = re_malloc (Idx, 2);
907a7c91847Schristos   if (BE (set->elems == NULL, 0))
908a7c91847Schristos     return REG_ESPACE;
909a7c91847Schristos   if (elem1 == elem2)
910a7c91847Schristos     {
911a7c91847Schristos       set->nelem = 1;
912a7c91847Schristos       set->elems[0] = elem1;
913a7c91847Schristos     }
914a7c91847Schristos   else
915a7c91847Schristos     {
916a7c91847Schristos       set->nelem = 2;
917a7c91847Schristos       if (elem1 < elem2)
918a7c91847Schristos 	{
919a7c91847Schristos 	  set->elems[0] = elem1;
920a7c91847Schristos 	  set->elems[1] = elem2;
921a7c91847Schristos 	}
922a7c91847Schristos       else
923a7c91847Schristos 	{
924a7c91847Schristos 	  set->elems[0] = elem2;
925a7c91847Schristos 	  set->elems[1] = elem1;
926a7c91847Schristos 	}
927a7c91847Schristos     }
928a7c91847Schristos   return REG_NOERROR;
929a7c91847Schristos }
930a7c91847Schristos 
931a7c91847Schristos static reg_errcode_t
932a7c91847Schristos internal_function
re_node_set_init_copy(re_node_set * dest,const re_node_set * src)933a7c91847Schristos re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
934a7c91847Schristos {
935a7c91847Schristos   dest->nelem = src->nelem;
936a7c91847Schristos   if (src->nelem > 0)
937a7c91847Schristos     {
938a7c91847Schristos       dest->alloc = dest->nelem;
939a7c91847Schristos       dest->elems = re_malloc (Idx, dest->alloc);
940a7c91847Schristos       if (BE (dest->elems == NULL, 0))
941a7c91847Schristos 	{
942a7c91847Schristos 	  dest->alloc = dest->nelem = 0;
943a7c91847Schristos 	  return REG_ESPACE;
944a7c91847Schristos 	}
945a7c91847Schristos       memcpy (dest->elems, src->elems, src->nelem * sizeof dest->elems[0]);
946a7c91847Schristos     }
947a7c91847Schristos   else
948a7c91847Schristos     re_node_set_init_empty (dest);
949a7c91847Schristos   return REG_NOERROR;
950a7c91847Schristos }
951a7c91847Schristos 
952a7c91847Schristos /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
953a7c91847Schristos    DEST. Return value indicate the error code or REG_NOERROR if succeeded.
954a7c91847Schristos    Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
955a7c91847Schristos 
956a7c91847Schristos static reg_errcode_t
957a7c91847Schristos internal_function
re_node_set_add_intersect(re_node_set * dest,const re_node_set * src1,const re_node_set * src2)958a7c91847Schristos re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
959a7c91847Schristos 			   const re_node_set *src2)
960a7c91847Schristos {
961a7c91847Schristos   Idx i1, i2, is, id, delta, sbase;
962a7c91847Schristos   if (src1->nelem == 0 || src2->nelem == 0)
963a7c91847Schristos     return REG_NOERROR;
964a7c91847Schristos 
965a7c91847Schristos   /* We need dest->nelem + 2 * elems_in_intersection; this is a
966a7c91847Schristos      conservative estimate.  */
967a7c91847Schristos   if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
968a7c91847Schristos     {
969a7c91847Schristos       Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
970a7c91847Schristos       Idx *new_elems;
971a7c91847Schristos       if (sizeof (Idx) < 3
972a7c91847Schristos 	  && (new_alloc < dest->alloc
973a7c91847Schristos 	      || ((Idx) (src1->nelem + src2->nelem) < src1->nelem)))
974a7c91847Schristos 	return REG_ESPACE;
975a7c91847Schristos       new_elems = re_xrealloc (dest->elems, Idx, new_alloc);
976a7c91847Schristos       if (BE (new_elems == NULL, 0))
977a7c91847Schristos         return REG_ESPACE;
978a7c91847Schristos       dest->elems = new_elems;
979a7c91847Schristos       dest->alloc = new_alloc;
980a7c91847Schristos     }
981a7c91847Schristos 
982a7c91847Schristos   /* Find the items in the intersection of SRC1 and SRC2, and copy
983a7c91847Schristos      into the top of DEST those that are not already in DEST itself.  */
984a7c91847Schristos   sbase = dest->nelem + src1->nelem + src2->nelem;
985a7c91847Schristos   i1 = src1->nelem - 1;
986a7c91847Schristos   i2 = src2->nelem - 1;
987a7c91847Schristos   id = dest->nelem - 1;
988a7c91847Schristos   for (;;)
989a7c91847Schristos     {
990a7c91847Schristos       if (src1->elems[i1] == src2->elems[i2])
991a7c91847Schristos 	{
992a7c91847Schristos 	  /* Try to find the item in DEST.  Maybe we could binary search?  */
993a7c91847Schristos 	  while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
994a7c91847Schristos 	    --id;
995a7c91847Schristos 
996a7c91847Schristos           if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
997a7c91847Schristos             dest->elems[--sbase] = src1->elems[i1];
998a7c91847Schristos 
999a7c91847Schristos 	  if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
1000a7c91847Schristos 	    break;
1001a7c91847Schristos 	}
1002a7c91847Schristos 
1003a7c91847Schristos       /* Lower the highest of the two items.  */
1004a7c91847Schristos       else if (src1->elems[i1] < src2->elems[i2])
1005a7c91847Schristos 	{
1006a7c91847Schristos 	  if (! REG_VALID_INDEX (--i2))
1007a7c91847Schristos 	    break;
1008a7c91847Schristos 	}
1009a7c91847Schristos       else
1010a7c91847Schristos 	{
1011a7c91847Schristos 	  if (! REG_VALID_INDEX (--i1))
1012a7c91847Schristos 	    break;
1013a7c91847Schristos 	}
1014a7c91847Schristos     }
1015a7c91847Schristos 
1016a7c91847Schristos   id = dest->nelem - 1;
1017a7c91847Schristos   is = dest->nelem + src1->nelem + src2->nelem - 1;
1018a7c91847Schristos   delta = is - sbase + 1;
1019a7c91847Schristos 
1020a7c91847Schristos   /* Now copy.  When DELTA becomes zero, the remaining
1021a7c91847Schristos      DEST elements are already in place; this is more or
1022a7c91847Schristos      less the same loop that is in re_node_set_merge.  */
1023a7c91847Schristos   dest->nelem += delta;
1024a7c91847Schristos   if (delta > 0 && REG_VALID_INDEX (id))
1025a7c91847Schristos     for (;;)
1026a7c91847Schristos       {
1027a7c91847Schristos         if (dest->elems[is] > dest->elems[id])
1028a7c91847Schristos           {
1029a7c91847Schristos             /* Copy from the top.  */
1030a7c91847Schristos             dest->elems[id + delta--] = dest->elems[is--];
1031a7c91847Schristos             if (delta == 0)
1032a7c91847Schristos               break;
1033a7c91847Schristos           }
1034a7c91847Schristos         else
1035a7c91847Schristos           {
1036a7c91847Schristos             /* Slide from the bottom.  */
1037a7c91847Schristos             dest->elems[id + delta] = dest->elems[id];
1038a7c91847Schristos             if (! REG_VALID_INDEX (--id))
1039a7c91847Schristos               break;
1040a7c91847Schristos           }
1041a7c91847Schristos       }
1042a7c91847Schristos 
1043a7c91847Schristos   /* Copy remaining SRC elements.  */
1044a7c91847Schristos   memcpy (dest->elems, dest->elems + sbase, delta * sizeof dest->elems[0]);
1045a7c91847Schristos 
1046a7c91847Schristos   return REG_NOERROR;
1047a7c91847Schristos }
1048a7c91847Schristos 
1049a7c91847Schristos /* Calculate the union set of the sets SRC1 and SRC2. And store it to
1050a7c91847Schristos    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
1051a7c91847Schristos 
1052a7c91847Schristos static reg_errcode_t
1053a7c91847Schristos internal_function
re_node_set_init_union(re_node_set * dest,const re_node_set * src1,const re_node_set * src2)1054a7c91847Schristos re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1055a7c91847Schristos 			const re_node_set *src2)
1056a7c91847Schristos {
1057a7c91847Schristos   Idx i1, i2, id;
1058a7c91847Schristos   if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1059a7c91847Schristos     {
1060a7c91847Schristos       dest->alloc = src1->nelem + src2->nelem;
1061a7c91847Schristos       if (sizeof (Idx) < 2 && dest->alloc < src1->nelem)
1062a7c91847Schristos 	return REG_ESPACE;
1063a7c91847Schristos       dest->elems = re_xmalloc (Idx, dest->alloc);
1064a7c91847Schristos       if (BE (dest->elems == NULL, 0))
1065a7c91847Schristos 	return REG_ESPACE;
1066a7c91847Schristos     }
1067a7c91847Schristos   else
1068a7c91847Schristos     {
1069a7c91847Schristos       if (src1 != NULL && src1->nelem > 0)
1070a7c91847Schristos 	return re_node_set_init_copy (dest, src1);
1071a7c91847Schristos       else if (src2 != NULL && src2->nelem > 0)
1072a7c91847Schristos 	return re_node_set_init_copy (dest, src2);
1073a7c91847Schristos       else
1074a7c91847Schristos 	re_node_set_init_empty (dest);
1075a7c91847Schristos       return REG_NOERROR;
1076a7c91847Schristos     }
1077a7c91847Schristos   for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1078a7c91847Schristos     {
1079a7c91847Schristos       if (src1->elems[i1] > src2->elems[i2])
1080a7c91847Schristos 	{
1081a7c91847Schristos 	  dest->elems[id++] = src2->elems[i2++];
1082a7c91847Schristos 	  continue;
1083a7c91847Schristos 	}
1084a7c91847Schristos       if (src1->elems[i1] == src2->elems[i2])
1085a7c91847Schristos 	++i2;
1086a7c91847Schristos       dest->elems[id++] = src1->elems[i1++];
1087a7c91847Schristos     }
1088a7c91847Schristos   if (i1 < src1->nelem)
1089a7c91847Schristos     {
1090a7c91847Schristos       memcpy (dest->elems + id, src1->elems + i1,
1091a7c91847Schristos 	     (src1->nelem - i1) * sizeof dest->elems[0]);
1092a7c91847Schristos       id += src1->nelem - i1;
1093a7c91847Schristos     }
1094a7c91847Schristos   else if (i2 < src2->nelem)
1095a7c91847Schristos     {
1096a7c91847Schristos       memcpy (dest->elems + id, src2->elems + i2,
1097a7c91847Schristos 	     (src2->nelem - i2) * sizeof dest->elems[0]);
1098a7c91847Schristos       id += src2->nelem - i2;
1099a7c91847Schristos     }
1100a7c91847Schristos   dest->nelem = id;
1101a7c91847Schristos   return REG_NOERROR;
1102a7c91847Schristos }
1103a7c91847Schristos 
1104a7c91847Schristos /* Calculate the union set of the sets DEST and SRC. And store it to
1105a7c91847Schristos    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
1106a7c91847Schristos 
1107a7c91847Schristos static reg_errcode_t
1108a7c91847Schristos internal_function
re_node_set_merge(re_node_set * dest,const re_node_set * src)1109a7c91847Schristos re_node_set_merge (re_node_set *dest, const re_node_set *src)
1110a7c91847Schristos {
1111a7c91847Schristos   Idx is, id, sbase, delta;
1112a7c91847Schristos   if (src == NULL || src->nelem == 0)
1113a7c91847Schristos     return REG_NOERROR;
1114a7c91847Schristos   if (sizeof (Idx) < 3
1115a7c91847Schristos       && ((Idx) (2 * src->nelem) < src->nelem
1116a7c91847Schristos 	  || (Idx) (2 * src->nelem + dest->nelem) < dest->nelem))
1117a7c91847Schristos     return REG_ESPACE;
1118a7c91847Schristos   if (dest->alloc < 2 * src->nelem + dest->nelem)
1119a7c91847Schristos     {
1120a7c91847Schristos       Idx new_alloc = src->nelem + dest->alloc;
1121a7c91847Schristos       Idx *new_buffer;
1122a7c91847Schristos       if (sizeof (Idx) < 4 && new_alloc < dest->alloc)
1123a7c91847Schristos 	return REG_ESPACE;
1124a7c91847Schristos       new_buffer = re_x2realloc (dest->elems, Idx, &new_alloc);
1125a7c91847Schristos       if (BE (new_buffer == NULL, 0))
1126a7c91847Schristos 	return REG_ESPACE;
1127a7c91847Schristos       dest->elems = new_buffer;
1128a7c91847Schristos       dest->alloc = new_alloc;
1129a7c91847Schristos     }
1130a7c91847Schristos 
1131a7c91847Schristos   if (BE (dest->nelem == 0, 0))
1132a7c91847Schristos     {
1133a7c91847Schristos       dest->nelem = src->nelem;
1134a7c91847Schristos       memcpy (dest->elems, src->elems, src->nelem * sizeof dest->elems[0]);
1135a7c91847Schristos       return REG_NOERROR;
1136a7c91847Schristos     }
1137a7c91847Schristos 
1138a7c91847Schristos   /* Copy into the top of DEST the items of SRC that are not
1139a7c91847Schristos      found in DEST.  Maybe we could binary search in DEST?  */
1140a7c91847Schristos   for (sbase = dest->nelem + 2 * src->nelem,
1141a7c91847Schristos        is = src->nelem - 1, id = dest->nelem - 1;
1142a7c91847Schristos        REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
1143a7c91847Schristos     {
1144a7c91847Schristos       if (dest->elems[id] == src->elems[is])
1145a7c91847Schristos         is--, id--;
1146a7c91847Schristos       else if (dest->elems[id] < src->elems[is])
1147a7c91847Schristos         dest->elems[--sbase] = src->elems[is--];
1148a7c91847Schristos       else /* if (dest->elems[id] > src->elems[is]) */
1149a7c91847Schristos         --id;
1150a7c91847Schristos     }
1151a7c91847Schristos 
1152a7c91847Schristos   if (REG_VALID_INDEX (is))
1153a7c91847Schristos     {
1154a7c91847Schristos       /* If DEST is exhausted, the remaining items of SRC must be unique.  */
1155a7c91847Schristos       sbase -= is + 1;
1156a7c91847Schristos       memcpy (dest->elems + sbase, src->elems,
1157a7c91847Schristos 	      (is + 1) * sizeof dest->elems[0]);
1158a7c91847Schristos     }
1159a7c91847Schristos 
1160a7c91847Schristos   id = dest->nelem - 1;
1161a7c91847Schristos   is = dest->nelem + 2 * src->nelem - 1;
1162a7c91847Schristos   delta = is - sbase + 1;
1163a7c91847Schristos   if (delta == 0)
1164a7c91847Schristos     return REG_NOERROR;
1165a7c91847Schristos 
1166a7c91847Schristos   /* Now copy.  When DELTA becomes zero, the remaining
1167a7c91847Schristos      DEST elements are already in place.  */
1168a7c91847Schristos   dest->nelem += delta;
1169a7c91847Schristos   for (;;)
1170a7c91847Schristos     {
1171a7c91847Schristos       if (dest->elems[is] > dest->elems[id])
1172a7c91847Schristos         {
1173a7c91847Schristos 	  /* Copy from the top.  */
1174a7c91847Schristos           dest->elems[id + delta--] = dest->elems[is--];
1175a7c91847Schristos 	  if (delta == 0)
1176a7c91847Schristos 	    break;
1177a7c91847Schristos 	}
1178a7c91847Schristos       else
1179a7c91847Schristos         {
1180a7c91847Schristos           /* Slide from the bottom.  */
1181a7c91847Schristos           dest->elems[id + delta] = dest->elems[id];
1182a7c91847Schristos 	  if (! REG_VALID_INDEX (--id))
1183a7c91847Schristos 	    {
1184a7c91847Schristos 	      /* Copy remaining SRC elements.  */
1185a7c91847Schristos 	      memcpy (dest->elems, dest->elems + sbase,
1186a7c91847Schristos 	              delta * sizeof dest->elems[0]);
1187a7c91847Schristos 	      break;
1188a7c91847Schristos 	    }
1189a7c91847Schristos 	}
1190a7c91847Schristos     }
1191a7c91847Schristos 
1192a7c91847Schristos   return REG_NOERROR;
1193a7c91847Schristos }
1194a7c91847Schristos 
1195a7c91847Schristos /* Insert the new element ELEM to the re_node_set* SET.
1196a7c91847Schristos    SET should not already have ELEM.
1197a7c91847Schristos    Return true if successful.  */
1198a7c91847Schristos 
1199a7c91847Schristos static bool
1200a7c91847Schristos internal_function
re_node_set_insert(re_node_set * set,Idx elem)1201a7c91847Schristos re_node_set_insert (re_node_set *set, Idx elem)
1202a7c91847Schristos {
1203a7c91847Schristos   Idx idx;
1204a7c91847Schristos   /* In case the set is empty.  */
1205a7c91847Schristos   if (set->alloc == 0)
1206a7c91847Schristos     return re_node_set_init_1 (set, elem) == REG_NOERROR;
1207a7c91847Schristos 
1208a7c91847Schristos   if (BE (set->nelem, 0) == 0)
1209a7c91847Schristos     {
1210a7c91847Schristos       /* We already guaranteed above that set->alloc != 0.  */
1211a7c91847Schristos       set->elems[0] = elem;
1212a7c91847Schristos       ++set->nelem;
1213a7c91847Schristos       return true;
1214a7c91847Schristos     }
1215a7c91847Schristos 
1216a7c91847Schristos   /* Realloc if we need.  */
1217a7c91847Schristos   if (set->alloc == set->nelem)
1218a7c91847Schristos     {
1219a7c91847Schristos       Idx *new_elems = re_x2realloc (set->elems, Idx, &set->alloc);
1220a7c91847Schristos       if (BE (new_elems == NULL, 0))
1221a7c91847Schristos 	return false;
1222a7c91847Schristos       set->elems = new_elems;
1223a7c91847Schristos     }
1224a7c91847Schristos 
1225a7c91847Schristos   /* Move the elements which follows the new element.  Test the
1226a7c91847Schristos      first element separately to skip a check in the inner loop.  */
1227a7c91847Schristos   if (elem < set->elems[0])
1228a7c91847Schristos     {
1229a7c91847Schristos       idx = 0;
1230a7c91847Schristos       for (idx = set->nelem; idx > 0; idx--)
1231a7c91847Schristos         set->elems[idx] = set->elems[idx - 1];
1232a7c91847Schristos     }
1233a7c91847Schristos   else
1234a7c91847Schristos     {
1235a7c91847Schristos       for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1236a7c91847Schristos         set->elems[idx] = set->elems[idx - 1];
1237a7c91847Schristos     }
1238a7c91847Schristos 
1239a7c91847Schristos   /* Insert the new element.  */
1240a7c91847Schristos   set->elems[idx] = elem;
1241a7c91847Schristos   ++set->nelem;
1242a7c91847Schristos   return true;
1243a7c91847Schristos }
1244a7c91847Schristos 
1245a7c91847Schristos /* Insert the new element ELEM to the re_node_set* SET.
1246a7c91847Schristos    SET should not already have any element greater than or equal to ELEM.
1247a7c91847Schristos    Return true if successful.  */
1248a7c91847Schristos 
1249a7c91847Schristos static bool
1250a7c91847Schristos internal_function
re_node_set_insert_last(re_node_set * set,Idx elem)1251a7c91847Schristos re_node_set_insert_last (re_node_set *set, Idx elem)
1252a7c91847Schristos {
1253a7c91847Schristos   /* Realloc if we need.  */
1254a7c91847Schristos   if (set->alloc == set->nelem)
1255a7c91847Schristos     {
1256a7c91847Schristos       Idx *new_elems;
1257a7c91847Schristos       new_elems = re_x2realloc (set->elems, Idx, &set->alloc);
1258a7c91847Schristos       if (BE (new_elems == NULL, 0))
1259a7c91847Schristos 	return false;
1260a7c91847Schristos       set->elems = new_elems;
1261a7c91847Schristos     }
1262a7c91847Schristos 
1263a7c91847Schristos   /* Insert the new element.  */
1264a7c91847Schristos   set->elems[set->nelem++] = elem;
1265a7c91847Schristos   return true;
1266a7c91847Schristos }
1267a7c91847Schristos 
1268a7c91847Schristos /* Compare two node sets SET1 and SET2.
1269a7c91847Schristos    Return true if SET1 and SET2 are equivalent.  */
1270a7c91847Schristos 
1271a7c91847Schristos static bool
internal_function(pure)1272a7c91847Schristos internal_function __attribute ((pure))
1273a7c91847Schristos re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1274a7c91847Schristos {
1275a7c91847Schristos   Idx i;
1276a7c91847Schristos   if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1277a7c91847Schristos     return false;
1278a7c91847Schristos   for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
1279a7c91847Schristos     if (set1->elems[i] != set2->elems[i])
1280a7c91847Schristos       return false;
1281a7c91847Schristos   return true;
1282a7c91847Schristos }
1283a7c91847Schristos 
1284a7c91847Schristos /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
1285a7c91847Schristos 
1286a7c91847Schristos static Idx
internal_function(pure)1287a7c91847Schristos internal_function __attribute ((pure))
1288a7c91847Schristos re_node_set_contains (const re_node_set *set, Idx elem)
1289a7c91847Schristos {
1290a7c91847Schristos   __re_size_t idx, right, mid;
1291a7c91847Schristos   if (! REG_VALID_NONZERO_INDEX (set->nelem))
1292a7c91847Schristos     return 0;
1293a7c91847Schristos 
1294a7c91847Schristos   /* Binary search the element.  */
1295a7c91847Schristos   idx = 0;
1296a7c91847Schristos   right = set->nelem - 1;
1297a7c91847Schristos   while (idx < right)
1298a7c91847Schristos     {
1299a7c91847Schristos       mid = (idx + right) / 2;
1300a7c91847Schristos       if (set->elems[mid] < elem)
1301a7c91847Schristos 	idx = mid + 1;
1302a7c91847Schristos       else
1303a7c91847Schristos 	right = mid;
1304a7c91847Schristos     }
1305a7c91847Schristos   return set->elems[idx] == elem ? idx + 1 : 0;
1306a7c91847Schristos }
1307a7c91847Schristos 
1308a7c91847Schristos static void
1309a7c91847Schristos internal_function
re_node_set_remove_at(re_node_set * set,Idx idx)1310a7c91847Schristos re_node_set_remove_at (re_node_set *set, Idx idx)
1311a7c91847Schristos {
1312a7c91847Schristos   if (idx < 0 || idx >= set->nelem)
1313a7c91847Schristos     return;
1314a7c91847Schristos   --set->nelem;
1315a7c91847Schristos   for (; idx < set->nelem; idx++)
1316a7c91847Schristos     set->elems[idx] = set->elems[idx + 1];
1317a7c91847Schristos }
1318a7c91847Schristos 
1319a7c91847Schristos 
1320a7c91847Schristos /* Add the token TOKEN to dfa->nodes, and return the index of the token.
1321a7c91847Schristos    Or return REG_MISSING if an error occurred.  */
1322a7c91847Schristos 
1323a7c91847Schristos static Idx
1324a7c91847Schristos internal_function
re_dfa_add_node(re_dfa_t * dfa,re_token_t token)1325a7c91847Schristos re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1326a7c91847Schristos {
1327a7c91847Schristos   int type = token.type;
1328a7c91847Schristos   if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1329a7c91847Schristos     {
1330a7c91847Schristos       Idx new_nodes_alloc = dfa->nodes_alloc;
1331a7c91847Schristos       Idx *new_nexts, *new_indices;
1332a7c91847Schristos       re_node_set *new_edests, *new_eclosures;
1333a7c91847Schristos 
1334a7c91847Schristos       re_token_t *new_nodes = re_x2realloc (dfa->nodes, re_token_t,
1335a7c91847Schristos 					    &new_nodes_alloc);
1336a7c91847Schristos       if (BE (new_nodes == NULL, 0))
1337a7c91847Schristos 	return REG_MISSING;
1338a7c91847Schristos       dfa->nodes = new_nodes;
1339a7c91847Schristos       new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1340a7c91847Schristos       new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1341a7c91847Schristos       new_edests = re_xrealloc (dfa->edests, re_node_set, new_nodes_alloc);
1342a7c91847Schristos       new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1343a7c91847Schristos       if (BE (new_nexts == NULL || new_indices == NULL
1344a7c91847Schristos 	      || new_edests == NULL || new_eclosures == NULL, 0))
1345a7c91847Schristos 	return REG_MISSING;
1346a7c91847Schristos       dfa->nexts = new_nexts;
1347a7c91847Schristos       dfa->org_indices = new_indices;
1348a7c91847Schristos       dfa->edests = new_edests;
1349a7c91847Schristos       dfa->eclosures = new_eclosures;
1350a7c91847Schristos       dfa->nodes_alloc = new_nodes_alloc;
1351a7c91847Schristos     }
1352a7c91847Schristos   dfa->nodes[dfa->nodes_len] = token;
1353a7c91847Schristos   dfa->nodes[dfa->nodes_len].constraint = 0;
1354a7c91847Schristos #ifdef RE_ENABLE_I18N
1355a7c91847Schristos   dfa->nodes[dfa->nodes_len].accept_mb =
1356a7c91847Schristos     (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1357a7c91847Schristos #endif
1358a7c91847Schristos   dfa->nexts[dfa->nodes_len] = REG_MISSING;
1359a7c91847Schristos   re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1360a7c91847Schristos   re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1361a7c91847Schristos   return dfa->nodes_len++;
1362a7c91847Schristos }
1363a7c91847Schristos 
1364a7c91847Schristos static inline re_hashval_t
1365a7c91847Schristos internal_function
calc_state_hash(const re_node_set * nodes,unsigned int context)1366a7c91847Schristos calc_state_hash (const re_node_set *nodes, unsigned int context)
1367a7c91847Schristos {
1368a7c91847Schristos   re_hashval_t hash = nodes->nelem + context;
1369a7c91847Schristos   Idx i;
1370a7c91847Schristos   for (i = 0 ; i < nodes->nelem ; i++)
1371a7c91847Schristos     hash += nodes->elems[i];
1372a7c91847Schristos   return hash;
1373a7c91847Schristos }
1374a7c91847Schristos 
1375a7c91847Schristos /* Search for the state whose node_set is equivalent to NODES.
1376a7c91847Schristos    Return the pointer to the state, if we found it in the DFA.
1377a7c91847Schristos    Otherwise create the new one and return it.  In case of an error
1378a7c91847Schristos    return NULL and set the error code in ERR.
1379a7c91847Schristos    Note: - We assume NULL as the invalid state, then it is possible that
1380a7c91847Schristos 	   return value is NULL and ERR is REG_NOERROR.
1381a7c91847Schristos 	 - We never return non-NULL value in case of any errors, it is for
1382a7c91847Schristos 	   optimization.  */
1383a7c91847Schristos 
1384a7c91847Schristos static re_dfastate_t*
1385a7c91847Schristos internal_function
re_acquire_state(reg_errcode_t * err,re_dfa_t * dfa,const re_node_set * nodes)1386a7c91847Schristos re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa, const re_node_set *nodes)
1387a7c91847Schristos {
1388a7c91847Schristos   re_hashval_t hash;
1389a7c91847Schristos   re_dfastate_t *new_state;
1390a7c91847Schristos   struct re_state_table_entry *spot;
1391a7c91847Schristos   Idx i;
1392a7c91847Schristos #ifdef lint
1393a7c91847Schristos   /* Suppress bogus uninitialized-variable warnings.  */
1394a7c91847Schristos   *err = REG_NOERROR;
1395a7c91847Schristos #endif
1396a7c91847Schristos   if (BE (nodes->nelem == 0, 0))
1397a7c91847Schristos     {
1398a7c91847Schristos       *err = REG_NOERROR;
1399a7c91847Schristos       return NULL;
1400a7c91847Schristos     }
1401a7c91847Schristos   hash = calc_state_hash (nodes, 0);
1402a7c91847Schristos   spot = dfa->state_table + (hash & dfa->state_hash_mask);
1403a7c91847Schristos 
1404a7c91847Schristos   for (i = 0 ; i < spot->num ; i++)
1405a7c91847Schristos     {
1406a7c91847Schristos       re_dfastate_t *state = spot->array[i];
1407a7c91847Schristos       if (hash != state->hash)
1408a7c91847Schristos 	continue;
1409a7c91847Schristos       if (re_node_set_compare (&state->nodes, nodes))
1410a7c91847Schristos 	return state;
1411a7c91847Schristos     }
1412a7c91847Schristos 
1413a7c91847Schristos   /* There are no appropriate state in the dfa, create the new one.  */
1414a7c91847Schristos   new_state = create_ci_newstate (dfa, nodes, hash);
1415a7c91847Schristos   if (BE (new_state != NULL, 1))
1416a7c91847Schristos     return new_state;
1417a7c91847Schristos   else
1418a7c91847Schristos     {
1419a7c91847Schristos       *err = REG_ESPACE;
1420a7c91847Schristos       return NULL;
1421a7c91847Schristos     }
1422a7c91847Schristos }
1423a7c91847Schristos 
1424a7c91847Schristos /* Search for the state whose node_set is equivalent to NODES and
1425a7c91847Schristos    whose context is equivalent to CONTEXT.
1426a7c91847Schristos    Return the pointer to the state, if we found it in the DFA.
1427a7c91847Schristos    Otherwise create the new one and return it.  In case of an error
1428a7c91847Schristos    return NULL and set the error code in ERR.
1429a7c91847Schristos    Note: - We assume NULL as the invalid state, then it is possible that
1430a7c91847Schristos 	   return value is NULL and ERR is REG_NOERROR.
1431a7c91847Schristos 	 - We never return non-NULL value in case of any errors, it is for
1432a7c91847Schristos 	   optimization.  */
1433a7c91847Schristos 
1434a7c91847Schristos static re_dfastate_t*
1435a7c91847Schristos internal_function
re_acquire_state_context(reg_errcode_t * err,re_dfa_t * dfa,const re_node_set * nodes,unsigned int context)1436a7c91847Schristos re_acquire_state_context (reg_errcode_t *err, re_dfa_t *dfa,
1437a7c91847Schristos 			  const re_node_set *nodes, unsigned int context)
1438a7c91847Schristos {
1439a7c91847Schristos   re_hashval_t hash;
1440a7c91847Schristos   re_dfastate_t *new_state;
1441a7c91847Schristos   struct re_state_table_entry *spot;
1442a7c91847Schristos   Idx i;
1443a7c91847Schristos #ifdef lint
1444a7c91847Schristos   /* Suppress bogus uninitialized-variable warnings.  */
1445a7c91847Schristos   *err = REG_NOERROR;
1446a7c91847Schristos #endif
1447a7c91847Schristos   if (nodes->nelem == 0)
1448a7c91847Schristos     {
1449a7c91847Schristos       *err = REG_NOERROR;
1450a7c91847Schristos       return NULL;
1451a7c91847Schristos     }
1452a7c91847Schristos   hash = calc_state_hash (nodes, context);
1453a7c91847Schristos   spot = dfa->state_table + (hash & dfa->state_hash_mask);
1454a7c91847Schristos 
1455a7c91847Schristos   for (i = 0 ; i < spot->num ; i++)
1456a7c91847Schristos     {
1457a7c91847Schristos       re_dfastate_t *state = spot->array[i];
1458a7c91847Schristos       if (state->hash == hash
1459a7c91847Schristos 	  && state->context == context
1460a7c91847Schristos 	  && re_node_set_compare (state->entrance_nodes, nodes))
1461a7c91847Schristos 	return state;
1462a7c91847Schristos     }
1463a7c91847Schristos   /* There are no appropriate state in `dfa', create the new one.  */
1464a7c91847Schristos   new_state = create_cd_newstate (dfa, nodes, context, hash);
1465a7c91847Schristos   if (BE (new_state != NULL, 1))
1466a7c91847Schristos     return new_state;
1467a7c91847Schristos   else
1468a7c91847Schristos     {
1469a7c91847Schristos       *err = REG_ESPACE;
1470a7c91847Schristos       return NULL;
1471a7c91847Schristos     }
1472a7c91847Schristos }
1473a7c91847Schristos 
1474a7c91847Schristos /* Finish initialization of the new state NEWSTATE, and using its hash value
1475a7c91847Schristos    HASH put in the appropriate bucket of DFA's state table.  Return value
1476a7c91847Schristos    indicates the error code if failed.  */
1477a7c91847Schristos 
1478a7c91847Schristos static reg_errcode_t
1479a7c91847Schristos internal_function
register_state(const re_dfa_t * dfa,re_dfastate_t * newstate,re_hashval_t hash)1480a7c91847Schristos register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, re_hashval_t hash)
1481a7c91847Schristos {
1482a7c91847Schristos   struct re_state_table_entry *spot;
1483a7c91847Schristos   reg_errcode_t err;
1484a7c91847Schristos   Idx i;
1485a7c91847Schristos 
1486a7c91847Schristos   newstate->hash = hash;
1487a7c91847Schristos   err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1488a7c91847Schristos   if (BE (err != REG_NOERROR, 0))
1489a7c91847Schristos     return REG_ESPACE;
1490a7c91847Schristos   for (i = 0; i < newstate->nodes.nelem; i++)
1491a7c91847Schristos     {
1492a7c91847Schristos       Idx elem = newstate->nodes.elems[i];
1493a7c91847Schristos       if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1494a7c91847Schristos 	{
1495a7c91847Schristos 	  bool ok = re_node_set_insert_last (&newstate->non_eps_nodes, elem);
1496a7c91847Schristos 	  if (BE (! ok, 0))
1497a7c91847Schristos 	    return REG_ESPACE;
1498a7c91847Schristos 	}
1499a7c91847Schristos     }
1500a7c91847Schristos 
1501a7c91847Schristos   spot = dfa->state_table + (hash & dfa->state_hash_mask);
1502a7c91847Schristos   if (BE (spot->alloc <= spot->num, 0))
1503a7c91847Schristos     {
1504a7c91847Schristos       Idx new_alloc = spot->num;
1505a7c91847Schristos       re_dfastate_t **new_array = re_x2realloc (spot->array, re_dfastate_t *,
1506a7c91847Schristos 						&new_alloc);
1507a7c91847Schristos       if (BE (new_array == NULL, 0))
1508a7c91847Schristos 	return REG_ESPACE;
1509a7c91847Schristos       spot->array = new_array;
1510a7c91847Schristos       spot->alloc = new_alloc;
1511a7c91847Schristos     }
1512a7c91847Schristos   spot->array[spot->num++] = newstate;
1513a7c91847Schristos   return REG_NOERROR;
1514a7c91847Schristos }
1515a7c91847Schristos 
1516a7c91847Schristos /* Create the new state which is independ of contexts.
1517a7c91847Schristos    Return the new state if succeeded, otherwise return NULL.  */
1518a7c91847Schristos 
1519a7c91847Schristos static re_dfastate_t *
1520a7c91847Schristos internal_function
create_ci_newstate(const re_dfa_t * dfa,const re_node_set * nodes,re_hashval_t hash)1521a7c91847Schristos create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1522a7c91847Schristos 		    re_hashval_t hash)
1523a7c91847Schristos {
1524a7c91847Schristos   Idx i;
1525a7c91847Schristos   reg_errcode_t err;
1526a7c91847Schristos   re_dfastate_t *newstate;
1527a7c91847Schristos 
1528a7c91847Schristos   newstate = re_calloc (re_dfastate_t, 1);
1529a7c91847Schristos   if (BE (newstate == NULL, 0))
1530a7c91847Schristos     return NULL;
1531a7c91847Schristos   err = re_node_set_init_copy (&newstate->nodes, nodes);
1532a7c91847Schristos   if (BE (err != REG_NOERROR, 0))
1533a7c91847Schristos     {
1534a7c91847Schristos       re_free (newstate);
1535a7c91847Schristos       return NULL;
1536a7c91847Schristos     }
1537a7c91847Schristos 
1538a7c91847Schristos   newstate->entrance_nodes = &newstate->nodes;
1539a7c91847Schristos   for (i = 0 ; i < nodes->nelem ; i++)
1540a7c91847Schristos     {
1541a7c91847Schristos       re_token_t *node = dfa->nodes + nodes->elems[i];
1542a7c91847Schristos       re_token_type_t type = node->type;
1543a7c91847Schristos       if (type == CHARACTER && !node->constraint)
1544a7c91847Schristos 	continue;
1545a7c91847Schristos #ifdef RE_ENABLE_I18N
1546a7c91847Schristos       newstate->accept_mb |= node->accept_mb;
1547a7c91847Schristos #endif /* RE_ENABLE_I18N */
1548a7c91847Schristos 
1549a7c91847Schristos       /* If the state has the halt node, the state is a halt state.  */
1550a7c91847Schristos       if (type == END_OF_RE)
1551a7c91847Schristos 	newstate->halt = 1;
1552a7c91847Schristos       else if (type == OP_BACK_REF)
1553a7c91847Schristos 	newstate->has_backref = 1;
1554a7c91847Schristos       else if (type == ANCHOR || node->constraint)
1555a7c91847Schristos 	newstate->has_constraint = 1;
1556a7c91847Schristos     }
1557a7c91847Schristos   err = register_state (dfa, newstate, hash);
1558a7c91847Schristos   if (BE (err != REG_NOERROR, 0))
1559a7c91847Schristos     {
1560a7c91847Schristos       free_state (newstate);
1561a7c91847Schristos       newstate = NULL;
1562a7c91847Schristos     }
1563a7c91847Schristos   return newstate;
1564a7c91847Schristos }
1565a7c91847Schristos 
1566a7c91847Schristos /* Create the new state which is depend on the context CONTEXT.
1567a7c91847Schristos    Return the new state if succeeded, otherwise return NULL.  */
1568a7c91847Schristos 
1569a7c91847Schristos static re_dfastate_t *
1570a7c91847Schristos internal_function
create_cd_newstate(const re_dfa_t * dfa,const re_node_set * nodes,unsigned int context,re_hashval_t hash)1571a7c91847Schristos create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1572a7c91847Schristos 		    unsigned int context, re_hashval_t hash)
1573a7c91847Schristos {
1574a7c91847Schristos   Idx i, nctx_nodes = 0;
1575a7c91847Schristos   reg_errcode_t err;
1576a7c91847Schristos   re_dfastate_t *newstate;
1577a7c91847Schristos 
1578a7c91847Schristos   newstate = re_calloc (re_dfastate_t, 1);
1579a7c91847Schristos   if (BE (newstate == NULL, 0))
1580a7c91847Schristos     return NULL;
1581a7c91847Schristos   err = re_node_set_init_copy (&newstate->nodes, nodes);
1582a7c91847Schristos   if (BE (err != REG_NOERROR, 0))
1583a7c91847Schristos     {
1584a7c91847Schristos       re_free (newstate);
1585a7c91847Schristos       return NULL;
1586a7c91847Schristos     }
1587a7c91847Schristos 
1588a7c91847Schristos   newstate->context = context;
1589a7c91847Schristos   newstate->entrance_nodes = &newstate->nodes;
1590a7c91847Schristos 
1591a7c91847Schristos   for (i = 0 ; i < nodes->nelem ; i++)
1592a7c91847Schristos     {
1593a7c91847Schristos       unsigned int constraint = 0;
1594a7c91847Schristos       re_token_t *node = dfa->nodes + nodes->elems[i];
1595a7c91847Schristos       re_token_type_t type = node->type;
1596a7c91847Schristos       if (node->constraint)
1597a7c91847Schristos 	constraint = node->constraint;
1598a7c91847Schristos 
1599a7c91847Schristos       if (type == CHARACTER && !constraint)
1600a7c91847Schristos 	continue;
1601a7c91847Schristos #ifdef RE_ENABLE_I18N
1602a7c91847Schristos       newstate->accept_mb |= node->accept_mb;
1603a7c91847Schristos #endif /* RE_ENABLE_I18N */
1604a7c91847Schristos 
1605a7c91847Schristos       /* If the state has the halt node, the state is a halt state.  */
1606a7c91847Schristos       if (type == END_OF_RE)
1607a7c91847Schristos 	newstate->halt = 1;
1608a7c91847Schristos       else if (type == OP_BACK_REF)
1609a7c91847Schristos 	newstate->has_backref = 1;
1610a7c91847Schristos       else if (type == ANCHOR)
1611a7c91847Schristos 	constraint = node->opr.ctx_type;
1612a7c91847Schristos 
1613a7c91847Schristos       if (constraint)
1614a7c91847Schristos 	{
1615a7c91847Schristos 	  if (newstate->entrance_nodes == &newstate->nodes)
1616a7c91847Schristos 	    {
1617a7c91847Schristos 	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
1618a7c91847Schristos 	      if (BE (newstate->entrance_nodes == NULL, 0))
1619a7c91847Schristos 		{
1620a7c91847Schristos 		  free_state (newstate);
1621a7c91847Schristos 		  return NULL;
1622a7c91847Schristos 		}
1623a7c91847Schristos 	      re_node_set_init_copy (newstate->entrance_nodes, nodes);
1624a7c91847Schristos 	      nctx_nodes = 0;
1625a7c91847Schristos 	      newstate->has_constraint = 1;
1626a7c91847Schristos 	    }
1627a7c91847Schristos 
1628a7c91847Schristos 	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1629a7c91847Schristos 	    {
1630a7c91847Schristos 	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1631a7c91847Schristos 	      ++nctx_nodes;
1632a7c91847Schristos 	    }
1633a7c91847Schristos 	}
1634a7c91847Schristos     }
1635a7c91847Schristos   err = register_state (dfa, newstate, hash);
1636a7c91847Schristos   if (BE (err != REG_NOERROR, 0))
1637a7c91847Schristos     {
1638a7c91847Schristos       free_state (newstate);
1639a7c91847Schristos       newstate = NULL;
1640a7c91847Schristos     }
1641a7c91847Schristos   return  newstate;
1642a7c91847Schristos }
1643a7c91847Schristos 
1644a7c91847Schristos static void
1645a7c91847Schristos internal_function
free_state(re_dfastate_t * state)1646a7c91847Schristos free_state (re_dfastate_t *state)
1647a7c91847Schristos {
1648a7c91847Schristos   re_node_set_free (&state->non_eps_nodes);
1649a7c91847Schristos   re_node_set_free (&state->inveclosure);
1650a7c91847Schristos   if (state->entrance_nodes != &state->nodes)
1651a7c91847Schristos     {
1652a7c91847Schristos       re_node_set_free (state->entrance_nodes);
1653a7c91847Schristos       re_free (state->entrance_nodes);
1654a7c91847Schristos     }
1655a7c91847Schristos   re_node_set_free (&state->nodes);
1656a7c91847Schristos   re_free (state->word_trtable);
1657a7c91847Schristos   re_free (state->trtable);
1658a7c91847Schristos   re_free (state);
1659a7c91847Schristos }
1660