xref: /dflybsd-src/contrib/grep/lib/regex_internal.c (revision cf28ed851948818eef148d616edca961f756ade8)
195b7b453SJohn Marino /* Extended regular expression matching and search library.
2*cf28ed85SJohn Marino    Copyright (C) 2002-2012 Free Software Foundation, Inc.
395b7b453SJohn Marino    This file is part of the GNU C Library.
495b7b453SJohn Marino    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
595b7b453SJohn Marino 
695b7b453SJohn Marino    This program is free software; you can redistribute it and/or modify
795b7b453SJohn Marino    it under the terms of the GNU General Public License as published by
895b7b453SJohn Marino    the Free Software Foundation; either version 3, or (at your option)
995b7b453SJohn Marino    any later version.
1095b7b453SJohn Marino 
1195b7b453SJohn Marino    This program is distributed in the hope that it will be useful,
1295b7b453SJohn Marino    but WITHOUT ANY WARRANTY; without even the implied warranty of
1395b7b453SJohn Marino    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1495b7b453SJohn Marino    GNU General Public License for more details.
1595b7b453SJohn Marino 
1695b7b453SJohn Marino    You should have received a copy of the GNU General Public License along
17*cf28ed85SJohn Marino    with this program; if not, see <http://www.gnu.org/licenses/>.  */
1895b7b453SJohn Marino 
1995b7b453SJohn Marino #include "verify.h"
2095b7b453SJohn Marino #include "intprops.h"
2195b7b453SJohn Marino static void re_string_construct_common (const char *str, Idx len,
2295b7b453SJohn Marino 					re_string_t *pstr,
2395b7b453SJohn Marino 					RE_TRANSLATE_TYPE trans, bool icase,
2495b7b453SJohn Marino 					const re_dfa_t *dfa) internal_function;
2595b7b453SJohn Marino static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
2695b7b453SJohn Marino 					  const re_node_set *nodes,
2795b7b453SJohn Marino 					  re_hashval_t hash) internal_function;
2895b7b453SJohn Marino static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
2995b7b453SJohn Marino 					  const re_node_set *nodes,
3095b7b453SJohn Marino 					  unsigned int context,
3195b7b453SJohn Marino 					  re_hashval_t hash) internal_function;
3295b7b453SJohn Marino 
3395b7b453SJohn Marino /* Functions for string operation.  */
3495b7b453SJohn Marino 
3595b7b453SJohn Marino /* This function allocate the buffers.  It is necessary to call
3695b7b453SJohn Marino    re_string_reconstruct before using the object.  */
3795b7b453SJohn Marino 
3895b7b453SJohn Marino static reg_errcode_t
3995b7b453SJohn Marino internal_function __attribute_warn_unused_result__
4095b7b453SJohn Marino re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
4195b7b453SJohn Marino 		    RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
4295b7b453SJohn Marino {
4395b7b453SJohn Marino   reg_errcode_t ret;
4495b7b453SJohn Marino   Idx init_buf_len;
4595b7b453SJohn Marino 
4695b7b453SJohn Marino   /* Ensure at least one character fits into the buffers.  */
4795b7b453SJohn Marino   if (init_len < dfa->mb_cur_max)
4895b7b453SJohn Marino     init_len = dfa->mb_cur_max;
4995b7b453SJohn Marino   init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
5095b7b453SJohn Marino   re_string_construct_common (str, len, pstr, trans, icase, dfa);
5195b7b453SJohn Marino 
5295b7b453SJohn Marino   ret = re_string_realloc_buffers (pstr, init_buf_len);
5395b7b453SJohn Marino   if (BE (ret != REG_NOERROR, 0))
5495b7b453SJohn Marino     return ret;
5595b7b453SJohn Marino 
5695b7b453SJohn Marino   pstr->word_char = dfa->word_char;
5795b7b453SJohn Marino   pstr->word_ops_used = dfa->word_ops_used;
5895b7b453SJohn Marino   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
5995b7b453SJohn Marino   pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
6095b7b453SJohn Marino   pstr->valid_raw_len = pstr->valid_len;
6195b7b453SJohn Marino   return REG_NOERROR;
6295b7b453SJohn Marino }
6395b7b453SJohn Marino 
6495b7b453SJohn Marino /* This function allocate the buffers, and initialize them.  */
6595b7b453SJohn Marino 
6695b7b453SJohn Marino static reg_errcode_t
6795b7b453SJohn Marino internal_function __attribute_warn_unused_result__
6895b7b453SJohn Marino re_string_construct (re_string_t *pstr, const char *str, Idx len,
6995b7b453SJohn Marino 		     RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
7095b7b453SJohn Marino {
7195b7b453SJohn Marino   reg_errcode_t ret;
7295b7b453SJohn Marino   memset (pstr, '\0', sizeof (re_string_t));
7395b7b453SJohn Marino   re_string_construct_common (str, len, pstr, trans, icase, dfa);
7495b7b453SJohn Marino 
7595b7b453SJohn Marino   if (len > 0)
7695b7b453SJohn Marino     {
7795b7b453SJohn Marino       ret = re_string_realloc_buffers (pstr, len + 1);
7895b7b453SJohn Marino       if (BE (ret != REG_NOERROR, 0))
7995b7b453SJohn Marino 	return ret;
8095b7b453SJohn Marino     }
8195b7b453SJohn Marino   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
8295b7b453SJohn Marino 
8395b7b453SJohn Marino   if (icase)
8495b7b453SJohn Marino     {
8595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
8695b7b453SJohn Marino       if (dfa->mb_cur_max > 1)
8795b7b453SJohn Marino 	{
8895b7b453SJohn Marino 	  while (1)
8995b7b453SJohn Marino 	    {
9095b7b453SJohn Marino 	      ret = build_wcs_upper_buffer (pstr);
9195b7b453SJohn Marino 	      if (BE (ret != REG_NOERROR, 0))
9295b7b453SJohn Marino 		return ret;
9395b7b453SJohn Marino 	      if (pstr->valid_raw_len >= len)
9495b7b453SJohn Marino 		break;
9595b7b453SJohn Marino 	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
9695b7b453SJohn Marino 		break;
9795b7b453SJohn Marino 	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
9895b7b453SJohn Marino 	      if (BE (ret != REG_NOERROR, 0))
9995b7b453SJohn Marino 		return ret;
10095b7b453SJohn Marino 	    }
10195b7b453SJohn Marino 	}
10295b7b453SJohn Marino       else
10395b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
10495b7b453SJohn Marino 	build_upper_buffer (pstr);
10595b7b453SJohn Marino     }
10695b7b453SJohn Marino   else
10795b7b453SJohn Marino     {
10895b7b453SJohn Marino #ifdef RE_ENABLE_I18N
10995b7b453SJohn Marino       if (dfa->mb_cur_max > 1)
11095b7b453SJohn Marino 	build_wcs_buffer (pstr);
11195b7b453SJohn Marino       else
11295b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
11395b7b453SJohn Marino 	{
11495b7b453SJohn Marino 	  if (trans != NULL)
11595b7b453SJohn Marino 	    re_string_translate_buffer (pstr);
11695b7b453SJohn Marino 	  else
11795b7b453SJohn Marino 	    {
11895b7b453SJohn Marino 	      pstr->valid_len = pstr->bufs_len;
11995b7b453SJohn Marino 	      pstr->valid_raw_len = pstr->bufs_len;
12095b7b453SJohn Marino 	    }
12195b7b453SJohn Marino 	}
12295b7b453SJohn Marino     }
12395b7b453SJohn Marino 
12495b7b453SJohn Marino   return REG_NOERROR;
12595b7b453SJohn Marino }
12695b7b453SJohn Marino 
12795b7b453SJohn Marino /* Helper functions for re_string_allocate, and re_string_construct.  */
12895b7b453SJohn Marino 
12995b7b453SJohn Marino static reg_errcode_t
13095b7b453SJohn Marino internal_function __attribute_warn_unused_result__
13195b7b453SJohn Marino re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
13295b7b453SJohn Marino {
13395b7b453SJohn Marino #ifdef RE_ENABLE_I18N
13495b7b453SJohn Marino   if (pstr->mb_cur_max > 1)
13595b7b453SJohn Marino     {
13695b7b453SJohn Marino       wint_t *new_wcs;
13795b7b453SJohn Marino 
138*cf28ed85SJohn Marino       /* Avoid overflow in realloc.  */
139*cf28ed85SJohn Marino       const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
140*cf28ed85SJohn Marino       if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_buf_len, 0))
14195b7b453SJohn Marino 	return REG_ESPACE;
14295b7b453SJohn Marino 
14395b7b453SJohn Marino       new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
14495b7b453SJohn Marino       if (BE (new_wcs == NULL, 0))
14595b7b453SJohn Marino 	return REG_ESPACE;
14695b7b453SJohn Marino       pstr->wcs = new_wcs;
14795b7b453SJohn Marino       if (pstr->offsets != NULL)
14895b7b453SJohn Marino 	{
14995b7b453SJohn Marino 	  Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
15095b7b453SJohn Marino 	  if (BE (new_offsets == NULL, 0))
15195b7b453SJohn Marino 	    return REG_ESPACE;
15295b7b453SJohn Marino 	  pstr->offsets = new_offsets;
15395b7b453SJohn Marino 	}
15495b7b453SJohn Marino     }
15595b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
15695b7b453SJohn Marino   if (pstr->mbs_allocated)
15795b7b453SJohn Marino     {
15895b7b453SJohn Marino       unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
15995b7b453SJohn Marino 					   new_buf_len);
16095b7b453SJohn Marino       if (BE (new_mbs == NULL, 0))
16195b7b453SJohn Marino 	return REG_ESPACE;
16295b7b453SJohn Marino       pstr->mbs = new_mbs;
16395b7b453SJohn Marino     }
16495b7b453SJohn Marino   pstr->bufs_len = new_buf_len;
16595b7b453SJohn Marino   return REG_NOERROR;
16695b7b453SJohn Marino }
16795b7b453SJohn Marino 
16895b7b453SJohn Marino 
16995b7b453SJohn Marino static void
17095b7b453SJohn Marino internal_function
17195b7b453SJohn Marino re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
17295b7b453SJohn Marino 			    RE_TRANSLATE_TYPE trans, bool icase,
17395b7b453SJohn Marino 			    const re_dfa_t *dfa)
17495b7b453SJohn Marino {
17595b7b453SJohn Marino   pstr->raw_mbs = (const unsigned char *) str;
17695b7b453SJohn Marino   pstr->len = len;
17795b7b453SJohn Marino   pstr->raw_len = len;
17895b7b453SJohn Marino   pstr->trans = trans;
17995b7b453SJohn Marino   pstr->icase = icase;
18095b7b453SJohn Marino   pstr->mbs_allocated = (trans != NULL || icase);
18195b7b453SJohn Marino   pstr->mb_cur_max = dfa->mb_cur_max;
18295b7b453SJohn Marino   pstr->is_utf8 = dfa->is_utf8;
18395b7b453SJohn Marino   pstr->map_notascii = dfa->map_notascii;
18495b7b453SJohn Marino   pstr->stop = pstr->len;
18595b7b453SJohn Marino   pstr->raw_stop = pstr->stop;
18695b7b453SJohn Marino }
18795b7b453SJohn Marino 
18895b7b453SJohn Marino #ifdef RE_ENABLE_I18N
18995b7b453SJohn Marino 
19095b7b453SJohn Marino /* Build wide character buffer PSTR->WCS.
19195b7b453SJohn Marino    If the byte sequence of the string are:
19295b7b453SJohn Marino      <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
19395b7b453SJohn Marino    Then wide character buffer will be:
19495b7b453SJohn Marino      <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
19595b7b453SJohn Marino    We use WEOF for padding, they indicate that the position isn't
19695b7b453SJohn Marino    a first byte of a multibyte character.
19795b7b453SJohn Marino 
19895b7b453SJohn Marino    Note that this function assumes PSTR->VALID_LEN elements are already
19995b7b453SJohn Marino    built and starts from PSTR->VALID_LEN.  */
20095b7b453SJohn Marino 
20195b7b453SJohn Marino static void
20295b7b453SJohn Marino internal_function
20395b7b453SJohn Marino build_wcs_buffer (re_string_t *pstr)
20495b7b453SJohn Marino {
20595b7b453SJohn Marino #ifdef _LIBC
20695b7b453SJohn Marino   unsigned char buf[MB_LEN_MAX];
20795b7b453SJohn Marino   assert (MB_LEN_MAX >= pstr->mb_cur_max);
20895b7b453SJohn Marino #else
20995b7b453SJohn Marino   unsigned char buf[64];
21095b7b453SJohn Marino #endif
21195b7b453SJohn Marino   mbstate_t prev_st;
21295b7b453SJohn Marino   Idx byte_idx, end_idx, remain_len;
21395b7b453SJohn Marino   size_t mbclen;
21495b7b453SJohn Marino 
21595b7b453SJohn Marino   /* Build the buffers from pstr->valid_len to either pstr->len or
21695b7b453SJohn Marino      pstr->bufs_len.  */
21795b7b453SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
21895b7b453SJohn Marino   for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
21995b7b453SJohn Marino     {
22095b7b453SJohn Marino       wchar_t wc;
22195b7b453SJohn Marino       const char *p;
22295b7b453SJohn Marino 
22395b7b453SJohn Marino       remain_len = end_idx - byte_idx;
22495b7b453SJohn Marino       prev_st = pstr->cur_state;
22595b7b453SJohn Marino       /* Apply the translation if we need.  */
22695b7b453SJohn Marino       if (BE (pstr->trans != NULL, 0))
22795b7b453SJohn Marino 	{
22895b7b453SJohn Marino 	  int i, ch;
22995b7b453SJohn Marino 
23095b7b453SJohn Marino 	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
23195b7b453SJohn Marino 	    {
23295b7b453SJohn Marino 	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
23395b7b453SJohn Marino 	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
23495b7b453SJohn Marino 	    }
23595b7b453SJohn Marino 	  p = (const char *) buf;
23695b7b453SJohn Marino 	}
23795b7b453SJohn Marino       else
23895b7b453SJohn Marino 	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
23995b7b453SJohn Marino       mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
240*cf28ed85SJohn Marino       if (BE (mbclen == (size_t) -1 || mbclen == 0
241*cf28ed85SJohn Marino 	      || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
24295b7b453SJohn Marino 	{
24395b7b453SJohn Marino 	  /* We treat these cases as a singlebyte character.  */
24495b7b453SJohn Marino 	  mbclen = 1;
24595b7b453SJohn Marino 	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
24695b7b453SJohn Marino 	  if (BE (pstr->trans != NULL, 0))
24795b7b453SJohn Marino 	    wc = pstr->trans[wc];
24895b7b453SJohn Marino 	  pstr->cur_state = prev_st;
24995b7b453SJohn Marino 	}
250*cf28ed85SJohn Marino       else if (BE (mbclen == (size_t) -2, 0))
251*cf28ed85SJohn Marino 	{
252*cf28ed85SJohn Marino 	  /* The buffer doesn't have enough space, finish to build.  */
253*cf28ed85SJohn Marino 	  pstr->cur_state = prev_st;
254*cf28ed85SJohn Marino 	  break;
255*cf28ed85SJohn Marino 	}
25695b7b453SJohn Marino 
25795b7b453SJohn Marino       /* Write wide character and padding.  */
25895b7b453SJohn Marino       pstr->wcs[byte_idx++] = wc;
25995b7b453SJohn Marino       /* Write paddings.  */
26095b7b453SJohn Marino       for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
26195b7b453SJohn Marino 	pstr->wcs[byte_idx++] = WEOF;
26295b7b453SJohn Marino     }
26395b7b453SJohn Marino   pstr->valid_len = byte_idx;
26495b7b453SJohn Marino   pstr->valid_raw_len = byte_idx;
26595b7b453SJohn Marino }
26695b7b453SJohn Marino 
26795b7b453SJohn Marino /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
26895b7b453SJohn Marino    but for REG_ICASE.  */
26995b7b453SJohn Marino 
27095b7b453SJohn Marino static reg_errcode_t
27195b7b453SJohn Marino internal_function __attribute_warn_unused_result__
27295b7b453SJohn Marino build_wcs_upper_buffer (re_string_t *pstr)
27395b7b453SJohn Marino {
27495b7b453SJohn Marino   mbstate_t prev_st;
27595b7b453SJohn Marino   Idx src_idx, byte_idx, end_idx, remain_len;
27695b7b453SJohn Marino   size_t mbclen;
27795b7b453SJohn Marino #ifdef _LIBC
27895b7b453SJohn Marino   char buf[MB_LEN_MAX];
27995b7b453SJohn Marino   assert (MB_LEN_MAX >= pstr->mb_cur_max);
28095b7b453SJohn Marino #else
28195b7b453SJohn Marino   char buf[64];
28295b7b453SJohn Marino #endif
28395b7b453SJohn Marino 
28495b7b453SJohn Marino   byte_idx = pstr->valid_len;
28595b7b453SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
28695b7b453SJohn Marino 
28795b7b453SJohn Marino   /* The following optimization assumes that ASCII characters can be
28895b7b453SJohn Marino      mapped to wide characters with a simple cast.  */
28995b7b453SJohn Marino   if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
29095b7b453SJohn Marino     {
29195b7b453SJohn Marino       while (byte_idx < end_idx)
29295b7b453SJohn Marino 	{
29395b7b453SJohn Marino 	  wchar_t wc;
29495b7b453SJohn Marino 
29595b7b453SJohn Marino 	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
29695b7b453SJohn Marino 	      && mbsinit (&pstr->cur_state))
29795b7b453SJohn Marino 	    {
29895b7b453SJohn Marino 	      /* In case of a singlebyte character.  */
29995b7b453SJohn Marino 	      pstr->mbs[byte_idx]
30095b7b453SJohn Marino 		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
30195b7b453SJohn Marino 	      /* The next step uses the assumption that wchar_t is encoded
30295b7b453SJohn Marino 		 ASCII-safe: all ASCII values can be converted like this.  */
30395b7b453SJohn Marino 	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
30495b7b453SJohn Marino 	      ++byte_idx;
30595b7b453SJohn Marino 	      continue;
30695b7b453SJohn Marino 	    }
30795b7b453SJohn Marino 
30895b7b453SJohn Marino 	  remain_len = end_idx - byte_idx;
30995b7b453SJohn Marino 	  prev_st = pstr->cur_state;
31095b7b453SJohn Marino 	  mbclen = __mbrtowc (&wc,
31195b7b453SJohn Marino 			      ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
31295b7b453SJohn Marino 			       + byte_idx), remain_len, &pstr->cur_state);
31395b7b453SJohn Marino 	  if (BE (mbclen < (size_t) -2, 1))
31495b7b453SJohn Marino 	    {
31595b7b453SJohn Marino 	      wchar_t wcu = wc;
31695b7b453SJohn Marino 	      if (iswlower (wc))
31795b7b453SJohn Marino 		{
31895b7b453SJohn Marino 		  size_t mbcdlen;
31995b7b453SJohn Marino 
32095b7b453SJohn Marino 		  wcu = towupper (wc);
32195b7b453SJohn Marino 		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
32295b7b453SJohn Marino 		  if (BE (mbclen == mbcdlen, 1))
32395b7b453SJohn Marino 		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
32495b7b453SJohn Marino 		  else
32595b7b453SJohn Marino 		    {
32695b7b453SJohn Marino 		      src_idx = byte_idx;
32795b7b453SJohn Marino 		      goto offsets_needed;
32895b7b453SJohn Marino 		    }
32995b7b453SJohn Marino 		}
33095b7b453SJohn Marino 	      else
33195b7b453SJohn Marino 		memcpy (pstr->mbs + byte_idx,
33295b7b453SJohn Marino 			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
33395b7b453SJohn Marino 	      pstr->wcs[byte_idx++] = wcu;
33495b7b453SJohn Marino 	      /* Write paddings.  */
33595b7b453SJohn Marino 	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
33695b7b453SJohn Marino 		pstr->wcs[byte_idx++] = WEOF;
33795b7b453SJohn Marino 	    }
338*cf28ed85SJohn Marino 	  else if (mbclen == (size_t) -1 || mbclen == 0
339*cf28ed85SJohn Marino 		   || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
34095b7b453SJohn Marino 	    {
341*cf28ed85SJohn Marino 	      /* It is an invalid character, an incomplete character
342*cf28ed85SJohn Marino 		 at the end of the string, or '\0'.  Just use the byte.  */
34395b7b453SJohn Marino 	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
34495b7b453SJohn Marino 	      pstr->mbs[byte_idx] = ch;
34595b7b453SJohn Marino 	      /* And also cast it to wide char.  */
34695b7b453SJohn Marino 	      pstr->wcs[byte_idx++] = (wchar_t) ch;
34795b7b453SJohn Marino 	      if (BE (mbclen == (size_t) -1, 0))
34895b7b453SJohn Marino 		pstr->cur_state = prev_st;
34995b7b453SJohn Marino 	    }
35095b7b453SJohn Marino 	  else
35195b7b453SJohn Marino 	    {
35295b7b453SJohn Marino 	      /* The buffer doesn't have enough space, finish to build.  */
35395b7b453SJohn Marino 	      pstr->cur_state = prev_st;
35495b7b453SJohn Marino 	      break;
35595b7b453SJohn Marino 	    }
35695b7b453SJohn Marino 	}
35795b7b453SJohn Marino       pstr->valid_len = byte_idx;
35895b7b453SJohn Marino       pstr->valid_raw_len = byte_idx;
35995b7b453SJohn Marino       return REG_NOERROR;
36095b7b453SJohn Marino     }
36195b7b453SJohn Marino   else
36295b7b453SJohn Marino     for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
36395b7b453SJohn Marino       {
36495b7b453SJohn Marino 	wchar_t wc;
36595b7b453SJohn Marino 	const char *p;
36695b7b453SJohn Marino       offsets_needed:
36795b7b453SJohn Marino 	remain_len = end_idx - byte_idx;
36895b7b453SJohn Marino 	prev_st = pstr->cur_state;
36995b7b453SJohn Marino 	if (BE (pstr->trans != NULL, 0))
37095b7b453SJohn Marino 	  {
37195b7b453SJohn Marino 	    int i, ch;
37295b7b453SJohn Marino 
37395b7b453SJohn Marino 	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
37495b7b453SJohn Marino 	      {
37595b7b453SJohn Marino 		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
37695b7b453SJohn Marino 		buf[i] = pstr->trans[ch];
37795b7b453SJohn Marino 	      }
37895b7b453SJohn Marino 	    p = (const char *) buf;
37995b7b453SJohn Marino 	  }
38095b7b453SJohn Marino 	else
38195b7b453SJohn Marino 	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
38295b7b453SJohn Marino 	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
38395b7b453SJohn Marino 	if (BE (mbclen < (size_t) -2, 1))
38495b7b453SJohn Marino 	  {
38595b7b453SJohn Marino 	    wchar_t wcu = wc;
38695b7b453SJohn Marino 	    if (iswlower (wc))
38795b7b453SJohn Marino 	      {
38895b7b453SJohn Marino 		size_t mbcdlen;
38995b7b453SJohn Marino 
39095b7b453SJohn Marino 		wcu = towupper (wc);
39195b7b453SJohn Marino 		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
39295b7b453SJohn Marino 		if (BE (mbclen == mbcdlen, 1))
39395b7b453SJohn Marino 		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
39495b7b453SJohn Marino 		else if (mbcdlen != (size_t) -1)
39595b7b453SJohn Marino 		  {
39695b7b453SJohn Marino 		    size_t i;
39795b7b453SJohn Marino 
39895b7b453SJohn Marino 		    if (byte_idx + mbcdlen > pstr->bufs_len)
39995b7b453SJohn Marino 		      {
40095b7b453SJohn Marino 			pstr->cur_state = prev_st;
40195b7b453SJohn Marino 			break;
40295b7b453SJohn Marino 		      }
40395b7b453SJohn Marino 
40495b7b453SJohn Marino 		    if (pstr->offsets == NULL)
40595b7b453SJohn Marino 		      {
40695b7b453SJohn Marino 			pstr->offsets = re_malloc (Idx, pstr->bufs_len);
40795b7b453SJohn Marino 
40895b7b453SJohn Marino 			if (pstr->offsets == NULL)
40995b7b453SJohn Marino 			  return REG_ESPACE;
41095b7b453SJohn Marino 		      }
41195b7b453SJohn Marino 		    if (!pstr->offsets_needed)
41295b7b453SJohn Marino 		      {
41395b7b453SJohn Marino 			for (i = 0; i < (size_t) byte_idx; ++i)
41495b7b453SJohn Marino 			  pstr->offsets[i] = i;
41595b7b453SJohn Marino 			pstr->offsets_needed = 1;
41695b7b453SJohn Marino 		      }
41795b7b453SJohn Marino 
41895b7b453SJohn Marino 		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
41995b7b453SJohn Marino 		    pstr->wcs[byte_idx] = wcu;
42095b7b453SJohn Marino 		    pstr->offsets[byte_idx] = src_idx;
42195b7b453SJohn Marino 		    for (i = 1; i < mbcdlen; ++i)
42295b7b453SJohn Marino 		      {
42395b7b453SJohn Marino 			pstr->offsets[byte_idx + i]
42495b7b453SJohn Marino 			  = src_idx + (i < mbclen ? i : mbclen - 1);
42595b7b453SJohn Marino 			pstr->wcs[byte_idx + i] = WEOF;
42695b7b453SJohn Marino 		      }
42795b7b453SJohn Marino 		    pstr->len += mbcdlen - mbclen;
42895b7b453SJohn Marino 		    if (pstr->raw_stop > src_idx)
42995b7b453SJohn Marino 		      pstr->stop += mbcdlen - mbclen;
43095b7b453SJohn Marino 		    end_idx = (pstr->bufs_len > pstr->len)
43195b7b453SJohn Marino 			      ? pstr->len : pstr->bufs_len;
43295b7b453SJohn Marino 		    byte_idx += mbcdlen;
43395b7b453SJohn Marino 		    src_idx += mbclen;
43495b7b453SJohn Marino 		    continue;
43595b7b453SJohn Marino 		  }
43695b7b453SJohn Marino 		else
43795b7b453SJohn Marino 		  memcpy (pstr->mbs + byte_idx, p, mbclen);
43895b7b453SJohn Marino 	      }
43995b7b453SJohn Marino 	    else
44095b7b453SJohn Marino 	      memcpy (pstr->mbs + byte_idx, p, mbclen);
44195b7b453SJohn Marino 
44295b7b453SJohn Marino 	    if (BE (pstr->offsets_needed != 0, 0))
44395b7b453SJohn Marino 	      {
44495b7b453SJohn Marino 		size_t i;
44595b7b453SJohn Marino 		for (i = 0; i < mbclen; ++i)
44695b7b453SJohn Marino 		  pstr->offsets[byte_idx + i] = src_idx + i;
44795b7b453SJohn Marino 	      }
44895b7b453SJohn Marino 	    src_idx += mbclen;
44995b7b453SJohn Marino 
45095b7b453SJohn Marino 	    pstr->wcs[byte_idx++] = wcu;
45195b7b453SJohn Marino 	    /* Write paddings.  */
45295b7b453SJohn Marino 	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
45395b7b453SJohn Marino 	      pstr->wcs[byte_idx++] = WEOF;
45495b7b453SJohn Marino 	  }
455*cf28ed85SJohn Marino 	else if (mbclen == (size_t) -1 || mbclen == 0
456*cf28ed85SJohn Marino 		 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
45795b7b453SJohn Marino 	  {
45895b7b453SJohn Marino 	    /* It is an invalid character or '\0'.  Just use the byte.  */
45995b7b453SJohn Marino 	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
46095b7b453SJohn Marino 
46195b7b453SJohn Marino 	    if (BE (pstr->trans != NULL, 0))
46295b7b453SJohn Marino 	      ch = pstr->trans [ch];
46395b7b453SJohn Marino 	    pstr->mbs[byte_idx] = ch;
46495b7b453SJohn Marino 
46595b7b453SJohn Marino 	    if (BE (pstr->offsets_needed != 0, 0))
46695b7b453SJohn Marino 	      pstr->offsets[byte_idx] = src_idx;
46795b7b453SJohn Marino 	    ++src_idx;
46895b7b453SJohn Marino 
46995b7b453SJohn Marino 	    /* And also cast it to wide char.  */
47095b7b453SJohn Marino 	    pstr->wcs[byte_idx++] = (wchar_t) ch;
47195b7b453SJohn Marino 	    if (BE (mbclen == (size_t) -1, 0))
47295b7b453SJohn Marino 	      pstr->cur_state = prev_st;
47395b7b453SJohn Marino 	  }
47495b7b453SJohn Marino 	else
47595b7b453SJohn Marino 	  {
47695b7b453SJohn Marino 	    /* The buffer doesn't have enough space, finish to build.  */
47795b7b453SJohn Marino 	    pstr->cur_state = prev_st;
47895b7b453SJohn Marino 	    break;
47995b7b453SJohn Marino 	  }
48095b7b453SJohn Marino       }
48195b7b453SJohn Marino   pstr->valid_len = byte_idx;
48295b7b453SJohn Marino   pstr->valid_raw_len = src_idx;
48395b7b453SJohn Marino   return REG_NOERROR;
48495b7b453SJohn Marino }
48595b7b453SJohn Marino 
48695b7b453SJohn Marino /* Skip characters until the index becomes greater than NEW_RAW_IDX.
48795b7b453SJohn Marino    Return the index.  */
48895b7b453SJohn Marino 
48995b7b453SJohn Marino static Idx
49095b7b453SJohn Marino internal_function
49195b7b453SJohn Marino re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
49295b7b453SJohn Marino {
49395b7b453SJohn Marino   mbstate_t prev_st;
49495b7b453SJohn Marino   Idx rawbuf_idx;
49595b7b453SJohn Marino   size_t mbclen;
49695b7b453SJohn Marino   wint_t wc = WEOF;
49795b7b453SJohn Marino 
49895b7b453SJohn Marino   /* Skip the characters which are not necessary to check.  */
49995b7b453SJohn Marino   for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
50095b7b453SJohn Marino        rawbuf_idx < new_raw_idx;)
50195b7b453SJohn Marino     {
50295b7b453SJohn Marino       wchar_t wc2;
503*cf28ed85SJohn Marino       Idx remain_len = pstr->raw_len - rawbuf_idx;
50495b7b453SJohn Marino       prev_st = pstr->cur_state;
50595b7b453SJohn Marino       mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
50695b7b453SJohn Marino 			  remain_len, &pstr->cur_state);
50795b7b453SJohn Marino       if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
50895b7b453SJohn Marino 	{
50995b7b453SJohn Marino 	  /* We treat these cases as a single byte character.  */
51095b7b453SJohn Marino 	  if (mbclen == 0 || remain_len == 0)
51195b7b453SJohn Marino 	    wc = L'\0';
51295b7b453SJohn Marino 	  else
51395b7b453SJohn Marino 	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
51495b7b453SJohn Marino 	  mbclen = 1;
51595b7b453SJohn Marino 	  pstr->cur_state = prev_st;
51695b7b453SJohn Marino 	}
51795b7b453SJohn Marino       else
51895b7b453SJohn Marino 	wc = wc2;
51995b7b453SJohn Marino       /* Then proceed the next character.  */
52095b7b453SJohn Marino       rawbuf_idx += mbclen;
52195b7b453SJohn Marino     }
52295b7b453SJohn Marino   *last_wc = wc;
52395b7b453SJohn Marino   return rawbuf_idx;
52495b7b453SJohn Marino }
52595b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
52695b7b453SJohn Marino 
52795b7b453SJohn Marino /* Build the buffer PSTR->MBS, and apply the translation if we need.
52895b7b453SJohn Marino    This function is used in case of REG_ICASE.  */
52995b7b453SJohn Marino 
53095b7b453SJohn Marino static void
53195b7b453SJohn Marino internal_function
53295b7b453SJohn Marino build_upper_buffer (re_string_t *pstr)
53395b7b453SJohn Marino {
53495b7b453SJohn Marino   Idx char_idx, end_idx;
53595b7b453SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
53695b7b453SJohn Marino 
53795b7b453SJohn Marino   for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
53895b7b453SJohn Marino     {
53995b7b453SJohn Marino       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
54095b7b453SJohn Marino       if (BE (pstr->trans != NULL, 0))
54195b7b453SJohn Marino 	ch = pstr->trans[ch];
54295b7b453SJohn Marino       if (islower (ch))
54395b7b453SJohn Marino 	pstr->mbs[char_idx] = toupper (ch);
54495b7b453SJohn Marino       else
54595b7b453SJohn Marino 	pstr->mbs[char_idx] = ch;
54695b7b453SJohn Marino     }
54795b7b453SJohn Marino   pstr->valid_len = char_idx;
54895b7b453SJohn Marino   pstr->valid_raw_len = char_idx;
54995b7b453SJohn Marino }
55095b7b453SJohn Marino 
55195b7b453SJohn Marino /* Apply TRANS to the buffer in PSTR.  */
55295b7b453SJohn Marino 
55395b7b453SJohn Marino static void
55495b7b453SJohn Marino internal_function
55595b7b453SJohn Marino re_string_translate_buffer (re_string_t *pstr)
55695b7b453SJohn Marino {
55795b7b453SJohn Marino   Idx buf_idx, end_idx;
55895b7b453SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
55995b7b453SJohn Marino 
56095b7b453SJohn Marino   for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
56195b7b453SJohn Marino     {
56295b7b453SJohn Marino       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
56395b7b453SJohn Marino       pstr->mbs[buf_idx] = pstr->trans[ch];
56495b7b453SJohn Marino     }
56595b7b453SJohn Marino 
56695b7b453SJohn Marino   pstr->valid_len = buf_idx;
56795b7b453SJohn Marino   pstr->valid_raw_len = buf_idx;
56895b7b453SJohn Marino }
56995b7b453SJohn Marino 
57095b7b453SJohn Marino /* This function re-construct the buffers.
57195b7b453SJohn Marino    Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
57295b7b453SJohn Marino    convert to upper case in case of REG_ICASE, apply translation.  */
57395b7b453SJohn Marino 
57495b7b453SJohn Marino static reg_errcode_t
57595b7b453SJohn Marino internal_function __attribute_warn_unused_result__
57695b7b453SJohn Marino re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
57795b7b453SJohn Marino {
57895b7b453SJohn Marino   Idx offset;
57995b7b453SJohn Marino 
58095b7b453SJohn Marino   if (BE (pstr->raw_mbs_idx <= idx, 0))
58195b7b453SJohn Marino     offset = idx - pstr->raw_mbs_idx;
58295b7b453SJohn Marino   else
58395b7b453SJohn Marino     {
58495b7b453SJohn Marino       /* Reset buffer.  */
58595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
58695b7b453SJohn Marino       if (pstr->mb_cur_max > 1)
58795b7b453SJohn Marino 	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
58895b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
58995b7b453SJohn Marino       pstr->len = pstr->raw_len;
59095b7b453SJohn Marino       pstr->stop = pstr->raw_stop;
59195b7b453SJohn Marino       pstr->valid_len = 0;
59295b7b453SJohn Marino       pstr->raw_mbs_idx = 0;
59395b7b453SJohn Marino       pstr->valid_raw_len = 0;
59495b7b453SJohn Marino       pstr->offsets_needed = 0;
59595b7b453SJohn Marino       pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
59695b7b453SJohn Marino 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
59795b7b453SJohn Marino       if (!pstr->mbs_allocated)
59895b7b453SJohn Marino 	pstr->mbs = (unsigned char *) pstr->raw_mbs;
59995b7b453SJohn Marino       offset = idx;
60095b7b453SJohn Marino     }
60195b7b453SJohn Marino 
60295b7b453SJohn Marino   if (BE (offset != 0, 1))
60395b7b453SJohn Marino     {
60495b7b453SJohn Marino       /* Should the already checked characters be kept?  */
60595b7b453SJohn Marino       if (BE (offset < pstr->valid_raw_len, 1))
60695b7b453SJohn Marino 	{
60795b7b453SJohn Marino 	  /* Yes, move them to the front of the buffer.  */
60895b7b453SJohn Marino #ifdef RE_ENABLE_I18N
60995b7b453SJohn Marino 	  if (BE (pstr->offsets_needed, 0))
61095b7b453SJohn Marino 	    {
61195b7b453SJohn Marino 	      Idx low = 0, high = pstr->valid_len, mid;
61295b7b453SJohn Marino 	      do
61395b7b453SJohn Marino 		{
61495b7b453SJohn Marino 		  mid = (high + low) / 2;
61595b7b453SJohn Marino 		  if (pstr->offsets[mid] > offset)
61695b7b453SJohn Marino 		    high = mid;
61795b7b453SJohn Marino 		  else if (pstr->offsets[mid] < offset)
61895b7b453SJohn Marino 		    low = mid + 1;
61995b7b453SJohn Marino 		  else
62095b7b453SJohn Marino 		    break;
62195b7b453SJohn Marino 		}
62295b7b453SJohn Marino 	      while (low < high);
62395b7b453SJohn Marino 	      if (pstr->offsets[mid] < offset)
62495b7b453SJohn Marino 		++mid;
62595b7b453SJohn Marino 	      pstr->tip_context = re_string_context_at (pstr, mid - 1,
62695b7b453SJohn Marino 							eflags);
62795b7b453SJohn Marino 	      /* This can be quite complicated, so handle specially
62895b7b453SJohn Marino 		 only the common and easy case where the character with
62995b7b453SJohn Marino 		 different length representation of lower and upper
63095b7b453SJohn Marino 		 case is present at or after offset.  */
63195b7b453SJohn Marino 	      if (pstr->valid_len > offset
63295b7b453SJohn Marino 		  && mid == offset && pstr->offsets[mid] == offset)
63395b7b453SJohn Marino 		{
63495b7b453SJohn Marino 		  memmove (pstr->wcs, pstr->wcs + offset,
63595b7b453SJohn Marino 			   (pstr->valid_len - offset) * sizeof (wint_t));
63695b7b453SJohn Marino 		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
63795b7b453SJohn Marino 		  pstr->valid_len -= offset;
63895b7b453SJohn Marino 		  pstr->valid_raw_len -= offset;
63995b7b453SJohn Marino 		  for (low = 0; low < pstr->valid_len; low++)
64095b7b453SJohn Marino 		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;
64195b7b453SJohn Marino 		}
64295b7b453SJohn Marino 	      else
64395b7b453SJohn Marino 		{
64495b7b453SJohn Marino 		  /* Otherwise, just find out how long the partial multibyte
64595b7b453SJohn Marino 		     character at offset is and fill it with WEOF/255.  */
64695b7b453SJohn Marino 		  pstr->len = pstr->raw_len - idx + offset;
64795b7b453SJohn Marino 		  pstr->stop = pstr->raw_stop - idx + offset;
64895b7b453SJohn Marino 		  pstr->offsets_needed = 0;
64995b7b453SJohn Marino 		  while (mid > 0 && pstr->offsets[mid - 1] == offset)
65095b7b453SJohn Marino 		    --mid;
65195b7b453SJohn Marino 		  while (mid < pstr->valid_len)
65295b7b453SJohn Marino 		    if (pstr->wcs[mid] != WEOF)
65395b7b453SJohn Marino 		      break;
65495b7b453SJohn Marino 		    else
65595b7b453SJohn Marino 		      ++mid;
65695b7b453SJohn Marino 		  if (mid == pstr->valid_len)
65795b7b453SJohn Marino 		    pstr->valid_len = 0;
65895b7b453SJohn Marino 		  else
65995b7b453SJohn Marino 		    {
66095b7b453SJohn Marino 		      pstr->valid_len = pstr->offsets[mid] - offset;
66195b7b453SJohn Marino 		      if (pstr->valid_len)
66295b7b453SJohn Marino 			{
66395b7b453SJohn Marino 			  for (low = 0; low < pstr->valid_len; ++low)
66495b7b453SJohn Marino 			    pstr->wcs[low] = WEOF;
66595b7b453SJohn Marino 			  memset (pstr->mbs, 255, pstr->valid_len);
66695b7b453SJohn Marino 			}
66795b7b453SJohn Marino 		    }
66895b7b453SJohn Marino 		  pstr->valid_raw_len = pstr->valid_len;
66995b7b453SJohn Marino 		}
67095b7b453SJohn Marino 	    }
67195b7b453SJohn Marino 	  else
67295b7b453SJohn Marino #endif
67395b7b453SJohn Marino 	    {
67495b7b453SJohn Marino 	      pstr->tip_context = re_string_context_at (pstr, offset - 1,
67595b7b453SJohn Marino 							eflags);
67695b7b453SJohn Marino #ifdef RE_ENABLE_I18N
67795b7b453SJohn Marino 	      if (pstr->mb_cur_max > 1)
67895b7b453SJohn Marino 		memmove (pstr->wcs, pstr->wcs + offset,
67995b7b453SJohn Marino 			 (pstr->valid_len - offset) * sizeof (wint_t));
68095b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
68195b7b453SJohn Marino 	      if (BE (pstr->mbs_allocated, 0))
68295b7b453SJohn Marino 		memmove (pstr->mbs, pstr->mbs + offset,
68395b7b453SJohn Marino 			 pstr->valid_len - offset);
68495b7b453SJohn Marino 	      pstr->valid_len -= offset;
68595b7b453SJohn Marino 	      pstr->valid_raw_len -= offset;
68695b7b453SJohn Marino #if DEBUG
68795b7b453SJohn Marino 	      assert (pstr->valid_len > 0);
68895b7b453SJohn Marino #endif
68995b7b453SJohn Marino 	    }
69095b7b453SJohn Marino 	}
69195b7b453SJohn Marino       else
69295b7b453SJohn Marino 	{
69395b7b453SJohn Marino #ifdef RE_ENABLE_I18N
69495b7b453SJohn Marino 	  /* No, skip all characters until IDX.  */
69595b7b453SJohn Marino 	  Idx prev_valid_len = pstr->valid_len;
69695b7b453SJohn Marino 
69795b7b453SJohn Marino 	  if (BE (pstr->offsets_needed, 0))
69895b7b453SJohn Marino 	    {
69995b7b453SJohn Marino 	      pstr->len = pstr->raw_len - idx + offset;
70095b7b453SJohn Marino 	      pstr->stop = pstr->raw_stop - idx + offset;
70195b7b453SJohn Marino 	      pstr->offsets_needed = 0;
70295b7b453SJohn Marino 	    }
70395b7b453SJohn Marino #endif
70495b7b453SJohn Marino 	  pstr->valid_len = 0;
70595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
70695b7b453SJohn Marino 	  if (pstr->mb_cur_max > 1)
70795b7b453SJohn Marino 	    {
70895b7b453SJohn Marino 	      Idx wcs_idx;
70995b7b453SJohn Marino 	      wint_t wc = WEOF;
71095b7b453SJohn Marino 
71195b7b453SJohn Marino 	      if (pstr->is_utf8)
71295b7b453SJohn Marino 		{
71395b7b453SJohn Marino 		  const unsigned char *raw, *p, *end;
71495b7b453SJohn Marino 
71595b7b453SJohn Marino 		  /* Special case UTF-8.  Multi-byte chars start with any
71695b7b453SJohn Marino 		     byte other than 0x80 - 0xbf.  */
71795b7b453SJohn Marino 		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
71895b7b453SJohn Marino 		  end = raw + (offset - pstr->mb_cur_max);
71995b7b453SJohn Marino 		  if (end < pstr->raw_mbs)
72095b7b453SJohn Marino 		    end = pstr->raw_mbs;
72195b7b453SJohn Marino 		  p = raw + offset - 1;
72295b7b453SJohn Marino #ifdef _LIBC
72395b7b453SJohn Marino 		  /* We know the wchar_t encoding is UCS4, so for the simple
72495b7b453SJohn Marino 		     case, ASCII characters, skip the conversion step.  */
72595b7b453SJohn Marino 		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
72695b7b453SJohn Marino 		    {
72795b7b453SJohn Marino 		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
72895b7b453SJohn Marino 		      /* pstr->valid_len = 0; */
72995b7b453SJohn Marino 		      wc = (wchar_t) *p;
73095b7b453SJohn Marino 		    }
73195b7b453SJohn Marino 		  else
73295b7b453SJohn Marino #endif
73395b7b453SJohn Marino 		    for (; p >= end; --p)
73495b7b453SJohn Marino 		      if ((*p & 0xc0) != 0x80)
73595b7b453SJohn Marino 			{
73695b7b453SJohn Marino 			  mbstate_t cur_state;
73795b7b453SJohn Marino 			  wchar_t wc2;
73895b7b453SJohn Marino 			  Idx mlen = raw + pstr->len - p;
739*cf28ed85SJohn Marino 			  unsigned char buf[6];
74095b7b453SJohn Marino 			  size_t mbclen;
74195b7b453SJohn Marino 
742*cf28ed85SJohn Marino 			  const unsigned char *pp = p;
74395b7b453SJohn Marino 			  if (BE (pstr->trans != NULL, 0))
74495b7b453SJohn Marino 			    {
74595b7b453SJohn Marino 			      int i = mlen < 6 ? mlen : 6;
74695b7b453SJohn Marino 			      while (--i >= 0)
74795b7b453SJohn Marino 				buf[i] = pstr->trans[p[i]];
748*cf28ed85SJohn Marino 			      pp = buf;
74995b7b453SJohn Marino 			    }
75095b7b453SJohn Marino 			  /* XXX Don't use mbrtowc, we know which conversion
75195b7b453SJohn Marino 			     to use (UTF-8 -> UCS4).  */
75295b7b453SJohn Marino 			  memset (&cur_state, 0, sizeof (cur_state));
753*cf28ed85SJohn Marino 			  mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
75495b7b453SJohn Marino 					      &cur_state);
75595b7b453SJohn Marino 			  if (raw + offset - p <= mbclen
75695b7b453SJohn Marino 			      && mbclen < (size_t) -2)
75795b7b453SJohn Marino 			    {
75895b7b453SJohn Marino 			      memset (&pstr->cur_state, '\0',
75995b7b453SJohn Marino 				      sizeof (mbstate_t));
76095b7b453SJohn Marino 			      pstr->valid_len = mbclen - (raw + offset - p);
76195b7b453SJohn Marino 			      wc = wc2;
76295b7b453SJohn Marino 			    }
76395b7b453SJohn Marino 			  break;
76495b7b453SJohn Marino 			}
76595b7b453SJohn Marino 		}
76695b7b453SJohn Marino 
76795b7b453SJohn Marino 	      if (wc == WEOF)
76895b7b453SJohn Marino 		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
76995b7b453SJohn Marino 	      if (wc == WEOF)
77095b7b453SJohn Marino 		pstr->tip_context
77195b7b453SJohn Marino 		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);
77295b7b453SJohn Marino 	      else
77395b7b453SJohn Marino 		pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
77495b7b453SJohn Marino 				      && IS_WIDE_WORD_CHAR (wc))
77595b7b453SJohn Marino 				     ? CONTEXT_WORD
77695b7b453SJohn Marino 				     : ((IS_WIDE_NEWLINE (wc)
77795b7b453SJohn Marino 					 && pstr->newline_anchor)
77895b7b453SJohn Marino 					? CONTEXT_NEWLINE : 0));
77995b7b453SJohn Marino 	      if (BE (pstr->valid_len, 0))
78095b7b453SJohn Marino 		{
78195b7b453SJohn Marino 		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
78295b7b453SJohn Marino 		    pstr->wcs[wcs_idx] = WEOF;
78395b7b453SJohn Marino 		  if (pstr->mbs_allocated)
78495b7b453SJohn Marino 		    memset (pstr->mbs, 255, pstr->valid_len);
78595b7b453SJohn Marino 		}
78695b7b453SJohn Marino 	      pstr->valid_raw_len = pstr->valid_len;
78795b7b453SJohn Marino 	    }
78895b7b453SJohn Marino 	  else
78995b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
79095b7b453SJohn Marino 	    {
79195b7b453SJohn Marino 	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
79295b7b453SJohn Marino 	      pstr->valid_raw_len = 0;
79395b7b453SJohn Marino 	      if (pstr->trans)
79495b7b453SJohn Marino 		c = pstr->trans[c];
79595b7b453SJohn Marino 	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
79695b7b453SJohn Marino 				   ? CONTEXT_WORD
79795b7b453SJohn Marino 				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
79895b7b453SJohn Marino 				      ? CONTEXT_NEWLINE : 0));
79995b7b453SJohn Marino 	    }
80095b7b453SJohn Marino 	}
80195b7b453SJohn Marino       if (!BE (pstr->mbs_allocated, 0))
80295b7b453SJohn Marino 	pstr->mbs += offset;
80395b7b453SJohn Marino     }
80495b7b453SJohn Marino   pstr->raw_mbs_idx = idx;
80595b7b453SJohn Marino   pstr->len -= offset;
80695b7b453SJohn Marino   pstr->stop -= offset;
80795b7b453SJohn Marino 
80895b7b453SJohn Marino   /* Then build the buffers.  */
80995b7b453SJohn Marino #ifdef RE_ENABLE_I18N
81095b7b453SJohn Marino   if (pstr->mb_cur_max > 1)
81195b7b453SJohn Marino     {
81295b7b453SJohn Marino       if (pstr->icase)
81395b7b453SJohn Marino 	{
81495b7b453SJohn Marino 	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
81595b7b453SJohn Marino 	  if (BE (ret != REG_NOERROR, 0))
81695b7b453SJohn Marino 	    return ret;
81795b7b453SJohn Marino 	}
81895b7b453SJohn Marino       else
81995b7b453SJohn Marino 	build_wcs_buffer (pstr);
82095b7b453SJohn Marino     }
82195b7b453SJohn Marino   else
82295b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
82395b7b453SJohn Marino     if (BE (pstr->mbs_allocated, 0))
82495b7b453SJohn Marino       {
82595b7b453SJohn Marino 	if (pstr->icase)
82695b7b453SJohn Marino 	  build_upper_buffer (pstr);
82795b7b453SJohn Marino 	else if (pstr->trans != NULL)
82895b7b453SJohn Marino 	  re_string_translate_buffer (pstr);
82995b7b453SJohn Marino       }
83095b7b453SJohn Marino     else
83195b7b453SJohn Marino       pstr->valid_len = pstr->len;
83295b7b453SJohn Marino 
83395b7b453SJohn Marino   pstr->cur_idx = 0;
83495b7b453SJohn Marino   return REG_NOERROR;
83595b7b453SJohn Marino }
83695b7b453SJohn Marino 
83795b7b453SJohn Marino static unsigned char
83895b7b453SJohn Marino internal_function __attribute ((pure))
83995b7b453SJohn Marino re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
84095b7b453SJohn Marino {
84195b7b453SJohn Marino   int ch;
84295b7b453SJohn Marino   Idx off;
84395b7b453SJohn Marino 
84495b7b453SJohn Marino   /* Handle the common (easiest) cases first.  */
84595b7b453SJohn Marino   if (BE (!pstr->mbs_allocated, 1))
84695b7b453SJohn Marino     return re_string_peek_byte (pstr, idx);
84795b7b453SJohn Marino 
84895b7b453SJohn Marino #ifdef RE_ENABLE_I18N
84995b7b453SJohn Marino   if (pstr->mb_cur_max > 1
85095b7b453SJohn Marino       && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
85195b7b453SJohn Marino     return re_string_peek_byte (pstr, idx);
85295b7b453SJohn Marino #endif
85395b7b453SJohn Marino 
85495b7b453SJohn Marino   off = pstr->cur_idx + idx;
85595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
85695b7b453SJohn Marino   if (pstr->offsets_needed)
85795b7b453SJohn Marino     off = pstr->offsets[off];
85895b7b453SJohn Marino #endif
85995b7b453SJohn Marino 
86095b7b453SJohn Marino   ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
86195b7b453SJohn Marino 
86295b7b453SJohn Marino #ifdef RE_ENABLE_I18N
86395b7b453SJohn Marino   /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
86495b7b453SJohn Marino      this function returns CAPITAL LETTER I instead of first byte of
86595b7b453SJohn Marino      DOTLESS SMALL LETTER I.  The latter would confuse the parser,
86695b7b453SJohn Marino      since peek_byte_case doesn't advance cur_idx in any way.  */
86795b7b453SJohn Marino   if (pstr->offsets_needed && !isascii (ch))
86895b7b453SJohn Marino     return re_string_peek_byte (pstr, idx);
86995b7b453SJohn Marino #endif
87095b7b453SJohn Marino 
87195b7b453SJohn Marino   return ch;
87295b7b453SJohn Marino }
87395b7b453SJohn Marino 
87495b7b453SJohn Marino static unsigned char
875*cf28ed85SJohn Marino internal_function
87695b7b453SJohn Marino re_string_fetch_byte_case (re_string_t *pstr)
87795b7b453SJohn Marino {
87895b7b453SJohn Marino   if (BE (!pstr->mbs_allocated, 1))
87995b7b453SJohn Marino     return re_string_fetch_byte (pstr);
88095b7b453SJohn Marino 
88195b7b453SJohn Marino #ifdef RE_ENABLE_I18N
88295b7b453SJohn Marino   if (pstr->offsets_needed)
88395b7b453SJohn Marino     {
88495b7b453SJohn Marino       Idx off;
88595b7b453SJohn Marino       int ch;
88695b7b453SJohn Marino 
88795b7b453SJohn Marino       /* For tr_TR.UTF-8 [[:islower:]] there is
88895b7b453SJohn Marino 	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
88995b7b453SJohn Marino 	 in that case the whole multi-byte character and return
89095b7b453SJohn Marino 	 the original letter.  On the other side, with
89195b7b453SJohn Marino 	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
89295b7b453SJohn Marino 	 anything else would complicate things too much.  */
89395b7b453SJohn Marino 
89495b7b453SJohn Marino       if (!re_string_first_byte (pstr, pstr->cur_idx))
89595b7b453SJohn Marino 	return re_string_fetch_byte (pstr);
89695b7b453SJohn Marino 
89795b7b453SJohn Marino       off = pstr->offsets[pstr->cur_idx];
89895b7b453SJohn Marino       ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
89995b7b453SJohn Marino 
90095b7b453SJohn Marino       if (! isascii (ch))
90195b7b453SJohn Marino 	return re_string_fetch_byte (pstr);
90295b7b453SJohn Marino 
90395b7b453SJohn Marino       re_string_skip_bytes (pstr,
90495b7b453SJohn Marino 			    re_string_char_size_at (pstr, pstr->cur_idx));
90595b7b453SJohn Marino       return ch;
90695b7b453SJohn Marino     }
90795b7b453SJohn Marino #endif
90895b7b453SJohn Marino 
90995b7b453SJohn Marino   return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
91095b7b453SJohn Marino }
91195b7b453SJohn Marino 
91295b7b453SJohn Marino static void
91395b7b453SJohn Marino internal_function
91495b7b453SJohn Marino re_string_destruct (re_string_t *pstr)
91595b7b453SJohn Marino {
91695b7b453SJohn Marino #ifdef RE_ENABLE_I18N
91795b7b453SJohn Marino   re_free (pstr->wcs);
91895b7b453SJohn Marino   re_free (pstr->offsets);
91995b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
92095b7b453SJohn Marino   if (pstr->mbs_allocated)
92195b7b453SJohn Marino     re_free (pstr->mbs);
92295b7b453SJohn Marino }
92395b7b453SJohn Marino 
92495b7b453SJohn Marino /* Return the context at IDX in INPUT.  */
92595b7b453SJohn Marino 
92695b7b453SJohn Marino static unsigned int
92795b7b453SJohn Marino internal_function
92895b7b453SJohn Marino re_string_context_at (const re_string_t *input, Idx idx, int eflags)
92995b7b453SJohn Marino {
93095b7b453SJohn Marino   int c;
93195b7b453SJohn Marino   if (BE (! REG_VALID_INDEX (idx), 0))
93295b7b453SJohn Marino     /* In this case, we use the value stored in input->tip_context,
93395b7b453SJohn Marino        since we can't know the character in input->mbs[-1] here.  */
93495b7b453SJohn Marino     return input->tip_context;
93595b7b453SJohn Marino   if (BE (idx == input->len, 0))
93695b7b453SJohn Marino     return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
93795b7b453SJohn Marino 	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
93895b7b453SJohn Marino #ifdef RE_ENABLE_I18N
93995b7b453SJohn Marino   if (input->mb_cur_max > 1)
94095b7b453SJohn Marino     {
94195b7b453SJohn Marino       wint_t wc;
94295b7b453SJohn Marino       Idx wc_idx = idx;
94395b7b453SJohn Marino       while(input->wcs[wc_idx] == WEOF)
94495b7b453SJohn Marino 	{
94595b7b453SJohn Marino #ifdef DEBUG
94695b7b453SJohn Marino 	  /* It must not happen.  */
94795b7b453SJohn Marino 	  assert (REG_VALID_INDEX (wc_idx));
94895b7b453SJohn Marino #endif
94995b7b453SJohn Marino 	  --wc_idx;
95095b7b453SJohn Marino 	  if (! REG_VALID_INDEX (wc_idx))
95195b7b453SJohn Marino 	    return input->tip_context;
95295b7b453SJohn Marino 	}
95395b7b453SJohn Marino       wc = input->wcs[wc_idx];
95495b7b453SJohn Marino       if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
95595b7b453SJohn Marino 	return CONTEXT_WORD;
95695b7b453SJohn Marino       return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
95795b7b453SJohn Marino 	      ? CONTEXT_NEWLINE : 0);
95895b7b453SJohn Marino     }
95995b7b453SJohn Marino   else
96095b7b453SJohn Marino #endif
96195b7b453SJohn Marino     {
96295b7b453SJohn Marino       c = re_string_byte_at (input, idx);
96395b7b453SJohn Marino       if (bitset_contain (input->word_char, c))
96495b7b453SJohn Marino 	return CONTEXT_WORD;
96595b7b453SJohn Marino       return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
96695b7b453SJohn Marino     }
96795b7b453SJohn Marino }
96895b7b453SJohn Marino 
96995b7b453SJohn Marino /* Functions for set operation.  */
97095b7b453SJohn Marino 
97195b7b453SJohn Marino static reg_errcode_t
97295b7b453SJohn Marino internal_function __attribute_warn_unused_result__
97395b7b453SJohn Marino re_node_set_alloc (re_node_set *set, Idx size)
97495b7b453SJohn Marino {
97595b7b453SJohn Marino   set->alloc = size;
97695b7b453SJohn Marino   set->nelem = 0;
97795b7b453SJohn Marino   set->elems = re_malloc (Idx, size);
97895b7b453SJohn Marino   if (BE (set->elems == NULL, 0))
97995b7b453SJohn Marino     return REG_ESPACE;
98095b7b453SJohn Marino   return REG_NOERROR;
98195b7b453SJohn Marino }
98295b7b453SJohn Marino 
98395b7b453SJohn Marino static reg_errcode_t
98495b7b453SJohn Marino internal_function __attribute_warn_unused_result__
98595b7b453SJohn Marino re_node_set_init_1 (re_node_set *set, Idx elem)
98695b7b453SJohn Marino {
98795b7b453SJohn Marino   set->alloc = 1;
98895b7b453SJohn Marino   set->nelem = 1;
98995b7b453SJohn Marino   set->elems = re_malloc (Idx, 1);
99095b7b453SJohn Marino   if (BE (set->elems == NULL, 0))
99195b7b453SJohn Marino     {
99295b7b453SJohn Marino       set->alloc = set->nelem = 0;
99395b7b453SJohn Marino       return REG_ESPACE;
99495b7b453SJohn Marino     }
99595b7b453SJohn Marino   set->elems[0] = elem;
99695b7b453SJohn Marino   return REG_NOERROR;
99795b7b453SJohn Marino }
99895b7b453SJohn Marino 
99995b7b453SJohn Marino static reg_errcode_t
100095b7b453SJohn Marino internal_function __attribute_warn_unused_result__
100195b7b453SJohn Marino re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
100295b7b453SJohn Marino {
100395b7b453SJohn Marino   set->alloc = 2;
100495b7b453SJohn Marino   set->elems = re_malloc (Idx, 2);
100595b7b453SJohn Marino   if (BE (set->elems == NULL, 0))
100695b7b453SJohn Marino     return REG_ESPACE;
100795b7b453SJohn Marino   if (elem1 == elem2)
100895b7b453SJohn Marino     {
100995b7b453SJohn Marino       set->nelem = 1;
101095b7b453SJohn Marino       set->elems[0] = elem1;
101195b7b453SJohn Marino     }
101295b7b453SJohn Marino   else
101395b7b453SJohn Marino     {
101495b7b453SJohn Marino       set->nelem = 2;
101595b7b453SJohn Marino       if (elem1 < elem2)
101695b7b453SJohn Marino 	{
101795b7b453SJohn Marino 	  set->elems[0] = elem1;
101895b7b453SJohn Marino 	  set->elems[1] = elem2;
101995b7b453SJohn Marino 	}
102095b7b453SJohn Marino       else
102195b7b453SJohn Marino 	{
102295b7b453SJohn Marino 	  set->elems[0] = elem2;
102395b7b453SJohn Marino 	  set->elems[1] = elem1;
102495b7b453SJohn Marino 	}
102595b7b453SJohn Marino     }
102695b7b453SJohn Marino   return REG_NOERROR;
102795b7b453SJohn Marino }
102895b7b453SJohn Marino 
102995b7b453SJohn Marino static reg_errcode_t
103095b7b453SJohn Marino internal_function __attribute_warn_unused_result__
103195b7b453SJohn Marino re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
103295b7b453SJohn Marino {
103395b7b453SJohn Marino   dest->nelem = src->nelem;
103495b7b453SJohn Marino   if (src->nelem > 0)
103595b7b453SJohn Marino     {
103695b7b453SJohn Marino       dest->alloc = dest->nelem;
103795b7b453SJohn Marino       dest->elems = re_malloc (Idx, dest->alloc);
103895b7b453SJohn Marino       if (BE (dest->elems == NULL, 0))
103995b7b453SJohn Marino 	{
104095b7b453SJohn Marino 	  dest->alloc = dest->nelem = 0;
104195b7b453SJohn Marino 	  return REG_ESPACE;
104295b7b453SJohn Marino 	}
104395b7b453SJohn Marino       memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
104495b7b453SJohn Marino     }
104595b7b453SJohn Marino   else
104695b7b453SJohn Marino     re_node_set_init_empty (dest);
104795b7b453SJohn Marino   return REG_NOERROR;
104895b7b453SJohn Marino }
104995b7b453SJohn Marino 
105095b7b453SJohn Marino /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
105195b7b453SJohn Marino    DEST. Return value indicate the error code or REG_NOERROR if succeeded.
105295b7b453SJohn Marino    Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
105395b7b453SJohn Marino 
105495b7b453SJohn Marino static reg_errcode_t
105595b7b453SJohn Marino internal_function __attribute_warn_unused_result__
105695b7b453SJohn Marino re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
105795b7b453SJohn Marino 			   const re_node_set *src2)
105895b7b453SJohn Marino {
105995b7b453SJohn Marino   Idx i1, i2, is, id, delta, sbase;
106095b7b453SJohn Marino   if (src1->nelem == 0 || src2->nelem == 0)
106195b7b453SJohn Marino     return REG_NOERROR;
106295b7b453SJohn Marino 
106395b7b453SJohn Marino   /* We need dest->nelem + 2 * elems_in_intersection; this is a
106495b7b453SJohn Marino      conservative estimate.  */
106595b7b453SJohn Marino   if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
106695b7b453SJohn Marino     {
106795b7b453SJohn Marino       Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
106895b7b453SJohn Marino       Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
106995b7b453SJohn Marino       if (BE (new_elems == NULL, 0))
107095b7b453SJohn Marino 	return REG_ESPACE;
107195b7b453SJohn Marino       dest->elems = new_elems;
107295b7b453SJohn Marino       dest->alloc = new_alloc;
107395b7b453SJohn Marino     }
107495b7b453SJohn Marino 
107595b7b453SJohn Marino   /* Find the items in the intersection of SRC1 and SRC2, and copy
107695b7b453SJohn Marino      into the top of DEST those that are not already in DEST itself.  */
107795b7b453SJohn Marino   sbase = dest->nelem + src1->nelem + src2->nelem;
107895b7b453SJohn Marino   i1 = src1->nelem - 1;
107995b7b453SJohn Marino   i2 = src2->nelem - 1;
108095b7b453SJohn Marino   id = dest->nelem - 1;
108195b7b453SJohn Marino   for (;;)
108295b7b453SJohn Marino     {
108395b7b453SJohn Marino       if (src1->elems[i1] == src2->elems[i2])
108495b7b453SJohn Marino 	{
108595b7b453SJohn Marino 	  /* Try to find the item in DEST.  Maybe we could binary search?  */
108695b7b453SJohn Marino 	  while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
108795b7b453SJohn Marino 	    --id;
108895b7b453SJohn Marino 
108995b7b453SJohn Marino           if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
109095b7b453SJohn Marino             dest->elems[--sbase] = src1->elems[i1];
109195b7b453SJohn Marino 
109295b7b453SJohn Marino 	  if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
109395b7b453SJohn Marino 	    break;
109495b7b453SJohn Marino 	}
109595b7b453SJohn Marino 
109695b7b453SJohn Marino       /* Lower the highest of the two items.  */
109795b7b453SJohn Marino       else if (src1->elems[i1] < src2->elems[i2])
109895b7b453SJohn Marino 	{
109995b7b453SJohn Marino 	  if (! REG_VALID_INDEX (--i2))
110095b7b453SJohn Marino 	    break;
110195b7b453SJohn Marino 	}
110295b7b453SJohn Marino       else
110395b7b453SJohn Marino 	{
110495b7b453SJohn Marino 	  if (! REG_VALID_INDEX (--i1))
110595b7b453SJohn Marino 	    break;
110695b7b453SJohn Marino 	}
110795b7b453SJohn Marino     }
110895b7b453SJohn Marino 
110995b7b453SJohn Marino   id = dest->nelem - 1;
111095b7b453SJohn Marino   is = dest->nelem + src1->nelem + src2->nelem - 1;
111195b7b453SJohn Marino   delta = is - sbase + 1;
111295b7b453SJohn Marino 
111395b7b453SJohn Marino   /* Now copy.  When DELTA becomes zero, the remaining
111495b7b453SJohn Marino      DEST elements are already in place; this is more or
111595b7b453SJohn Marino      less the same loop that is in re_node_set_merge.  */
111695b7b453SJohn Marino   dest->nelem += delta;
111795b7b453SJohn Marino   if (delta > 0 && REG_VALID_INDEX (id))
111895b7b453SJohn Marino     for (;;)
111995b7b453SJohn Marino       {
112095b7b453SJohn Marino 	if (dest->elems[is] > dest->elems[id])
112195b7b453SJohn Marino 	  {
112295b7b453SJohn Marino 	    /* Copy from the top.  */
112395b7b453SJohn Marino 	    dest->elems[id + delta--] = dest->elems[is--];
112495b7b453SJohn Marino 	    if (delta == 0)
112595b7b453SJohn Marino 	      break;
112695b7b453SJohn Marino 	  }
112795b7b453SJohn Marino 	else
112895b7b453SJohn Marino 	  {
112995b7b453SJohn Marino 	    /* Slide from the bottom.  */
113095b7b453SJohn Marino 	    dest->elems[id + delta] = dest->elems[id];
113195b7b453SJohn Marino 	    if (! REG_VALID_INDEX (--id))
113295b7b453SJohn Marino 	      break;
113395b7b453SJohn Marino 	  }
113495b7b453SJohn Marino       }
113595b7b453SJohn Marino 
113695b7b453SJohn Marino   /* Copy remaining SRC elements.  */
113795b7b453SJohn Marino   memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
113895b7b453SJohn Marino 
113995b7b453SJohn Marino   return REG_NOERROR;
114095b7b453SJohn Marino }
114195b7b453SJohn Marino 
114295b7b453SJohn Marino /* Calculate the union set of the sets SRC1 and SRC2. And store it to
114395b7b453SJohn Marino    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
114495b7b453SJohn Marino 
114595b7b453SJohn Marino static reg_errcode_t
114695b7b453SJohn Marino internal_function __attribute_warn_unused_result__
114795b7b453SJohn Marino re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
114895b7b453SJohn Marino 			const re_node_set *src2)
114995b7b453SJohn Marino {
115095b7b453SJohn Marino   Idx i1, i2, id;
115195b7b453SJohn Marino   if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
115295b7b453SJohn Marino     {
115395b7b453SJohn Marino       dest->alloc = src1->nelem + src2->nelem;
115495b7b453SJohn Marino       dest->elems = re_malloc (Idx, dest->alloc);
115595b7b453SJohn Marino       if (BE (dest->elems == NULL, 0))
115695b7b453SJohn Marino 	return REG_ESPACE;
115795b7b453SJohn Marino     }
115895b7b453SJohn Marino   else
115995b7b453SJohn Marino     {
116095b7b453SJohn Marino       if (src1 != NULL && src1->nelem > 0)
116195b7b453SJohn Marino 	return re_node_set_init_copy (dest, src1);
116295b7b453SJohn Marino       else if (src2 != NULL && src2->nelem > 0)
116395b7b453SJohn Marino 	return re_node_set_init_copy (dest, src2);
116495b7b453SJohn Marino       else
116595b7b453SJohn Marino 	re_node_set_init_empty (dest);
116695b7b453SJohn Marino       return REG_NOERROR;
116795b7b453SJohn Marino     }
116895b7b453SJohn Marino   for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
116995b7b453SJohn Marino     {
117095b7b453SJohn Marino       if (src1->elems[i1] > src2->elems[i2])
117195b7b453SJohn Marino 	{
117295b7b453SJohn Marino 	  dest->elems[id++] = src2->elems[i2++];
117395b7b453SJohn Marino 	  continue;
117495b7b453SJohn Marino 	}
117595b7b453SJohn Marino       if (src1->elems[i1] == src2->elems[i2])
117695b7b453SJohn Marino 	++i2;
117795b7b453SJohn Marino       dest->elems[id++] = src1->elems[i1++];
117895b7b453SJohn Marino     }
117995b7b453SJohn Marino   if (i1 < src1->nelem)
118095b7b453SJohn Marino     {
118195b7b453SJohn Marino       memcpy (dest->elems + id, src1->elems + i1,
118295b7b453SJohn Marino 	     (src1->nelem - i1) * sizeof (Idx));
118395b7b453SJohn Marino       id += src1->nelem - i1;
118495b7b453SJohn Marino     }
118595b7b453SJohn Marino   else if (i2 < src2->nelem)
118695b7b453SJohn Marino     {
118795b7b453SJohn Marino       memcpy (dest->elems + id, src2->elems + i2,
118895b7b453SJohn Marino 	     (src2->nelem - i2) * sizeof (Idx));
118995b7b453SJohn Marino       id += src2->nelem - i2;
119095b7b453SJohn Marino     }
119195b7b453SJohn Marino   dest->nelem = id;
119295b7b453SJohn Marino   return REG_NOERROR;
119395b7b453SJohn Marino }
119495b7b453SJohn Marino 
119595b7b453SJohn Marino /* Calculate the union set of the sets DEST and SRC. And store it to
119695b7b453SJohn Marino    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
119795b7b453SJohn Marino 
119895b7b453SJohn Marino static reg_errcode_t
119995b7b453SJohn Marino internal_function __attribute_warn_unused_result__
120095b7b453SJohn Marino re_node_set_merge (re_node_set *dest, const re_node_set *src)
120195b7b453SJohn Marino {
120295b7b453SJohn Marino   Idx is, id, sbase, delta;
120395b7b453SJohn Marino   if (src == NULL || src->nelem == 0)
120495b7b453SJohn Marino     return REG_NOERROR;
120595b7b453SJohn Marino   if (dest->alloc < 2 * src->nelem + dest->nelem)
120695b7b453SJohn Marino     {
120795b7b453SJohn Marino       Idx new_alloc = 2 * (src->nelem + dest->alloc);
120895b7b453SJohn Marino       Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
120995b7b453SJohn Marino       if (BE (new_buffer == NULL, 0))
121095b7b453SJohn Marino 	return REG_ESPACE;
121195b7b453SJohn Marino       dest->elems = new_buffer;
121295b7b453SJohn Marino       dest->alloc = new_alloc;
121395b7b453SJohn Marino     }
121495b7b453SJohn Marino 
121595b7b453SJohn Marino   if (BE (dest->nelem == 0, 0))
121695b7b453SJohn Marino     {
121795b7b453SJohn Marino       dest->nelem = src->nelem;
121895b7b453SJohn Marino       memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
121995b7b453SJohn Marino       return REG_NOERROR;
122095b7b453SJohn Marino     }
122195b7b453SJohn Marino 
122295b7b453SJohn Marino   /* Copy into the top of DEST the items of SRC that are not
122395b7b453SJohn Marino      found in DEST.  Maybe we could binary search in DEST?  */
122495b7b453SJohn Marino   for (sbase = dest->nelem + 2 * src->nelem,
122595b7b453SJohn Marino        is = src->nelem - 1, id = dest->nelem - 1;
122695b7b453SJohn Marino        REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
122795b7b453SJohn Marino     {
122895b7b453SJohn Marino       if (dest->elems[id] == src->elems[is])
122995b7b453SJohn Marino 	is--, id--;
123095b7b453SJohn Marino       else if (dest->elems[id] < src->elems[is])
123195b7b453SJohn Marino 	dest->elems[--sbase] = src->elems[is--];
123295b7b453SJohn Marino       else /* if (dest->elems[id] > src->elems[is]) */
123395b7b453SJohn Marino 	--id;
123495b7b453SJohn Marino     }
123595b7b453SJohn Marino 
123695b7b453SJohn Marino   if (REG_VALID_INDEX (is))
123795b7b453SJohn Marino     {
123895b7b453SJohn Marino       /* If DEST is exhausted, the remaining items of SRC must be unique.  */
123995b7b453SJohn Marino       sbase -= is + 1;
124095b7b453SJohn Marino       memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
124195b7b453SJohn Marino     }
124295b7b453SJohn Marino 
124395b7b453SJohn Marino   id = dest->nelem - 1;
124495b7b453SJohn Marino   is = dest->nelem + 2 * src->nelem - 1;
124595b7b453SJohn Marino   delta = is - sbase + 1;
124695b7b453SJohn Marino   if (delta == 0)
124795b7b453SJohn Marino     return REG_NOERROR;
124895b7b453SJohn Marino 
124995b7b453SJohn Marino   /* Now copy.  When DELTA becomes zero, the remaining
125095b7b453SJohn Marino      DEST elements are already in place.  */
125195b7b453SJohn Marino   dest->nelem += delta;
125295b7b453SJohn Marino   for (;;)
125395b7b453SJohn Marino     {
125495b7b453SJohn Marino       if (dest->elems[is] > dest->elems[id])
125595b7b453SJohn Marino 	{
125695b7b453SJohn Marino 	  /* Copy from the top.  */
125795b7b453SJohn Marino 	  dest->elems[id + delta--] = dest->elems[is--];
125895b7b453SJohn Marino 	  if (delta == 0)
125995b7b453SJohn Marino 	    break;
126095b7b453SJohn Marino 	}
126195b7b453SJohn Marino       else
126295b7b453SJohn Marino 	{
126395b7b453SJohn Marino 	  /* Slide from the bottom.  */
126495b7b453SJohn Marino 	  dest->elems[id + delta] = dest->elems[id];
126595b7b453SJohn Marino 	  if (! REG_VALID_INDEX (--id))
126695b7b453SJohn Marino 	    {
126795b7b453SJohn Marino 	      /* Copy remaining SRC elements.  */
126895b7b453SJohn Marino 	      memcpy (dest->elems, dest->elems + sbase,
126995b7b453SJohn Marino 		      delta * sizeof (Idx));
127095b7b453SJohn Marino 	      break;
127195b7b453SJohn Marino 	    }
127295b7b453SJohn Marino 	}
127395b7b453SJohn Marino     }
127495b7b453SJohn Marino 
127595b7b453SJohn Marino   return REG_NOERROR;
127695b7b453SJohn Marino }
127795b7b453SJohn Marino 
127895b7b453SJohn Marino /* Insert the new element ELEM to the re_node_set* SET.
127995b7b453SJohn Marino    SET should not already have ELEM.
128095b7b453SJohn Marino    Return true if successful.  */
128195b7b453SJohn Marino 
128295b7b453SJohn Marino static bool
128395b7b453SJohn Marino internal_function __attribute_warn_unused_result__
128495b7b453SJohn Marino re_node_set_insert (re_node_set *set, Idx elem)
128595b7b453SJohn Marino {
128695b7b453SJohn Marino   Idx idx;
128795b7b453SJohn Marino   /* In case the set is empty.  */
128895b7b453SJohn Marino   if (set->alloc == 0)
128995b7b453SJohn Marino     return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
129095b7b453SJohn Marino 
129195b7b453SJohn Marino   if (BE (set->nelem, 0) == 0)
129295b7b453SJohn Marino     {
129395b7b453SJohn Marino       /* We already guaranteed above that set->alloc != 0.  */
129495b7b453SJohn Marino       set->elems[0] = elem;
129595b7b453SJohn Marino       ++set->nelem;
129695b7b453SJohn Marino       return true;
129795b7b453SJohn Marino     }
129895b7b453SJohn Marino 
129995b7b453SJohn Marino   /* Realloc if we need.  */
130095b7b453SJohn Marino   if (set->alloc == set->nelem)
130195b7b453SJohn Marino     {
130295b7b453SJohn Marino       Idx *new_elems;
130395b7b453SJohn Marino       set->alloc = set->alloc * 2;
130495b7b453SJohn Marino       new_elems = re_realloc (set->elems, Idx, set->alloc);
130595b7b453SJohn Marino       if (BE (new_elems == NULL, 0))
130695b7b453SJohn Marino 	return false;
130795b7b453SJohn Marino       set->elems = new_elems;
130895b7b453SJohn Marino     }
130995b7b453SJohn Marino 
131095b7b453SJohn Marino   /* Move the elements which follows the new element.  Test the
131195b7b453SJohn Marino      first element separately to skip a check in the inner loop.  */
131295b7b453SJohn Marino   if (elem < set->elems[0])
131395b7b453SJohn Marino     {
131495b7b453SJohn Marino       idx = 0;
131595b7b453SJohn Marino       for (idx = set->nelem; idx > 0; idx--)
131695b7b453SJohn Marino 	set->elems[idx] = set->elems[idx - 1];
131795b7b453SJohn Marino     }
131895b7b453SJohn Marino   else
131995b7b453SJohn Marino     {
132095b7b453SJohn Marino       for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
132195b7b453SJohn Marino 	set->elems[idx] = set->elems[idx - 1];
132295b7b453SJohn Marino     }
132395b7b453SJohn Marino 
132495b7b453SJohn Marino   /* Insert the new element.  */
132595b7b453SJohn Marino   set->elems[idx] = elem;
132695b7b453SJohn Marino   ++set->nelem;
132795b7b453SJohn Marino   return true;
132895b7b453SJohn Marino }
132995b7b453SJohn Marino 
133095b7b453SJohn Marino /* Insert the new element ELEM to the re_node_set* SET.
133195b7b453SJohn Marino    SET should not already have any element greater than or equal to ELEM.
133295b7b453SJohn Marino    Return true if successful.  */
133395b7b453SJohn Marino 
133495b7b453SJohn Marino static bool
133595b7b453SJohn Marino internal_function __attribute_warn_unused_result__
133695b7b453SJohn Marino re_node_set_insert_last (re_node_set *set, Idx elem)
133795b7b453SJohn Marino {
133895b7b453SJohn Marino   /* Realloc if we need.  */
133995b7b453SJohn Marino   if (set->alloc == set->nelem)
134095b7b453SJohn Marino     {
134195b7b453SJohn Marino       Idx *new_elems;
134295b7b453SJohn Marino       set->alloc = (set->alloc + 1) * 2;
134395b7b453SJohn Marino       new_elems = re_realloc (set->elems, Idx, set->alloc);
134495b7b453SJohn Marino       if (BE (new_elems == NULL, 0))
134595b7b453SJohn Marino 	return false;
134695b7b453SJohn Marino       set->elems = new_elems;
134795b7b453SJohn Marino     }
134895b7b453SJohn Marino 
134995b7b453SJohn Marino   /* Insert the new element.  */
135095b7b453SJohn Marino   set->elems[set->nelem++] = elem;
135195b7b453SJohn Marino   return true;
135295b7b453SJohn Marino }
135395b7b453SJohn Marino 
135495b7b453SJohn Marino /* Compare two node sets SET1 and SET2.
135595b7b453SJohn Marino    Return true if SET1 and SET2 are equivalent.  */
135695b7b453SJohn Marino 
135795b7b453SJohn Marino static bool
135895b7b453SJohn Marino internal_function __attribute ((pure))
135995b7b453SJohn Marino re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
136095b7b453SJohn Marino {
136195b7b453SJohn Marino   Idx i;
136295b7b453SJohn Marino   if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
136395b7b453SJohn Marino     return false;
136495b7b453SJohn Marino   for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
136595b7b453SJohn Marino     if (set1->elems[i] != set2->elems[i])
136695b7b453SJohn Marino       return false;
136795b7b453SJohn Marino   return true;
136895b7b453SJohn Marino }
136995b7b453SJohn Marino 
137095b7b453SJohn Marino /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
137195b7b453SJohn Marino 
137295b7b453SJohn Marino static Idx
137395b7b453SJohn Marino internal_function __attribute ((pure))
137495b7b453SJohn Marino re_node_set_contains (const re_node_set *set, Idx elem)
137595b7b453SJohn Marino {
137695b7b453SJohn Marino   __re_size_t idx, right, mid;
137795b7b453SJohn Marino   if (! REG_VALID_NONZERO_INDEX (set->nelem))
137895b7b453SJohn Marino     return 0;
137995b7b453SJohn Marino 
138095b7b453SJohn Marino   /* Binary search the element.  */
138195b7b453SJohn Marino   idx = 0;
138295b7b453SJohn Marino   right = set->nelem - 1;
138395b7b453SJohn Marino   while (idx < right)
138495b7b453SJohn Marino     {
138595b7b453SJohn Marino       mid = (idx + right) / 2;
138695b7b453SJohn Marino       if (set->elems[mid] < elem)
138795b7b453SJohn Marino 	idx = mid + 1;
138895b7b453SJohn Marino       else
138995b7b453SJohn Marino 	right = mid;
139095b7b453SJohn Marino     }
139195b7b453SJohn Marino   return set->elems[idx] == elem ? idx + 1 : 0;
139295b7b453SJohn Marino }
139395b7b453SJohn Marino 
139495b7b453SJohn Marino static void
139595b7b453SJohn Marino internal_function
139695b7b453SJohn Marino re_node_set_remove_at (re_node_set *set, Idx idx)
139795b7b453SJohn Marino {
139895b7b453SJohn Marino   verify (! TYPE_SIGNED (Idx));
139995b7b453SJohn Marino   /* if (idx < 0)
140095b7b453SJohn Marino      return; */
140195b7b453SJohn Marino   if (idx >= set->nelem)
140295b7b453SJohn Marino     return;
140395b7b453SJohn Marino   --set->nelem;
140495b7b453SJohn Marino   for (; idx < set->nelem; idx++)
140595b7b453SJohn Marino     set->elems[idx] = set->elems[idx + 1];
140695b7b453SJohn Marino }
140795b7b453SJohn Marino 
140895b7b453SJohn Marino 
140995b7b453SJohn Marino /* Add the token TOKEN to dfa->nodes, and return the index of the token.
141095b7b453SJohn Marino    Or return REG_MISSING if an error occurred.  */
141195b7b453SJohn Marino 
141295b7b453SJohn Marino static Idx
141395b7b453SJohn Marino internal_function
141495b7b453SJohn Marino re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
141595b7b453SJohn Marino {
141695b7b453SJohn Marino   if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
141795b7b453SJohn Marino     {
141895b7b453SJohn Marino       size_t new_nodes_alloc = dfa->nodes_alloc * 2;
141995b7b453SJohn Marino       Idx *new_nexts, *new_indices;
142095b7b453SJohn Marino       re_node_set *new_edests, *new_eclosures;
142195b7b453SJohn Marino       re_token_t *new_nodes;
1422*cf28ed85SJohn Marino 
1423*cf28ed85SJohn Marino       /* Avoid overflows in realloc.  */
1424*cf28ed85SJohn Marino       const size_t max_object_size = MAX (sizeof (re_token_t),
142595b7b453SJohn Marino 					  MAX (sizeof (re_node_set),
142695b7b453SJohn Marino 					       sizeof (Idx)));
1427*cf28ed85SJohn Marino       if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_nodes_alloc, 0))
142895b7b453SJohn Marino 	return REG_MISSING;
142995b7b453SJohn Marino 
143095b7b453SJohn Marino       new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
143195b7b453SJohn Marino       if (BE (new_nodes == NULL, 0))
143295b7b453SJohn Marino 	return REG_MISSING;
143395b7b453SJohn Marino       dfa->nodes = new_nodes;
143495b7b453SJohn Marino       new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
143595b7b453SJohn Marino       new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
143695b7b453SJohn Marino       new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
143795b7b453SJohn Marino       new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
143895b7b453SJohn Marino       if (BE (new_nexts == NULL || new_indices == NULL
143995b7b453SJohn Marino 	      || new_edests == NULL || new_eclosures == NULL, 0))
144095b7b453SJohn Marino 	return REG_MISSING;
144195b7b453SJohn Marino       dfa->nexts = new_nexts;
144295b7b453SJohn Marino       dfa->org_indices = new_indices;
144395b7b453SJohn Marino       dfa->edests = new_edests;
144495b7b453SJohn Marino       dfa->eclosures = new_eclosures;
144595b7b453SJohn Marino       dfa->nodes_alloc = new_nodes_alloc;
144695b7b453SJohn Marino     }
144795b7b453SJohn Marino   dfa->nodes[dfa->nodes_len] = token;
144895b7b453SJohn Marino   dfa->nodes[dfa->nodes_len].constraint = 0;
144995b7b453SJohn Marino #ifdef RE_ENABLE_I18N
145095b7b453SJohn Marino   {
145195b7b453SJohn Marino   int type = token.type;
145295b7b453SJohn Marino   dfa->nodes[dfa->nodes_len].accept_mb =
145395b7b453SJohn Marino     (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
145495b7b453SJohn Marino   }
145595b7b453SJohn Marino #endif
145695b7b453SJohn Marino   dfa->nexts[dfa->nodes_len] = REG_MISSING;
145795b7b453SJohn Marino   re_node_set_init_empty (dfa->edests + dfa->nodes_len);
145895b7b453SJohn Marino   re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
145995b7b453SJohn Marino   return dfa->nodes_len++;
146095b7b453SJohn Marino }
146195b7b453SJohn Marino 
146295b7b453SJohn Marino static inline re_hashval_t
146395b7b453SJohn Marino internal_function
146495b7b453SJohn Marino calc_state_hash (const re_node_set *nodes, unsigned int context)
146595b7b453SJohn Marino {
146695b7b453SJohn Marino   re_hashval_t hash = nodes->nelem + context;
146795b7b453SJohn Marino   Idx i;
146895b7b453SJohn Marino   for (i = 0 ; i < nodes->nelem ; i++)
146995b7b453SJohn Marino     hash += nodes->elems[i];
147095b7b453SJohn Marino   return hash;
147195b7b453SJohn Marino }
147295b7b453SJohn Marino 
147395b7b453SJohn Marino /* Search for the state whose node_set is equivalent to NODES.
147495b7b453SJohn Marino    Return the pointer to the state, if we found it in the DFA.
147595b7b453SJohn Marino    Otherwise create the new one and return it.  In case of an error
147695b7b453SJohn Marino    return NULL and set the error code in ERR.
147795b7b453SJohn Marino    Note: - We assume NULL as the invalid state, then it is possible that
147895b7b453SJohn Marino 	   return value is NULL and ERR is REG_NOERROR.
147995b7b453SJohn Marino 	 - We never return non-NULL value in case of any errors, it is for
148095b7b453SJohn Marino 	   optimization.  */
148195b7b453SJohn Marino 
148295b7b453SJohn Marino static re_dfastate_t *
148395b7b453SJohn Marino internal_function __attribute_warn_unused_result__
148495b7b453SJohn Marino re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
148595b7b453SJohn Marino 		  const re_node_set *nodes)
148695b7b453SJohn Marino {
148795b7b453SJohn Marino   re_hashval_t hash;
148895b7b453SJohn Marino   re_dfastate_t *new_state;
148995b7b453SJohn Marino   struct re_state_table_entry *spot;
149095b7b453SJohn Marino   Idx i;
149195b7b453SJohn Marino #ifdef lint
149295b7b453SJohn Marino   /* Suppress bogus uninitialized-variable warnings.  */
149395b7b453SJohn Marino   *err = REG_NOERROR;
149495b7b453SJohn Marino #endif
149595b7b453SJohn Marino   if (BE (nodes->nelem == 0, 0))
149695b7b453SJohn Marino     {
149795b7b453SJohn Marino       *err = REG_NOERROR;
149895b7b453SJohn Marino       return NULL;
149995b7b453SJohn Marino     }
150095b7b453SJohn Marino   hash = calc_state_hash (nodes, 0);
150195b7b453SJohn Marino   spot = dfa->state_table + (hash & dfa->state_hash_mask);
150295b7b453SJohn Marino 
150395b7b453SJohn Marino   for (i = 0 ; i < spot->num ; i++)
150495b7b453SJohn Marino     {
150595b7b453SJohn Marino       re_dfastate_t *state = spot->array[i];
150695b7b453SJohn Marino       if (hash != state->hash)
150795b7b453SJohn Marino 	continue;
150895b7b453SJohn Marino       if (re_node_set_compare (&state->nodes, nodes))
150995b7b453SJohn Marino 	return state;
151095b7b453SJohn Marino     }
151195b7b453SJohn Marino 
151295b7b453SJohn Marino   /* There are no appropriate state in the dfa, create the new one.  */
151395b7b453SJohn Marino   new_state = create_ci_newstate (dfa, nodes, hash);
151495b7b453SJohn Marino   if (BE (new_state == NULL, 0))
151595b7b453SJohn Marino     *err = REG_ESPACE;
151695b7b453SJohn Marino 
151795b7b453SJohn Marino   return new_state;
151895b7b453SJohn Marino }
151995b7b453SJohn Marino 
152095b7b453SJohn Marino /* Search for the state whose node_set is equivalent to NODES and
152195b7b453SJohn Marino    whose context is equivalent to CONTEXT.
152295b7b453SJohn Marino    Return the pointer to the state, if we found it in the DFA.
152395b7b453SJohn Marino    Otherwise create the new one and return it.  In case of an error
152495b7b453SJohn Marino    return NULL and set the error code in ERR.
152595b7b453SJohn Marino    Note: - We assume NULL as the invalid state, then it is possible that
152695b7b453SJohn Marino 	   return value is NULL and ERR is REG_NOERROR.
152795b7b453SJohn Marino 	 - We never return non-NULL value in case of any errors, it is for
152895b7b453SJohn Marino 	   optimization.  */
152995b7b453SJohn Marino 
153095b7b453SJohn Marino static re_dfastate_t *
153195b7b453SJohn Marino internal_function __attribute_warn_unused_result__
153295b7b453SJohn Marino re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
153395b7b453SJohn Marino 			  const re_node_set *nodes, unsigned int context)
153495b7b453SJohn Marino {
153595b7b453SJohn Marino   re_hashval_t hash;
153695b7b453SJohn Marino   re_dfastate_t *new_state;
153795b7b453SJohn Marino   struct re_state_table_entry *spot;
153895b7b453SJohn Marino   Idx i;
153995b7b453SJohn Marino #ifdef lint
154095b7b453SJohn Marino   /* Suppress bogus uninitialized-variable warnings.  */
154195b7b453SJohn Marino   *err = REG_NOERROR;
154295b7b453SJohn Marino #endif
154395b7b453SJohn Marino   if (nodes->nelem == 0)
154495b7b453SJohn Marino     {
154595b7b453SJohn Marino       *err = REG_NOERROR;
154695b7b453SJohn Marino       return NULL;
154795b7b453SJohn Marino     }
154895b7b453SJohn Marino   hash = calc_state_hash (nodes, context);
154995b7b453SJohn Marino   spot = dfa->state_table + (hash & dfa->state_hash_mask);
155095b7b453SJohn Marino 
155195b7b453SJohn Marino   for (i = 0 ; i < spot->num ; i++)
155295b7b453SJohn Marino     {
155395b7b453SJohn Marino       re_dfastate_t *state = spot->array[i];
155495b7b453SJohn Marino       if (state->hash == hash
155595b7b453SJohn Marino 	  && state->context == context
155695b7b453SJohn Marino 	  && re_node_set_compare (state->entrance_nodes, nodes))
155795b7b453SJohn Marino 	return state;
155895b7b453SJohn Marino     }
1559*cf28ed85SJohn Marino   /* There are no appropriate state in 'dfa', create the new one.  */
156095b7b453SJohn Marino   new_state = create_cd_newstate (dfa, nodes, context, hash);
156195b7b453SJohn Marino   if (BE (new_state == NULL, 0))
156295b7b453SJohn Marino     *err = REG_ESPACE;
156395b7b453SJohn Marino 
156495b7b453SJohn Marino   return new_state;
156595b7b453SJohn Marino }
156695b7b453SJohn Marino 
156795b7b453SJohn Marino /* Finish initialization of the new state NEWSTATE, and using its hash value
156895b7b453SJohn Marino    HASH put in the appropriate bucket of DFA's state table.  Return value
156995b7b453SJohn Marino    indicates the error code if failed.  */
157095b7b453SJohn Marino 
157195b7b453SJohn Marino static reg_errcode_t
157295b7b453SJohn Marino __attribute_warn_unused_result__
157395b7b453SJohn Marino register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
157495b7b453SJohn Marino 		re_hashval_t hash)
157595b7b453SJohn Marino {
157695b7b453SJohn Marino   struct re_state_table_entry *spot;
157795b7b453SJohn Marino   reg_errcode_t err;
157895b7b453SJohn Marino   Idx i;
157995b7b453SJohn Marino 
158095b7b453SJohn Marino   newstate->hash = hash;
158195b7b453SJohn Marino   err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
158295b7b453SJohn Marino   if (BE (err != REG_NOERROR, 0))
158395b7b453SJohn Marino     return REG_ESPACE;
158495b7b453SJohn Marino   for (i = 0; i < newstate->nodes.nelem; i++)
158595b7b453SJohn Marino     {
158695b7b453SJohn Marino       Idx elem = newstate->nodes.elems[i];
158795b7b453SJohn Marino       if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1588*cf28ed85SJohn Marino 	if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
158995b7b453SJohn Marino 	  return REG_ESPACE;
159095b7b453SJohn Marino     }
159195b7b453SJohn Marino 
159295b7b453SJohn Marino   spot = dfa->state_table + (hash & dfa->state_hash_mask);
159395b7b453SJohn Marino   if (BE (spot->alloc <= spot->num, 0))
159495b7b453SJohn Marino     {
159595b7b453SJohn Marino       Idx new_alloc = 2 * spot->num + 2;
159695b7b453SJohn Marino       re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
159795b7b453SJohn Marino 					      new_alloc);
159895b7b453SJohn Marino       if (BE (new_array == NULL, 0))
159995b7b453SJohn Marino 	return REG_ESPACE;
160095b7b453SJohn Marino       spot->array = new_array;
160195b7b453SJohn Marino       spot->alloc = new_alloc;
160295b7b453SJohn Marino     }
160395b7b453SJohn Marino   spot->array[spot->num++] = newstate;
160495b7b453SJohn Marino   return REG_NOERROR;
160595b7b453SJohn Marino }
160695b7b453SJohn Marino 
160795b7b453SJohn Marino static void
160895b7b453SJohn Marino free_state (re_dfastate_t *state)
160995b7b453SJohn Marino {
161095b7b453SJohn Marino   re_node_set_free (&state->non_eps_nodes);
161195b7b453SJohn Marino   re_node_set_free (&state->inveclosure);
161295b7b453SJohn Marino   if (state->entrance_nodes != &state->nodes)
161395b7b453SJohn Marino     {
161495b7b453SJohn Marino       re_node_set_free (state->entrance_nodes);
161595b7b453SJohn Marino       re_free (state->entrance_nodes);
161695b7b453SJohn Marino     }
161795b7b453SJohn Marino   re_node_set_free (&state->nodes);
161895b7b453SJohn Marino   re_free (state->word_trtable);
161995b7b453SJohn Marino   re_free (state->trtable);
162095b7b453SJohn Marino   re_free (state);
162195b7b453SJohn Marino }
162295b7b453SJohn Marino 
1623*cf28ed85SJohn Marino /* Create the new state which is independent of contexts.
162495b7b453SJohn Marino    Return the new state if succeeded, otherwise return NULL.  */
162595b7b453SJohn Marino 
162695b7b453SJohn Marino static re_dfastate_t *
162795b7b453SJohn Marino internal_function __attribute_warn_unused_result__
162895b7b453SJohn Marino create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
162995b7b453SJohn Marino 		    re_hashval_t hash)
163095b7b453SJohn Marino {
163195b7b453SJohn Marino   Idx i;
163295b7b453SJohn Marino   reg_errcode_t err;
163395b7b453SJohn Marino   re_dfastate_t *newstate;
163495b7b453SJohn Marino 
163595b7b453SJohn Marino   newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
163695b7b453SJohn Marino   if (BE (newstate == NULL, 0))
163795b7b453SJohn Marino     return NULL;
163895b7b453SJohn Marino   err = re_node_set_init_copy (&newstate->nodes, nodes);
163995b7b453SJohn Marino   if (BE (err != REG_NOERROR, 0))
164095b7b453SJohn Marino     {
164195b7b453SJohn Marino       re_free (newstate);
164295b7b453SJohn Marino       return NULL;
164395b7b453SJohn Marino     }
164495b7b453SJohn Marino 
164595b7b453SJohn Marino   newstate->entrance_nodes = &newstate->nodes;
164695b7b453SJohn Marino   for (i = 0 ; i < nodes->nelem ; i++)
164795b7b453SJohn Marino     {
164895b7b453SJohn Marino       re_token_t *node = dfa->nodes + nodes->elems[i];
164995b7b453SJohn Marino       re_token_type_t type = node->type;
165095b7b453SJohn Marino       if (type == CHARACTER && !node->constraint)
165195b7b453SJohn Marino 	continue;
165295b7b453SJohn Marino #ifdef RE_ENABLE_I18N
165395b7b453SJohn Marino       newstate->accept_mb |= node->accept_mb;
165495b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
165595b7b453SJohn Marino 
165695b7b453SJohn Marino       /* If the state has the halt node, the state is a halt state.  */
165795b7b453SJohn Marino       if (type == END_OF_RE)
165895b7b453SJohn Marino 	newstate->halt = 1;
165995b7b453SJohn Marino       else if (type == OP_BACK_REF)
166095b7b453SJohn Marino 	newstate->has_backref = 1;
166195b7b453SJohn Marino       else if (type == ANCHOR || node->constraint)
166295b7b453SJohn Marino 	newstate->has_constraint = 1;
166395b7b453SJohn Marino     }
166495b7b453SJohn Marino   err = register_state (dfa, newstate, hash);
166595b7b453SJohn Marino   if (BE (err != REG_NOERROR, 0))
166695b7b453SJohn Marino     {
166795b7b453SJohn Marino       free_state (newstate);
166895b7b453SJohn Marino       newstate = NULL;
166995b7b453SJohn Marino     }
167095b7b453SJohn Marino   return newstate;
167195b7b453SJohn Marino }
167295b7b453SJohn Marino 
167395b7b453SJohn Marino /* Create the new state which is depend on the context CONTEXT.
167495b7b453SJohn Marino    Return the new state if succeeded, otherwise return NULL.  */
167595b7b453SJohn Marino 
167695b7b453SJohn Marino static re_dfastate_t *
167795b7b453SJohn Marino internal_function __attribute_warn_unused_result__
167895b7b453SJohn Marino create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
167995b7b453SJohn Marino 		    unsigned int context, re_hashval_t hash)
168095b7b453SJohn Marino {
168195b7b453SJohn Marino   Idx i, nctx_nodes = 0;
168295b7b453SJohn Marino   reg_errcode_t err;
168395b7b453SJohn Marino   re_dfastate_t *newstate;
168495b7b453SJohn Marino 
168595b7b453SJohn Marino   newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
168695b7b453SJohn Marino   if (BE (newstate == NULL, 0))
168795b7b453SJohn Marino     return NULL;
168895b7b453SJohn Marino   err = re_node_set_init_copy (&newstate->nodes, nodes);
168995b7b453SJohn Marino   if (BE (err != REG_NOERROR, 0))
169095b7b453SJohn Marino     {
169195b7b453SJohn Marino       re_free (newstate);
169295b7b453SJohn Marino       return NULL;
169395b7b453SJohn Marino     }
169495b7b453SJohn Marino 
169595b7b453SJohn Marino   newstate->context = context;
169695b7b453SJohn Marino   newstate->entrance_nodes = &newstate->nodes;
169795b7b453SJohn Marino 
169895b7b453SJohn Marino   for (i = 0 ; i < nodes->nelem ; i++)
169995b7b453SJohn Marino     {
170095b7b453SJohn Marino       re_token_t *node = dfa->nodes + nodes->elems[i];
170195b7b453SJohn Marino       re_token_type_t type = node->type;
170295b7b453SJohn Marino       unsigned int constraint = node->constraint;
170395b7b453SJohn Marino 
170495b7b453SJohn Marino       if (type == CHARACTER && !constraint)
170595b7b453SJohn Marino 	continue;
170695b7b453SJohn Marino #ifdef RE_ENABLE_I18N
170795b7b453SJohn Marino       newstate->accept_mb |= node->accept_mb;
170895b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
170995b7b453SJohn Marino 
171095b7b453SJohn Marino       /* If the state has the halt node, the state is a halt state.  */
171195b7b453SJohn Marino       if (type == END_OF_RE)
171295b7b453SJohn Marino 	newstate->halt = 1;
171395b7b453SJohn Marino       else if (type == OP_BACK_REF)
171495b7b453SJohn Marino 	newstate->has_backref = 1;
171595b7b453SJohn Marino 
171695b7b453SJohn Marino       if (constraint)
171795b7b453SJohn Marino 	{
171895b7b453SJohn Marino 	  if (newstate->entrance_nodes == &newstate->nodes)
171995b7b453SJohn Marino 	    {
172095b7b453SJohn Marino 	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
172195b7b453SJohn Marino 	      if (BE (newstate->entrance_nodes == NULL, 0))
172295b7b453SJohn Marino 		{
172395b7b453SJohn Marino 		  free_state (newstate);
172495b7b453SJohn Marino 		  return NULL;
172595b7b453SJohn Marino 		}
172695b7b453SJohn Marino 	      if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
172795b7b453SJohn Marino 		  != REG_NOERROR)
172895b7b453SJohn Marino 		return NULL;
172995b7b453SJohn Marino 	      nctx_nodes = 0;
173095b7b453SJohn Marino 	      newstate->has_constraint = 1;
173195b7b453SJohn Marino 	    }
173295b7b453SJohn Marino 
173395b7b453SJohn Marino 	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
173495b7b453SJohn Marino 	    {
173595b7b453SJohn Marino 	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
173695b7b453SJohn Marino 	      ++nctx_nodes;
173795b7b453SJohn Marino 	    }
173895b7b453SJohn Marino 	}
173995b7b453SJohn Marino     }
174095b7b453SJohn Marino   err = register_state (dfa, newstate, hash);
174195b7b453SJohn Marino   if (BE (err != REG_NOERROR, 0))
174295b7b453SJohn Marino     {
174395b7b453SJohn Marino       free_state (newstate);
174495b7b453SJohn Marino       newstate = NULL;
174595b7b453SJohn Marino     }
174695b7b453SJohn Marino   return  newstate;
174795b7b453SJohn Marino }
1748