xref: /dflybsd-src/contrib/grep/lib/regex_internal.c (revision 680a9cb8d724bb1128d1b284fa92c474e5602a13)
195b7b453SJohn Marino /* Extended regular expression matching and search library.
2*680a9cb8SJohn Marino    Copyright (C) 2002-2014 Free Software Foundation, Inc.
395b7b453SJohn Marino    This file is part of the GNU C Library.
495b7b453SJohn Marino    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
595b7b453SJohn Marino 
6*680a9cb8SJohn Marino    The GNU C Library is free software; you can redistribute it and/or
7*680a9cb8SJohn Marino    modify it under the terms of the GNU General Public
8*680a9cb8SJohn Marino    License as published by the Free Software Foundation; either
9*680a9cb8SJohn Marino    version 3 of the License, or (at your option) any later version.
1095b7b453SJohn Marino 
11*680a9cb8SJohn Marino    The GNU C Library is distributed in the hope that it will be useful,
1295b7b453SJohn Marino    but WITHOUT ANY WARRANTY; without even the implied warranty of
13*680a9cb8SJohn Marino    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14*680a9cb8SJohn Marino    General Public License for more details.
1595b7b453SJohn Marino 
16*680a9cb8SJohn Marino    You should have received a copy of the GNU General Public
17*680a9cb8SJohn Marino    License along with the GNU C Library; if not, see
18*680a9cb8SJohn Marino    <http://www.gnu.org/licenses/>.  */
1995b7b453SJohn Marino 
2095b7b453SJohn Marino static void re_string_construct_common (const char *str, Idx len,
2195b7b453SJohn Marino 					re_string_t *pstr,
2295b7b453SJohn Marino 					RE_TRANSLATE_TYPE trans, bool icase,
2395b7b453SJohn Marino 					const re_dfa_t *dfa) internal_function;
2495b7b453SJohn Marino static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
2595b7b453SJohn Marino 					  const re_node_set *nodes,
2695b7b453SJohn Marino 					  re_hashval_t hash) internal_function;
2795b7b453SJohn Marino static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
2895b7b453SJohn Marino 					  const re_node_set *nodes,
2995b7b453SJohn Marino 					  unsigned int context,
3095b7b453SJohn Marino 					  re_hashval_t hash) internal_function;
3195b7b453SJohn Marino 
3295b7b453SJohn Marino /* Functions for string operation.  */
3395b7b453SJohn Marino 
3495b7b453SJohn Marino /* This function allocate the buffers.  It is necessary to call
3595b7b453SJohn Marino    re_string_reconstruct before using the object.  */
3695b7b453SJohn Marino 
3795b7b453SJohn Marino static reg_errcode_t
3895b7b453SJohn Marino internal_function __attribute_warn_unused_result__
3995b7b453SJohn Marino re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
4095b7b453SJohn Marino 		    RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
4195b7b453SJohn Marino {
4295b7b453SJohn Marino   reg_errcode_t ret;
4395b7b453SJohn Marino   Idx init_buf_len;
4495b7b453SJohn Marino 
4595b7b453SJohn Marino   /* Ensure at least one character fits into the buffers.  */
4695b7b453SJohn Marino   if (init_len < dfa->mb_cur_max)
4795b7b453SJohn Marino     init_len = dfa->mb_cur_max;
4895b7b453SJohn Marino   init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
4995b7b453SJohn Marino   re_string_construct_common (str, len, pstr, trans, icase, dfa);
5095b7b453SJohn Marino 
5195b7b453SJohn Marino   ret = re_string_realloc_buffers (pstr, init_buf_len);
5295b7b453SJohn Marino   if (BE (ret != REG_NOERROR, 0))
5395b7b453SJohn Marino     return ret;
5495b7b453SJohn Marino 
5595b7b453SJohn Marino   pstr->word_char = dfa->word_char;
5695b7b453SJohn Marino   pstr->word_ops_used = dfa->word_ops_used;
5795b7b453SJohn Marino   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
5895b7b453SJohn Marino   pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
5995b7b453SJohn Marino   pstr->valid_raw_len = pstr->valid_len;
6095b7b453SJohn Marino   return REG_NOERROR;
6195b7b453SJohn Marino }
6295b7b453SJohn Marino 
6395b7b453SJohn Marino /* This function allocate the buffers, and initialize them.  */
6495b7b453SJohn Marino 
6595b7b453SJohn Marino static reg_errcode_t
6695b7b453SJohn Marino internal_function __attribute_warn_unused_result__
6795b7b453SJohn Marino re_string_construct (re_string_t *pstr, const char *str, Idx len,
6895b7b453SJohn Marino 		     RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
6995b7b453SJohn Marino {
7095b7b453SJohn Marino   reg_errcode_t ret;
7195b7b453SJohn Marino   memset (pstr, '\0', sizeof (re_string_t));
7295b7b453SJohn Marino   re_string_construct_common (str, len, pstr, trans, icase, dfa);
7395b7b453SJohn Marino 
7495b7b453SJohn Marino   if (len > 0)
7595b7b453SJohn Marino     {
7695b7b453SJohn Marino       ret = re_string_realloc_buffers (pstr, len + 1);
7795b7b453SJohn Marino       if (BE (ret != REG_NOERROR, 0))
7895b7b453SJohn Marino 	return ret;
7995b7b453SJohn Marino     }
8095b7b453SJohn Marino   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
8195b7b453SJohn Marino 
8295b7b453SJohn Marino   if (icase)
8395b7b453SJohn Marino     {
8495b7b453SJohn Marino #ifdef RE_ENABLE_I18N
8595b7b453SJohn Marino       if (dfa->mb_cur_max > 1)
8695b7b453SJohn Marino 	{
8795b7b453SJohn Marino 	  while (1)
8895b7b453SJohn Marino 	    {
8995b7b453SJohn Marino 	      ret = build_wcs_upper_buffer (pstr);
9095b7b453SJohn Marino 	      if (BE (ret != REG_NOERROR, 0))
9195b7b453SJohn Marino 		return ret;
9295b7b453SJohn Marino 	      if (pstr->valid_raw_len >= len)
9395b7b453SJohn Marino 		break;
9495b7b453SJohn Marino 	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
9595b7b453SJohn Marino 		break;
9695b7b453SJohn Marino 	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
9795b7b453SJohn Marino 	      if (BE (ret != REG_NOERROR, 0))
9895b7b453SJohn Marino 		return ret;
9995b7b453SJohn Marino 	    }
10095b7b453SJohn Marino 	}
10195b7b453SJohn Marino       else
10295b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
10395b7b453SJohn Marino 	build_upper_buffer (pstr);
10495b7b453SJohn Marino     }
10595b7b453SJohn Marino   else
10695b7b453SJohn Marino     {
10795b7b453SJohn Marino #ifdef RE_ENABLE_I18N
10895b7b453SJohn Marino       if (dfa->mb_cur_max > 1)
10995b7b453SJohn Marino 	build_wcs_buffer (pstr);
11095b7b453SJohn Marino       else
11195b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
11295b7b453SJohn Marino 	{
11395b7b453SJohn Marino 	  if (trans != NULL)
11495b7b453SJohn Marino 	    re_string_translate_buffer (pstr);
11595b7b453SJohn Marino 	  else
11695b7b453SJohn Marino 	    {
11795b7b453SJohn Marino 	      pstr->valid_len = pstr->bufs_len;
11895b7b453SJohn Marino 	      pstr->valid_raw_len = pstr->bufs_len;
11995b7b453SJohn Marino 	    }
12095b7b453SJohn Marino 	}
12195b7b453SJohn Marino     }
12295b7b453SJohn Marino 
12395b7b453SJohn Marino   return REG_NOERROR;
12495b7b453SJohn Marino }
12595b7b453SJohn Marino 
12695b7b453SJohn Marino /* Helper functions for re_string_allocate, and re_string_construct.  */
12795b7b453SJohn Marino 
12895b7b453SJohn Marino static reg_errcode_t
12995b7b453SJohn Marino internal_function __attribute_warn_unused_result__
13095b7b453SJohn Marino re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
13195b7b453SJohn Marino {
13295b7b453SJohn Marino #ifdef RE_ENABLE_I18N
13395b7b453SJohn Marino   if (pstr->mb_cur_max > 1)
13495b7b453SJohn Marino     {
13595b7b453SJohn Marino       wint_t *new_wcs;
13695b7b453SJohn Marino 
137cf28ed85SJohn Marino       /* Avoid overflow in realloc.  */
138cf28ed85SJohn Marino       const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
139cf28ed85SJohn Marino       if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_buf_len, 0))
14095b7b453SJohn Marino 	return REG_ESPACE;
14195b7b453SJohn Marino 
14295b7b453SJohn Marino       new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
14395b7b453SJohn Marino       if (BE (new_wcs == NULL, 0))
14495b7b453SJohn Marino 	return REG_ESPACE;
14595b7b453SJohn Marino       pstr->wcs = new_wcs;
14695b7b453SJohn Marino       if (pstr->offsets != NULL)
14795b7b453SJohn Marino 	{
14895b7b453SJohn Marino 	  Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
14995b7b453SJohn Marino 	  if (BE (new_offsets == NULL, 0))
15095b7b453SJohn Marino 	    return REG_ESPACE;
15195b7b453SJohn Marino 	  pstr->offsets = new_offsets;
15295b7b453SJohn Marino 	}
15395b7b453SJohn Marino     }
15495b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
15595b7b453SJohn Marino   if (pstr->mbs_allocated)
15695b7b453SJohn Marino     {
15795b7b453SJohn Marino       unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
15895b7b453SJohn Marino 					   new_buf_len);
15995b7b453SJohn Marino       if (BE (new_mbs == NULL, 0))
16095b7b453SJohn Marino 	return REG_ESPACE;
16195b7b453SJohn Marino       pstr->mbs = new_mbs;
16295b7b453SJohn Marino     }
16395b7b453SJohn Marino   pstr->bufs_len = new_buf_len;
16495b7b453SJohn Marino   return REG_NOERROR;
16595b7b453SJohn Marino }
16695b7b453SJohn Marino 
16795b7b453SJohn Marino 
16895b7b453SJohn Marino static void
16995b7b453SJohn Marino internal_function
17095b7b453SJohn Marino re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
17195b7b453SJohn Marino 			    RE_TRANSLATE_TYPE trans, bool icase,
17295b7b453SJohn Marino 			    const re_dfa_t *dfa)
17395b7b453SJohn Marino {
17495b7b453SJohn Marino   pstr->raw_mbs = (const unsigned char *) str;
17595b7b453SJohn Marino   pstr->len = len;
17695b7b453SJohn Marino   pstr->raw_len = len;
17795b7b453SJohn Marino   pstr->trans = trans;
17895b7b453SJohn Marino   pstr->icase = icase;
17995b7b453SJohn Marino   pstr->mbs_allocated = (trans != NULL || icase);
18095b7b453SJohn Marino   pstr->mb_cur_max = dfa->mb_cur_max;
18195b7b453SJohn Marino   pstr->is_utf8 = dfa->is_utf8;
18295b7b453SJohn Marino   pstr->map_notascii = dfa->map_notascii;
18395b7b453SJohn Marino   pstr->stop = pstr->len;
18495b7b453SJohn Marino   pstr->raw_stop = pstr->stop;
18595b7b453SJohn Marino }
18695b7b453SJohn Marino 
18795b7b453SJohn Marino #ifdef RE_ENABLE_I18N
18895b7b453SJohn Marino 
18995b7b453SJohn Marino /* Build wide character buffer PSTR->WCS.
19095b7b453SJohn Marino    If the byte sequence of the string are:
19195b7b453SJohn Marino      <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
19295b7b453SJohn Marino    Then wide character buffer will be:
19395b7b453SJohn Marino      <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
19495b7b453SJohn Marino    We use WEOF for padding, they indicate that the position isn't
19595b7b453SJohn Marino    a first byte of a multibyte character.
19695b7b453SJohn Marino 
19795b7b453SJohn Marino    Note that this function assumes PSTR->VALID_LEN elements are already
19895b7b453SJohn Marino    built and starts from PSTR->VALID_LEN.  */
19995b7b453SJohn Marino 
20095b7b453SJohn Marino static void
20195b7b453SJohn Marino internal_function
20295b7b453SJohn Marino build_wcs_buffer (re_string_t *pstr)
20395b7b453SJohn Marino {
20495b7b453SJohn Marino #ifdef _LIBC
20595b7b453SJohn Marino   unsigned char buf[MB_LEN_MAX];
20695b7b453SJohn Marino   assert (MB_LEN_MAX >= pstr->mb_cur_max);
20795b7b453SJohn Marino #else
20895b7b453SJohn Marino   unsigned char buf[64];
20995b7b453SJohn Marino #endif
21095b7b453SJohn Marino   mbstate_t prev_st;
21195b7b453SJohn Marino   Idx byte_idx, end_idx, remain_len;
21295b7b453SJohn Marino   size_t mbclen;
21395b7b453SJohn Marino 
21495b7b453SJohn Marino   /* Build the buffers from pstr->valid_len to either pstr->len or
21595b7b453SJohn Marino      pstr->bufs_len.  */
21695b7b453SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
21795b7b453SJohn Marino   for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
21895b7b453SJohn Marino     {
21995b7b453SJohn Marino       wchar_t wc;
22095b7b453SJohn Marino       const char *p;
22195b7b453SJohn Marino 
22295b7b453SJohn Marino       remain_len = end_idx - byte_idx;
22395b7b453SJohn Marino       prev_st = pstr->cur_state;
22495b7b453SJohn Marino       /* Apply the translation if we need.  */
22595b7b453SJohn Marino       if (BE (pstr->trans != NULL, 0))
22695b7b453SJohn Marino 	{
22795b7b453SJohn Marino 	  int i, ch;
22895b7b453SJohn Marino 
22995b7b453SJohn Marino 	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
23095b7b453SJohn Marino 	    {
23195b7b453SJohn Marino 	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
23295b7b453SJohn Marino 	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
23395b7b453SJohn Marino 	    }
23495b7b453SJohn Marino 	  p = (const char *) buf;
23595b7b453SJohn Marino 	}
23695b7b453SJohn Marino       else
23795b7b453SJohn Marino 	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
23895b7b453SJohn Marino       mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
239cf28ed85SJohn Marino       if (BE (mbclen == (size_t) -1 || mbclen == 0
240cf28ed85SJohn Marino 	      || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
24195b7b453SJohn Marino 	{
24295b7b453SJohn Marino 	  /* We treat these cases as a singlebyte character.  */
24395b7b453SJohn Marino 	  mbclen = 1;
24495b7b453SJohn Marino 	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
24595b7b453SJohn Marino 	  if (BE (pstr->trans != NULL, 0))
24695b7b453SJohn Marino 	    wc = pstr->trans[wc];
24795b7b453SJohn Marino 	  pstr->cur_state = prev_st;
24895b7b453SJohn Marino 	}
249cf28ed85SJohn Marino       else if (BE (mbclen == (size_t) -2, 0))
250cf28ed85SJohn Marino 	{
251cf28ed85SJohn Marino 	  /* The buffer doesn't have enough space, finish to build.  */
252cf28ed85SJohn Marino 	  pstr->cur_state = prev_st;
253cf28ed85SJohn Marino 	  break;
254cf28ed85SJohn Marino 	}
25595b7b453SJohn Marino 
25695b7b453SJohn Marino       /* Write wide character and padding.  */
25795b7b453SJohn Marino       pstr->wcs[byte_idx++] = wc;
25895b7b453SJohn Marino       /* Write paddings.  */
25995b7b453SJohn Marino       for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
26095b7b453SJohn Marino 	pstr->wcs[byte_idx++] = WEOF;
26195b7b453SJohn Marino     }
26295b7b453SJohn Marino   pstr->valid_len = byte_idx;
26395b7b453SJohn Marino   pstr->valid_raw_len = byte_idx;
26495b7b453SJohn Marino }
26595b7b453SJohn Marino 
26695b7b453SJohn Marino /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
26795b7b453SJohn Marino    but for REG_ICASE.  */
26895b7b453SJohn Marino 
26995b7b453SJohn Marino static reg_errcode_t
27095b7b453SJohn Marino internal_function __attribute_warn_unused_result__
27195b7b453SJohn Marino build_wcs_upper_buffer (re_string_t *pstr)
27295b7b453SJohn Marino {
27395b7b453SJohn Marino   mbstate_t prev_st;
27495b7b453SJohn Marino   Idx src_idx, byte_idx, end_idx, remain_len;
27595b7b453SJohn Marino   size_t mbclen;
27695b7b453SJohn Marino #ifdef _LIBC
27795b7b453SJohn Marino   char buf[MB_LEN_MAX];
27895b7b453SJohn Marino   assert (MB_LEN_MAX >= pstr->mb_cur_max);
27995b7b453SJohn Marino #else
28095b7b453SJohn Marino   char buf[64];
28195b7b453SJohn Marino #endif
28295b7b453SJohn Marino 
28395b7b453SJohn Marino   byte_idx = pstr->valid_len;
28495b7b453SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
28595b7b453SJohn Marino 
28695b7b453SJohn Marino   /* The following optimization assumes that ASCII characters can be
28795b7b453SJohn Marino      mapped to wide characters with a simple cast.  */
28895b7b453SJohn Marino   if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
28995b7b453SJohn Marino     {
29095b7b453SJohn Marino       while (byte_idx < end_idx)
29195b7b453SJohn Marino 	{
29295b7b453SJohn Marino 	  wchar_t wc;
29395b7b453SJohn Marino 
29495b7b453SJohn Marino 	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
29595b7b453SJohn Marino 	      && mbsinit (&pstr->cur_state))
29695b7b453SJohn Marino 	    {
29795b7b453SJohn Marino 	      /* In case of a singlebyte character.  */
29895b7b453SJohn Marino 	      pstr->mbs[byte_idx]
29995b7b453SJohn Marino 		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
30095b7b453SJohn Marino 	      /* The next step uses the assumption that wchar_t is encoded
30195b7b453SJohn Marino 		 ASCII-safe: all ASCII values can be converted like this.  */
30295b7b453SJohn Marino 	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
30395b7b453SJohn Marino 	      ++byte_idx;
30495b7b453SJohn Marino 	      continue;
30595b7b453SJohn Marino 	    }
30695b7b453SJohn Marino 
30795b7b453SJohn Marino 	  remain_len = end_idx - byte_idx;
30895b7b453SJohn Marino 	  prev_st = pstr->cur_state;
30995b7b453SJohn Marino 	  mbclen = __mbrtowc (&wc,
31095b7b453SJohn Marino 			      ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
31195b7b453SJohn Marino 			       + byte_idx), remain_len, &pstr->cur_state);
31295b7b453SJohn Marino 	  if (BE (mbclen < (size_t) -2, 1))
31395b7b453SJohn Marino 	    {
314*680a9cb8SJohn Marino 	      wchar_t wcu = towupper (wc);
315*680a9cb8SJohn Marino 	      if (wcu != wc)
31695b7b453SJohn Marino 		{
31795b7b453SJohn Marino 		  size_t mbcdlen;
31895b7b453SJohn Marino 
31995b7b453SJohn Marino 		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
32095b7b453SJohn Marino 		  if (BE (mbclen == mbcdlen, 1))
32195b7b453SJohn Marino 		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
32295b7b453SJohn Marino 		  else
32395b7b453SJohn Marino 		    {
32495b7b453SJohn Marino 		      src_idx = byte_idx;
32595b7b453SJohn Marino 		      goto offsets_needed;
32695b7b453SJohn Marino 		    }
32795b7b453SJohn Marino 		}
32895b7b453SJohn Marino 	      else
32995b7b453SJohn Marino 		memcpy (pstr->mbs + byte_idx,
33095b7b453SJohn Marino 			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
33195b7b453SJohn Marino 	      pstr->wcs[byte_idx++] = wcu;
33295b7b453SJohn Marino 	      /* Write paddings.  */
33395b7b453SJohn Marino 	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
33495b7b453SJohn Marino 		pstr->wcs[byte_idx++] = WEOF;
33595b7b453SJohn Marino 	    }
336cf28ed85SJohn Marino 	  else if (mbclen == (size_t) -1 || mbclen == 0
337cf28ed85SJohn Marino 		   || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
33895b7b453SJohn Marino 	    {
339cf28ed85SJohn Marino 	      /* It is an invalid character, an incomplete character
340cf28ed85SJohn Marino 		 at the end of the string, or '\0'.  Just use the byte.  */
34195b7b453SJohn Marino 	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
34295b7b453SJohn Marino 	      pstr->mbs[byte_idx] = ch;
34395b7b453SJohn Marino 	      /* And also cast it to wide char.  */
34495b7b453SJohn Marino 	      pstr->wcs[byte_idx++] = (wchar_t) ch;
34595b7b453SJohn Marino 	      if (BE (mbclen == (size_t) -1, 0))
34695b7b453SJohn Marino 		pstr->cur_state = prev_st;
34795b7b453SJohn Marino 	    }
34895b7b453SJohn Marino 	  else
34995b7b453SJohn Marino 	    {
35095b7b453SJohn Marino 	      /* The buffer doesn't have enough space, finish to build.  */
35195b7b453SJohn Marino 	      pstr->cur_state = prev_st;
35295b7b453SJohn Marino 	      break;
35395b7b453SJohn Marino 	    }
35495b7b453SJohn Marino 	}
35595b7b453SJohn Marino       pstr->valid_len = byte_idx;
35695b7b453SJohn Marino       pstr->valid_raw_len = byte_idx;
35795b7b453SJohn Marino       return REG_NOERROR;
35895b7b453SJohn Marino     }
35995b7b453SJohn Marino   else
36095b7b453SJohn Marino     for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
36195b7b453SJohn Marino       {
36295b7b453SJohn Marino 	wchar_t wc;
36395b7b453SJohn Marino 	const char *p;
36495b7b453SJohn Marino       offsets_needed:
36595b7b453SJohn Marino 	remain_len = end_idx - byte_idx;
36695b7b453SJohn Marino 	prev_st = pstr->cur_state;
36795b7b453SJohn Marino 	if (BE (pstr->trans != NULL, 0))
36895b7b453SJohn Marino 	  {
36995b7b453SJohn Marino 	    int i, ch;
37095b7b453SJohn Marino 
37195b7b453SJohn Marino 	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
37295b7b453SJohn Marino 	      {
37395b7b453SJohn Marino 		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
37495b7b453SJohn Marino 		buf[i] = pstr->trans[ch];
37595b7b453SJohn Marino 	      }
37695b7b453SJohn Marino 	    p = (const char *) buf;
37795b7b453SJohn Marino 	  }
37895b7b453SJohn Marino 	else
37995b7b453SJohn Marino 	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
38095b7b453SJohn Marino 	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
38195b7b453SJohn Marino 	if (BE (mbclen < (size_t) -2, 1))
38295b7b453SJohn Marino 	  {
383*680a9cb8SJohn Marino 	    wchar_t wcu = towupper (wc);
384*680a9cb8SJohn Marino 	    if (wcu != wc)
38595b7b453SJohn Marino 	      {
38695b7b453SJohn Marino 		size_t mbcdlen;
38795b7b453SJohn Marino 
38895b7b453SJohn Marino 		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
38995b7b453SJohn Marino 		if (BE (mbclen == mbcdlen, 1))
39095b7b453SJohn Marino 		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
39195b7b453SJohn Marino 		else if (mbcdlen != (size_t) -1)
39295b7b453SJohn Marino 		  {
39395b7b453SJohn Marino 		    size_t i;
39495b7b453SJohn Marino 
39595b7b453SJohn Marino 		    if (byte_idx + mbcdlen > pstr->bufs_len)
39695b7b453SJohn Marino 		      {
39795b7b453SJohn Marino 			pstr->cur_state = prev_st;
39895b7b453SJohn Marino 			break;
39995b7b453SJohn Marino 		      }
40095b7b453SJohn Marino 
40195b7b453SJohn Marino 		    if (pstr->offsets == NULL)
40295b7b453SJohn Marino 		      {
40395b7b453SJohn Marino 			pstr->offsets = re_malloc (Idx, pstr->bufs_len);
40495b7b453SJohn Marino 
40595b7b453SJohn Marino 			if (pstr->offsets == NULL)
40695b7b453SJohn Marino 			  return REG_ESPACE;
40795b7b453SJohn Marino 		      }
40895b7b453SJohn Marino 		    if (!pstr->offsets_needed)
40995b7b453SJohn Marino 		      {
41095b7b453SJohn Marino 			for (i = 0; i < (size_t) byte_idx; ++i)
41195b7b453SJohn Marino 			  pstr->offsets[i] = i;
41295b7b453SJohn Marino 			pstr->offsets_needed = 1;
41395b7b453SJohn Marino 		      }
41495b7b453SJohn Marino 
41595b7b453SJohn Marino 		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
41695b7b453SJohn Marino 		    pstr->wcs[byte_idx] = wcu;
41795b7b453SJohn Marino 		    pstr->offsets[byte_idx] = src_idx;
41895b7b453SJohn Marino 		    for (i = 1; i < mbcdlen; ++i)
41995b7b453SJohn Marino 		      {
42095b7b453SJohn Marino 			pstr->offsets[byte_idx + i]
42195b7b453SJohn Marino 			  = src_idx + (i < mbclen ? i : mbclen - 1);
42295b7b453SJohn Marino 			pstr->wcs[byte_idx + i] = WEOF;
42395b7b453SJohn Marino 		      }
42495b7b453SJohn Marino 		    pstr->len += mbcdlen - mbclen;
42595b7b453SJohn Marino 		    if (pstr->raw_stop > src_idx)
42695b7b453SJohn Marino 		      pstr->stop += mbcdlen - mbclen;
42795b7b453SJohn Marino 		    end_idx = (pstr->bufs_len > pstr->len)
42895b7b453SJohn Marino 			      ? pstr->len : pstr->bufs_len;
42995b7b453SJohn Marino 		    byte_idx += mbcdlen;
43095b7b453SJohn Marino 		    src_idx += mbclen;
43195b7b453SJohn Marino 		    continue;
43295b7b453SJohn Marino 		  }
43395b7b453SJohn Marino 		else
43495b7b453SJohn Marino 		  memcpy (pstr->mbs + byte_idx, p, mbclen);
43595b7b453SJohn Marino 	      }
43695b7b453SJohn Marino 	    else
43795b7b453SJohn Marino 	      memcpy (pstr->mbs + byte_idx, p, mbclen);
43895b7b453SJohn Marino 
43995b7b453SJohn Marino 	    if (BE (pstr->offsets_needed != 0, 0))
44095b7b453SJohn Marino 	      {
44195b7b453SJohn Marino 		size_t i;
44295b7b453SJohn Marino 		for (i = 0; i < mbclen; ++i)
44395b7b453SJohn Marino 		  pstr->offsets[byte_idx + i] = src_idx + i;
44495b7b453SJohn Marino 	      }
44595b7b453SJohn Marino 	    src_idx += mbclen;
44695b7b453SJohn Marino 
44795b7b453SJohn Marino 	    pstr->wcs[byte_idx++] = wcu;
44895b7b453SJohn Marino 	    /* Write paddings.  */
44995b7b453SJohn Marino 	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
45095b7b453SJohn Marino 	      pstr->wcs[byte_idx++] = WEOF;
45195b7b453SJohn Marino 	  }
452cf28ed85SJohn Marino 	else if (mbclen == (size_t) -1 || mbclen == 0
453cf28ed85SJohn Marino 		 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
45495b7b453SJohn Marino 	  {
45595b7b453SJohn Marino 	    /* It is an invalid character or '\0'.  Just use the byte.  */
45695b7b453SJohn Marino 	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
45795b7b453SJohn Marino 
45895b7b453SJohn Marino 	    if (BE (pstr->trans != NULL, 0))
45995b7b453SJohn Marino 	      ch = pstr->trans [ch];
46095b7b453SJohn Marino 	    pstr->mbs[byte_idx] = ch;
46195b7b453SJohn Marino 
46295b7b453SJohn Marino 	    if (BE (pstr->offsets_needed != 0, 0))
46395b7b453SJohn Marino 	      pstr->offsets[byte_idx] = src_idx;
46495b7b453SJohn Marino 	    ++src_idx;
46595b7b453SJohn Marino 
46695b7b453SJohn Marino 	    /* And also cast it to wide char.  */
46795b7b453SJohn Marino 	    pstr->wcs[byte_idx++] = (wchar_t) ch;
46895b7b453SJohn Marino 	    if (BE (mbclen == (size_t) -1, 0))
46995b7b453SJohn Marino 	      pstr->cur_state = prev_st;
47095b7b453SJohn Marino 	  }
47195b7b453SJohn Marino 	else
47295b7b453SJohn Marino 	  {
47395b7b453SJohn Marino 	    /* The buffer doesn't have enough space, finish to build.  */
47495b7b453SJohn Marino 	    pstr->cur_state = prev_st;
47595b7b453SJohn Marino 	    break;
47695b7b453SJohn Marino 	  }
47795b7b453SJohn Marino       }
47895b7b453SJohn Marino   pstr->valid_len = byte_idx;
47995b7b453SJohn Marino   pstr->valid_raw_len = src_idx;
48095b7b453SJohn Marino   return REG_NOERROR;
48195b7b453SJohn Marino }
48295b7b453SJohn Marino 
48395b7b453SJohn Marino /* Skip characters until the index becomes greater than NEW_RAW_IDX.
48495b7b453SJohn Marino    Return the index.  */
48595b7b453SJohn Marino 
48695b7b453SJohn Marino static Idx
48795b7b453SJohn Marino internal_function
48895b7b453SJohn Marino re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
48995b7b453SJohn Marino {
49095b7b453SJohn Marino   mbstate_t prev_st;
49195b7b453SJohn Marino   Idx rawbuf_idx;
49295b7b453SJohn Marino   size_t mbclen;
49395b7b453SJohn Marino   wint_t wc = WEOF;
49495b7b453SJohn Marino 
49595b7b453SJohn Marino   /* Skip the characters which are not necessary to check.  */
49695b7b453SJohn Marino   for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
49795b7b453SJohn Marino        rawbuf_idx < new_raw_idx;)
49895b7b453SJohn Marino     {
49995b7b453SJohn Marino       wchar_t wc2;
500cf28ed85SJohn Marino       Idx remain_len = pstr->raw_len - rawbuf_idx;
50195b7b453SJohn Marino       prev_st = pstr->cur_state;
50295b7b453SJohn Marino       mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
50395b7b453SJohn Marino 			  remain_len, &pstr->cur_state);
50495b7b453SJohn Marino       if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
50595b7b453SJohn Marino 	{
50695b7b453SJohn Marino 	  /* We treat these cases as a single byte character.  */
50795b7b453SJohn Marino 	  if (mbclen == 0 || remain_len == 0)
50895b7b453SJohn Marino 	    wc = L'\0';
50995b7b453SJohn Marino 	  else
51095b7b453SJohn Marino 	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
51195b7b453SJohn Marino 	  mbclen = 1;
51295b7b453SJohn Marino 	  pstr->cur_state = prev_st;
51395b7b453SJohn Marino 	}
51495b7b453SJohn Marino       else
51595b7b453SJohn Marino 	wc = wc2;
51695b7b453SJohn Marino       /* Then proceed the next character.  */
51795b7b453SJohn Marino       rawbuf_idx += mbclen;
51895b7b453SJohn Marino     }
51995b7b453SJohn Marino   *last_wc = wc;
52095b7b453SJohn Marino   return rawbuf_idx;
52195b7b453SJohn Marino }
52295b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
52395b7b453SJohn Marino 
52495b7b453SJohn Marino /* Build the buffer PSTR->MBS, and apply the translation if we need.
52595b7b453SJohn Marino    This function is used in case of REG_ICASE.  */
52695b7b453SJohn Marino 
52795b7b453SJohn Marino static void
52895b7b453SJohn Marino internal_function
52995b7b453SJohn Marino build_upper_buffer (re_string_t *pstr)
53095b7b453SJohn Marino {
53195b7b453SJohn Marino   Idx char_idx, end_idx;
53295b7b453SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
53395b7b453SJohn Marino 
53495b7b453SJohn Marino   for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
53595b7b453SJohn Marino     {
53695b7b453SJohn Marino       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
53795b7b453SJohn Marino       if (BE (pstr->trans != NULL, 0))
53895b7b453SJohn Marino 	ch = pstr->trans[ch];
53995b7b453SJohn Marino       pstr->mbs[char_idx] = toupper (ch);
54095b7b453SJohn Marino     }
54195b7b453SJohn Marino   pstr->valid_len = char_idx;
54295b7b453SJohn Marino   pstr->valid_raw_len = char_idx;
54395b7b453SJohn Marino }
54495b7b453SJohn Marino 
54595b7b453SJohn Marino /* Apply TRANS to the buffer in PSTR.  */
54695b7b453SJohn Marino 
54795b7b453SJohn Marino static void
54895b7b453SJohn Marino internal_function
54995b7b453SJohn Marino re_string_translate_buffer (re_string_t *pstr)
55095b7b453SJohn Marino {
55195b7b453SJohn Marino   Idx buf_idx, end_idx;
55295b7b453SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
55395b7b453SJohn Marino 
55495b7b453SJohn Marino   for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
55595b7b453SJohn Marino     {
55695b7b453SJohn Marino       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
55795b7b453SJohn Marino       pstr->mbs[buf_idx] = pstr->trans[ch];
55895b7b453SJohn Marino     }
55995b7b453SJohn Marino 
56095b7b453SJohn Marino   pstr->valid_len = buf_idx;
56195b7b453SJohn Marino   pstr->valid_raw_len = buf_idx;
56295b7b453SJohn Marino }
56395b7b453SJohn Marino 
56495b7b453SJohn Marino /* This function re-construct the buffers.
56595b7b453SJohn Marino    Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
56695b7b453SJohn Marino    convert to upper case in case of REG_ICASE, apply translation.  */
56795b7b453SJohn Marino 
56895b7b453SJohn Marino static reg_errcode_t
56995b7b453SJohn Marino internal_function __attribute_warn_unused_result__
57095b7b453SJohn Marino re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
57195b7b453SJohn Marino {
57295b7b453SJohn Marino   Idx offset;
57395b7b453SJohn Marino 
57495b7b453SJohn Marino   if (BE (pstr->raw_mbs_idx <= idx, 0))
57595b7b453SJohn Marino     offset = idx - pstr->raw_mbs_idx;
57695b7b453SJohn Marino   else
57795b7b453SJohn Marino     {
57895b7b453SJohn Marino       /* Reset buffer.  */
57995b7b453SJohn Marino #ifdef RE_ENABLE_I18N
58095b7b453SJohn Marino       if (pstr->mb_cur_max > 1)
58195b7b453SJohn Marino 	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
58295b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
58395b7b453SJohn Marino       pstr->len = pstr->raw_len;
58495b7b453SJohn Marino       pstr->stop = pstr->raw_stop;
58595b7b453SJohn Marino       pstr->valid_len = 0;
58695b7b453SJohn Marino       pstr->raw_mbs_idx = 0;
58795b7b453SJohn Marino       pstr->valid_raw_len = 0;
58895b7b453SJohn Marino       pstr->offsets_needed = 0;
58995b7b453SJohn Marino       pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
59095b7b453SJohn Marino 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
59195b7b453SJohn Marino       if (!pstr->mbs_allocated)
59295b7b453SJohn Marino 	pstr->mbs = (unsigned char *) pstr->raw_mbs;
59395b7b453SJohn Marino       offset = idx;
59495b7b453SJohn Marino     }
59595b7b453SJohn Marino 
59695b7b453SJohn Marino   if (BE (offset != 0, 1))
59795b7b453SJohn Marino     {
59895b7b453SJohn Marino       /* Should the already checked characters be kept?  */
59995b7b453SJohn Marino       if (BE (offset < pstr->valid_raw_len, 1))
60095b7b453SJohn Marino 	{
60195b7b453SJohn Marino 	  /* Yes, move them to the front of the buffer.  */
60295b7b453SJohn Marino #ifdef RE_ENABLE_I18N
60395b7b453SJohn Marino 	  if (BE (pstr->offsets_needed, 0))
60495b7b453SJohn Marino 	    {
60595b7b453SJohn Marino 	      Idx low = 0, high = pstr->valid_len, mid;
60695b7b453SJohn Marino 	      do
60795b7b453SJohn Marino 		{
60895b7b453SJohn Marino 		  mid = (high + low) / 2;
60995b7b453SJohn Marino 		  if (pstr->offsets[mid] > offset)
61095b7b453SJohn Marino 		    high = mid;
61195b7b453SJohn Marino 		  else if (pstr->offsets[mid] < offset)
61295b7b453SJohn Marino 		    low = mid + 1;
61395b7b453SJohn Marino 		  else
61495b7b453SJohn Marino 		    break;
61595b7b453SJohn Marino 		}
61695b7b453SJohn Marino 	      while (low < high);
61795b7b453SJohn Marino 	      if (pstr->offsets[mid] < offset)
61895b7b453SJohn Marino 		++mid;
61995b7b453SJohn Marino 	      pstr->tip_context = re_string_context_at (pstr, mid - 1,
62095b7b453SJohn Marino 							eflags);
62195b7b453SJohn Marino 	      /* This can be quite complicated, so handle specially
62295b7b453SJohn Marino 		 only the common and easy case where the character with
62395b7b453SJohn Marino 		 different length representation of lower and upper
62495b7b453SJohn Marino 		 case is present at or after offset.  */
62595b7b453SJohn Marino 	      if (pstr->valid_len > offset
62695b7b453SJohn Marino 		  && mid == offset && pstr->offsets[mid] == offset)
62795b7b453SJohn Marino 		{
62895b7b453SJohn Marino 		  memmove (pstr->wcs, pstr->wcs + offset,
62995b7b453SJohn Marino 			   (pstr->valid_len - offset) * sizeof (wint_t));
63095b7b453SJohn Marino 		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
63195b7b453SJohn Marino 		  pstr->valid_len -= offset;
63295b7b453SJohn Marino 		  pstr->valid_raw_len -= offset;
63395b7b453SJohn Marino 		  for (low = 0; low < pstr->valid_len; low++)
63495b7b453SJohn Marino 		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;
63595b7b453SJohn Marino 		}
63695b7b453SJohn Marino 	      else
63795b7b453SJohn Marino 		{
63895b7b453SJohn Marino 		  /* Otherwise, just find out how long the partial multibyte
63995b7b453SJohn Marino 		     character at offset is and fill it with WEOF/255.  */
64095b7b453SJohn Marino 		  pstr->len = pstr->raw_len - idx + offset;
64195b7b453SJohn Marino 		  pstr->stop = pstr->raw_stop - idx + offset;
64295b7b453SJohn Marino 		  pstr->offsets_needed = 0;
64395b7b453SJohn Marino 		  while (mid > 0 && pstr->offsets[mid - 1] == offset)
64495b7b453SJohn Marino 		    --mid;
64595b7b453SJohn Marino 		  while (mid < pstr->valid_len)
64695b7b453SJohn Marino 		    if (pstr->wcs[mid] != WEOF)
64795b7b453SJohn Marino 		      break;
64895b7b453SJohn Marino 		    else
64995b7b453SJohn Marino 		      ++mid;
65095b7b453SJohn Marino 		  if (mid == pstr->valid_len)
65195b7b453SJohn Marino 		    pstr->valid_len = 0;
65295b7b453SJohn Marino 		  else
65395b7b453SJohn Marino 		    {
65495b7b453SJohn Marino 		      pstr->valid_len = pstr->offsets[mid] - offset;
65595b7b453SJohn Marino 		      if (pstr->valid_len)
65695b7b453SJohn Marino 			{
65795b7b453SJohn Marino 			  for (low = 0; low < pstr->valid_len; ++low)
65895b7b453SJohn Marino 			    pstr->wcs[low] = WEOF;
65995b7b453SJohn Marino 			  memset (pstr->mbs, 255, pstr->valid_len);
66095b7b453SJohn Marino 			}
66195b7b453SJohn Marino 		    }
66295b7b453SJohn Marino 		  pstr->valid_raw_len = pstr->valid_len;
66395b7b453SJohn Marino 		}
66495b7b453SJohn Marino 	    }
66595b7b453SJohn Marino 	  else
66695b7b453SJohn Marino #endif
66795b7b453SJohn Marino 	    {
66895b7b453SJohn Marino 	      pstr->tip_context = re_string_context_at (pstr, offset - 1,
66995b7b453SJohn Marino 							eflags);
67095b7b453SJohn Marino #ifdef RE_ENABLE_I18N
67195b7b453SJohn Marino 	      if (pstr->mb_cur_max > 1)
67295b7b453SJohn Marino 		memmove (pstr->wcs, pstr->wcs + offset,
67395b7b453SJohn Marino 			 (pstr->valid_len - offset) * sizeof (wint_t));
67495b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
67595b7b453SJohn Marino 	      if (BE (pstr->mbs_allocated, 0))
67695b7b453SJohn Marino 		memmove (pstr->mbs, pstr->mbs + offset,
67795b7b453SJohn Marino 			 pstr->valid_len - offset);
67895b7b453SJohn Marino 	      pstr->valid_len -= offset;
67995b7b453SJohn Marino 	      pstr->valid_raw_len -= offset;
68095b7b453SJohn Marino #if DEBUG
68195b7b453SJohn Marino 	      assert (pstr->valid_len > 0);
68295b7b453SJohn Marino #endif
68395b7b453SJohn Marino 	    }
68495b7b453SJohn Marino 	}
68595b7b453SJohn Marino       else
68695b7b453SJohn Marino 	{
68795b7b453SJohn Marino #ifdef RE_ENABLE_I18N
68895b7b453SJohn Marino 	  /* No, skip all characters until IDX.  */
68995b7b453SJohn Marino 	  Idx prev_valid_len = pstr->valid_len;
69095b7b453SJohn Marino 
69195b7b453SJohn Marino 	  if (BE (pstr->offsets_needed, 0))
69295b7b453SJohn Marino 	    {
69395b7b453SJohn Marino 	      pstr->len = pstr->raw_len - idx + offset;
69495b7b453SJohn Marino 	      pstr->stop = pstr->raw_stop - idx + offset;
69595b7b453SJohn Marino 	      pstr->offsets_needed = 0;
69695b7b453SJohn Marino 	    }
69795b7b453SJohn Marino #endif
69895b7b453SJohn Marino 	  pstr->valid_len = 0;
69995b7b453SJohn Marino #ifdef RE_ENABLE_I18N
70095b7b453SJohn Marino 	  if (pstr->mb_cur_max > 1)
70195b7b453SJohn Marino 	    {
70295b7b453SJohn Marino 	      Idx wcs_idx;
70395b7b453SJohn Marino 	      wint_t wc = WEOF;
70495b7b453SJohn Marino 
70595b7b453SJohn Marino 	      if (pstr->is_utf8)
70695b7b453SJohn Marino 		{
70795b7b453SJohn Marino 		  const unsigned char *raw, *p, *end;
70895b7b453SJohn Marino 
70995b7b453SJohn Marino 		  /* Special case UTF-8.  Multi-byte chars start with any
71095b7b453SJohn Marino 		     byte other than 0x80 - 0xbf.  */
71195b7b453SJohn Marino 		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
71295b7b453SJohn Marino 		  end = raw + (offset - pstr->mb_cur_max);
71395b7b453SJohn Marino 		  if (end < pstr->raw_mbs)
71495b7b453SJohn Marino 		    end = pstr->raw_mbs;
71595b7b453SJohn Marino 		  p = raw + offset - 1;
71695b7b453SJohn Marino #ifdef _LIBC
71795b7b453SJohn Marino 		  /* We know the wchar_t encoding is UCS4, so for the simple
71895b7b453SJohn Marino 		     case, ASCII characters, skip the conversion step.  */
71995b7b453SJohn Marino 		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
72095b7b453SJohn Marino 		    {
72195b7b453SJohn Marino 		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
72295b7b453SJohn Marino 		      /* pstr->valid_len = 0; */
72395b7b453SJohn Marino 		      wc = (wchar_t) *p;
72495b7b453SJohn Marino 		    }
72595b7b453SJohn Marino 		  else
72695b7b453SJohn Marino #endif
72795b7b453SJohn Marino 		    for (; p >= end; --p)
72895b7b453SJohn Marino 		      if ((*p & 0xc0) != 0x80)
72995b7b453SJohn Marino 			{
73095b7b453SJohn Marino 			  mbstate_t cur_state;
73195b7b453SJohn Marino 			  wchar_t wc2;
73295b7b453SJohn Marino 			  Idx mlen = raw + pstr->len - p;
733cf28ed85SJohn Marino 			  unsigned char buf[6];
73495b7b453SJohn Marino 			  size_t mbclen;
73595b7b453SJohn Marino 
736cf28ed85SJohn Marino 			  const unsigned char *pp = p;
73795b7b453SJohn Marino 			  if (BE (pstr->trans != NULL, 0))
73895b7b453SJohn Marino 			    {
73995b7b453SJohn Marino 			      int i = mlen < 6 ? mlen : 6;
74095b7b453SJohn Marino 			      while (--i >= 0)
74195b7b453SJohn Marino 				buf[i] = pstr->trans[p[i]];
742cf28ed85SJohn Marino 			      pp = buf;
74395b7b453SJohn Marino 			    }
74495b7b453SJohn Marino 			  /* XXX Don't use mbrtowc, we know which conversion
74595b7b453SJohn Marino 			     to use (UTF-8 -> UCS4).  */
74695b7b453SJohn Marino 			  memset (&cur_state, 0, sizeof (cur_state));
747cf28ed85SJohn Marino 			  mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
74895b7b453SJohn Marino 					      &cur_state);
74995b7b453SJohn Marino 			  if (raw + offset - p <= mbclen
75095b7b453SJohn Marino 			      && mbclen < (size_t) -2)
75195b7b453SJohn Marino 			    {
75295b7b453SJohn Marino 			      memset (&pstr->cur_state, '\0',
75395b7b453SJohn Marino 				      sizeof (mbstate_t));
75495b7b453SJohn Marino 			      pstr->valid_len = mbclen - (raw + offset - p);
75595b7b453SJohn Marino 			      wc = wc2;
75695b7b453SJohn Marino 			    }
75795b7b453SJohn Marino 			  break;
75895b7b453SJohn Marino 			}
75995b7b453SJohn Marino 		}
76095b7b453SJohn Marino 
76195b7b453SJohn Marino 	      if (wc == WEOF)
76295b7b453SJohn Marino 		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
76395b7b453SJohn Marino 	      if (wc == WEOF)
76495b7b453SJohn Marino 		pstr->tip_context
76595b7b453SJohn Marino 		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);
76695b7b453SJohn Marino 	      else
76795b7b453SJohn Marino 		pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
76895b7b453SJohn Marino 				      && IS_WIDE_WORD_CHAR (wc))
76995b7b453SJohn Marino 				     ? CONTEXT_WORD
77095b7b453SJohn Marino 				     : ((IS_WIDE_NEWLINE (wc)
77195b7b453SJohn Marino 					 && pstr->newline_anchor)
77295b7b453SJohn Marino 					? CONTEXT_NEWLINE : 0));
77395b7b453SJohn Marino 	      if (BE (pstr->valid_len, 0))
77495b7b453SJohn Marino 		{
77595b7b453SJohn Marino 		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
77695b7b453SJohn Marino 		    pstr->wcs[wcs_idx] = WEOF;
77795b7b453SJohn Marino 		  if (pstr->mbs_allocated)
77895b7b453SJohn Marino 		    memset (pstr->mbs, 255, pstr->valid_len);
77995b7b453SJohn Marino 		}
78095b7b453SJohn Marino 	      pstr->valid_raw_len = pstr->valid_len;
78195b7b453SJohn Marino 	    }
78295b7b453SJohn Marino 	  else
78395b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
78495b7b453SJohn Marino 	    {
78595b7b453SJohn Marino 	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
78695b7b453SJohn Marino 	      pstr->valid_raw_len = 0;
78795b7b453SJohn Marino 	      if (pstr->trans)
78895b7b453SJohn Marino 		c = pstr->trans[c];
78995b7b453SJohn Marino 	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
79095b7b453SJohn Marino 				   ? CONTEXT_WORD
79195b7b453SJohn Marino 				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
79295b7b453SJohn Marino 				      ? CONTEXT_NEWLINE : 0));
79395b7b453SJohn Marino 	    }
79495b7b453SJohn Marino 	}
79595b7b453SJohn Marino       if (!BE (pstr->mbs_allocated, 0))
79695b7b453SJohn Marino 	pstr->mbs += offset;
79795b7b453SJohn Marino     }
79895b7b453SJohn Marino   pstr->raw_mbs_idx = idx;
79995b7b453SJohn Marino   pstr->len -= offset;
80095b7b453SJohn Marino   pstr->stop -= offset;
80195b7b453SJohn Marino 
80295b7b453SJohn Marino   /* Then build the buffers.  */
80395b7b453SJohn Marino #ifdef RE_ENABLE_I18N
80495b7b453SJohn Marino   if (pstr->mb_cur_max > 1)
80595b7b453SJohn Marino     {
80695b7b453SJohn Marino       if (pstr->icase)
80795b7b453SJohn Marino 	{
80895b7b453SJohn Marino 	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
80995b7b453SJohn Marino 	  if (BE (ret != REG_NOERROR, 0))
81095b7b453SJohn Marino 	    return ret;
81195b7b453SJohn Marino 	}
81295b7b453SJohn Marino       else
81395b7b453SJohn Marino 	build_wcs_buffer (pstr);
81495b7b453SJohn Marino     }
81595b7b453SJohn Marino   else
81695b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
81795b7b453SJohn Marino     if (BE (pstr->mbs_allocated, 0))
81895b7b453SJohn Marino       {
81995b7b453SJohn Marino 	if (pstr->icase)
82095b7b453SJohn Marino 	  build_upper_buffer (pstr);
82195b7b453SJohn Marino 	else if (pstr->trans != NULL)
82295b7b453SJohn Marino 	  re_string_translate_buffer (pstr);
82395b7b453SJohn Marino       }
82495b7b453SJohn Marino     else
82595b7b453SJohn Marino       pstr->valid_len = pstr->len;
82695b7b453SJohn Marino 
82795b7b453SJohn Marino   pstr->cur_idx = 0;
82895b7b453SJohn Marino   return REG_NOERROR;
82995b7b453SJohn Marino }
83095b7b453SJohn Marino 
83195b7b453SJohn Marino static unsigned char
832*680a9cb8SJohn Marino internal_function __attribute__ ((pure))
83395b7b453SJohn Marino re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
83495b7b453SJohn Marino {
83595b7b453SJohn Marino   int ch;
83695b7b453SJohn Marino   Idx off;
83795b7b453SJohn Marino 
83895b7b453SJohn Marino   /* Handle the common (easiest) cases first.  */
83995b7b453SJohn Marino   if (BE (!pstr->mbs_allocated, 1))
84095b7b453SJohn Marino     return re_string_peek_byte (pstr, idx);
84195b7b453SJohn Marino 
84295b7b453SJohn Marino #ifdef RE_ENABLE_I18N
84395b7b453SJohn Marino   if (pstr->mb_cur_max > 1
84495b7b453SJohn Marino       && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
84595b7b453SJohn Marino     return re_string_peek_byte (pstr, idx);
84695b7b453SJohn Marino #endif
84795b7b453SJohn Marino 
84895b7b453SJohn Marino   off = pstr->cur_idx + idx;
84995b7b453SJohn Marino #ifdef RE_ENABLE_I18N
85095b7b453SJohn Marino   if (pstr->offsets_needed)
85195b7b453SJohn Marino     off = pstr->offsets[off];
85295b7b453SJohn Marino #endif
85395b7b453SJohn Marino 
85495b7b453SJohn Marino   ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
85595b7b453SJohn Marino 
85695b7b453SJohn Marino #ifdef RE_ENABLE_I18N
85795b7b453SJohn Marino   /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
85895b7b453SJohn Marino      this function returns CAPITAL LETTER I instead of first byte of
85995b7b453SJohn Marino      DOTLESS SMALL LETTER I.  The latter would confuse the parser,
86095b7b453SJohn Marino      since peek_byte_case doesn't advance cur_idx in any way.  */
86195b7b453SJohn Marino   if (pstr->offsets_needed && !isascii (ch))
86295b7b453SJohn Marino     return re_string_peek_byte (pstr, idx);
86395b7b453SJohn Marino #endif
86495b7b453SJohn Marino 
86595b7b453SJohn Marino   return ch;
86695b7b453SJohn Marino }
86795b7b453SJohn Marino 
86895b7b453SJohn Marino static unsigned char
869cf28ed85SJohn Marino internal_function
87095b7b453SJohn Marino re_string_fetch_byte_case (re_string_t *pstr)
87195b7b453SJohn Marino {
87295b7b453SJohn Marino   if (BE (!pstr->mbs_allocated, 1))
87395b7b453SJohn Marino     return re_string_fetch_byte (pstr);
87495b7b453SJohn Marino 
87595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
87695b7b453SJohn Marino   if (pstr->offsets_needed)
87795b7b453SJohn Marino     {
87895b7b453SJohn Marino       Idx off;
87995b7b453SJohn Marino       int ch;
88095b7b453SJohn Marino 
88195b7b453SJohn Marino       /* For tr_TR.UTF-8 [[:islower:]] there is
88295b7b453SJohn Marino 	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
88395b7b453SJohn Marino 	 in that case the whole multi-byte character and return
88495b7b453SJohn Marino 	 the original letter.  On the other side, with
88595b7b453SJohn Marino 	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
88695b7b453SJohn Marino 	 anything else would complicate things too much.  */
88795b7b453SJohn Marino 
88895b7b453SJohn Marino       if (!re_string_first_byte (pstr, pstr->cur_idx))
88995b7b453SJohn Marino 	return re_string_fetch_byte (pstr);
89095b7b453SJohn Marino 
89195b7b453SJohn Marino       off = pstr->offsets[pstr->cur_idx];
89295b7b453SJohn Marino       ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
89395b7b453SJohn Marino 
89495b7b453SJohn Marino       if (! isascii (ch))
89595b7b453SJohn Marino 	return re_string_fetch_byte (pstr);
89695b7b453SJohn Marino 
89795b7b453SJohn Marino       re_string_skip_bytes (pstr,
89895b7b453SJohn Marino 			    re_string_char_size_at (pstr, pstr->cur_idx));
89995b7b453SJohn Marino       return ch;
90095b7b453SJohn Marino     }
90195b7b453SJohn Marino #endif
90295b7b453SJohn Marino 
90395b7b453SJohn Marino   return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
90495b7b453SJohn Marino }
90595b7b453SJohn Marino 
90695b7b453SJohn Marino static void
90795b7b453SJohn Marino internal_function
90895b7b453SJohn Marino re_string_destruct (re_string_t *pstr)
90995b7b453SJohn Marino {
91095b7b453SJohn Marino #ifdef RE_ENABLE_I18N
91195b7b453SJohn Marino   re_free (pstr->wcs);
91295b7b453SJohn Marino   re_free (pstr->offsets);
91395b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
91495b7b453SJohn Marino   if (pstr->mbs_allocated)
91595b7b453SJohn Marino     re_free (pstr->mbs);
91695b7b453SJohn Marino }
91795b7b453SJohn Marino 
91895b7b453SJohn Marino /* Return the context at IDX in INPUT.  */
91995b7b453SJohn Marino 
92095b7b453SJohn Marino static unsigned int
92195b7b453SJohn Marino internal_function
92295b7b453SJohn Marino re_string_context_at (const re_string_t *input, Idx idx, int eflags)
92395b7b453SJohn Marino {
92495b7b453SJohn Marino   int c;
92595b7b453SJohn Marino   if (BE (! REG_VALID_INDEX (idx), 0))
92695b7b453SJohn Marino     /* In this case, we use the value stored in input->tip_context,
92795b7b453SJohn Marino        since we can't know the character in input->mbs[-1] here.  */
92895b7b453SJohn Marino     return input->tip_context;
92995b7b453SJohn Marino   if (BE (idx == input->len, 0))
93095b7b453SJohn Marino     return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
93195b7b453SJohn Marino 	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
93295b7b453SJohn Marino #ifdef RE_ENABLE_I18N
93395b7b453SJohn Marino   if (input->mb_cur_max > 1)
93495b7b453SJohn Marino     {
93595b7b453SJohn Marino       wint_t wc;
93695b7b453SJohn Marino       Idx wc_idx = idx;
93795b7b453SJohn Marino       while(input->wcs[wc_idx] == WEOF)
93895b7b453SJohn Marino 	{
93995b7b453SJohn Marino #ifdef DEBUG
94095b7b453SJohn Marino 	  /* It must not happen.  */
94195b7b453SJohn Marino 	  assert (REG_VALID_INDEX (wc_idx));
94295b7b453SJohn Marino #endif
94395b7b453SJohn Marino 	  --wc_idx;
94495b7b453SJohn Marino 	  if (! REG_VALID_INDEX (wc_idx))
94595b7b453SJohn Marino 	    return input->tip_context;
94695b7b453SJohn Marino 	}
94795b7b453SJohn Marino       wc = input->wcs[wc_idx];
94895b7b453SJohn Marino       if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
94995b7b453SJohn Marino 	return CONTEXT_WORD;
95095b7b453SJohn Marino       return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
95195b7b453SJohn Marino 	      ? CONTEXT_NEWLINE : 0);
95295b7b453SJohn Marino     }
95395b7b453SJohn Marino   else
95495b7b453SJohn Marino #endif
95595b7b453SJohn Marino     {
95695b7b453SJohn Marino       c = re_string_byte_at (input, idx);
95795b7b453SJohn Marino       if (bitset_contain (input->word_char, c))
95895b7b453SJohn Marino 	return CONTEXT_WORD;
95995b7b453SJohn Marino       return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
96095b7b453SJohn Marino     }
96195b7b453SJohn Marino }
96295b7b453SJohn Marino 
96395b7b453SJohn Marino /* Functions for set operation.  */
96495b7b453SJohn Marino 
96595b7b453SJohn Marino static reg_errcode_t
96695b7b453SJohn Marino internal_function __attribute_warn_unused_result__
96795b7b453SJohn Marino re_node_set_alloc (re_node_set *set, Idx size)
96895b7b453SJohn Marino {
96995b7b453SJohn Marino   set->alloc = size;
97095b7b453SJohn Marino   set->nelem = 0;
97195b7b453SJohn Marino   set->elems = re_malloc (Idx, size);
972*680a9cb8SJohn Marino   if (BE (set->elems == NULL, 0) && (MALLOC_0_IS_NONNULL || size != 0))
97395b7b453SJohn Marino     return REG_ESPACE;
97495b7b453SJohn Marino   return REG_NOERROR;
97595b7b453SJohn Marino }
97695b7b453SJohn Marino 
97795b7b453SJohn Marino static reg_errcode_t
97895b7b453SJohn Marino internal_function __attribute_warn_unused_result__
97995b7b453SJohn Marino re_node_set_init_1 (re_node_set *set, Idx elem)
98095b7b453SJohn Marino {
98195b7b453SJohn Marino   set->alloc = 1;
98295b7b453SJohn Marino   set->nelem = 1;
98395b7b453SJohn Marino   set->elems = re_malloc (Idx, 1);
98495b7b453SJohn Marino   if (BE (set->elems == NULL, 0))
98595b7b453SJohn Marino     {
98695b7b453SJohn Marino       set->alloc = set->nelem = 0;
98795b7b453SJohn Marino       return REG_ESPACE;
98895b7b453SJohn Marino     }
98995b7b453SJohn Marino   set->elems[0] = elem;
99095b7b453SJohn Marino   return REG_NOERROR;
99195b7b453SJohn Marino }
99295b7b453SJohn Marino 
99395b7b453SJohn Marino static reg_errcode_t
99495b7b453SJohn Marino internal_function __attribute_warn_unused_result__
99595b7b453SJohn Marino re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
99695b7b453SJohn Marino {
99795b7b453SJohn Marino   set->alloc = 2;
99895b7b453SJohn Marino   set->elems = re_malloc (Idx, 2);
99995b7b453SJohn Marino   if (BE (set->elems == NULL, 0))
100095b7b453SJohn Marino     return REG_ESPACE;
100195b7b453SJohn Marino   if (elem1 == elem2)
100295b7b453SJohn Marino     {
100395b7b453SJohn Marino       set->nelem = 1;
100495b7b453SJohn Marino       set->elems[0] = elem1;
100595b7b453SJohn Marino     }
100695b7b453SJohn Marino   else
100795b7b453SJohn Marino     {
100895b7b453SJohn Marino       set->nelem = 2;
100995b7b453SJohn Marino       if (elem1 < elem2)
101095b7b453SJohn Marino 	{
101195b7b453SJohn Marino 	  set->elems[0] = elem1;
101295b7b453SJohn Marino 	  set->elems[1] = elem2;
101395b7b453SJohn Marino 	}
101495b7b453SJohn Marino       else
101595b7b453SJohn Marino 	{
101695b7b453SJohn Marino 	  set->elems[0] = elem2;
101795b7b453SJohn Marino 	  set->elems[1] = elem1;
101895b7b453SJohn Marino 	}
101995b7b453SJohn Marino     }
102095b7b453SJohn Marino   return REG_NOERROR;
102195b7b453SJohn Marino }
102295b7b453SJohn Marino 
102395b7b453SJohn Marino static reg_errcode_t
102495b7b453SJohn Marino internal_function __attribute_warn_unused_result__
102595b7b453SJohn Marino re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
102695b7b453SJohn Marino {
102795b7b453SJohn Marino   dest->nelem = src->nelem;
102895b7b453SJohn Marino   if (src->nelem > 0)
102995b7b453SJohn Marino     {
103095b7b453SJohn Marino       dest->alloc = dest->nelem;
103195b7b453SJohn Marino       dest->elems = re_malloc (Idx, dest->alloc);
103295b7b453SJohn Marino       if (BE (dest->elems == NULL, 0))
103395b7b453SJohn Marino 	{
103495b7b453SJohn Marino 	  dest->alloc = dest->nelem = 0;
103595b7b453SJohn Marino 	  return REG_ESPACE;
103695b7b453SJohn Marino 	}
103795b7b453SJohn Marino       memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
103895b7b453SJohn Marino     }
103995b7b453SJohn Marino   else
104095b7b453SJohn Marino     re_node_set_init_empty (dest);
104195b7b453SJohn Marino   return REG_NOERROR;
104295b7b453SJohn Marino }
104395b7b453SJohn Marino 
104495b7b453SJohn Marino /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
104595b7b453SJohn Marino    DEST. Return value indicate the error code or REG_NOERROR if succeeded.
104695b7b453SJohn Marino    Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
104795b7b453SJohn Marino 
104895b7b453SJohn Marino static reg_errcode_t
104995b7b453SJohn Marino internal_function __attribute_warn_unused_result__
105095b7b453SJohn Marino re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
105195b7b453SJohn Marino 			   const re_node_set *src2)
105295b7b453SJohn Marino {
105395b7b453SJohn Marino   Idx i1, i2, is, id, delta, sbase;
105495b7b453SJohn Marino   if (src1->nelem == 0 || src2->nelem == 0)
105595b7b453SJohn Marino     return REG_NOERROR;
105695b7b453SJohn Marino 
105795b7b453SJohn Marino   /* We need dest->nelem + 2 * elems_in_intersection; this is a
105895b7b453SJohn Marino      conservative estimate.  */
105995b7b453SJohn Marino   if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
106095b7b453SJohn Marino     {
106195b7b453SJohn Marino       Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
106295b7b453SJohn Marino       Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
106395b7b453SJohn Marino       if (BE (new_elems == NULL, 0))
106495b7b453SJohn Marino 	return REG_ESPACE;
106595b7b453SJohn Marino       dest->elems = new_elems;
106695b7b453SJohn Marino       dest->alloc = new_alloc;
106795b7b453SJohn Marino     }
106895b7b453SJohn Marino 
106995b7b453SJohn Marino   /* Find the items in the intersection of SRC1 and SRC2, and copy
107095b7b453SJohn Marino      into the top of DEST those that are not already in DEST itself.  */
107195b7b453SJohn Marino   sbase = dest->nelem + src1->nelem + src2->nelem;
107295b7b453SJohn Marino   i1 = src1->nelem - 1;
107395b7b453SJohn Marino   i2 = src2->nelem - 1;
107495b7b453SJohn Marino   id = dest->nelem - 1;
107595b7b453SJohn Marino   for (;;)
107695b7b453SJohn Marino     {
107795b7b453SJohn Marino       if (src1->elems[i1] == src2->elems[i2])
107895b7b453SJohn Marino 	{
107995b7b453SJohn Marino 	  /* Try to find the item in DEST.  Maybe we could binary search?  */
108095b7b453SJohn Marino 	  while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
108195b7b453SJohn Marino 	    --id;
108295b7b453SJohn Marino 
108395b7b453SJohn Marino           if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
108495b7b453SJohn Marino             dest->elems[--sbase] = src1->elems[i1];
108595b7b453SJohn Marino 
108695b7b453SJohn Marino 	  if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
108795b7b453SJohn Marino 	    break;
108895b7b453SJohn Marino 	}
108995b7b453SJohn Marino 
109095b7b453SJohn Marino       /* Lower the highest of the two items.  */
109195b7b453SJohn Marino       else if (src1->elems[i1] < src2->elems[i2])
109295b7b453SJohn Marino 	{
109395b7b453SJohn Marino 	  if (! REG_VALID_INDEX (--i2))
109495b7b453SJohn Marino 	    break;
109595b7b453SJohn Marino 	}
109695b7b453SJohn Marino       else
109795b7b453SJohn Marino 	{
109895b7b453SJohn Marino 	  if (! REG_VALID_INDEX (--i1))
109995b7b453SJohn Marino 	    break;
110095b7b453SJohn Marino 	}
110195b7b453SJohn Marino     }
110295b7b453SJohn Marino 
110395b7b453SJohn Marino   id = dest->nelem - 1;
110495b7b453SJohn Marino   is = dest->nelem + src1->nelem + src2->nelem - 1;
110595b7b453SJohn Marino   delta = is - sbase + 1;
110695b7b453SJohn Marino 
110795b7b453SJohn Marino   /* Now copy.  When DELTA becomes zero, the remaining
110895b7b453SJohn Marino      DEST elements are already in place; this is more or
110995b7b453SJohn Marino      less the same loop that is in re_node_set_merge.  */
111095b7b453SJohn Marino   dest->nelem += delta;
111195b7b453SJohn Marino   if (delta > 0 && REG_VALID_INDEX (id))
111295b7b453SJohn Marino     for (;;)
111395b7b453SJohn Marino       {
111495b7b453SJohn Marino 	if (dest->elems[is] > dest->elems[id])
111595b7b453SJohn Marino 	  {
111695b7b453SJohn Marino 	    /* Copy from the top.  */
111795b7b453SJohn Marino 	    dest->elems[id + delta--] = dest->elems[is--];
111895b7b453SJohn Marino 	    if (delta == 0)
111995b7b453SJohn Marino 	      break;
112095b7b453SJohn Marino 	  }
112195b7b453SJohn Marino 	else
112295b7b453SJohn Marino 	  {
112395b7b453SJohn Marino 	    /* Slide from the bottom.  */
112495b7b453SJohn Marino 	    dest->elems[id + delta] = dest->elems[id];
112595b7b453SJohn Marino 	    if (! REG_VALID_INDEX (--id))
112695b7b453SJohn Marino 	      break;
112795b7b453SJohn Marino 	  }
112895b7b453SJohn Marino       }
112995b7b453SJohn Marino 
113095b7b453SJohn Marino   /* Copy remaining SRC elements.  */
113195b7b453SJohn Marino   memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
113295b7b453SJohn Marino 
113395b7b453SJohn Marino   return REG_NOERROR;
113495b7b453SJohn Marino }
113595b7b453SJohn Marino 
113695b7b453SJohn Marino /* Calculate the union set of the sets SRC1 and SRC2. And store it to
113795b7b453SJohn Marino    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
113895b7b453SJohn Marino 
113995b7b453SJohn Marino static reg_errcode_t
114095b7b453SJohn Marino internal_function __attribute_warn_unused_result__
114195b7b453SJohn Marino re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
114295b7b453SJohn Marino 			const re_node_set *src2)
114395b7b453SJohn Marino {
114495b7b453SJohn Marino   Idx i1, i2, id;
114595b7b453SJohn Marino   if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
114695b7b453SJohn Marino     {
114795b7b453SJohn Marino       dest->alloc = src1->nelem + src2->nelem;
114895b7b453SJohn Marino       dest->elems = re_malloc (Idx, dest->alloc);
114995b7b453SJohn Marino       if (BE (dest->elems == NULL, 0))
115095b7b453SJohn Marino 	return REG_ESPACE;
115195b7b453SJohn Marino     }
115295b7b453SJohn Marino   else
115395b7b453SJohn Marino     {
115495b7b453SJohn Marino       if (src1 != NULL && src1->nelem > 0)
115595b7b453SJohn Marino 	return re_node_set_init_copy (dest, src1);
115695b7b453SJohn Marino       else if (src2 != NULL && src2->nelem > 0)
115795b7b453SJohn Marino 	return re_node_set_init_copy (dest, src2);
115895b7b453SJohn Marino       else
115995b7b453SJohn Marino 	re_node_set_init_empty (dest);
116095b7b453SJohn Marino       return REG_NOERROR;
116195b7b453SJohn Marino     }
116295b7b453SJohn Marino   for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
116395b7b453SJohn Marino     {
116495b7b453SJohn Marino       if (src1->elems[i1] > src2->elems[i2])
116595b7b453SJohn Marino 	{
116695b7b453SJohn Marino 	  dest->elems[id++] = src2->elems[i2++];
116795b7b453SJohn Marino 	  continue;
116895b7b453SJohn Marino 	}
116995b7b453SJohn Marino       if (src1->elems[i1] == src2->elems[i2])
117095b7b453SJohn Marino 	++i2;
117195b7b453SJohn Marino       dest->elems[id++] = src1->elems[i1++];
117295b7b453SJohn Marino     }
117395b7b453SJohn Marino   if (i1 < src1->nelem)
117495b7b453SJohn Marino     {
117595b7b453SJohn Marino       memcpy (dest->elems + id, src1->elems + i1,
117695b7b453SJohn Marino 	     (src1->nelem - i1) * sizeof (Idx));
117795b7b453SJohn Marino       id += src1->nelem - i1;
117895b7b453SJohn Marino     }
117995b7b453SJohn Marino   else if (i2 < src2->nelem)
118095b7b453SJohn Marino     {
118195b7b453SJohn Marino       memcpy (dest->elems + id, src2->elems + i2,
118295b7b453SJohn Marino 	     (src2->nelem - i2) * sizeof (Idx));
118395b7b453SJohn Marino       id += src2->nelem - i2;
118495b7b453SJohn Marino     }
118595b7b453SJohn Marino   dest->nelem = id;
118695b7b453SJohn Marino   return REG_NOERROR;
118795b7b453SJohn Marino }
118895b7b453SJohn Marino 
118995b7b453SJohn Marino /* Calculate the union set of the sets DEST and SRC. And store it to
119095b7b453SJohn Marino    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
119195b7b453SJohn Marino 
119295b7b453SJohn Marino static reg_errcode_t
119395b7b453SJohn Marino internal_function __attribute_warn_unused_result__
119495b7b453SJohn Marino re_node_set_merge (re_node_set *dest, const re_node_set *src)
119595b7b453SJohn Marino {
119695b7b453SJohn Marino   Idx is, id, sbase, delta;
119795b7b453SJohn Marino   if (src == NULL || src->nelem == 0)
119895b7b453SJohn Marino     return REG_NOERROR;
119995b7b453SJohn Marino   if (dest->alloc < 2 * src->nelem + dest->nelem)
120095b7b453SJohn Marino     {
120195b7b453SJohn Marino       Idx new_alloc = 2 * (src->nelem + dest->alloc);
120295b7b453SJohn Marino       Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
120395b7b453SJohn Marino       if (BE (new_buffer == NULL, 0))
120495b7b453SJohn Marino 	return REG_ESPACE;
120595b7b453SJohn Marino       dest->elems = new_buffer;
120695b7b453SJohn Marino       dest->alloc = new_alloc;
120795b7b453SJohn Marino     }
120895b7b453SJohn Marino 
120995b7b453SJohn Marino   if (BE (dest->nelem == 0, 0))
121095b7b453SJohn Marino     {
121195b7b453SJohn Marino       dest->nelem = src->nelem;
121295b7b453SJohn Marino       memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
121395b7b453SJohn Marino       return REG_NOERROR;
121495b7b453SJohn Marino     }
121595b7b453SJohn Marino 
121695b7b453SJohn Marino   /* Copy into the top of DEST the items of SRC that are not
121795b7b453SJohn Marino      found in DEST.  Maybe we could binary search in DEST?  */
121895b7b453SJohn Marino   for (sbase = dest->nelem + 2 * src->nelem,
121995b7b453SJohn Marino        is = src->nelem - 1, id = dest->nelem - 1;
122095b7b453SJohn Marino        REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
122195b7b453SJohn Marino     {
122295b7b453SJohn Marino       if (dest->elems[id] == src->elems[is])
122395b7b453SJohn Marino 	is--, id--;
122495b7b453SJohn Marino       else if (dest->elems[id] < src->elems[is])
122595b7b453SJohn Marino 	dest->elems[--sbase] = src->elems[is--];
122695b7b453SJohn Marino       else /* if (dest->elems[id] > src->elems[is]) */
122795b7b453SJohn Marino 	--id;
122895b7b453SJohn Marino     }
122995b7b453SJohn Marino 
123095b7b453SJohn Marino   if (REG_VALID_INDEX (is))
123195b7b453SJohn Marino     {
123295b7b453SJohn Marino       /* If DEST is exhausted, the remaining items of SRC must be unique.  */
123395b7b453SJohn Marino       sbase -= is + 1;
123495b7b453SJohn Marino       memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
123595b7b453SJohn Marino     }
123695b7b453SJohn Marino 
123795b7b453SJohn Marino   id = dest->nelem - 1;
123895b7b453SJohn Marino   is = dest->nelem + 2 * src->nelem - 1;
123995b7b453SJohn Marino   delta = is - sbase + 1;
124095b7b453SJohn Marino   if (delta == 0)
124195b7b453SJohn Marino     return REG_NOERROR;
124295b7b453SJohn Marino 
124395b7b453SJohn Marino   /* Now copy.  When DELTA becomes zero, the remaining
124495b7b453SJohn Marino      DEST elements are already in place.  */
124595b7b453SJohn Marino   dest->nelem += delta;
124695b7b453SJohn Marino   for (;;)
124795b7b453SJohn Marino     {
124895b7b453SJohn Marino       if (dest->elems[is] > dest->elems[id])
124995b7b453SJohn Marino 	{
125095b7b453SJohn Marino 	  /* Copy from the top.  */
125195b7b453SJohn Marino 	  dest->elems[id + delta--] = dest->elems[is--];
125295b7b453SJohn Marino 	  if (delta == 0)
125395b7b453SJohn Marino 	    break;
125495b7b453SJohn Marino 	}
125595b7b453SJohn Marino       else
125695b7b453SJohn Marino 	{
125795b7b453SJohn Marino 	  /* Slide from the bottom.  */
125895b7b453SJohn Marino 	  dest->elems[id + delta] = dest->elems[id];
125995b7b453SJohn Marino 	  if (! REG_VALID_INDEX (--id))
126095b7b453SJohn Marino 	    {
126195b7b453SJohn Marino 	      /* Copy remaining SRC elements.  */
126295b7b453SJohn Marino 	      memcpy (dest->elems, dest->elems + sbase,
126395b7b453SJohn Marino 		      delta * sizeof (Idx));
126495b7b453SJohn Marino 	      break;
126595b7b453SJohn Marino 	    }
126695b7b453SJohn Marino 	}
126795b7b453SJohn Marino     }
126895b7b453SJohn Marino 
126995b7b453SJohn Marino   return REG_NOERROR;
127095b7b453SJohn Marino }
127195b7b453SJohn Marino 
127295b7b453SJohn Marino /* Insert the new element ELEM to the re_node_set* SET.
127395b7b453SJohn Marino    SET should not already have ELEM.
127495b7b453SJohn Marino    Return true if successful.  */
127595b7b453SJohn Marino 
127695b7b453SJohn Marino static bool
127795b7b453SJohn Marino internal_function __attribute_warn_unused_result__
127895b7b453SJohn Marino re_node_set_insert (re_node_set *set, Idx elem)
127995b7b453SJohn Marino {
128095b7b453SJohn Marino   Idx idx;
128195b7b453SJohn Marino   /* In case the set is empty.  */
128295b7b453SJohn Marino   if (set->alloc == 0)
128395b7b453SJohn Marino     return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
128495b7b453SJohn Marino 
128595b7b453SJohn Marino   if (BE (set->nelem, 0) == 0)
128695b7b453SJohn Marino     {
128795b7b453SJohn Marino       /* We already guaranteed above that set->alloc != 0.  */
128895b7b453SJohn Marino       set->elems[0] = elem;
128995b7b453SJohn Marino       ++set->nelem;
129095b7b453SJohn Marino       return true;
129195b7b453SJohn Marino     }
129295b7b453SJohn Marino 
129395b7b453SJohn Marino   /* Realloc if we need.  */
129495b7b453SJohn Marino   if (set->alloc == set->nelem)
129595b7b453SJohn Marino     {
129695b7b453SJohn Marino       Idx *new_elems;
129795b7b453SJohn Marino       set->alloc = set->alloc * 2;
129895b7b453SJohn Marino       new_elems = re_realloc (set->elems, Idx, set->alloc);
129995b7b453SJohn Marino       if (BE (new_elems == NULL, 0))
130095b7b453SJohn Marino 	return false;
130195b7b453SJohn Marino       set->elems = new_elems;
130295b7b453SJohn Marino     }
130395b7b453SJohn Marino 
130495b7b453SJohn Marino   /* Move the elements which follows the new element.  Test the
130595b7b453SJohn Marino      first element separately to skip a check in the inner loop.  */
130695b7b453SJohn Marino   if (elem < set->elems[0])
130795b7b453SJohn Marino     {
130895b7b453SJohn Marino       idx = 0;
130995b7b453SJohn Marino       for (idx = set->nelem; idx > 0; idx--)
131095b7b453SJohn Marino 	set->elems[idx] = set->elems[idx - 1];
131195b7b453SJohn Marino     }
131295b7b453SJohn Marino   else
131395b7b453SJohn Marino     {
131495b7b453SJohn Marino       for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
131595b7b453SJohn Marino 	set->elems[idx] = set->elems[idx - 1];
131695b7b453SJohn Marino     }
131795b7b453SJohn Marino 
131895b7b453SJohn Marino   /* Insert the new element.  */
131995b7b453SJohn Marino   set->elems[idx] = elem;
132095b7b453SJohn Marino   ++set->nelem;
132195b7b453SJohn Marino   return true;
132295b7b453SJohn Marino }
132395b7b453SJohn Marino 
132495b7b453SJohn Marino /* Insert the new element ELEM to the re_node_set* SET.
132595b7b453SJohn Marino    SET should not already have any element greater than or equal to ELEM.
132695b7b453SJohn Marino    Return true if successful.  */
132795b7b453SJohn Marino 
132895b7b453SJohn Marino static bool
132995b7b453SJohn Marino internal_function __attribute_warn_unused_result__
133095b7b453SJohn Marino re_node_set_insert_last (re_node_set *set, Idx elem)
133195b7b453SJohn Marino {
133295b7b453SJohn Marino   /* Realloc if we need.  */
133395b7b453SJohn Marino   if (set->alloc == set->nelem)
133495b7b453SJohn Marino     {
133595b7b453SJohn Marino       Idx *new_elems;
133695b7b453SJohn Marino       set->alloc = (set->alloc + 1) * 2;
133795b7b453SJohn Marino       new_elems = re_realloc (set->elems, Idx, set->alloc);
133895b7b453SJohn Marino       if (BE (new_elems == NULL, 0))
133995b7b453SJohn Marino 	return false;
134095b7b453SJohn Marino       set->elems = new_elems;
134195b7b453SJohn Marino     }
134295b7b453SJohn Marino 
134395b7b453SJohn Marino   /* Insert the new element.  */
134495b7b453SJohn Marino   set->elems[set->nelem++] = elem;
134595b7b453SJohn Marino   return true;
134695b7b453SJohn Marino }
134795b7b453SJohn Marino 
134895b7b453SJohn Marino /* Compare two node sets SET1 and SET2.
134995b7b453SJohn Marino    Return true if SET1 and SET2 are equivalent.  */
135095b7b453SJohn Marino 
135195b7b453SJohn Marino static bool
1352*680a9cb8SJohn Marino internal_function __attribute__ ((pure))
135395b7b453SJohn Marino re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
135495b7b453SJohn Marino {
135595b7b453SJohn Marino   Idx i;
135695b7b453SJohn Marino   if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
135795b7b453SJohn Marino     return false;
135895b7b453SJohn Marino   for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
135995b7b453SJohn Marino     if (set1->elems[i] != set2->elems[i])
136095b7b453SJohn Marino       return false;
136195b7b453SJohn Marino   return true;
136295b7b453SJohn Marino }
136395b7b453SJohn Marino 
136495b7b453SJohn Marino /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
136595b7b453SJohn Marino 
136695b7b453SJohn Marino static Idx
1367*680a9cb8SJohn Marino internal_function __attribute__ ((pure))
136895b7b453SJohn Marino re_node_set_contains (const re_node_set *set, Idx elem)
136995b7b453SJohn Marino {
137095b7b453SJohn Marino   __re_size_t idx, right, mid;
137195b7b453SJohn Marino   if (! REG_VALID_NONZERO_INDEX (set->nelem))
137295b7b453SJohn Marino     return 0;
137395b7b453SJohn Marino 
137495b7b453SJohn Marino   /* Binary search the element.  */
137595b7b453SJohn Marino   idx = 0;
137695b7b453SJohn Marino   right = set->nelem - 1;
137795b7b453SJohn Marino   while (idx < right)
137895b7b453SJohn Marino     {
137995b7b453SJohn Marino       mid = (idx + right) / 2;
138095b7b453SJohn Marino       if (set->elems[mid] < elem)
138195b7b453SJohn Marino 	idx = mid + 1;
138295b7b453SJohn Marino       else
138395b7b453SJohn Marino 	right = mid;
138495b7b453SJohn Marino     }
138595b7b453SJohn Marino   return set->elems[idx] == elem ? idx + 1 : 0;
138695b7b453SJohn Marino }
138795b7b453SJohn Marino 
138895b7b453SJohn Marino static void
138995b7b453SJohn Marino internal_function
139095b7b453SJohn Marino re_node_set_remove_at (re_node_set *set, Idx idx)
139195b7b453SJohn Marino {
1392*680a9cb8SJohn Marino   if (idx < 0 || idx >= set->nelem)
139395b7b453SJohn Marino     return;
139495b7b453SJohn Marino   --set->nelem;
139595b7b453SJohn Marino   for (; idx < set->nelem; idx++)
139695b7b453SJohn Marino     set->elems[idx] = set->elems[idx + 1];
139795b7b453SJohn Marino }
139895b7b453SJohn Marino 
139995b7b453SJohn Marino 
140095b7b453SJohn Marino /* Add the token TOKEN to dfa->nodes, and return the index of the token.
140195b7b453SJohn Marino    Or return REG_MISSING if an error occurred.  */
140295b7b453SJohn Marino 
140395b7b453SJohn Marino static Idx
140495b7b453SJohn Marino internal_function
140595b7b453SJohn Marino re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
140695b7b453SJohn Marino {
140795b7b453SJohn Marino   if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
140895b7b453SJohn Marino     {
140995b7b453SJohn Marino       size_t new_nodes_alloc = dfa->nodes_alloc * 2;
141095b7b453SJohn Marino       Idx *new_nexts, *new_indices;
141195b7b453SJohn Marino       re_node_set *new_edests, *new_eclosures;
141295b7b453SJohn Marino       re_token_t *new_nodes;
1413cf28ed85SJohn Marino 
1414cf28ed85SJohn Marino       /* Avoid overflows in realloc.  */
1415cf28ed85SJohn Marino       const size_t max_object_size = MAX (sizeof (re_token_t),
141695b7b453SJohn Marino 					  MAX (sizeof (re_node_set),
141795b7b453SJohn Marino 					       sizeof (Idx)));
1418cf28ed85SJohn Marino       if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_nodes_alloc, 0))
141995b7b453SJohn Marino 	return REG_MISSING;
142095b7b453SJohn Marino 
142195b7b453SJohn Marino       new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
142295b7b453SJohn Marino       if (BE (new_nodes == NULL, 0))
142395b7b453SJohn Marino 	return REG_MISSING;
142495b7b453SJohn Marino       dfa->nodes = new_nodes;
142595b7b453SJohn Marino       new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
142695b7b453SJohn Marino       new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
142795b7b453SJohn Marino       new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
142895b7b453SJohn Marino       new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
142995b7b453SJohn Marino       if (BE (new_nexts == NULL || new_indices == NULL
143095b7b453SJohn Marino 	      || new_edests == NULL || new_eclosures == NULL, 0))
143195b7b453SJohn Marino 	return REG_MISSING;
143295b7b453SJohn Marino       dfa->nexts = new_nexts;
143395b7b453SJohn Marino       dfa->org_indices = new_indices;
143495b7b453SJohn Marino       dfa->edests = new_edests;
143595b7b453SJohn Marino       dfa->eclosures = new_eclosures;
143695b7b453SJohn Marino       dfa->nodes_alloc = new_nodes_alloc;
143795b7b453SJohn Marino     }
143895b7b453SJohn Marino   dfa->nodes[dfa->nodes_len] = token;
143995b7b453SJohn Marino   dfa->nodes[dfa->nodes_len].constraint = 0;
144095b7b453SJohn Marino #ifdef RE_ENABLE_I18N
144195b7b453SJohn Marino   dfa->nodes[dfa->nodes_len].accept_mb =
1442*680a9cb8SJohn Marino     ((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
1443*680a9cb8SJohn Marino      || token.type == COMPLEX_BRACKET);
144495b7b453SJohn Marino #endif
144595b7b453SJohn Marino   dfa->nexts[dfa->nodes_len] = REG_MISSING;
144695b7b453SJohn Marino   re_node_set_init_empty (dfa->edests + dfa->nodes_len);
144795b7b453SJohn Marino   re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
144895b7b453SJohn Marino   return dfa->nodes_len++;
144995b7b453SJohn Marino }
145095b7b453SJohn Marino 
1451*680a9cb8SJohn Marino static re_hashval_t
145295b7b453SJohn Marino internal_function
145395b7b453SJohn Marino calc_state_hash (const re_node_set *nodes, unsigned int context)
145495b7b453SJohn Marino {
145595b7b453SJohn Marino   re_hashval_t hash = nodes->nelem + context;
145695b7b453SJohn Marino   Idx i;
145795b7b453SJohn Marino   for (i = 0 ; i < nodes->nelem ; i++)
145895b7b453SJohn Marino     hash += nodes->elems[i];
145995b7b453SJohn Marino   return hash;
146095b7b453SJohn Marino }
146195b7b453SJohn Marino 
146295b7b453SJohn Marino /* Search for the state whose node_set is equivalent to NODES.
146395b7b453SJohn Marino    Return the pointer to the state, if we found it in the DFA.
146495b7b453SJohn Marino    Otherwise create the new one and return it.  In case of an error
146595b7b453SJohn Marino    return NULL and set the error code in ERR.
146695b7b453SJohn Marino    Note: - We assume NULL as the invalid state, then it is possible that
146795b7b453SJohn Marino 	   return value is NULL and ERR is REG_NOERROR.
146895b7b453SJohn Marino 	 - We never return non-NULL value in case of any errors, it is for
146995b7b453SJohn Marino 	   optimization.  */
147095b7b453SJohn Marino 
147195b7b453SJohn Marino static re_dfastate_t *
147295b7b453SJohn Marino internal_function __attribute_warn_unused_result__
147395b7b453SJohn Marino re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
147495b7b453SJohn Marino 		  const re_node_set *nodes)
147595b7b453SJohn Marino {
147695b7b453SJohn Marino   re_hashval_t hash;
147795b7b453SJohn Marino   re_dfastate_t *new_state;
147895b7b453SJohn Marino   struct re_state_table_entry *spot;
147995b7b453SJohn Marino   Idx i;
148095b7b453SJohn Marino #ifdef lint
148195b7b453SJohn Marino   /* Suppress bogus uninitialized-variable warnings.  */
148295b7b453SJohn Marino   *err = REG_NOERROR;
148395b7b453SJohn Marino #endif
148495b7b453SJohn Marino   if (BE (nodes->nelem == 0, 0))
148595b7b453SJohn Marino     {
148695b7b453SJohn Marino       *err = REG_NOERROR;
148795b7b453SJohn Marino       return NULL;
148895b7b453SJohn Marino     }
148995b7b453SJohn Marino   hash = calc_state_hash (nodes, 0);
149095b7b453SJohn Marino   spot = dfa->state_table + (hash & dfa->state_hash_mask);
149195b7b453SJohn Marino 
149295b7b453SJohn Marino   for (i = 0 ; i < spot->num ; i++)
149395b7b453SJohn Marino     {
149495b7b453SJohn Marino       re_dfastate_t *state = spot->array[i];
149595b7b453SJohn Marino       if (hash != state->hash)
149695b7b453SJohn Marino 	continue;
149795b7b453SJohn Marino       if (re_node_set_compare (&state->nodes, nodes))
149895b7b453SJohn Marino 	return state;
149995b7b453SJohn Marino     }
150095b7b453SJohn Marino 
150195b7b453SJohn Marino   /* There are no appropriate state in the dfa, create the new one.  */
150295b7b453SJohn Marino   new_state = create_ci_newstate (dfa, nodes, hash);
150395b7b453SJohn Marino   if (BE (new_state == NULL, 0))
150495b7b453SJohn Marino     *err = REG_ESPACE;
150595b7b453SJohn Marino 
150695b7b453SJohn Marino   return new_state;
150795b7b453SJohn Marino }
150895b7b453SJohn Marino 
150995b7b453SJohn Marino /* Search for the state whose node_set is equivalent to NODES and
151095b7b453SJohn Marino    whose context is equivalent to CONTEXT.
151195b7b453SJohn Marino    Return the pointer to the state, if we found it in the DFA.
151295b7b453SJohn Marino    Otherwise create the new one and return it.  In case of an error
151395b7b453SJohn Marino    return NULL and set the error code in ERR.
151495b7b453SJohn Marino    Note: - We assume NULL as the invalid state, then it is possible that
151595b7b453SJohn Marino 	   return value is NULL and ERR is REG_NOERROR.
151695b7b453SJohn Marino 	 - We never return non-NULL value in case of any errors, it is for
151795b7b453SJohn Marino 	   optimization.  */
151895b7b453SJohn Marino 
151995b7b453SJohn Marino static re_dfastate_t *
152095b7b453SJohn Marino internal_function __attribute_warn_unused_result__
152195b7b453SJohn Marino re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
152295b7b453SJohn Marino 			  const re_node_set *nodes, unsigned int context)
152395b7b453SJohn Marino {
152495b7b453SJohn Marino   re_hashval_t hash;
152595b7b453SJohn Marino   re_dfastate_t *new_state;
152695b7b453SJohn Marino   struct re_state_table_entry *spot;
152795b7b453SJohn Marino   Idx i;
152895b7b453SJohn Marino #ifdef lint
152995b7b453SJohn Marino   /* Suppress bogus uninitialized-variable warnings.  */
153095b7b453SJohn Marino   *err = REG_NOERROR;
153195b7b453SJohn Marino #endif
153295b7b453SJohn Marino   if (nodes->nelem == 0)
153395b7b453SJohn Marino     {
153495b7b453SJohn Marino       *err = REG_NOERROR;
153595b7b453SJohn Marino       return NULL;
153695b7b453SJohn Marino     }
153795b7b453SJohn Marino   hash = calc_state_hash (nodes, context);
153895b7b453SJohn Marino   spot = dfa->state_table + (hash & dfa->state_hash_mask);
153995b7b453SJohn Marino 
154095b7b453SJohn Marino   for (i = 0 ; i < spot->num ; i++)
154195b7b453SJohn Marino     {
154295b7b453SJohn Marino       re_dfastate_t *state = spot->array[i];
154395b7b453SJohn Marino       if (state->hash == hash
154495b7b453SJohn Marino 	  && state->context == context
154595b7b453SJohn Marino 	  && re_node_set_compare (state->entrance_nodes, nodes))
154695b7b453SJohn Marino 	return state;
154795b7b453SJohn Marino     }
1548cf28ed85SJohn Marino   /* There are no appropriate state in 'dfa', create the new one.  */
154995b7b453SJohn Marino   new_state = create_cd_newstate (dfa, nodes, context, hash);
155095b7b453SJohn Marino   if (BE (new_state == NULL, 0))
155195b7b453SJohn Marino     *err = REG_ESPACE;
155295b7b453SJohn Marino 
155395b7b453SJohn Marino   return new_state;
155495b7b453SJohn Marino }
155595b7b453SJohn Marino 
155695b7b453SJohn Marino /* Finish initialization of the new state NEWSTATE, and using its hash value
155795b7b453SJohn Marino    HASH put in the appropriate bucket of DFA's state table.  Return value
155895b7b453SJohn Marino    indicates the error code if failed.  */
155995b7b453SJohn Marino 
156095b7b453SJohn Marino static reg_errcode_t
156195b7b453SJohn Marino __attribute_warn_unused_result__
156295b7b453SJohn Marino register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
156395b7b453SJohn Marino 		re_hashval_t hash)
156495b7b453SJohn Marino {
156595b7b453SJohn Marino   struct re_state_table_entry *spot;
156695b7b453SJohn Marino   reg_errcode_t err;
156795b7b453SJohn Marino   Idx i;
156895b7b453SJohn Marino 
156995b7b453SJohn Marino   newstate->hash = hash;
157095b7b453SJohn Marino   err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
157195b7b453SJohn Marino   if (BE (err != REG_NOERROR, 0))
157295b7b453SJohn Marino     return REG_ESPACE;
157395b7b453SJohn Marino   for (i = 0; i < newstate->nodes.nelem; i++)
157495b7b453SJohn Marino     {
157595b7b453SJohn Marino       Idx elem = newstate->nodes.elems[i];
157695b7b453SJohn Marino       if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1577cf28ed85SJohn Marino 	if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
157895b7b453SJohn Marino 	  return REG_ESPACE;
157995b7b453SJohn Marino     }
158095b7b453SJohn Marino 
158195b7b453SJohn Marino   spot = dfa->state_table + (hash & dfa->state_hash_mask);
158295b7b453SJohn Marino   if (BE (spot->alloc <= spot->num, 0))
158395b7b453SJohn Marino     {
158495b7b453SJohn Marino       Idx new_alloc = 2 * spot->num + 2;
158595b7b453SJohn Marino       re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
158695b7b453SJohn Marino 					      new_alloc);
158795b7b453SJohn Marino       if (BE (new_array == NULL, 0))
158895b7b453SJohn Marino 	return REG_ESPACE;
158995b7b453SJohn Marino       spot->array = new_array;
159095b7b453SJohn Marino       spot->alloc = new_alloc;
159195b7b453SJohn Marino     }
159295b7b453SJohn Marino   spot->array[spot->num++] = newstate;
159395b7b453SJohn Marino   return REG_NOERROR;
159495b7b453SJohn Marino }
159595b7b453SJohn Marino 
159695b7b453SJohn Marino static void
159795b7b453SJohn Marino free_state (re_dfastate_t *state)
159895b7b453SJohn Marino {
159995b7b453SJohn Marino   re_node_set_free (&state->non_eps_nodes);
160095b7b453SJohn Marino   re_node_set_free (&state->inveclosure);
160195b7b453SJohn Marino   if (state->entrance_nodes != &state->nodes)
160295b7b453SJohn Marino     {
160395b7b453SJohn Marino       re_node_set_free (state->entrance_nodes);
160495b7b453SJohn Marino       re_free (state->entrance_nodes);
160595b7b453SJohn Marino     }
160695b7b453SJohn Marino   re_node_set_free (&state->nodes);
160795b7b453SJohn Marino   re_free (state->word_trtable);
160895b7b453SJohn Marino   re_free (state->trtable);
160995b7b453SJohn Marino   re_free (state);
161095b7b453SJohn Marino }
161195b7b453SJohn Marino 
1612cf28ed85SJohn Marino /* Create the new state which is independent of contexts.
161395b7b453SJohn Marino    Return the new state if succeeded, otherwise return NULL.  */
161495b7b453SJohn Marino 
161595b7b453SJohn Marino static re_dfastate_t *
161695b7b453SJohn Marino internal_function __attribute_warn_unused_result__
161795b7b453SJohn Marino create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
161895b7b453SJohn Marino 		    re_hashval_t hash)
161995b7b453SJohn Marino {
162095b7b453SJohn Marino   Idx i;
162195b7b453SJohn Marino   reg_errcode_t err;
162295b7b453SJohn Marino   re_dfastate_t *newstate;
162395b7b453SJohn Marino 
162495b7b453SJohn Marino   newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
162595b7b453SJohn Marino   if (BE (newstate == NULL, 0))
162695b7b453SJohn Marino     return NULL;
162795b7b453SJohn Marino   err = re_node_set_init_copy (&newstate->nodes, nodes);
162895b7b453SJohn Marino   if (BE (err != REG_NOERROR, 0))
162995b7b453SJohn Marino     {
163095b7b453SJohn Marino       re_free (newstate);
163195b7b453SJohn Marino       return NULL;
163295b7b453SJohn Marino     }
163395b7b453SJohn Marino 
163495b7b453SJohn Marino   newstate->entrance_nodes = &newstate->nodes;
163595b7b453SJohn Marino   for (i = 0 ; i < nodes->nelem ; i++)
163695b7b453SJohn Marino     {
163795b7b453SJohn Marino       re_token_t *node = dfa->nodes + nodes->elems[i];
163895b7b453SJohn Marino       re_token_type_t type = node->type;
163995b7b453SJohn Marino       if (type == CHARACTER && !node->constraint)
164095b7b453SJohn Marino 	continue;
164195b7b453SJohn Marino #ifdef RE_ENABLE_I18N
164295b7b453SJohn Marino       newstate->accept_mb |= node->accept_mb;
164395b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
164495b7b453SJohn Marino 
164595b7b453SJohn Marino       /* If the state has the halt node, the state is a halt state.  */
164695b7b453SJohn Marino       if (type == END_OF_RE)
164795b7b453SJohn Marino 	newstate->halt = 1;
164895b7b453SJohn Marino       else if (type == OP_BACK_REF)
164995b7b453SJohn Marino 	newstate->has_backref = 1;
165095b7b453SJohn Marino       else if (type == ANCHOR || node->constraint)
165195b7b453SJohn Marino 	newstate->has_constraint = 1;
165295b7b453SJohn Marino     }
165395b7b453SJohn Marino   err = register_state (dfa, newstate, hash);
165495b7b453SJohn Marino   if (BE (err != REG_NOERROR, 0))
165595b7b453SJohn Marino     {
165695b7b453SJohn Marino       free_state (newstate);
165795b7b453SJohn Marino       newstate = NULL;
165895b7b453SJohn Marino     }
165995b7b453SJohn Marino   return newstate;
166095b7b453SJohn Marino }
166195b7b453SJohn Marino 
166295b7b453SJohn Marino /* Create the new state which is depend on the context CONTEXT.
166395b7b453SJohn Marino    Return the new state if succeeded, otherwise return NULL.  */
166495b7b453SJohn Marino 
166595b7b453SJohn Marino static re_dfastate_t *
166695b7b453SJohn Marino internal_function __attribute_warn_unused_result__
166795b7b453SJohn Marino create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
166895b7b453SJohn Marino 		    unsigned int context, re_hashval_t hash)
166995b7b453SJohn Marino {
167095b7b453SJohn Marino   Idx i, nctx_nodes = 0;
167195b7b453SJohn Marino   reg_errcode_t err;
167295b7b453SJohn Marino   re_dfastate_t *newstate;
167395b7b453SJohn Marino 
167495b7b453SJohn Marino   newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
167595b7b453SJohn Marino   if (BE (newstate == NULL, 0))
167695b7b453SJohn Marino     return NULL;
167795b7b453SJohn Marino   err = re_node_set_init_copy (&newstate->nodes, nodes);
167895b7b453SJohn Marino   if (BE (err != REG_NOERROR, 0))
167995b7b453SJohn Marino     {
168095b7b453SJohn Marino       re_free (newstate);
168195b7b453SJohn Marino       return NULL;
168295b7b453SJohn Marino     }
168395b7b453SJohn Marino 
168495b7b453SJohn Marino   newstate->context = context;
168595b7b453SJohn Marino   newstate->entrance_nodes = &newstate->nodes;
168695b7b453SJohn Marino 
168795b7b453SJohn Marino   for (i = 0 ; i < nodes->nelem ; i++)
168895b7b453SJohn Marino     {
168995b7b453SJohn Marino       re_token_t *node = dfa->nodes + nodes->elems[i];
169095b7b453SJohn Marino       re_token_type_t type = node->type;
169195b7b453SJohn Marino       unsigned int constraint = node->constraint;
169295b7b453SJohn Marino 
169395b7b453SJohn Marino       if (type == CHARACTER && !constraint)
169495b7b453SJohn Marino 	continue;
169595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
169695b7b453SJohn Marino       newstate->accept_mb |= node->accept_mb;
169795b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
169895b7b453SJohn Marino 
169995b7b453SJohn Marino       /* If the state has the halt node, the state is a halt state.  */
170095b7b453SJohn Marino       if (type == END_OF_RE)
170195b7b453SJohn Marino 	newstate->halt = 1;
170295b7b453SJohn Marino       else if (type == OP_BACK_REF)
170395b7b453SJohn Marino 	newstate->has_backref = 1;
170495b7b453SJohn Marino 
170595b7b453SJohn Marino       if (constraint)
170695b7b453SJohn Marino 	{
170795b7b453SJohn Marino 	  if (newstate->entrance_nodes == &newstate->nodes)
170895b7b453SJohn Marino 	    {
170995b7b453SJohn Marino 	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
171095b7b453SJohn Marino 	      if (BE (newstate->entrance_nodes == NULL, 0))
171195b7b453SJohn Marino 		{
171295b7b453SJohn Marino 		  free_state (newstate);
171395b7b453SJohn Marino 		  return NULL;
171495b7b453SJohn Marino 		}
171595b7b453SJohn Marino 	      if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
171695b7b453SJohn Marino 		  != REG_NOERROR)
171795b7b453SJohn Marino 		return NULL;
171895b7b453SJohn Marino 	      nctx_nodes = 0;
171995b7b453SJohn Marino 	      newstate->has_constraint = 1;
172095b7b453SJohn Marino 	    }
172195b7b453SJohn Marino 
172295b7b453SJohn Marino 	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
172395b7b453SJohn Marino 	    {
172495b7b453SJohn Marino 	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
172595b7b453SJohn Marino 	      ++nctx_nodes;
172695b7b453SJohn Marino 	    }
172795b7b453SJohn Marino 	}
172895b7b453SJohn Marino     }
172995b7b453SJohn Marino   err = register_state (dfa, newstate, hash);
173095b7b453SJohn Marino   if (BE (err != REG_NOERROR, 0))
173195b7b453SJohn Marino     {
173295b7b453SJohn Marino       free_state (newstate);
173395b7b453SJohn Marino       newstate = NULL;
173495b7b453SJohn Marino     }
173595b7b453SJohn Marino   return  newstate;
173695b7b453SJohn Marino }
1737