xref: /dflybsd-src/contrib/cvs-1.12/lib/regex_internal.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* Extended regular expression matching and search library.
286d7f5d3SJohn Marino    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
386d7f5d3SJohn Marino    This file is part of the GNU C Library.
486d7f5d3SJohn Marino    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
586d7f5d3SJohn Marino 
686d7f5d3SJohn Marino    This program is free software; you can redistribute it and/or modify
786d7f5d3SJohn Marino    it under the terms of the GNU General Public License as published by
886d7f5d3SJohn Marino    the Free Software Foundation; either version 2, or (at your option)
986d7f5d3SJohn Marino    any later version.
1086d7f5d3SJohn Marino 
1186d7f5d3SJohn Marino    This program is distributed in the hope that it will be useful,
1286d7f5d3SJohn Marino    but WITHOUT ANY WARRANTY; without even the implied warranty of
1386d7f5d3SJohn Marino    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1486d7f5d3SJohn Marino    GNU General Public License for more details.
1586d7f5d3SJohn Marino 
1686d7f5d3SJohn Marino    You should have received a copy of the GNU General Public License along
1786d7f5d3SJohn Marino    with this program; if not, write to the Free Software Foundation,
1886d7f5d3SJohn Marino    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
1986d7f5d3SJohn Marino 
2086d7f5d3SJohn Marino static void re_string_construct_common (const char *str, Idx len,
2186d7f5d3SJohn Marino 					re_string_t *pstr,
2286d7f5d3SJohn Marino 					REG_TRANSLATE_TYPE trans, bool icase,
2386d7f5d3SJohn Marino 					const re_dfa_t *dfa) internal_function;
2486d7f5d3SJohn Marino static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
2586d7f5d3SJohn Marino 					  const re_node_set *nodes,
2686d7f5d3SJohn Marino 					  re_hashval_t hash) internal_function;
2786d7f5d3SJohn Marino static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
2886d7f5d3SJohn Marino 					  const re_node_set *nodes,
2986d7f5d3SJohn Marino 					  unsigned int context,
3086d7f5d3SJohn Marino 					  re_hashval_t hash) internal_function;
3186d7f5d3SJohn Marino 
3286d7f5d3SJohn Marino /* Functions for string operation.  */
3386d7f5d3SJohn Marino 
3486d7f5d3SJohn Marino /* This function allocate the buffers.  It is necessary to call
3586d7f5d3SJohn Marino    re_string_reconstruct before using the object.  */
3686d7f5d3SJohn Marino 
3786d7f5d3SJohn Marino static reg_errcode_t
3886d7f5d3SJohn Marino internal_function
re_string_allocate(re_string_t * pstr,const char * str,Idx len,Idx init_len,REG_TRANSLATE_TYPE trans,bool icase,const re_dfa_t * dfa)3986d7f5d3SJohn Marino re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
4086d7f5d3SJohn Marino 		    REG_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
4186d7f5d3SJohn Marino {
4286d7f5d3SJohn Marino   reg_errcode_t ret;
4386d7f5d3SJohn Marino   Idx init_buf_len;
4486d7f5d3SJohn Marino 
4586d7f5d3SJohn Marino   /* Ensure at least one character fits into the buffers.  */
4686d7f5d3SJohn Marino   if (init_len < dfa->mb_cur_max)
4786d7f5d3SJohn Marino     init_len = dfa->mb_cur_max;
4886d7f5d3SJohn Marino   init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
4986d7f5d3SJohn Marino   re_string_construct_common (str, len, pstr, trans, icase, dfa);
5086d7f5d3SJohn Marino 
5186d7f5d3SJohn Marino   ret = re_string_realloc_buffers (pstr, init_buf_len);
5286d7f5d3SJohn Marino   if (BE (ret != REG_NOERROR, 0))
5386d7f5d3SJohn Marino     return ret;
5486d7f5d3SJohn Marino 
5586d7f5d3SJohn Marino   pstr->word_char = dfa->word_char;
5686d7f5d3SJohn Marino   pstr->word_ops_used = dfa->word_ops_used;
5786d7f5d3SJohn Marino   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
5886d7f5d3SJohn Marino   pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
5986d7f5d3SJohn Marino   pstr->valid_raw_len = pstr->valid_len;
6086d7f5d3SJohn Marino   return REG_NOERROR;
6186d7f5d3SJohn Marino }
6286d7f5d3SJohn Marino 
6386d7f5d3SJohn Marino /* This function allocate the buffers, and initialize them.  */
6486d7f5d3SJohn Marino 
6586d7f5d3SJohn Marino static reg_errcode_t
6686d7f5d3SJohn Marino internal_function
re_string_construct(re_string_t * pstr,const char * str,Idx len,REG_TRANSLATE_TYPE trans,bool icase,const re_dfa_t * dfa)6786d7f5d3SJohn Marino re_string_construct (re_string_t *pstr, const char *str, Idx len,
6886d7f5d3SJohn Marino 		     REG_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
6986d7f5d3SJohn Marino {
7086d7f5d3SJohn Marino   reg_errcode_t ret;
7186d7f5d3SJohn Marino   memset (pstr, '\0', sizeof (re_string_t));
7286d7f5d3SJohn Marino   re_string_construct_common (str, len, pstr, trans, icase, dfa);
7386d7f5d3SJohn Marino 
7486d7f5d3SJohn Marino   if (len > 0)
7586d7f5d3SJohn Marino     {
7686d7f5d3SJohn Marino       ret = re_string_realloc_buffers (pstr, len + 1);
7786d7f5d3SJohn Marino       if (BE (ret != REG_NOERROR, 0))
7886d7f5d3SJohn Marino 	return ret;
7986d7f5d3SJohn Marino     }
8086d7f5d3SJohn Marino   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
8186d7f5d3SJohn Marino 
8286d7f5d3SJohn Marino   if (icase)
8386d7f5d3SJohn Marino     {
8486d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
8586d7f5d3SJohn Marino       if (dfa->mb_cur_max > 1)
8686d7f5d3SJohn Marino 	{
8786d7f5d3SJohn Marino 	  while (1)
8886d7f5d3SJohn Marino 	    {
8986d7f5d3SJohn Marino 	      ret = build_wcs_upper_buffer (pstr);
9086d7f5d3SJohn Marino 	      if (BE (ret != REG_NOERROR, 0))
9186d7f5d3SJohn Marino 		return ret;
9286d7f5d3SJohn Marino 	      if (pstr->valid_raw_len >= len)
9386d7f5d3SJohn Marino 		break;
9486d7f5d3SJohn Marino 	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
9586d7f5d3SJohn Marino 		break;
9686d7f5d3SJohn Marino 	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
9786d7f5d3SJohn Marino 	      if (BE (ret != REG_NOERROR, 0))
9886d7f5d3SJohn Marino 		return ret;
9986d7f5d3SJohn Marino 	    }
10086d7f5d3SJohn Marino 	}
10186d7f5d3SJohn Marino       else
10286d7f5d3SJohn Marino #endif /* RE_ENABLE_I18N  */
10386d7f5d3SJohn Marino 	build_upper_buffer (pstr);
10486d7f5d3SJohn Marino     }
10586d7f5d3SJohn Marino   else
10686d7f5d3SJohn Marino     {
10786d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
10886d7f5d3SJohn Marino       if (dfa->mb_cur_max > 1)
10986d7f5d3SJohn Marino 	build_wcs_buffer (pstr);
11086d7f5d3SJohn Marino       else
11186d7f5d3SJohn Marino #endif /* RE_ENABLE_I18N  */
11286d7f5d3SJohn Marino 	{
11386d7f5d3SJohn Marino 	  if (trans != NULL)
11486d7f5d3SJohn Marino 	    re_string_translate_buffer (pstr);
11586d7f5d3SJohn Marino 	  else
11686d7f5d3SJohn Marino 	    {
11786d7f5d3SJohn Marino 	      pstr->valid_len = pstr->bufs_len;
11886d7f5d3SJohn Marino 	      pstr->valid_raw_len = pstr->bufs_len;
11986d7f5d3SJohn Marino 	    }
12086d7f5d3SJohn Marino 	}
12186d7f5d3SJohn Marino     }
12286d7f5d3SJohn Marino 
12386d7f5d3SJohn Marino   return REG_NOERROR;
12486d7f5d3SJohn Marino }
12586d7f5d3SJohn Marino 
12686d7f5d3SJohn Marino /* Helper functions for re_string_allocate, and re_string_construct.  */
12786d7f5d3SJohn Marino 
12886d7f5d3SJohn Marino static reg_errcode_t
12986d7f5d3SJohn Marino internal_function
re_string_realloc_buffers(re_string_t * pstr,Idx new_buf_len)13086d7f5d3SJohn Marino re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
13186d7f5d3SJohn Marino {
13286d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
13386d7f5d3SJohn Marino   if (pstr->mb_cur_max > 1)
13486d7f5d3SJohn Marino     {
13586d7f5d3SJohn Marino       wint_t *new_wcs = re_xrealloc (pstr->wcs, wint_t, new_buf_len);
13686d7f5d3SJohn Marino       if (BE (new_wcs == NULL, 0))
13786d7f5d3SJohn Marino 	return REG_ESPACE;
13886d7f5d3SJohn Marino       pstr->wcs = new_wcs;
13986d7f5d3SJohn Marino       if (pstr->offsets != NULL)
14086d7f5d3SJohn Marino 	{
14186d7f5d3SJohn Marino 	  Idx *new_offsets = re_xrealloc (pstr->offsets, Idx, new_buf_len);
14286d7f5d3SJohn Marino 	  if (BE (new_offsets == NULL, 0))
14386d7f5d3SJohn Marino 	    return REG_ESPACE;
14486d7f5d3SJohn Marino 	  pstr->offsets = new_offsets;
14586d7f5d3SJohn Marino 	}
14686d7f5d3SJohn Marino     }
14786d7f5d3SJohn Marino #endif /* RE_ENABLE_I18N  */
14886d7f5d3SJohn Marino   if (pstr->mbs_allocated)
14986d7f5d3SJohn Marino     {
15086d7f5d3SJohn Marino       unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
15186d7f5d3SJohn Marino 					   new_buf_len);
15286d7f5d3SJohn Marino       if (BE (new_mbs == NULL, 0))
15386d7f5d3SJohn Marino 	return REG_ESPACE;
15486d7f5d3SJohn Marino       pstr->mbs = new_mbs;
15586d7f5d3SJohn Marino     }
15686d7f5d3SJohn Marino   pstr->bufs_len = new_buf_len;
15786d7f5d3SJohn Marino   return REG_NOERROR;
15886d7f5d3SJohn Marino }
15986d7f5d3SJohn Marino 
16086d7f5d3SJohn Marino 
16186d7f5d3SJohn Marino static void
16286d7f5d3SJohn Marino internal_function
re_string_construct_common(const char * str,Idx len,re_string_t * pstr,REG_TRANSLATE_TYPE trans,bool icase,const re_dfa_t * dfa)16386d7f5d3SJohn Marino re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
16486d7f5d3SJohn Marino 			    REG_TRANSLATE_TYPE trans, bool icase,
16586d7f5d3SJohn Marino 			    const re_dfa_t *dfa)
16686d7f5d3SJohn Marino {
16786d7f5d3SJohn Marino   pstr->raw_mbs = (const unsigned char *) str;
16886d7f5d3SJohn Marino   pstr->len = len;
16986d7f5d3SJohn Marino   pstr->raw_len = len;
17086d7f5d3SJohn Marino   pstr->trans = (unsigned REG_TRANSLATE_TYPE) trans;
17186d7f5d3SJohn Marino   pstr->icase = icase;
17286d7f5d3SJohn Marino   pstr->mbs_allocated = (trans != NULL || icase);
17386d7f5d3SJohn Marino   pstr->mb_cur_max = dfa->mb_cur_max;
17486d7f5d3SJohn Marino   pstr->is_utf8 = dfa->is_utf8;
17586d7f5d3SJohn Marino   pstr->map_notascii = dfa->map_notascii;
17686d7f5d3SJohn Marino   pstr->stop = pstr->len;
17786d7f5d3SJohn Marino   pstr->raw_stop = pstr->stop;
17886d7f5d3SJohn Marino }
17986d7f5d3SJohn Marino 
18086d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
18186d7f5d3SJohn Marino 
18286d7f5d3SJohn Marino /* Build wide character buffer PSTR->WCS.
18386d7f5d3SJohn Marino    If the byte sequence of the string are:
18486d7f5d3SJohn Marino      <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
18586d7f5d3SJohn Marino    Then wide character buffer will be:
18686d7f5d3SJohn Marino      <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
18786d7f5d3SJohn Marino    We use WEOF for padding, they indicate that the position isn't
18886d7f5d3SJohn Marino    a first byte of a multibyte character.
18986d7f5d3SJohn Marino 
19086d7f5d3SJohn Marino    Note that this function assumes PSTR->VALID_LEN elements are already
19186d7f5d3SJohn Marino    built and starts from PSTR->VALID_LEN.  */
19286d7f5d3SJohn Marino 
19386d7f5d3SJohn Marino static void
19486d7f5d3SJohn Marino internal_function
build_wcs_buffer(re_string_t * pstr)19586d7f5d3SJohn Marino build_wcs_buffer (re_string_t *pstr)
19686d7f5d3SJohn Marino {
19786d7f5d3SJohn Marino #ifdef _LIBC
19886d7f5d3SJohn Marino   unsigned char buf[MB_LEN_MAX];
19986d7f5d3SJohn Marino   assert (MB_LEN_MAX >= pstr->mb_cur_max);
20086d7f5d3SJohn Marino #else
20186d7f5d3SJohn Marino   unsigned char buf[64];
20286d7f5d3SJohn Marino #endif
20386d7f5d3SJohn Marino   mbstate_t prev_st;
20486d7f5d3SJohn Marino   Idx byte_idx, end_idx, remain_len;
20586d7f5d3SJohn Marino   size_t mbclen;
20686d7f5d3SJohn Marino 
20786d7f5d3SJohn Marino   /* Build the buffers from pstr->valid_len to either pstr->len or
20886d7f5d3SJohn Marino      pstr->bufs_len.  */
20986d7f5d3SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
21086d7f5d3SJohn Marino   for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
21186d7f5d3SJohn Marino     {
21286d7f5d3SJohn Marino       wchar_t wc;
21386d7f5d3SJohn Marino       const char *p;
21486d7f5d3SJohn Marino 
21586d7f5d3SJohn Marino       remain_len = end_idx - byte_idx;
21686d7f5d3SJohn Marino       prev_st = pstr->cur_state;
21786d7f5d3SJohn Marino       /* Apply the translation if we need.  */
21886d7f5d3SJohn Marino       if (BE (pstr->trans != NULL, 0))
21986d7f5d3SJohn Marino 	{
22086d7f5d3SJohn Marino 	  int i, ch;
22186d7f5d3SJohn Marino 
22286d7f5d3SJohn Marino 	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
22386d7f5d3SJohn Marino 	    {
22486d7f5d3SJohn Marino 	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
22586d7f5d3SJohn Marino 	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
22686d7f5d3SJohn Marino 	    }
22786d7f5d3SJohn Marino 	  p = (const char *) buf;
22886d7f5d3SJohn Marino 	}
22986d7f5d3SJohn Marino       else
23086d7f5d3SJohn Marino 	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
23186d7f5d3SJohn Marino       mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
23286d7f5d3SJohn Marino       if (BE (mbclen == (size_t) -2, 0))
23386d7f5d3SJohn Marino 	{
23486d7f5d3SJohn Marino 	  /* The buffer doesn't have enough space, finish to build.  */
23586d7f5d3SJohn Marino 	  pstr->cur_state = prev_st;
23686d7f5d3SJohn Marino 	  break;
23786d7f5d3SJohn Marino 	}
23886d7f5d3SJohn Marino       else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
23986d7f5d3SJohn Marino 	{
24086d7f5d3SJohn Marino 	  /* We treat these cases as a singlebyte character.  */
24186d7f5d3SJohn Marino 	  mbclen = 1;
24286d7f5d3SJohn Marino 	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
24386d7f5d3SJohn Marino 	  if (BE (pstr->trans != NULL, 0))
24486d7f5d3SJohn Marino 	    wc = pstr->trans[wc];
24586d7f5d3SJohn Marino 	  pstr->cur_state = prev_st;
24686d7f5d3SJohn Marino 	}
24786d7f5d3SJohn Marino 
24886d7f5d3SJohn Marino       /* Write wide character and padding.  */
24986d7f5d3SJohn Marino       pstr->wcs[byte_idx++] = wc;
25086d7f5d3SJohn Marino       /* Write paddings.  */
25186d7f5d3SJohn Marino       for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
25286d7f5d3SJohn Marino 	pstr->wcs[byte_idx++] = WEOF;
25386d7f5d3SJohn Marino     }
25486d7f5d3SJohn Marino   pstr->valid_len = byte_idx;
25586d7f5d3SJohn Marino   pstr->valid_raw_len = byte_idx;
25686d7f5d3SJohn Marino }
25786d7f5d3SJohn Marino 
25886d7f5d3SJohn Marino /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
25986d7f5d3SJohn Marino    but for REG_ICASE.  */
26086d7f5d3SJohn Marino 
26186d7f5d3SJohn Marino static reg_errcode_t
26286d7f5d3SJohn Marino internal_function
build_wcs_upper_buffer(re_string_t * pstr)26386d7f5d3SJohn Marino build_wcs_upper_buffer (re_string_t *pstr)
26486d7f5d3SJohn Marino {
26586d7f5d3SJohn Marino   mbstate_t prev_st;
26686d7f5d3SJohn Marino   Idx src_idx, byte_idx, end_idx, remain_len;
26786d7f5d3SJohn Marino   size_t mbclen;
26886d7f5d3SJohn Marino #ifdef _LIBC
26986d7f5d3SJohn Marino   char buf[MB_LEN_MAX];
27086d7f5d3SJohn Marino   assert (MB_LEN_MAX >= pstr->mb_cur_max);
27186d7f5d3SJohn Marino #else
27286d7f5d3SJohn Marino   char buf[64];
27386d7f5d3SJohn Marino #endif
27486d7f5d3SJohn Marino 
27586d7f5d3SJohn Marino   byte_idx = pstr->valid_len;
27686d7f5d3SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
27786d7f5d3SJohn Marino 
27886d7f5d3SJohn Marino   /* The following optimization assumes that ASCII characters can be
27986d7f5d3SJohn Marino      mapped to wide characters with a simple cast.  */
28086d7f5d3SJohn Marino   if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
28186d7f5d3SJohn Marino     {
28286d7f5d3SJohn Marino       while (byte_idx < end_idx)
28386d7f5d3SJohn Marino 	{
28486d7f5d3SJohn Marino 	  wchar_t wc;
28586d7f5d3SJohn Marino 
28686d7f5d3SJohn Marino 	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
28786d7f5d3SJohn Marino 	      && mbsinit (&pstr->cur_state))
28886d7f5d3SJohn Marino 	    {
28986d7f5d3SJohn Marino 	      /* In case of a singlebyte character.  */
29086d7f5d3SJohn Marino 	      pstr->mbs[byte_idx]
29186d7f5d3SJohn Marino 		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
29286d7f5d3SJohn Marino 	      /* The next step uses the assumption that wchar_t is encoded
29386d7f5d3SJohn Marino 		 ASCII-safe: all ASCII values can be converted like this.  */
29486d7f5d3SJohn Marino 	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
29586d7f5d3SJohn Marino 	      ++byte_idx;
29686d7f5d3SJohn Marino 	      continue;
29786d7f5d3SJohn Marino 	    }
29886d7f5d3SJohn Marino 
29986d7f5d3SJohn Marino 	  remain_len = end_idx - byte_idx;
30086d7f5d3SJohn Marino 	  prev_st = pstr->cur_state;
30186d7f5d3SJohn Marino 	  mbclen = mbrtowc (&wc,
30286d7f5d3SJohn Marino 			    ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
30386d7f5d3SJohn Marino 			     + byte_idx), remain_len, &pstr->cur_state);
30486d7f5d3SJohn Marino 	  if (BE ((size_t) (mbclen + 2) > 2, 1))
30586d7f5d3SJohn Marino 	    {
30686d7f5d3SJohn Marino 	      wchar_t wcu = wc;
30786d7f5d3SJohn Marino 	      if (iswlower (wc))
30886d7f5d3SJohn Marino 		{
30986d7f5d3SJohn Marino 		  size_t mbcdlen;
31086d7f5d3SJohn Marino 
31186d7f5d3SJohn Marino 		  wcu = towupper (wc);
31286d7f5d3SJohn Marino 		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
31386d7f5d3SJohn Marino 		  if (BE (mbclen == mbcdlen, 1))
31486d7f5d3SJohn Marino 		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
31586d7f5d3SJohn Marino 		  else
31686d7f5d3SJohn Marino 		    {
31786d7f5d3SJohn Marino 		      src_idx = byte_idx;
31886d7f5d3SJohn Marino 		      goto offsets_needed;
31986d7f5d3SJohn Marino 		    }
32086d7f5d3SJohn Marino 		}
32186d7f5d3SJohn Marino 	      else
32286d7f5d3SJohn Marino 		memcpy (pstr->mbs + byte_idx,
32386d7f5d3SJohn Marino 			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
32486d7f5d3SJohn Marino 	      pstr->wcs[byte_idx++] = wcu;
32586d7f5d3SJohn Marino 	      /* Write paddings.  */
32686d7f5d3SJohn Marino 	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
32786d7f5d3SJohn Marino 		pstr->wcs[byte_idx++] = WEOF;
32886d7f5d3SJohn Marino 	    }
32986d7f5d3SJohn Marino 	  else if (mbclen == (size_t) -1 || mbclen == 0)
33086d7f5d3SJohn Marino 	    {
33186d7f5d3SJohn Marino 	      /* It is an invalid character or '\0'.  Just use the byte.  */
33286d7f5d3SJohn Marino 	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
33386d7f5d3SJohn Marino 	      pstr->mbs[byte_idx] = ch;
33486d7f5d3SJohn Marino 	      /* And also cast it to wide char.  */
33586d7f5d3SJohn Marino 	      pstr->wcs[byte_idx++] = (wchar_t) ch;
33686d7f5d3SJohn Marino 	      if (BE (mbclen == (size_t) -1, 0))
33786d7f5d3SJohn Marino 		pstr->cur_state = prev_st;
33886d7f5d3SJohn Marino 	    }
33986d7f5d3SJohn Marino 	  else
34086d7f5d3SJohn Marino 	    {
34186d7f5d3SJohn Marino 	      /* The buffer doesn't have enough space, finish to build.  */
34286d7f5d3SJohn Marino 	      pstr->cur_state = prev_st;
34386d7f5d3SJohn Marino 	      break;
34486d7f5d3SJohn Marino 	    }
34586d7f5d3SJohn Marino 	}
34686d7f5d3SJohn Marino       pstr->valid_len = byte_idx;
34786d7f5d3SJohn Marino       pstr->valid_raw_len = byte_idx;
34886d7f5d3SJohn Marino       return REG_NOERROR;
34986d7f5d3SJohn Marino     }
35086d7f5d3SJohn Marino   else
35186d7f5d3SJohn Marino     for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
35286d7f5d3SJohn Marino       {
35386d7f5d3SJohn Marino 	wchar_t wc;
35486d7f5d3SJohn Marino 	const char *p;
35586d7f5d3SJohn Marino       offsets_needed:
35686d7f5d3SJohn Marino 	remain_len = end_idx - byte_idx;
35786d7f5d3SJohn Marino 	prev_st = pstr->cur_state;
35886d7f5d3SJohn Marino 	if (BE (pstr->trans != NULL, 0))
35986d7f5d3SJohn Marino 	  {
36086d7f5d3SJohn Marino 	    int i, ch;
36186d7f5d3SJohn Marino 
36286d7f5d3SJohn Marino 	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
36386d7f5d3SJohn Marino 	      {
36486d7f5d3SJohn Marino 		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
36586d7f5d3SJohn Marino 		buf[i] = pstr->trans[ch];
36686d7f5d3SJohn Marino 	      }
36786d7f5d3SJohn Marino 	    p = (const char *) buf;
36886d7f5d3SJohn Marino 	  }
36986d7f5d3SJohn Marino 	else
37086d7f5d3SJohn Marino 	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
37186d7f5d3SJohn Marino 	mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
37286d7f5d3SJohn Marino 	if (BE ((size_t) (mbclen + 2) > 2, 1))
37386d7f5d3SJohn Marino 	  {
37486d7f5d3SJohn Marino 	    wchar_t wcu = wc;
37586d7f5d3SJohn Marino 	    if (iswlower (wc))
37686d7f5d3SJohn Marino 	      {
37786d7f5d3SJohn Marino 		size_t mbcdlen;
37886d7f5d3SJohn Marino 
37986d7f5d3SJohn Marino 		wcu = towupper (wc);
38086d7f5d3SJohn Marino 		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
38186d7f5d3SJohn Marino 		if (BE (mbclen == mbcdlen, 1))
38286d7f5d3SJohn Marino 		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
38386d7f5d3SJohn Marino 		else if (mbcdlen != (size_t) -1)
38486d7f5d3SJohn Marino 		  {
38586d7f5d3SJohn Marino 		    size_t i;
38686d7f5d3SJohn Marino 
38786d7f5d3SJohn Marino 		    if (byte_idx + mbcdlen > pstr->bufs_len)
38886d7f5d3SJohn Marino 		      {
38986d7f5d3SJohn Marino 			pstr->cur_state = prev_st;
39086d7f5d3SJohn Marino 			break;
39186d7f5d3SJohn Marino 		      }
39286d7f5d3SJohn Marino 
39386d7f5d3SJohn Marino 		    if (pstr->offsets == NULL)
39486d7f5d3SJohn Marino 		      {
39586d7f5d3SJohn Marino 			pstr->offsets = re_xmalloc (Idx, pstr->bufs_len);
39686d7f5d3SJohn Marino 
39786d7f5d3SJohn Marino 			if (pstr->offsets == NULL)
39886d7f5d3SJohn Marino 			  return REG_ESPACE;
39986d7f5d3SJohn Marino 		      }
40086d7f5d3SJohn Marino 		    if (!pstr->offsets_needed)
40186d7f5d3SJohn Marino 		      {
40286d7f5d3SJohn Marino 			for (i = 0; i < (size_t) byte_idx; ++i)
40386d7f5d3SJohn Marino 			  pstr->offsets[i] = i;
40486d7f5d3SJohn Marino 			pstr->offsets_needed = 1;
40586d7f5d3SJohn Marino 		      }
40686d7f5d3SJohn Marino 
40786d7f5d3SJohn Marino 		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
40886d7f5d3SJohn Marino 		    pstr->wcs[byte_idx] = wcu;
40986d7f5d3SJohn Marino 		    pstr->offsets[byte_idx] = src_idx;
41086d7f5d3SJohn Marino 		    for (i = 1; i < mbcdlen; ++i)
41186d7f5d3SJohn Marino 		      {
41286d7f5d3SJohn Marino 			pstr->offsets[byte_idx + i]
41386d7f5d3SJohn Marino 			  = src_idx + (i < mbclen ? i : mbclen - 1);
41486d7f5d3SJohn Marino 			pstr->wcs[byte_idx + i] = WEOF;
41586d7f5d3SJohn Marino 		      }
41686d7f5d3SJohn Marino 		    pstr->len += mbcdlen - mbclen;
41786d7f5d3SJohn Marino 		    if (pstr->raw_stop > src_idx)
41886d7f5d3SJohn Marino 		      pstr->stop += mbcdlen - mbclen;
41986d7f5d3SJohn Marino 		    end_idx = (pstr->bufs_len > pstr->len)
42086d7f5d3SJohn Marino 			      ? pstr->len : pstr->bufs_len;
42186d7f5d3SJohn Marino 		    byte_idx += mbcdlen;
42286d7f5d3SJohn Marino 		    src_idx += mbclen;
42386d7f5d3SJohn Marino 		    continue;
42486d7f5d3SJohn Marino 		  }
42586d7f5d3SJohn Marino                 else
42686d7f5d3SJohn Marino                   memcpy (pstr->mbs + byte_idx, p, mbclen);
42786d7f5d3SJohn Marino 	      }
42886d7f5d3SJohn Marino 	    else
42986d7f5d3SJohn Marino 	      memcpy (pstr->mbs + byte_idx, p, mbclen);
43086d7f5d3SJohn Marino 
43186d7f5d3SJohn Marino 	    if (BE (pstr->offsets_needed != 0, 0))
43286d7f5d3SJohn Marino 	      {
43386d7f5d3SJohn Marino 		size_t i;
43486d7f5d3SJohn Marino 		for (i = 0; i < mbclen; ++i)
43586d7f5d3SJohn Marino 		  pstr->offsets[byte_idx + i] = src_idx + i;
43686d7f5d3SJohn Marino 	      }
43786d7f5d3SJohn Marino 	    src_idx += mbclen;
43886d7f5d3SJohn Marino 
43986d7f5d3SJohn Marino 	    pstr->wcs[byte_idx++] = wcu;
44086d7f5d3SJohn Marino 	    /* Write paddings.  */
44186d7f5d3SJohn Marino 	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
44286d7f5d3SJohn Marino 	      pstr->wcs[byte_idx++] = WEOF;
44386d7f5d3SJohn Marino 	  }
44486d7f5d3SJohn Marino 	else if (mbclen == (size_t) -1 || mbclen == 0)
44586d7f5d3SJohn Marino 	  {
44686d7f5d3SJohn Marino 	    /* It is an invalid character or '\0'.  Just use the byte.  */
44786d7f5d3SJohn Marino 	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
44886d7f5d3SJohn Marino 
44986d7f5d3SJohn Marino 	    if (BE (pstr->trans != NULL, 0))
45086d7f5d3SJohn Marino 	      ch = pstr->trans [ch];
45186d7f5d3SJohn Marino 	    pstr->mbs[byte_idx] = ch;
45286d7f5d3SJohn Marino 
45386d7f5d3SJohn Marino 	    if (BE (pstr->offsets_needed != 0, 0))
45486d7f5d3SJohn Marino 	      pstr->offsets[byte_idx] = src_idx;
45586d7f5d3SJohn Marino 	    ++src_idx;
45686d7f5d3SJohn Marino 
45786d7f5d3SJohn Marino 	    /* And also cast it to wide char.  */
45886d7f5d3SJohn Marino 	    pstr->wcs[byte_idx++] = (wchar_t) ch;
45986d7f5d3SJohn Marino 	    if (BE (mbclen == (size_t) -1, 0))
46086d7f5d3SJohn Marino 	      pstr->cur_state = prev_st;
46186d7f5d3SJohn Marino 	  }
46286d7f5d3SJohn Marino 	else
46386d7f5d3SJohn Marino 	  {
46486d7f5d3SJohn Marino 	    /* The buffer doesn't have enough space, finish to build.  */
46586d7f5d3SJohn Marino 	    pstr->cur_state = prev_st;
46686d7f5d3SJohn Marino 	    break;
46786d7f5d3SJohn Marino 	  }
46886d7f5d3SJohn Marino       }
46986d7f5d3SJohn Marino   pstr->valid_len = byte_idx;
47086d7f5d3SJohn Marino   pstr->valid_raw_len = src_idx;
47186d7f5d3SJohn Marino   return REG_NOERROR;
47286d7f5d3SJohn Marino }
47386d7f5d3SJohn Marino 
47486d7f5d3SJohn Marino /* Skip characters until the index becomes greater than NEW_RAW_IDX.
47586d7f5d3SJohn Marino    Return the index.  */
47686d7f5d3SJohn Marino 
47786d7f5d3SJohn Marino static Idx
47886d7f5d3SJohn Marino internal_function
re_string_skip_chars(re_string_t * pstr,Idx new_raw_idx,wint_t * last_wc)47986d7f5d3SJohn Marino re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
48086d7f5d3SJohn Marino {
48186d7f5d3SJohn Marino   mbstate_t prev_st;
48286d7f5d3SJohn Marino   Idx rawbuf_idx;
48386d7f5d3SJohn Marino   size_t mbclen;
48486d7f5d3SJohn Marino   wchar_t wc = 0;
48586d7f5d3SJohn Marino 
48686d7f5d3SJohn Marino   /* Skip the characters which are not necessary to check.  */
48786d7f5d3SJohn Marino   for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
48886d7f5d3SJohn Marino        rawbuf_idx < new_raw_idx;)
48986d7f5d3SJohn Marino     {
49086d7f5d3SJohn Marino       Idx remain_len;
49186d7f5d3SJohn Marino       remain_len = pstr->len - rawbuf_idx;
49286d7f5d3SJohn Marino       prev_st = pstr->cur_state;
49386d7f5d3SJohn Marino       mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx,
49486d7f5d3SJohn Marino 			remain_len, &pstr->cur_state);
49586d7f5d3SJohn Marino       if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
49686d7f5d3SJohn Marino 	{
49786d7f5d3SJohn Marino 	  /* We treat these cases as a singlebyte character.  */
49886d7f5d3SJohn Marino 	  mbclen = 1;
49986d7f5d3SJohn Marino 	  pstr->cur_state = prev_st;
50086d7f5d3SJohn Marino 	}
50186d7f5d3SJohn Marino       /* Then proceed the next character.  */
50286d7f5d3SJohn Marino       rawbuf_idx += mbclen;
50386d7f5d3SJohn Marino     }
50486d7f5d3SJohn Marino   *last_wc = (wint_t) wc;
50586d7f5d3SJohn Marino   return rawbuf_idx;
50686d7f5d3SJohn Marino }
50786d7f5d3SJohn Marino #endif /* RE_ENABLE_I18N  */
50886d7f5d3SJohn Marino 
50986d7f5d3SJohn Marino /* Build the buffer PSTR->MBS, and apply the translation if we need.
51086d7f5d3SJohn Marino    This function is used in case of REG_ICASE.  */
51186d7f5d3SJohn Marino 
51286d7f5d3SJohn Marino static void
51386d7f5d3SJohn Marino internal_function
build_upper_buffer(re_string_t * pstr)51486d7f5d3SJohn Marino build_upper_buffer (re_string_t *pstr)
51586d7f5d3SJohn Marino {
51686d7f5d3SJohn Marino   Idx char_idx, end_idx;
51786d7f5d3SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
51886d7f5d3SJohn Marino 
51986d7f5d3SJohn Marino   for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
52086d7f5d3SJohn Marino     {
52186d7f5d3SJohn Marino       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
52286d7f5d3SJohn Marino       if (BE (pstr->trans != NULL, 0))
52386d7f5d3SJohn Marino 	ch = pstr->trans[ch];
52486d7f5d3SJohn Marino       if (islower (ch))
52586d7f5d3SJohn Marino 	pstr->mbs[char_idx] = toupper (ch);
52686d7f5d3SJohn Marino       else
52786d7f5d3SJohn Marino 	pstr->mbs[char_idx] = ch;
52886d7f5d3SJohn Marino     }
52986d7f5d3SJohn Marino   pstr->valid_len = char_idx;
53086d7f5d3SJohn Marino   pstr->valid_raw_len = char_idx;
53186d7f5d3SJohn Marino }
53286d7f5d3SJohn Marino 
53386d7f5d3SJohn Marino /* Apply TRANS to the buffer in PSTR.  */
53486d7f5d3SJohn Marino 
53586d7f5d3SJohn Marino static void
53686d7f5d3SJohn Marino internal_function
re_string_translate_buffer(re_string_t * pstr)53786d7f5d3SJohn Marino re_string_translate_buffer (re_string_t *pstr)
53886d7f5d3SJohn Marino {
53986d7f5d3SJohn Marino   Idx buf_idx, end_idx;
54086d7f5d3SJohn Marino   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
54186d7f5d3SJohn Marino 
54286d7f5d3SJohn Marino   for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
54386d7f5d3SJohn Marino     {
54486d7f5d3SJohn Marino       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
54586d7f5d3SJohn Marino       pstr->mbs[buf_idx] = pstr->trans[ch];
54686d7f5d3SJohn Marino     }
54786d7f5d3SJohn Marino 
54886d7f5d3SJohn Marino   pstr->valid_len = buf_idx;
54986d7f5d3SJohn Marino   pstr->valid_raw_len = buf_idx;
55086d7f5d3SJohn Marino }
55186d7f5d3SJohn Marino 
55286d7f5d3SJohn Marino /* This function re-construct the buffers.
55386d7f5d3SJohn Marino    Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
55486d7f5d3SJohn Marino    convert to upper case in case of REG_ICASE, apply translation.  */
55586d7f5d3SJohn Marino 
55686d7f5d3SJohn Marino static reg_errcode_t
55786d7f5d3SJohn Marino internal_function
re_string_reconstruct(re_string_t * pstr,Idx idx,int eflags)55886d7f5d3SJohn Marino re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
55986d7f5d3SJohn Marino {
56086d7f5d3SJohn Marino   Idx offset;
56186d7f5d3SJohn Marino 
56286d7f5d3SJohn Marino   if (BE (pstr->raw_mbs_idx <= idx, 0))
56386d7f5d3SJohn Marino     offset = idx - pstr->raw_mbs_idx;
56486d7f5d3SJohn Marino   else
56586d7f5d3SJohn Marino     {
56686d7f5d3SJohn Marino       /* Reset buffer.  */
56786d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
56886d7f5d3SJohn Marino       if (pstr->mb_cur_max > 1)
56986d7f5d3SJohn Marino 	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
57086d7f5d3SJohn Marino #endif /* RE_ENABLE_I18N */
57186d7f5d3SJohn Marino       pstr->len = pstr->raw_len;
57286d7f5d3SJohn Marino       pstr->stop = pstr->raw_stop;
57386d7f5d3SJohn Marino       pstr->valid_len = 0;
57486d7f5d3SJohn Marino       pstr->raw_mbs_idx = 0;
57586d7f5d3SJohn Marino       pstr->valid_raw_len = 0;
57686d7f5d3SJohn Marino       pstr->offsets_needed = 0;
57786d7f5d3SJohn Marino       pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
57886d7f5d3SJohn Marino 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
57986d7f5d3SJohn Marino       if (!pstr->mbs_allocated)
58086d7f5d3SJohn Marino 	pstr->mbs = (unsigned char *) pstr->raw_mbs;
58186d7f5d3SJohn Marino       offset = idx;
58286d7f5d3SJohn Marino     }
58386d7f5d3SJohn Marino 
58486d7f5d3SJohn Marino   if (BE (offset != 0, 1))
58586d7f5d3SJohn Marino     {
58686d7f5d3SJohn Marino       /* Are the characters which are already checked remain?  */
58786d7f5d3SJohn Marino       if (BE (offset < pstr->valid_raw_len, 1)
58886d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
58986d7f5d3SJohn Marino 	  /* Handling this would enlarge the code too much.
59086d7f5d3SJohn Marino 	     Accept a slowdown in that case.  */
59186d7f5d3SJohn Marino 	  && pstr->offsets_needed == 0
59286d7f5d3SJohn Marino #endif
59386d7f5d3SJohn Marino 	 )
59486d7f5d3SJohn Marino 	{
59586d7f5d3SJohn Marino 	  /* Yes, move them to the front of the buffer.  */
59686d7f5d3SJohn Marino 	  pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags);
59786d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
59886d7f5d3SJohn Marino 	  if (pstr->mb_cur_max > 1)
59986d7f5d3SJohn Marino 	    memmove (pstr->wcs, pstr->wcs + offset,
60086d7f5d3SJohn Marino 		     (pstr->valid_len - offset) * sizeof (wint_t));
60186d7f5d3SJohn Marino #endif /* RE_ENABLE_I18N */
60286d7f5d3SJohn Marino 	  if (BE (pstr->mbs_allocated, 0))
60386d7f5d3SJohn Marino 	    memmove (pstr->mbs, pstr->mbs + offset,
60486d7f5d3SJohn Marino 		     pstr->valid_len - offset);
60586d7f5d3SJohn Marino 	  pstr->valid_len -= offset;
60686d7f5d3SJohn Marino 	  pstr->valid_raw_len -= offset;
60786d7f5d3SJohn Marino #if DEBUG
60886d7f5d3SJohn Marino 	  assert (pstr->valid_len > 0);
60986d7f5d3SJohn Marino #endif
61086d7f5d3SJohn Marino 	}
61186d7f5d3SJohn Marino       else
61286d7f5d3SJohn Marino 	{
61386d7f5d3SJohn Marino 	  /* No, skip all characters until IDX.  */
61486d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
61586d7f5d3SJohn Marino 	  if (BE (pstr->offsets_needed, 0))
61686d7f5d3SJohn Marino 	    {
61786d7f5d3SJohn Marino 	      pstr->len = pstr->raw_len - idx + offset;
61886d7f5d3SJohn Marino 	      pstr->stop = pstr->raw_stop - idx + offset;
61986d7f5d3SJohn Marino 	      pstr->offsets_needed = 0;
62086d7f5d3SJohn Marino 	    }
62186d7f5d3SJohn Marino #endif
62286d7f5d3SJohn Marino 	  pstr->valid_len = 0;
62386d7f5d3SJohn Marino 	  pstr->valid_raw_len = 0;
62486d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
62586d7f5d3SJohn Marino 	  if (pstr->mb_cur_max > 1)
62686d7f5d3SJohn Marino 	    {
62786d7f5d3SJohn Marino 	      Idx wcs_idx;
62886d7f5d3SJohn Marino 	      wint_t wc = WEOF;
62986d7f5d3SJohn Marino 
63086d7f5d3SJohn Marino 	      if (pstr->is_utf8)
63186d7f5d3SJohn Marino 		{
63286d7f5d3SJohn Marino 		  const unsigned char *raw, *p, *q, *end;
63386d7f5d3SJohn Marino 
63486d7f5d3SJohn Marino 		  /* Special case UTF-8.  Multi-byte chars start with any
63586d7f5d3SJohn Marino 		     byte other than 0x80 - 0xbf.  */
63686d7f5d3SJohn Marino 		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
63786d7f5d3SJohn Marino 		  end = raw + (offset - pstr->mb_cur_max);
63886d7f5d3SJohn Marino 		  for (p = raw + offset - 1; p >= end; --p)
63986d7f5d3SJohn Marino 		    if ((*p & 0xc0) != 0x80)
64086d7f5d3SJohn Marino 		      {
64186d7f5d3SJohn Marino 			mbstate_t cur_state;
64286d7f5d3SJohn Marino 			wchar_t wc2;
64386d7f5d3SJohn Marino 			Idx mlen = raw + pstr->len - p;
64486d7f5d3SJohn Marino 			unsigned char buf[6];
64586d7f5d3SJohn Marino 			size_t mbclen;
64686d7f5d3SJohn Marino 
64786d7f5d3SJohn Marino 			q = p;
64886d7f5d3SJohn Marino 			if (BE (pstr->trans != NULL, 0))
64986d7f5d3SJohn Marino 			  {
65086d7f5d3SJohn Marino 			    int i = mlen < 6 ? mlen : 6;
65186d7f5d3SJohn Marino 			    while (--i >= 0)
65286d7f5d3SJohn Marino 			      buf[i] = pstr->trans[p[i]];
65386d7f5d3SJohn Marino 			    q = buf;
65486d7f5d3SJohn Marino 			  }
65586d7f5d3SJohn Marino 			/* XXX Don't use mbrtowc, we know which conversion
65686d7f5d3SJohn Marino 			   to use (UTF-8 -> UCS4).  */
65786d7f5d3SJohn Marino 			memset (&cur_state, 0, sizeof (cur_state));
65886d7f5d3SJohn Marino 			mbclen = mbrtowc (&wc2, (const char *) p, mlen,
65986d7f5d3SJohn Marino 					  &cur_state);
66086d7f5d3SJohn Marino 			if (raw + offset - p <= mbclen && mbclen < (size_t) -2)
66186d7f5d3SJohn Marino 			  {
66286d7f5d3SJohn Marino 			    memset (&pstr->cur_state, '\0',
66386d7f5d3SJohn Marino 				    sizeof (mbstate_t));
66486d7f5d3SJohn Marino 			    pstr->valid_len = mbclen - (raw + offset - p);
66586d7f5d3SJohn Marino 			    wc = wc2;
66686d7f5d3SJohn Marino 			  }
66786d7f5d3SJohn Marino 			break;
66886d7f5d3SJohn Marino 		      }
66986d7f5d3SJohn Marino 		}
67086d7f5d3SJohn Marino 
67186d7f5d3SJohn Marino 	      if (wc == WEOF)
67286d7f5d3SJohn Marino 		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
67386d7f5d3SJohn Marino 	      if (BE (pstr->valid_len, 0))
67486d7f5d3SJohn Marino 		{
67586d7f5d3SJohn Marino 		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
67686d7f5d3SJohn Marino 		    pstr->wcs[wcs_idx] = WEOF;
67786d7f5d3SJohn Marino 		  if (pstr->mbs_allocated)
67886d7f5d3SJohn Marino 		    memset (pstr->mbs, -1, pstr->valid_len);
67986d7f5d3SJohn Marino 		}
68086d7f5d3SJohn Marino 	      pstr->valid_raw_len = pstr->valid_len;
68186d7f5d3SJohn Marino 	      pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
68286d7f5d3SJohn Marino 				    && IS_WIDE_WORD_CHAR (wc))
68386d7f5d3SJohn Marino 				   ? CONTEXT_WORD
68486d7f5d3SJohn Marino 				   : ((IS_WIDE_NEWLINE (wc)
68586d7f5d3SJohn Marino 				       && pstr->newline_anchor)
68686d7f5d3SJohn Marino 				      ? CONTEXT_NEWLINE : 0));
68786d7f5d3SJohn Marino 	    }
68886d7f5d3SJohn Marino 	  else
68986d7f5d3SJohn Marino #endif /* RE_ENABLE_I18N */
69086d7f5d3SJohn Marino 	    {
69186d7f5d3SJohn Marino 	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
69286d7f5d3SJohn Marino 	      if (pstr->trans)
69386d7f5d3SJohn Marino 		c = pstr->trans[c];
69486d7f5d3SJohn Marino 	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
69586d7f5d3SJohn Marino 				   ? CONTEXT_WORD
69686d7f5d3SJohn Marino 				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
69786d7f5d3SJohn Marino 				      ? CONTEXT_NEWLINE : 0));
69886d7f5d3SJohn Marino 	    }
69986d7f5d3SJohn Marino 	}
70086d7f5d3SJohn Marino       if (!BE (pstr->mbs_allocated, 0))
70186d7f5d3SJohn Marino 	pstr->mbs += offset;
70286d7f5d3SJohn Marino     }
70386d7f5d3SJohn Marino   pstr->raw_mbs_idx = idx;
70486d7f5d3SJohn Marino   pstr->len -= offset;
70586d7f5d3SJohn Marino   pstr->stop -= offset;
70686d7f5d3SJohn Marino 
70786d7f5d3SJohn Marino   /* Then build the buffers.  */
70886d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
70986d7f5d3SJohn Marino   if (pstr->mb_cur_max > 1)
71086d7f5d3SJohn Marino     {
71186d7f5d3SJohn Marino       if (pstr->icase)
71286d7f5d3SJohn Marino 	{
71386d7f5d3SJohn Marino 	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
71486d7f5d3SJohn Marino 	  if (BE (ret != REG_NOERROR, 0))
71586d7f5d3SJohn Marino 	    return ret;
71686d7f5d3SJohn Marino 	}
71786d7f5d3SJohn Marino       else
71886d7f5d3SJohn Marino 	build_wcs_buffer (pstr);
71986d7f5d3SJohn Marino     }
72086d7f5d3SJohn Marino   else
72186d7f5d3SJohn Marino #endif /* RE_ENABLE_I18N */
72286d7f5d3SJohn Marino   if (BE (pstr->mbs_allocated, 0))
72386d7f5d3SJohn Marino     {
72486d7f5d3SJohn Marino       if (pstr->icase)
72586d7f5d3SJohn Marino 	build_upper_buffer (pstr);
72686d7f5d3SJohn Marino       else if (pstr->trans != NULL)
72786d7f5d3SJohn Marino 	re_string_translate_buffer (pstr);
72886d7f5d3SJohn Marino     }
72986d7f5d3SJohn Marino   else
73086d7f5d3SJohn Marino     pstr->valid_len = pstr->len;
73186d7f5d3SJohn Marino 
73286d7f5d3SJohn Marino   pstr->cur_idx = 0;
73386d7f5d3SJohn Marino   return REG_NOERROR;
73486d7f5d3SJohn Marino }
73586d7f5d3SJohn Marino 
73686d7f5d3SJohn Marino static unsigned char
internal_function(pure)73786d7f5d3SJohn Marino internal_function __attribute ((pure))
73886d7f5d3SJohn Marino re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
73986d7f5d3SJohn Marino {
74086d7f5d3SJohn Marino   int ch;
74186d7f5d3SJohn Marino   Idx off;
74286d7f5d3SJohn Marino 
74386d7f5d3SJohn Marino   /* Handle the common (easiest) cases first.  */
74486d7f5d3SJohn Marino   if (BE (!pstr->mbs_allocated, 1))
74586d7f5d3SJohn Marino     return re_string_peek_byte (pstr, idx);
74686d7f5d3SJohn Marino 
74786d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
74886d7f5d3SJohn Marino   if (pstr->mb_cur_max > 1
74986d7f5d3SJohn Marino       && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
75086d7f5d3SJohn Marino     return re_string_peek_byte (pstr, idx);
75186d7f5d3SJohn Marino #endif
75286d7f5d3SJohn Marino 
75386d7f5d3SJohn Marino   off = pstr->cur_idx + idx;
75486d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
75586d7f5d3SJohn Marino   if (pstr->offsets_needed)
75686d7f5d3SJohn Marino     off = pstr->offsets[off];
75786d7f5d3SJohn Marino #endif
75886d7f5d3SJohn Marino 
75986d7f5d3SJohn Marino   ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
76086d7f5d3SJohn Marino 
76186d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
76286d7f5d3SJohn Marino   /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
76386d7f5d3SJohn Marino      this function returns CAPITAL LETTER I instead of first byte of
76486d7f5d3SJohn Marino      DOTLESS SMALL LETTER I.  The latter would confuse the parser,
76586d7f5d3SJohn Marino      since peek_byte_case doesn't advance cur_idx in any way.  */
76686d7f5d3SJohn Marino   if (pstr->offsets_needed && !isascii (ch))
76786d7f5d3SJohn Marino     return re_string_peek_byte (pstr, idx);
76886d7f5d3SJohn Marino #endif
76986d7f5d3SJohn Marino 
77086d7f5d3SJohn Marino   return ch;
77186d7f5d3SJohn Marino }
77286d7f5d3SJohn Marino 
77386d7f5d3SJohn Marino static unsigned char
internal_function(pure)77486d7f5d3SJohn Marino internal_function __attribute ((pure))
77586d7f5d3SJohn Marino re_string_fetch_byte_case (re_string_t *pstr)
77686d7f5d3SJohn Marino {
77786d7f5d3SJohn Marino   if (BE (!pstr->mbs_allocated, 1))
77886d7f5d3SJohn Marino     return re_string_fetch_byte (pstr);
77986d7f5d3SJohn Marino 
78086d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
78186d7f5d3SJohn Marino   if (pstr->offsets_needed)
78286d7f5d3SJohn Marino     {
78386d7f5d3SJohn Marino       Idx off;
78486d7f5d3SJohn Marino       int ch;
78586d7f5d3SJohn Marino 
78686d7f5d3SJohn Marino       /* For tr_TR.UTF-8 [[:islower:]] there is
78786d7f5d3SJohn Marino 	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
78886d7f5d3SJohn Marino 	 in that case the whole multi-byte character and return
78986d7f5d3SJohn Marino 	 the original letter.  On the other side, with
79086d7f5d3SJohn Marino 	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
79186d7f5d3SJohn Marino 	 anything else would complicate things too much.  */
79286d7f5d3SJohn Marino 
79386d7f5d3SJohn Marino       if (!re_string_first_byte (pstr, pstr->cur_idx))
79486d7f5d3SJohn Marino 	return re_string_fetch_byte (pstr);
79586d7f5d3SJohn Marino 
79686d7f5d3SJohn Marino       off = pstr->offsets[pstr->cur_idx];
79786d7f5d3SJohn Marino       ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
79886d7f5d3SJohn Marino 
79986d7f5d3SJohn Marino       if (! isascii (ch))
80086d7f5d3SJohn Marino 	return re_string_fetch_byte (pstr);
80186d7f5d3SJohn Marino 
80286d7f5d3SJohn Marino       re_string_skip_bytes (pstr,
80386d7f5d3SJohn Marino 			    re_string_char_size_at (pstr, pstr->cur_idx));
80486d7f5d3SJohn Marino       return ch;
80586d7f5d3SJohn Marino     }
80686d7f5d3SJohn Marino #endif
80786d7f5d3SJohn Marino 
80886d7f5d3SJohn Marino   return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
80986d7f5d3SJohn Marino }
81086d7f5d3SJohn Marino 
81186d7f5d3SJohn Marino static void
81286d7f5d3SJohn Marino internal_function
re_string_destruct(re_string_t * pstr)81386d7f5d3SJohn Marino re_string_destruct (re_string_t *pstr)
81486d7f5d3SJohn Marino {
81586d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
81686d7f5d3SJohn Marino   re_free (pstr->wcs);
81786d7f5d3SJohn Marino   re_free (pstr->offsets);
81886d7f5d3SJohn Marino #endif /* RE_ENABLE_I18N  */
81986d7f5d3SJohn Marino   if (pstr->mbs_allocated)
82086d7f5d3SJohn Marino     re_free (pstr->mbs);
82186d7f5d3SJohn Marino }
82286d7f5d3SJohn Marino 
82386d7f5d3SJohn Marino /* Return the context at IDX in INPUT.  */
82486d7f5d3SJohn Marino 
82586d7f5d3SJohn Marino static unsigned int
82686d7f5d3SJohn Marino internal_function
re_string_context_at(const re_string_t * input,Idx idx,int eflags)82786d7f5d3SJohn Marino re_string_context_at (const re_string_t *input, Idx idx, int eflags)
82886d7f5d3SJohn Marino {
82986d7f5d3SJohn Marino   int c;
83086d7f5d3SJohn Marino   if (BE (! REG_VALID_INDEX (idx), 0))
83186d7f5d3SJohn Marino     /* In this case, we use the value stored in input->tip_context,
83286d7f5d3SJohn Marino        since we can't know the character in input->mbs[-1] here.  */
83386d7f5d3SJohn Marino     return input->tip_context;
83486d7f5d3SJohn Marino   if (BE (idx == input->len, 0))
83586d7f5d3SJohn Marino     return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
83686d7f5d3SJohn Marino 	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
83786d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
83886d7f5d3SJohn Marino   if (input->mb_cur_max > 1)
83986d7f5d3SJohn Marino     {
84086d7f5d3SJohn Marino       wint_t wc;
84186d7f5d3SJohn Marino       Idx wc_idx = idx;
84286d7f5d3SJohn Marino       while(input->wcs[wc_idx] == WEOF)
84386d7f5d3SJohn Marino 	{
84486d7f5d3SJohn Marino #ifdef DEBUG
84586d7f5d3SJohn Marino 	  /* It must not happen.  */
84686d7f5d3SJohn Marino 	  assert (REG_VALID_INDEX (wc_idx));
84786d7f5d3SJohn Marino #endif
84886d7f5d3SJohn Marino 	  --wc_idx;
84986d7f5d3SJohn Marino 	  if (! REG_VALID_INDEX (wc_idx))
85086d7f5d3SJohn Marino 	    return input->tip_context;
85186d7f5d3SJohn Marino 	}
85286d7f5d3SJohn Marino       wc = input->wcs[wc_idx];
85386d7f5d3SJohn Marino       if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
85486d7f5d3SJohn Marino 	return CONTEXT_WORD;
85586d7f5d3SJohn Marino       return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
85686d7f5d3SJohn Marino 	      ? CONTEXT_NEWLINE : 0);
85786d7f5d3SJohn Marino     }
85886d7f5d3SJohn Marino   else
85986d7f5d3SJohn Marino #endif
86086d7f5d3SJohn Marino     {
86186d7f5d3SJohn Marino       c = re_string_byte_at (input, idx);
86286d7f5d3SJohn Marino       if (bitset_contain (input->word_char, c))
86386d7f5d3SJohn Marino 	return CONTEXT_WORD;
86486d7f5d3SJohn Marino       return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
86586d7f5d3SJohn Marino     }
86686d7f5d3SJohn Marino }
86786d7f5d3SJohn Marino 
86886d7f5d3SJohn Marino /* Functions for set operation.  */
86986d7f5d3SJohn Marino 
87086d7f5d3SJohn Marino static reg_errcode_t
87186d7f5d3SJohn Marino internal_function
re_node_set_alloc(re_node_set * set,Idx size)87286d7f5d3SJohn Marino re_node_set_alloc (re_node_set *set, Idx size)
87386d7f5d3SJohn Marino {
87486d7f5d3SJohn Marino   set->alloc = size;
87586d7f5d3SJohn Marino   set->nelem = 0;
87686d7f5d3SJohn Marino   set->elems = re_xmalloc (Idx, size);
87786d7f5d3SJohn Marino   if (BE (set->elems == NULL, 0))
87886d7f5d3SJohn Marino     return REG_ESPACE;
87986d7f5d3SJohn Marino   return REG_NOERROR;
88086d7f5d3SJohn Marino }
88186d7f5d3SJohn Marino 
88286d7f5d3SJohn Marino static reg_errcode_t
88386d7f5d3SJohn Marino internal_function
re_node_set_init_1(re_node_set * set,Idx elem)88486d7f5d3SJohn Marino re_node_set_init_1 (re_node_set *set, Idx elem)
88586d7f5d3SJohn Marino {
88686d7f5d3SJohn Marino   set->alloc = 1;
88786d7f5d3SJohn Marino   set->nelem = 1;
88886d7f5d3SJohn Marino   set->elems = re_malloc (Idx, 1);
88986d7f5d3SJohn Marino   if (BE (set->elems == NULL, 0))
89086d7f5d3SJohn Marino     {
89186d7f5d3SJohn Marino       set->alloc = set->nelem = 0;
89286d7f5d3SJohn Marino       return REG_ESPACE;
89386d7f5d3SJohn Marino     }
89486d7f5d3SJohn Marino   set->elems[0] = elem;
89586d7f5d3SJohn Marino   return REG_NOERROR;
89686d7f5d3SJohn Marino }
89786d7f5d3SJohn Marino 
89886d7f5d3SJohn Marino static reg_errcode_t
89986d7f5d3SJohn Marino internal_function
re_node_set_init_2(re_node_set * set,Idx elem1,Idx elem2)90086d7f5d3SJohn Marino re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
90186d7f5d3SJohn Marino {
90286d7f5d3SJohn Marino   set->alloc = 2;
90386d7f5d3SJohn Marino   set->elems = re_malloc (Idx, 2);
90486d7f5d3SJohn Marino   if (BE (set->elems == NULL, 0))
90586d7f5d3SJohn Marino     return REG_ESPACE;
90686d7f5d3SJohn Marino   if (elem1 == elem2)
90786d7f5d3SJohn Marino     {
90886d7f5d3SJohn Marino       set->nelem = 1;
90986d7f5d3SJohn Marino       set->elems[0] = elem1;
91086d7f5d3SJohn Marino     }
91186d7f5d3SJohn Marino   else
91286d7f5d3SJohn Marino     {
91386d7f5d3SJohn Marino       set->nelem = 2;
91486d7f5d3SJohn Marino       if (elem1 < elem2)
91586d7f5d3SJohn Marino 	{
91686d7f5d3SJohn Marino 	  set->elems[0] = elem1;
91786d7f5d3SJohn Marino 	  set->elems[1] = elem2;
91886d7f5d3SJohn Marino 	}
91986d7f5d3SJohn Marino       else
92086d7f5d3SJohn Marino 	{
92186d7f5d3SJohn Marino 	  set->elems[0] = elem2;
92286d7f5d3SJohn Marino 	  set->elems[1] = elem1;
92386d7f5d3SJohn Marino 	}
92486d7f5d3SJohn Marino     }
92586d7f5d3SJohn Marino   return REG_NOERROR;
92686d7f5d3SJohn Marino }
92786d7f5d3SJohn Marino 
92886d7f5d3SJohn Marino static reg_errcode_t
92986d7f5d3SJohn Marino internal_function
re_node_set_init_copy(re_node_set * dest,const re_node_set * src)93086d7f5d3SJohn Marino re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
93186d7f5d3SJohn Marino {
93286d7f5d3SJohn Marino   dest->nelem = src->nelem;
93386d7f5d3SJohn Marino   if (src->nelem > 0)
93486d7f5d3SJohn Marino     {
93586d7f5d3SJohn Marino       dest->alloc = dest->nelem;
93686d7f5d3SJohn Marino       dest->elems = re_malloc (Idx, dest->alloc);
93786d7f5d3SJohn Marino       if (BE (dest->elems == NULL, 0))
93886d7f5d3SJohn Marino 	{
93986d7f5d3SJohn Marino 	  dest->alloc = dest->nelem = 0;
94086d7f5d3SJohn Marino 	  return REG_ESPACE;
94186d7f5d3SJohn Marino 	}
94286d7f5d3SJohn Marino       memcpy (dest->elems, src->elems, src->nelem * sizeof dest->elems[0]);
94386d7f5d3SJohn Marino     }
94486d7f5d3SJohn Marino   else
94586d7f5d3SJohn Marino     re_node_set_init_empty (dest);
94686d7f5d3SJohn Marino   return REG_NOERROR;
94786d7f5d3SJohn Marino }
94886d7f5d3SJohn Marino 
94986d7f5d3SJohn Marino /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
95086d7f5d3SJohn Marino    DEST. Return value indicate the error code or REG_NOERROR if succeeded.
95186d7f5d3SJohn Marino    Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
95286d7f5d3SJohn Marino 
95386d7f5d3SJohn Marino static reg_errcode_t
95486d7f5d3SJohn Marino internal_function
re_node_set_add_intersect(re_node_set * dest,const re_node_set * src1,const re_node_set * src2)95586d7f5d3SJohn Marino re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
95686d7f5d3SJohn Marino 			   const re_node_set *src2)
95786d7f5d3SJohn Marino {
95886d7f5d3SJohn Marino   Idx i1, i2, is, id, delta, sbase;
95986d7f5d3SJohn Marino   if (src1->nelem == 0 || src2->nelem == 0)
96086d7f5d3SJohn Marino     return REG_NOERROR;
96186d7f5d3SJohn Marino 
96286d7f5d3SJohn Marino   /* We need dest->nelem + 2 * elems_in_intersection; this is a
96386d7f5d3SJohn Marino      conservative estimate.  */
96486d7f5d3SJohn Marino   if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
96586d7f5d3SJohn Marino     {
96686d7f5d3SJohn Marino       Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
96786d7f5d3SJohn Marino       Idx *new_elems;
96886d7f5d3SJohn Marino       if (sizeof (Idx) < 3
96986d7f5d3SJohn Marino 	  && (new_alloc < dest->alloc
97086d7f5d3SJohn Marino 	      || ((Idx) (src1->nelem + src2->nelem) < src1->nelem)))
97186d7f5d3SJohn Marino 	return REG_ESPACE;
97286d7f5d3SJohn Marino       new_elems = re_xrealloc (dest->elems, Idx, new_alloc);
97386d7f5d3SJohn Marino       if (BE (new_elems == NULL, 0))
97486d7f5d3SJohn Marino         return REG_ESPACE;
97586d7f5d3SJohn Marino       dest->elems = new_elems;
97686d7f5d3SJohn Marino       dest->alloc = new_alloc;
97786d7f5d3SJohn Marino     }
97886d7f5d3SJohn Marino 
97986d7f5d3SJohn Marino   /* Find the items in the intersection of SRC1 and SRC2, and copy
98086d7f5d3SJohn Marino      into the top of DEST those that are not already in DEST itself.  */
98186d7f5d3SJohn Marino   sbase = dest->nelem + src1->nelem + src2->nelem;
98286d7f5d3SJohn Marino   i1 = src1->nelem - 1;
98386d7f5d3SJohn Marino   i2 = src2->nelem - 1;
98486d7f5d3SJohn Marino   id = dest->nelem - 1;
98586d7f5d3SJohn Marino   for (;;)
98686d7f5d3SJohn Marino     {
98786d7f5d3SJohn Marino       if (src1->elems[i1] == src2->elems[i2])
98886d7f5d3SJohn Marino 	{
98986d7f5d3SJohn Marino 	  /* Try to find the item in DEST.  Maybe we could binary search?  */
99086d7f5d3SJohn Marino 	  while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
99186d7f5d3SJohn Marino 	    --id;
99286d7f5d3SJohn Marino 
99386d7f5d3SJohn Marino           if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
99486d7f5d3SJohn Marino             dest->elems[--sbase] = src1->elems[i1];
99586d7f5d3SJohn Marino 
99686d7f5d3SJohn Marino 	  if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
99786d7f5d3SJohn Marino 	    break;
99886d7f5d3SJohn Marino 	}
99986d7f5d3SJohn Marino 
100086d7f5d3SJohn Marino       /* Lower the highest of the two items.  */
100186d7f5d3SJohn Marino       else if (src1->elems[i1] < src2->elems[i2])
100286d7f5d3SJohn Marino 	{
100386d7f5d3SJohn Marino 	  if (! REG_VALID_INDEX (--i2))
100486d7f5d3SJohn Marino 	    break;
100586d7f5d3SJohn Marino 	}
100686d7f5d3SJohn Marino       else
100786d7f5d3SJohn Marino 	{
100886d7f5d3SJohn Marino 	  if (! REG_VALID_INDEX (--i1))
100986d7f5d3SJohn Marino 	    break;
101086d7f5d3SJohn Marino 	}
101186d7f5d3SJohn Marino     }
101286d7f5d3SJohn Marino 
101386d7f5d3SJohn Marino   id = dest->nelem - 1;
101486d7f5d3SJohn Marino   is = dest->nelem + src1->nelem + src2->nelem - 1;
101586d7f5d3SJohn Marino   delta = is - sbase + 1;
101686d7f5d3SJohn Marino 
101786d7f5d3SJohn Marino   /* Now copy.  When DELTA becomes zero, the remaining
101886d7f5d3SJohn Marino      DEST elements are already in place; this is more or
101986d7f5d3SJohn Marino      less the same loop that is in re_node_set_merge.  */
102086d7f5d3SJohn Marino   dest->nelem += delta;
102186d7f5d3SJohn Marino   if (delta > 0 && REG_VALID_INDEX (id))
102286d7f5d3SJohn Marino     for (;;)
102386d7f5d3SJohn Marino       {
102486d7f5d3SJohn Marino         if (dest->elems[is] > dest->elems[id])
102586d7f5d3SJohn Marino           {
102686d7f5d3SJohn Marino             /* Copy from the top.  */
102786d7f5d3SJohn Marino             dest->elems[id + delta--] = dest->elems[is--];
102886d7f5d3SJohn Marino             if (delta == 0)
102986d7f5d3SJohn Marino               break;
103086d7f5d3SJohn Marino           }
103186d7f5d3SJohn Marino         else
103286d7f5d3SJohn Marino           {
103386d7f5d3SJohn Marino             /* Slide from the bottom.  */
103486d7f5d3SJohn Marino             dest->elems[id + delta] = dest->elems[id];
103586d7f5d3SJohn Marino             if (! REG_VALID_INDEX (--id))
103686d7f5d3SJohn Marino               break;
103786d7f5d3SJohn Marino           }
103886d7f5d3SJohn Marino       }
103986d7f5d3SJohn Marino 
104086d7f5d3SJohn Marino   /* Copy remaining SRC elements.  */
104186d7f5d3SJohn Marino   memcpy (dest->elems, dest->elems + sbase, delta * sizeof dest->elems[0]);
104286d7f5d3SJohn Marino 
104386d7f5d3SJohn Marino   return REG_NOERROR;
104486d7f5d3SJohn Marino }
104586d7f5d3SJohn Marino 
104686d7f5d3SJohn Marino /* Calculate the union set of the sets SRC1 and SRC2. And store it to
104786d7f5d3SJohn Marino    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
104886d7f5d3SJohn Marino 
104986d7f5d3SJohn Marino static reg_errcode_t
105086d7f5d3SJohn Marino internal_function
re_node_set_init_union(re_node_set * dest,const re_node_set * src1,const re_node_set * src2)105186d7f5d3SJohn Marino re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
105286d7f5d3SJohn Marino 			const re_node_set *src2)
105386d7f5d3SJohn Marino {
105486d7f5d3SJohn Marino   Idx i1, i2, id;
105586d7f5d3SJohn Marino   if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
105686d7f5d3SJohn Marino     {
105786d7f5d3SJohn Marino       dest->alloc = src1->nelem + src2->nelem;
105886d7f5d3SJohn Marino       if (sizeof (Idx) < 2 && dest->alloc < src1->nelem)
105986d7f5d3SJohn Marino 	return REG_ESPACE;
106086d7f5d3SJohn Marino       dest->elems = re_xmalloc (Idx, dest->alloc);
106186d7f5d3SJohn Marino       if (BE (dest->elems == NULL, 0))
106286d7f5d3SJohn Marino 	return REG_ESPACE;
106386d7f5d3SJohn Marino     }
106486d7f5d3SJohn Marino   else
106586d7f5d3SJohn Marino     {
106686d7f5d3SJohn Marino       if (src1 != NULL && src1->nelem > 0)
106786d7f5d3SJohn Marino 	return re_node_set_init_copy (dest, src1);
106886d7f5d3SJohn Marino       else if (src2 != NULL && src2->nelem > 0)
106986d7f5d3SJohn Marino 	return re_node_set_init_copy (dest, src2);
107086d7f5d3SJohn Marino       else
107186d7f5d3SJohn Marino 	re_node_set_init_empty (dest);
107286d7f5d3SJohn Marino       return REG_NOERROR;
107386d7f5d3SJohn Marino     }
107486d7f5d3SJohn Marino   for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
107586d7f5d3SJohn Marino     {
107686d7f5d3SJohn Marino       if (src1->elems[i1] > src2->elems[i2])
107786d7f5d3SJohn Marino 	{
107886d7f5d3SJohn Marino 	  dest->elems[id++] = src2->elems[i2++];
107986d7f5d3SJohn Marino 	  continue;
108086d7f5d3SJohn Marino 	}
108186d7f5d3SJohn Marino       if (src1->elems[i1] == src2->elems[i2])
108286d7f5d3SJohn Marino 	++i2;
108386d7f5d3SJohn Marino       dest->elems[id++] = src1->elems[i1++];
108486d7f5d3SJohn Marino     }
108586d7f5d3SJohn Marino   if (i1 < src1->nelem)
108686d7f5d3SJohn Marino     {
108786d7f5d3SJohn Marino       memcpy (dest->elems + id, src1->elems + i1,
108886d7f5d3SJohn Marino 	     (src1->nelem - i1) * sizeof dest->elems[0]);
108986d7f5d3SJohn Marino       id += src1->nelem - i1;
109086d7f5d3SJohn Marino     }
109186d7f5d3SJohn Marino   else if (i2 < src2->nelem)
109286d7f5d3SJohn Marino     {
109386d7f5d3SJohn Marino       memcpy (dest->elems + id, src2->elems + i2,
109486d7f5d3SJohn Marino 	     (src2->nelem - i2) * sizeof dest->elems[0]);
109586d7f5d3SJohn Marino       id += src2->nelem - i2;
109686d7f5d3SJohn Marino     }
109786d7f5d3SJohn Marino   dest->nelem = id;
109886d7f5d3SJohn Marino   return REG_NOERROR;
109986d7f5d3SJohn Marino }
110086d7f5d3SJohn Marino 
110186d7f5d3SJohn Marino /* Calculate the union set of the sets DEST and SRC. And store it to
110286d7f5d3SJohn Marino    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
110386d7f5d3SJohn Marino 
110486d7f5d3SJohn Marino static reg_errcode_t
110586d7f5d3SJohn Marino internal_function
re_node_set_merge(re_node_set * dest,const re_node_set * src)110686d7f5d3SJohn Marino re_node_set_merge (re_node_set *dest, const re_node_set *src)
110786d7f5d3SJohn Marino {
110886d7f5d3SJohn Marino   Idx is, id, sbase, delta;
110986d7f5d3SJohn Marino   if (src == NULL || src->nelem == 0)
111086d7f5d3SJohn Marino     return REG_NOERROR;
111186d7f5d3SJohn Marino   if (sizeof (Idx) < 3
111286d7f5d3SJohn Marino       && ((Idx) (2 * src->nelem) < src->nelem
111386d7f5d3SJohn Marino 	  || (Idx) (2 * src->nelem + dest->nelem) < dest->nelem))
111486d7f5d3SJohn Marino     return REG_ESPACE;
111586d7f5d3SJohn Marino   if (dest->alloc < 2 * src->nelem + dest->nelem)
111686d7f5d3SJohn Marino     {
111786d7f5d3SJohn Marino       Idx new_alloc = src->nelem + dest->alloc;
111886d7f5d3SJohn Marino       Idx *new_buffer;
111986d7f5d3SJohn Marino       if (sizeof (Idx) < 4 && new_alloc < dest->alloc)
112086d7f5d3SJohn Marino 	return REG_ESPACE;
112186d7f5d3SJohn Marino       new_buffer = re_x2realloc (dest->elems, Idx, &new_alloc);
112286d7f5d3SJohn Marino       if (BE (new_buffer == NULL, 0))
112386d7f5d3SJohn Marino 	return REG_ESPACE;
112486d7f5d3SJohn Marino       dest->elems = new_buffer;
112586d7f5d3SJohn Marino       dest->alloc = new_alloc;
112686d7f5d3SJohn Marino     }
112786d7f5d3SJohn Marino 
112886d7f5d3SJohn Marino   if (BE (dest->nelem == 0, 0))
112986d7f5d3SJohn Marino     {
113086d7f5d3SJohn Marino       dest->nelem = src->nelem;
113186d7f5d3SJohn Marino       memcpy (dest->elems, src->elems, src->nelem * sizeof dest->elems[0]);
113286d7f5d3SJohn Marino       return REG_NOERROR;
113386d7f5d3SJohn Marino     }
113486d7f5d3SJohn Marino 
113586d7f5d3SJohn Marino   /* Copy into the top of DEST the items of SRC that are not
113686d7f5d3SJohn Marino      found in DEST.  Maybe we could binary search in DEST?  */
113786d7f5d3SJohn Marino   for (sbase = dest->nelem + 2 * src->nelem,
113886d7f5d3SJohn Marino        is = src->nelem - 1, id = dest->nelem - 1;
113986d7f5d3SJohn Marino        REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
114086d7f5d3SJohn Marino     {
114186d7f5d3SJohn Marino       if (dest->elems[id] == src->elems[is])
114286d7f5d3SJohn Marino         is--, id--;
114386d7f5d3SJohn Marino       else if (dest->elems[id] < src->elems[is])
114486d7f5d3SJohn Marino         dest->elems[--sbase] = src->elems[is--];
114586d7f5d3SJohn Marino       else /* if (dest->elems[id] > src->elems[is]) */
114686d7f5d3SJohn Marino         --id;
114786d7f5d3SJohn Marino     }
114886d7f5d3SJohn Marino 
114986d7f5d3SJohn Marino   if (REG_VALID_INDEX (is))
115086d7f5d3SJohn Marino     {
115186d7f5d3SJohn Marino       /* If DEST is exhausted, the remaining items of SRC must be unique.  */
115286d7f5d3SJohn Marino       sbase -= is + 1;
115386d7f5d3SJohn Marino       memcpy (dest->elems + sbase, src->elems,
115486d7f5d3SJohn Marino 	      (is + 1) * sizeof dest->elems[0]);
115586d7f5d3SJohn Marino     }
115686d7f5d3SJohn Marino 
115786d7f5d3SJohn Marino   id = dest->nelem - 1;
115886d7f5d3SJohn Marino   is = dest->nelem + 2 * src->nelem - 1;
115986d7f5d3SJohn Marino   delta = is - sbase + 1;
116086d7f5d3SJohn Marino   if (delta == 0)
116186d7f5d3SJohn Marino     return REG_NOERROR;
116286d7f5d3SJohn Marino 
116386d7f5d3SJohn Marino   /* Now copy.  When DELTA becomes zero, the remaining
116486d7f5d3SJohn Marino      DEST elements are already in place.  */
116586d7f5d3SJohn Marino   dest->nelem += delta;
116686d7f5d3SJohn Marino   for (;;)
116786d7f5d3SJohn Marino     {
116886d7f5d3SJohn Marino       if (dest->elems[is] > dest->elems[id])
116986d7f5d3SJohn Marino         {
117086d7f5d3SJohn Marino 	  /* Copy from the top.  */
117186d7f5d3SJohn Marino           dest->elems[id + delta--] = dest->elems[is--];
117286d7f5d3SJohn Marino 	  if (delta == 0)
117386d7f5d3SJohn Marino 	    break;
117486d7f5d3SJohn Marino 	}
117586d7f5d3SJohn Marino       else
117686d7f5d3SJohn Marino         {
117786d7f5d3SJohn Marino           /* Slide from the bottom.  */
117886d7f5d3SJohn Marino           dest->elems[id + delta] = dest->elems[id];
117986d7f5d3SJohn Marino 	  if (! REG_VALID_INDEX (--id))
118086d7f5d3SJohn Marino 	    {
118186d7f5d3SJohn Marino 	      /* Copy remaining SRC elements.  */
118286d7f5d3SJohn Marino 	      memcpy (dest->elems, dest->elems + sbase,
118386d7f5d3SJohn Marino 	              delta * sizeof dest->elems[0]);
118486d7f5d3SJohn Marino 	      break;
118586d7f5d3SJohn Marino 	    }
118686d7f5d3SJohn Marino 	}
118786d7f5d3SJohn Marino     }
118886d7f5d3SJohn Marino 
118986d7f5d3SJohn Marino   return REG_NOERROR;
119086d7f5d3SJohn Marino }
119186d7f5d3SJohn Marino 
119286d7f5d3SJohn Marino /* Insert the new element ELEM to the re_node_set* SET.
119386d7f5d3SJohn Marino    SET should not already have ELEM.
119486d7f5d3SJohn Marino    Return true if successful.  */
119586d7f5d3SJohn Marino 
119686d7f5d3SJohn Marino static bool
119786d7f5d3SJohn Marino internal_function
re_node_set_insert(re_node_set * set,Idx elem)119886d7f5d3SJohn Marino re_node_set_insert (re_node_set *set, Idx elem)
119986d7f5d3SJohn Marino {
120086d7f5d3SJohn Marino   Idx idx;
120186d7f5d3SJohn Marino   /* In case the set is empty.  */
120286d7f5d3SJohn Marino   if (set->alloc == 0)
120386d7f5d3SJohn Marino     return re_node_set_init_1 (set, elem) == REG_NOERROR;
120486d7f5d3SJohn Marino 
120586d7f5d3SJohn Marino   if (BE (set->nelem, 0) == 0)
120686d7f5d3SJohn Marino     {
120786d7f5d3SJohn Marino       /* We already guaranteed above that set->alloc != 0.  */
120886d7f5d3SJohn Marino       set->elems[0] = elem;
120986d7f5d3SJohn Marino       ++set->nelem;
121086d7f5d3SJohn Marino       return true;
121186d7f5d3SJohn Marino     }
121286d7f5d3SJohn Marino 
121386d7f5d3SJohn Marino   /* Realloc if we need.  */
121486d7f5d3SJohn Marino   if (set->alloc == set->nelem)
121586d7f5d3SJohn Marino     {
121686d7f5d3SJohn Marino       Idx *new_elems = re_x2realloc (set->elems, Idx, &set->alloc);
121786d7f5d3SJohn Marino       if (BE (new_elems == NULL, 0))
121886d7f5d3SJohn Marino 	return false;
121986d7f5d3SJohn Marino       set->elems = new_elems;
122086d7f5d3SJohn Marino     }
122186d7f5d3SJohn Marino 
122286d7f5d3SJohn Marino   /* Move the elements which follows the new element.  Test the
122386d7f5d3SJohn Marino      first element separately to skip a check in the inner loop.  */
122486d7f5d3SJohn Marino   if (elem < set->elems[0])
122586d7f5d3SJohn Marino     {
122686d7f5d3SJohn Marino       idx = 0;
122786d7f5d3SJohn Marino       for (idx = set->nelem; idx > 0; idx--)
122886d7f5d3SJohn Marino         set->elems[idx] = set->elems[idx - 1];
122986d7f5d3SJohn Marino     }
123086d7f5d3SJohn Marino   else
123186d7f5d3SJohn Marino     {
123286d7f5d3SJohn Marino       for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
123386d7f5d3SJohn Marino         set->elems[idx] = set->elems[idx - 1];
123486d7f5d3SJohn Marino     }
123586d7f5d3SJohn Marino 
123686d7f5d3SJohn Marino   /* Insert the new element.  */
123786d7f5d3SJohn Marino   set->elems[idx] = elem;
123886d7f5d3SJohn Marino   ++set->nelem;
123986d7f5d3SJohn Marino   return true;
124086d7f5d3SJohn Marino }
124186d7f5d3SJohn Marino 
124286d7f5d3SJohn Marino /* Insert the new element ELEM to the re_node_set* SET.
124386d7f5d3SJohn Marino    SET should not already have any element greater than or equal to ELEM.
124486d7f5d3SJohn Marino    Return true if successful.  */
124586d7f5d3SJohn Marino 
124686d7f5d3SJohn Marino static bool
124786d7f5d3SJohn Marino internal_function
re_node_set_insert_last(re_node_set * set,Idx elem)124886d7f5d3SJohn Marino re_node_set_insert_last (re_node_set *set, Idx elem)
124986d7f5d3SJohn Marino {
125086d7f5d3SJohn Marino   /* Realloc if we need.  */
125186d7f5d3SJohn Marino   if (set->alloc == set->nelem)
125286d7f5d3SJohn Marino     {
125386d7f5d3SJohn Marino       Idx *new_elems;
125486d7f5d3SJohn Marino       new_elems = re_x2realloc (set->elems, Idx, &set->alloc);
125586d7f5d3SJohn Marino       if (BE (new_elems == NULL, 0))
125686d7f5d3SJohn Marino 	return false;
125786d7f5d3SJohn Marino       set->elems = new_elems;
125886d7f5d3SJohn Marino     }
125986d7f5d3SJohn Marino 
126086d7f5d3SJohn Marino   /* Insert the new element.  */
126186d7f5d3SJohn Marino   set->elems[set->nelem++] = elem;
126286d7f5d3SJohn Marino   return true;
126386d7f5d3SJohn Marino }
126486d7f5d3SJohn Marino 
126586d7f5d3SJohn Marino /* Compare two node sets SET1 and SET2.
126686d7f5d3SJohn Marino    Return true if SET1 and SET2 are equivalent.  */
126786d7f5d3SJohn Marino 
126886d7f5d3SJohn Marino static bool
internal_function(pure)126986d7f5d3SJohn Marino internal_function __attribute ((pure))
127086d7f5d3SJohn Marino re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
127186d7f5d3SJohn Marino {
127286d7f5d3SJohn Marino   Idx i;
127386d7f5d3SJohn Marino   if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
127486d7f5d3SJohn Marino     return false;
127586d7f5d3SJohn Marino   for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
127686d7f5d3SJohn Marino     if (set1->elems[i] != set2->elems[i])
127786d7f5d3SJohn Marino       return false;
127886d7f5d3SJohn Marino   return true;
127986d7f5d3SJohn Marino }
128086d7f5d3SJohn Marino 
128186d7f5d3SJohn Marino /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
128286d7f5d3SJohn Marino 
128386d7f5d3SJohn Marino static Idx
internal_function(pure)128486d7f5d3SJohn Marino internal_function __attribute ((pure))
128586d7f5d3SJohn Marino re_node_set_contains (const re_node_set *set, Idx elem)
128686d7f5d3SJohn Marino {
128786d7f5d3SJohn Marino   __re_size_t idx, right, mid;
128886d7f5d3SJohn Marino   if (! REG_VALID_NONZERO_INDEX (set->nelem))
128986d7f5d3SJohn Marino     return 0;
129086d7f5d3SJohn Marino 
129186d7f5d3SJohn Marino   /* Binary search the element.  */
129286d7f5d3SJohn Marino   idx = 0;
129386d7f5d3SJohn Marino   right = set->nelem - 1;
129486d7f5d3SJohn Marino   while (idx < right)
129586d7f5d3SJohn Marino     {
129686d7f5d3SJohn Marino       mid = (idx + right) / 2;
129786d7f5d3SJohn Marino       if (set->elems[mid] < elem)
129886d7f5d3SJohn Marino 	idx = mid + 1;
129986d7f5d3SJohn Marino       else
130086d7f5d3SJohn Marino 	right = mid;
130186d7f5d3SJohn Marino     }
130286d7f5d3SJohn Marino   return set->elems[idx] == elem ? idx + 1 : 0;
130386d7f5d3SJohn Marino }
130486d7f5d3SJohn Marino 
130586d7f5d3SJohn Marino static void
130686d7f5d3SJohn Marino internal_function
re_node_set_remove_at(re_node_set * set,Idx idx)130786d7f5d3SJohn Marino re_node_set_remove_at (re_node_set *set, Idx idx)
130886d7f5d3SJohn Marino {
130986d7f5d3SJohn Marino   if (idx < 0 || idx >= set->nelem)
131086d7f5d3SJohn Marino     return;
131186d7f5d3SJohn Marino   --set->nelem;
131286d7f5d3SJohn Marino   for (; idx < set->nelem; idx++)
131386d7f5d3SJohn Marino     set->elems[idx] = set->elems[idx + 1];
131486d7f5d3SJohn Marino }
131586d7f5d3SJohn Marino 
131686d7f5d3SJohn Marino 
131786d7f5d3SJohn Marino /* Add the token TOKEN to dfa->nodes, and return the index of the token.
131886d7f5d3SJohn Marino    Or return REG_MISSING if an error occurred.  */
131986d7f5d3SJohn Marino 
132086d7f5d3SJohn Marino static Idx
132186d7f5d3SJohn Marino internal_function
re_dfa_add_node(re_dfa_t * dfa,re_token_t token)132286d7f5d3SJohn Marino re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
132386d7f5d3SJohn Marino {
132486d7f5d3SJohn Marino   int type = token.type;
132586d7f5d3SJohn Marino   if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
132686d7f5d3SJohn Marino     {
132786d7f5d3SJohn Marino       Idx new_nodes_alloc = dfa->nodes_alloc;
132886d7f5d3SJohn Marino       Idx *new_nexts, *new_indices;
132986d7f5d3SJohn Marino       re_node_set *new_edests, *new_eclosures;
133086d7f5d3SJohn Marino 
133186d7f5d3SJohn Marino       re_token_t *new_nodes = re_x2realloc (dfa->nodes, re_token_t,
133286d7f5d3SJohn Marino 					    &new_nodes_alloc);
133386d7f5d3SJohn Marino       if (BE (new_nodes == NULL, 0))
133486d7f5d3SJohn Marino 	return REG_MISSING;
133586d7f5d3SJohn Marino       dfa->nodes = new_nodes;
133686d7f5d3SJohn Marino       new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
133786d7f5d3SJohn Marino       new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
133886d7f5d3SJohn Marino       new_edests = re_xrealloc (dfa->edests, re_node_set, new_nodes_alloc);
133986d7f5d3SJohn Marino       new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
134086d7f5d3SJohn Marino       if (BE (new_nexts == NULL || new_indices == NULL
134186d7f5d3SJohn Marino 	      || new_edests == NULL || new_eclosures == NULL, 0))
134286d7f5d3SJohn Marino 	return REG_MISSING;
134386d7f5d3SJohn Marino       dfa->nexts = new_nexts;
134486d7f5d3SJohn Marino       dfa->org_indices = new_indices;
134586d7f5d3SJohn Marino       dfa->edests = new_edests;
134686d7f5d3SJohn Marino       dfa->eclosures = new_eclosures;
134786d7f5d3SJohn Marino       dfa->nodes_alloc = new_nodes_alloc;
134886d7f5d3SJohn Marino     }
134986d7f5d3SJohn Marino   dfa->nodes[dfa->nodes_len] = token;
135086d7f5d3SJohn Marino   dfa->nodes[dfa->nodes_len].constraint = 0;
135186d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
135286d7f5d3SJohn Marino   dfa->nodes[dfa->nodes_len].accept_mb =
135386d7f5d3SJohn Marino     (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
135486d7f5d3SJohn Marino #endif
135586d7f5d3SJohn Marino   dfa->nexts[dfa->nodes_len] = REG_MISSING;
135686d7f5d3SJohn Marino   re_node_set_init_empty (dfa->edests + dfa->nodes_len);
135786d7f5d3SJohn Marino   re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
135886d7f5d3SJohn Marino   return dfa->nodes_len++;
135986d7f5d3SJohn Marino }
136086d7f5d3SJohn Marino 
136186d7f5d3SJohn Marino static inline re_hashval_t
136286d7f5d3SJohn Marino internal_function
calc_state_hash(const re_node_set * nodes,unsigned int context)136386d7f5d3SJohn Marino calc_state_hash (const re_node_set *nodes, unsigned int context)
136486d7f5d3SJohn Marino {
136586d7f5d3SJohn Marino   re_hashval_t hash = nodes->nelem + context;
136686d7f5d3SJohn Marino   Idx i;
136786d7f5d3SJohn Marino   for (i = 0 ; i < nodes->nelem ; i++)
136886d7f5d3SJohn Marino     hash += nodes->elems[i];
136986d7f5d3SJohn Marino   return hash;
137086d7f5d3SJohn Marino }
137186d7f5d3SJohn Marino 
137286d7f5d3SJohn Marino /* Search for the state whose node_set is equivalent to NODES.
137386d7f5d3SJohn Marino    Return the pointer to the state, if we found it in the DFA.
137486d7f5d3SJohn Marino    Otherwise create the new one and return it.  In case of an error
137586d7f5d3SJohn Marino    return NULL and set the error code in ERR.
137686d7f5d3SJohn Marino    Note: - We assume NULL as the invalid state, then it is possible that
137786d7f5d3SJohn Marino 	   return value is NULL and ERR is REG_NOERROR.
137886d7f5d3SJohn Marino 	 - We never return non-NULL value in case of any errors, it is for
137986d7f5d3SJohn Marino 	   optimization.  */
138086d7f5d3SJohn Marino 
138186d7f5d3SJohn Marino static re_dfastate_t*
138286d7f5d3SJohn Marino internal_function
re_acquire_state(reg_errcode_t * err,re_dfa_t * dfa,const re_node_set * nodes)138386d7f5d3SJohn Marino re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa, const re_node_set *nodes)
138486d7f5d3SJohn Marino {
138586d7f5d3SJohn Marino   re_hashval_t hash;
138686d7f5d3SJohn Marino   re_dfastate_t *new_state;
138786d7f5d3SJohn Marino   struct re_state_table_entry *spot;
138886d7f5d3SJohn Marino   Idx i;
138986d7f5d3SJohn Marino #ifdef lint
139086d7f5d3SJohn Marino   /* Suppress bogus uninitialized-variable warnings.  */
139186d7f5d3SJohn Marino   *err = REG_NOERROR;
139286d7f5d3SJohn Marino #endif
139386d7f5d3SJohn Marino   if (BE (nodes->nelem == 0, 0))
139486d7f5d3SJohn Marino     {
139586d7f5d3SJohn Marino       *err = REG_NOERROR;
139686d7f5d3SJohn Marino       return NULL;
139786d7f5d3SJohn Marino     }
139886d7f5d3SJohn Marino   hash = calc_state_hash (nodes, 0);
139986d7f5d3SJohn Marino   spot = dfa->state_table + (hash & dfa->state_hash_mask);
140086d7f5d3SJohn Marino 
140186d7f5d3SJohn Marino   for (i = 0 ; i < spot->num ; i++)
140286d7f5d3SJohn Marino     {
140386d7f5d3SJohn Marino       re_dfastate_t *state = spot->array[i];
140486d7f5d3SJohn Marino       if (hash != state->hash)
140586d7f5d3SJohn Marino 	continue;
140686d7f5d3SJohn Marino       if (re_node_set_compare (&state->nodes, nodes))
140786d7f5d3SJohn Marino 	return state;
140886d7f5d3SJohn Marino     }
140986d7f5d3SJohn Marino 
141086d7f5d3SJohn Marino   /* There are no appropriate state in the dfa, create the new one.  */
141186d7f5d3SJohn Marino   new_state = create_ci_newstate (dfa, nodes, hash);
141286d7f5d3SJohn Marino   if (BE (new_state != NULL, 1))
141386d7f5d3SJohn Marino     return new_state;
141486d7f5d3SJohn Marino   else
141586d7f5d3SJohn Marino     {
141686d7f5d3SJohn Marino       *err = REG_ESPACE;
141786d7f5d3SJohn Marino       return NULL;
141886d7f5d3SJohn Marino     }
141986d7f5d3SJohn Marino }
142086d7f5d3SJohn Marino 
142186d7f5d3SJohn Marino /* Search for the state whose node_set is equivalent to NODES and
142286d7f5d3SJohn Marino    whose context is equivalent to CONTEXT.
142386d7f5d3SJohn Marino    Return the pointer to the state, if we found it in the DFA.
142486d7f5d3SJohn Marino    Otherwise create the new one and return it.  In case of an error
142586d7f5d3SJohn Marino    return NULL and set the error code in ERR.
142686d7f5d3SJohn Marino    Note: - We assume NULL as the invalid state, then it is possible that
142786d7f5d3SJohn Marino 	   return value is NULL and ERR is REG_NOERROR.
142886d7f5d3SJohn Marino 	 - We never return non-NULL value in case of any errors, it is for
142986d7f5d3SJohn Marino 	   optimization.  */
143086d7f5d3SJohn Marino 
143186d7f5d3SJohn Marino static re_dfastate_t*
143286d7f5d3SJohn Marino internal_function
re_acquire_state_context(reg_errcode_t * err,re_dfa_t * dfa,const re_node_set * nodes,unsigned int context)143386d7f5d3SJohn Marino re_acquire_state_context (reg_errcode_t *err, re_dfa_t *dfa,
143486d7f5d3SJohn Marino 			  const re_node_set *nodes, unsigned int context)
143586d7f5d3SJohn Marino {
143686d7f5d3SJohn Marino   re_hashval_t hash;
143786d7f5d3SJohn Marino   re_dfastate_t *new_state;
143886d7f5d3SJohn Marino   struct re_state_table_entry *spot;
143986d7f5d3SJohn Marino   Idx i;
144086d7f5d3SJohn Marino #ifdef lint
144186d7f5d3SJohn Marino   /* Suppress bogus uninitialized-variable warnings.  */
144286d7f5d3SJohn Marino   *err = REG_NOERROR;
144386d7f5d3SJohn Marino #endif
144486d7f5d3SJohn Marino   if (nodes->nelem == 0)
144586d7f5d3SJohn Marino     {
144686d7f5d3SJohn Marino       *err = REG_NOERROR;
144786d7f5d3SJohn Marino       return NULL;
144886d7f5d3SJohn Marino     }
144986d7f5d3SJohn Marino   hash = calc_state_hash (nodes, context);
145086d7f5d3SJohn Marino   spot = dfa->state_table + (hash & dfa->state_hash_mask);
145186d7f5d3SJohn Marino 
145286d7f5d3SJohn Marino   for (i = 0 ; i < spot->num ; i++)
145386d7f5d3SJohn Marino     {
145486d7f5d3SJohn Marino       re_dfastate_t *state = spot->array[i];
145586d7f5d3SJohn Marino       if (state->hash == hash
145686d7f5d3SJohn Marino 	  && state->context == context
145786d7f5d3SJohn Marino 	  && re_node_set_compare (state->entrance_nodes, nodes))
145886d7f5d3SJohn Marino 	return state;
145986d7f5d3SJohn Marino     }
146086d7f5d3SJohn Marino   /* There are no appropriate state in `dfa', create the new one.  */
146186d7f5d3SJohn Marino   new_state = create_cd_newstate (dfa, nodes, context, hash);
146286d7f5d3SJohn Marino   if (BE (new_state != NULL, 1))
146386d7f5d3SJohn Marino     return new_state;
146486d7f5d3SJohn Marino   else
146586d7f5d3SJohn Marino     {
146686d7f5d3SJohn Marino       *err = REG_ESPACE;
146786d7f5d3SJohn Marino       return NULL;
146886d7f5d3SJohn Marino     }
146986d7f5d3SJohn Marino }
147086d7f5d3SJohn Marino 
147186d7f5d3SJohn Marino /* Finish initialization of the new state NEWSTATE, and using its hash value
147286d7f5d3SJohn Marino    HASH put in the appropriate bucket of DFA's state table.  Return value
147386d7f5d3SJohn Marino    indicates the error code if failed.  */
147486d7f5d3SJohn Marino 
147586d7f5d3SJohn Marino static reg_errcode_t
147686d7f5d3SJohn Marino internal_function
register_state(const re_dfa_t * dfa,re_dfastate_t * newstate,re_hashval_t hash)147786d7f5d3SJohn Marino register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, re_hashval_t hash)
147886d7f5d3SJohn Marino {
147986d7f5d3SJohn Marino   struct re_state_table_entry *spot;
148086d7f5d3SJohn Marino   reg_errcode_t err;
148186d7f5d3SJohn Marino   Idx i;
148286d7f5d3SJohn Marino 
148386d7f5d3SJohn Marino   newstate->hash = hash;
148486d7f5d3SJohn Marino   err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
148586d7f5d3SJohn Marino   if (BE (err != REG_NOERROR, 0))
148686d7f5d3SJohn Marino     return REG_ESPACE;
148786d7f5d3SJohn Marino   for (i = 0; i < newstate->nodes.nelem; i++)
148886d7f5d3SJohn Marino     {
148986d7f5d3SJohn Marino       Idx elem = newstate->nodes.elems[i];
149086d7f5d3SJohn Marino       if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
149186d7f5d3SJohn Marino 	{
149286d7f5d3SJohn Marino 	  bool ok = re_node_set_insert_last (&newstate->non_eps_nodes, elem);
149386d7f5d3SJohn Marino 	  if (BE (! ok, 0))
149486d7f5d3SJohn Marino 	    return REG_ESPACE;
149586d7f5d3SJohn Marino 	}
149686d7f5d3SJohn Marino     }
149786d7f5d3SJohn Marino 
149886d7f5d3SJohn Marino   spot = dfa->state_table + (hash & dfa->state_hash_mask);
149986d7f5d3SJohn Marino   if (BE (spot->alloc <= spot->num, 0))
150086d7f5d3SJohn Marino     {
150186d7f5d3SJohn Marino       Idx new_alloc = spot->num;
150286d7f5d3SJohn Marino       re_dfastate_t **new_array = re_x2realloc (spot->array, re_dfastate_t *,
150386d7f5d3SJohn Marino 						&new_alloc);
150486d7f5d3SJohn Marino       if (BE (new_array == NULL, 0))
150586d7f5d3SJohn Marino 	return REG_ESPACE;
150686d7f5d3SJohn Marino       spot->array = new_array;
150786d7f5d3SJohn Marino       spot->alloc = new_alloc;
150886d7f5d3SJohn Marino     }
150986d7f5d3SJohn Marino   spot->array[spot->num++] = newstate;
151086d7f5d3SJohn Marino   return REG_NOERROR;
151186d7f5d3SJohn Marino }
151286d7f5d3SJohn Marino 
151386d7f5d3SJohn Marino /* Create the new state which is independ of contexts.
151486d7f5d3SJohn Marino    Return the new state if succeeded, otherwise return NULL.  */
151586d7f5d3SJohn Marino 
151686d7f5d3SJohn Marino static re_dfastate_t *
151786d7f5d3SJohn Marino internal_function
create_ci_newstate(const re_dfa_t * dfa,const re_node_set * nodes,re_hashval_t hash)151886d7f5d3SJohn Marino create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
151986d7f5d3SJohn Marino 		    re_hashval_t hash)
152086d7f5d3SJohn Marino {
152186d7f5d3SJohn Marino   Idx i;
152286d7f5d3SJohn Marino   reg_errcode_t err;
152386d7f5d3SJohn Marino   re_dfastate_t *newstate;
152486d7f5d3SJohn Marino 
152586d7f5d3SJohn Marino   newstate = re_calloc (re_dfastate_t, 1);
152686d7f5d3SJohn Marino   if (BE (newstate == NULL, 0))
152786d7f5d3SJohn Marino     return NULL;
152886d7f5d3SJohn Marino   err = re_node_set_init_copy (&newstate->nodes, nodes);
152986d7f5d3SJohn Marino   if (BE (err != REG_NOERROR, 0))
153086d7f5d3SJohn Marino     {
153186d7f5d3SJohn Marino       re_free (newstate);
153286d7f5d3SJohn Marino       return NULL;
153386d7f5d3SJohn Marino     }
153486d7f5d3SJohn Marino 
153586d7f5d3SJohn Marino   newstate->entrance_nodes = &newstate->nodes;
153686d7f5d3SJohn Marino   for (i = 0 ; i < nodes->nelem ; i++)
153786d7f5d3SJohn Marino     {
153886d7f5d3SJohn Marino       re_token_t *node = dfa->nodes + nodes->elems[i];
153986d7f5d3SJohn Marino       re_token_type_t type = node->type;
154086d7f5d3SJohn Marino       if (type == CHARACTER && !node->constraint)
154186d7f5d3SJohn Marino 	continue;
154286d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
154386d7f5d3SJohn Marino       newstate->accept_mb |= node->accept_mb;
154486d7f5d3SJohn Marino #endif /* RE_ENABLE_I18N */
154586d7f5d3SJohn Marino 
154686d7f5d3SJohn Marino       /* If the state has the halt node, the state is a halt state.  */
154786d7f5d3SJohn Marino       if (type == END_OF_RE)
154886d7f5d3SJohn Marino 	newstate->halt = 1;
154986d7f5d3SJohn Marino       else if (type == OP_BACK_REF)
155086d7f5d3SJohn Marino 	newstate->has_backref = 1;
155186d7f5d3SJohn Marino       else if (type == ANCHOR || node->constraint)
155286d7f5d3SJohn Marino 	newstate->has_constraint = 1;
155386d7f5d3SJohn Marino     }
155486d7f5d3SJohn Marino   err = register_state (dfa, newstate, hash);
155586d7f5d3SJohn Marino   if (BE (err != REG_NOERROR, 0))
155686d7f5d3SJohn Marino     {
155786d7f5d3SJohn Marino       free_state (newstate);
155886d7f5d3SJohn Marino       newstate = NULL;
155986d7f5d3SJohn Marino     }
156086d7f5d3SJohn Marino   return newstate;
156186d7f5d3SJohn Marino }
156286d7f5d3SJohn Marino 
156386d7f5d3SJohn Marino /* Create the new state which is depend on the context CONTEXT.
156486d7f5d3SJohn Marino    Return the new state if succeeded, otherwise return NULL.  */
156586d7f5d3SJohn Marino 
156686d7f5d3SJohn Marino static re_dfastate_t *
156786d7f5d3SJohn Marino internal_function
create_cd_newstate(const re_dfa_t * dfa,const re_node_set * nodes,unsigned int context,re_hashval_t hash)156886d7f5d3SJohn Marino create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
156986d7f5d3SJohn Marino 		    unsigned int context, re_hashval_t hash)
157086d7f5d3SJohn Marino {
157186d7f5d3SJohn Marino   Idx i, nctx_nodes = 0;
157286d7f5d3SJohn Marino   reg_errcode_t err;
157386d7f5d3SJohn Marino   re_dfastate_t *newstate;
157486d7f5d3SJohn Marino 
157586d7f5d3SJohn Marino   newstate = re_calloc (re_dfastate_t, 1);
157686d7f5d3SJohn Marino   if (BE (newstate == NULL, 0))
157786d7f5d3SJohn Marino     return NULL;
157886d7f5d3SJohn Marino   err = re_node_set_init_copy (&newstate->nodes, nodes);
157986d7f5d3SJohn Marino   if (BE (err != REG_NOERROR, 0))
158086d7f5d3SJohn Marino     {
158186d7f5d3SJohn Marino       re_free (newstate);
158286d7f5d3SJohn Marino       return NULL;
158386d7f5d3SJohn Marino     }
158486d7f5d3SJohn Marino 
158586d7f5d3SJohn Marino   newstate->context = context;
158686d7f5d3SJohn Marino   newstate->entrance_nodes = &newstate->nodes;
158786d7f5d3SJohn Marino 
158886d7f5d3SJohn Marino   for (i = 0 ; i < nodes->nelem ; i++)
158986d7f5d3SJohn Marino     {
159086d7f5d3SJohn Marino       unsigned int constraint = 0;
159186d7f5d3SJohn Marino       re_token_t *node = dfa->nodes + nodes->elems[i];
159286d7f5d3SJohn Marino       re_token_type_t type = node->type;
159386d7f5d3SJohn Marino       if (node->constraint)
159486d7f5d3SJohn Marino 	constraint = node->constraint;
159586d7f5d3SJohn Marino 
159686d7f5d3SJohn Marino       if (type == CHARACTER && !constraint)
159786d7f5d3SJohn Marino 	continue;
159886d7f5d3SJohn Marino #ifdef RE_ENABLE_I18N
159986d7f5d3SJohn Marino       newstate->accept_mb |= node->accept_mb;
160086d7f5d3SJohn Marino #endif /* RE_ENABLE_I18N */
160186d7f5d3SJohn Marino 
160286d7f5d3SJohn Marino       /* If the state has the halt node, the state is a halt state.  */
160386d7f5d3SJohn Marino       if (type == END_OF_RE)
160486d7f5d3SJohn Marino 	newstate->halt = 1;
160586d7f5d3SJohn Marino       else if (type == OP_BACK_REF)
160686d7f5d3SJohn Marino 	newstate->has_backref = 1;
160786d7f5d3SJohn Marino       else if (type == ANCHOR)
160886d7f5d3SJohn Marino 	constraint = node->opr.ctx_type;
160986d7f5d3SJohn Marino 
161086d7f5d3SJohn Marino       if (constraint)
161186d7f5d3SJohn Marino 	{
161286d7f5d3SJohn Marino 	  if (newstate->entrance_nodes == &newstate->nodes)
161386d7f5d3SJohn Marino 	    {
161486d7f5d3SJohn Marino 	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
161586d7f5d3SJohn Marino 	      if (BE (newstate->entrance_nodes == NULL, 0))
161686d7f5d3SJohn Marino 		{
161786d7f5d3SJohn Marino 		  free_state (newstate);
161886d7f5d3SJohn Marino 		  return NULL;
161986d7f5d3SJohn Marino 		}
162086d7f5d3SJohn Marino 	      re_node_set_init_copy (newstate->entrance_nodes, nodes);
162186d7f5d3SJohn Marino 	      nctx_nodes = 0;
162286d7f5d3SJohn Marino 	      newstate->has_constraint = 1;
162386d7f5d3SJohn Marino 	    }
162486d7f5d3SJohn Marino 
162586d7f5d3SJohn Marino 	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
162686d7f5d3SJohn Marino 	    {
162786d7f5d3SJohn Marino 	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
162886d7f5d3SJohn Marino 	      ++nctx_nodes;
162986d7f5d3SJohn Marino 	    }
163086d7f5d3SJohn Marino 	}
163186d7f5d3SJohn Marino     }
163286d7f5d3SJohn Marino   err = register_state (dfa, newstate, hash);
163386d7f5d3SJohn Marino   if (BE (err != REG_NOERROR, 0))
163486d7f5d3SJohn Marino     {
163586d7f5d3SJohn Marino       free_state (newstate);
163686d7f5d3SJohn Marino       newstate = NULL;
163786d7f5d3SJohn Marino     }
163886d7f5d3SJohn Marino   return  newstate;
163986d7f5d3SJohn Marino }
164086d7f5d3SJohn Marino 
164186d7f5d3SJohn Marino static void
164286d7f5d3SJohn Marino internal_function
free_state(re_dfastate_t * state)164386d7f5d3SJohn Marino free_state (re_dfastate_t *state)
164486d7f5d3SJohn Marino {
164586d7f5d3SJohn Marino   re_node_set_free (&state->non_eps_nodes);
164686d7f5d3SJohn Marino   re_node_set_free (&state->inveclosure);
164786d7f5d3SJohn Marino   if (state->entrance_nodes != &state->nodes)
164886d7f5d3SJohn Marino     {
164986d7f5d3SJohn Marino       re_node_set_free (state->entrance_nodes);
165086d7f5d3SJohn Marino       re_free (state->entrance_nodes);
165186d7f5d3SJohn Marino     }
165286d7f5d3SJohn Marino   re_node_set_free (&state->nodes);
165386d7f5d3SJohn Marino   re_free (state->word_trtable);
165486d7f5d3SJohn Marino   re_free (state->trtable);
165586d7f5d3SJohn Marino   re_free (state);
165686d7f5d3SJohn Marino }
1657