xref: /netbsd-src/external/gpl2/xcvs/dist/lib/regexec.c (revision 7c604eea85b4f330dc75ffe65e947f4d73758aa0)
1 /* Extended regular expression matching and search library.
2    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2, or (at your option)
9    any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License along
17    with this program; if not, write to the Free Software Foundation,
18    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19 
20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21 				     Idx n) internal_function;
22 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
23 static void match_ctx_free (re_match_context_t *cache) internal_function;
24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25 					  Idx str_idx, Idx from, Idx to)
26      internal_function;
27 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
28      internal_function;
29 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
30 					   Idx str_idx) internal_function;
31 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
32 						    Idx node, Idx str_idx)
33      internal_function;
34 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
35 			   re_dfastate_t **limited_sts, Idx last_node,
36 			   Idx last_str_idx)
37      internal_function;
38 static reg_errcode_t re_search_internal (const regex_t *preg,
39 					 const char *string, Idx length,
40 					 Idx start, Idx last_start, Idx stop,
41 					 size_t nmatch, regmatch_t pmatch[],
42 					 int eflags) internal_function;
43 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
44 				  const char *string1, Idx length1,
45 				  const char *string2, Idx length2,
46 				  Idx start, regoff_t range,
47 				  struct re_registers *regs,
48 				  Idx stop, bool ret_len) internal_function;
49 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
50 				const char *string, Idx length, Idx start,
51 				regoff_t range, Idx stop,
52 				struct re_registers *regs,
53 				bool ret_len) internal_function;
54 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
55 			      Idx nregs, int regs_allocated) internal_function;
56 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
57      internal_function;
58 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
59 			   Idx *p_match_first)
60      internal_function;
61 static Idx check_halt_state_context (const re_match_context_t *mctx,
62 				     const re_dfastate_t *state, Idx idx)
63      internal_function;
64 static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch,
65 			 regmatch_t *prev_idx_match, Idx cur_node,
66 			 Idx cur_idx, Idx nmatch) internal_function;
67 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
68 				      Idx str_idx, Idx dest_node, Idx nregs,
69 				      regmatch_t *regs,
70 				      re_node_set *eps_via_nodes) internal_function;
71 static reg_errcode_t set_regs (const regex_t *preg,
72 			       const re_match_context_t *mctx,
73 			       size_t nmatch, regmatch_t *pmatch,
74 			       bool fl_backtrack) internal_function;
75 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
76 
77 #ifdef RE_ENABLE_I18N
78 static int sift_states_iter_mb (const re_match_context_t *mctx,
79 				re_sift_context_t *sctx,
80 				Idx node_idx, Idx str_idx, Idx max_str_idx) internal_function;
81 #endif /* RE_ENABLE_I18N */
82 static reg_errcode_t sift_states_backward (re_match_context_t *mctx,
83 					   re_sift_context_t *sctx) internal_function;
84 static reg_errcode_t build_sifted_states (re_match_context_t *mctx,
85 					  re_sift_context_t *sctx, Idx str_idx,
86 					  re_node_set *cur_dest) internal_function;
87 static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
88 					      re_sift_context_t *sctx,
89 					      Idx str_idx,
90 					      re_node_set *dest_nodes) internal_function;
91 static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
92 					    re_node_set *dest_nodes,
93 					    const re_node_set *candidates) internal_function;
94 static bool check_dst_limits (const re_match_context_t *mctx,
95 			      const re_node_set *limits,
96 			      Idx dst_node, Idx dst_idx, Idx src_node,
97 			      Idx src_idx) internal_function;
98 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
99 					int boundaries, Idx subexp_idx,
100 					Idx from_node, Idx bkref_idx) internal_function;
101 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
102 				      Idx limit, Idx subexp_idx,
103 				      Idx node, Idx str_idx,
104 				      Idx bkref_idx) internal_function;
105 static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
106 					  re_node_set *dest_nodes,
107 					  const re_node_set *candidates,
108 					  re_node_set *limits,
109 					  struct re_backref_cache_entry *bkref_ents,
110 					  Idx str_idx) internal_function;
111 static reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
112 					re_sift_context_t *sctx,
113 					Idx str_idx, const re_node_set *candidates) internal_function;
114 static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
115 					re_dfastate_t **src, Idx num) internal_function;
116 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
117 					 re_match_context_t *mctx) internal_function;
118 static re_dfastate_t *transit_state (reg_errcode_t *err,
119 				     re_match_context_t *mctx,
120 				     re_dfastate_t *state) internal_function;
121 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
122 					    re_match_context_t *mctx,
123 					    re_dfastate_t *next_state) internal_function;
124 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
125 						re_node_set *cur_nodes,
126 						Idx str_idx) internal_function;
127 #if 0
128 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
129 					re_match_context_t *mctx,
130 					re_dfastate_t *pstate) internal_function;
131 #endif
132 #ifdef RE_ENABLE_I18N
133 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
134 				       re_dfastate_t *pstate) internal_function;
135 #endif /* RE_ENABLE_I18N */
136 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
137 					  const re_node_set *nodes) internal_function;
138 static reg_errcode_t get_subexp (re_match_context_t *mctx,
139 				 Idx bkref_node, Idx bkref_str_idx) internal_function;
140 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
141 				     const re_sub_match_top_t *sub_top,
142 				     re_sub_match_last_t *sub_last,
143 				     Idx bkref_node, Idx bkref_str) internal_function;
144 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
145 			     Idx subexp_idx, int type) internal_function;
146 static reg_errcode_t check_arrival (re_match_context_t *mctx,
147 				    state_array_t *path, Idx top_node,
148 				    Idx top_str, Idx last_node, Idx last_str,
149 				    int type) internal_function;
150 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
151 						   Idx str_idx,
152 						   re_node_set *cur_nodes,
153 						   re_node_set *next_nodes) internal_function;
154 static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
155 					       re_node_set *cur_nodes,
156 					       Idx ex_subexp, int type) internal_function;
157 static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
158 						   re_node_set *dst_nodes,
159 						   Idx target, Idx ex_subexp,
160 						   int type) internal_function;
161 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
162 					 re_node_set *cur_nodes, Idx cur_str,
163 					 Idx subexp_num, int type) internal_function;
164 static bool build_trtable (re_dfa_t *dfa,
165 			   re_dfastate_t *state) internal_function;
166 #ifdef RE_ENABLE_I18N
167 static int check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx,
168 				    const re_string_t *input, Idx idx) internal_function;
169 # ifdef _LIBC
170 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
171 						   size_t name_len) internal_function;
172 # endif /* _LIBC */
173 #endif /* RE_ENABLE_I18N */
174 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
175 				       const re_dfastate_t *state,
176 				       re_node_set *states_node,
177 				       bitset *states_ch) internal_function;
178 static bool check_node_accept (const re_match_context_t *mctx,
179 			       const re_token_t *node, Idx idx)
180      internal_function;
181 static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
182 
183 /* Entry point for POSIX code.  */
184 
185 /* regexec searches for a given pattern, specified by PREG, in the
186    string STRING.
187 
188    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
189    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
190    least NMATCH elements, and we set them to the offsets of the
191    corresponding matched substrings.
192 
193    EFLAGS specifies `execution flags' which affect matching: if
194    REG_NOTBOL is set, then ^ does not match at the beginning of the
195    string; if REG_NOTEOL is set, then $ does not match at the end.
196 
197    We return 0 if we find a match and REG_NOMATCH if not.  */
198 
199 int
200 regexec (const regex_t *__restrict preg, const char *__restrict string,
201 	 size_t nmatch, regmatch_t pmatch[], int eflags)
202 {
203   reg_errcode_t err;
204   Idx start, length;
205 #ifdef _LIBC
206   re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
207 #endif
208 
209   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
210     return REG_BADPAT;
211 
212   if (eflags & REG_STARTEND)
213     {
214       start = pmatch[0].rm_so;
215       length = pmatch[0].rm_eo;
216     }
217   else
218     {
219       start = 0;
220       length = strlen (string);
221     }
222 
223   __libc_lock_lock (dfa->lock);
224   if (preg->re_no_sub)
225     err = re_search_internal (preg, string, length, start, length,
226 			      length, 0, NULL, eflags);
227   else
228     err = re_search_internal (preg, string, length, start, length,
229 			      length, nmatch, pmatch, eflags);
230   __libc_lock_unlock (dfa->lock);
231   return err != REG_NOERROR;
232 }
233 
234 #ifdef _LIBC
235 # include <shlib-compat.h>
236 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
237 
238 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
239 __typeof__ (__regexec) __compat_regexec;
240 
241 int
242 attribute_compat_text_section
243 __compat_regexec (const regex_t *__restrict preg,
244 		  const char *__restrict string, size_t nmatch,
245 		  regmatch_t pmatch[], int eflags)
246 {
247   return regexec (preg, string, nmatch, pmatch,
248 		  eflags & (REG_NOTBOL | REG_NOTEOL));
249 }
250 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
251 # endif
252 #endif
253 
254 /* Entry points for GNU code.  */
255 
256 /* re_match, re_search, re_match_2, re_search_2
257 
258    The former two functions operate on STRING with length LENGTH,
259    while the later two operate on concatenation of STRING1 and STRING2
260    with lengths LENGTH1 and LENGTH2, respectively.
261 
262    re_match() matches the compiled pattern in BUFP against the string,
263    starting at index START.
264 
265    re_search() first tries matching at index START, then it tries to match
266    starting from index START + 1, and so on.  The last start position tried
267    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
268    way as re_match().)
269 
270    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
271    the first STOP characters of the concatenation of the strings should be
272    concerned.
273 
274    If REGS is not NULL, and BUFP->re_no_sub is not set, the offsets of the match
275    and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
276    computed relative to the concatenation, not relative to the individual
277    strings.)
278 
279    On success, re_match* functions return the length of the match, re_search*
280    return the position of the start of the match.  Return value -1 means no
281    match was found and -2 indicates an internal error.  */
282 
283 regoff_t
284 re_match (struct re_pattern_buffer *bufp, const char *string,
285 	  Idx length, Idx start, struct re_registers *regs)
286 {
287   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
288 }
289 #ifdef _LIBC
290 weak_alias (__re_match, re_match)
291 #endif
292 
293 regoff_t
294 re_search (struct re_pattern_buffer *bufp, const char *string,
295 	   Idx length, Idx start, regoff_t range, struct re_registers *regs)
296 {
297   return re_search_stub (bufp, string, length, start, range, length, regs,
298 			 false);
299 }
300 #ifdef _LIBC
301 weak_alias (__re_search, re_search)
302 #endif
303 
304 regoff_t
305 re_match_2 (struct re_pattern_buffer *bufp,
306 	    const char *string1, Idx length1,
307 	    const char *string2, Idx length2,
308 	    Idx start, struct re_registers *regs, Idx stop)
309 {
310   return re_search_2_stub (bufp, string1, length1, string2, length2,
311 			   start, 0, regs, stop, true);
312 }
313 #ifdef _LIBC
314 weak_alias (__re_match_2, re_match_2)
315 #endif
316 
317 regoff_t
318 re_search_2 (struct re_pattern_buffer *bufp,
319 	     const char *string1, Idx length1,
320 	     const char *string2, Idx length2,
321 	     Idx start, regoff_t range, struct re_registers *regs, Idx stop)
322 {
323   return re_search_2_stub (bufp, string1, length1, string2, length2,
324 			   start, range, regs, stop, false);
325 }
326 #ifdef _LIBC
327 weak_alias (__re_search_2, re_search_2)
328 #endif
329 
330 static regoff_t
331 internal_function
332 re_search_2_stub (struct re_pattern_buffer *bufp,
333 		  const char *string1, Idx length1,
334 		  const char *string2, Idx length2,
335 		  Idx start, regoff_t range, struct re_registers *regs,
336 		  Idx stop, bool ret_len)
337 {
338   const char *str;
339   regoff_t rval;
340   Idx len = length1 + length2;
341   char *s = NULL;
342 
343   if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
344     return -2;
345 
346   /* Concatenate the strings.  */
347   if (length2 > 0)
348     if (length1 > 0)
349       {
350 	s = re_malloc (char, len);
351 
352 	if (BE (s == NULL, 0))
353 	  return -2;
354 	memcpy (s, string1, length1);
355 	memcpy (s + length1, string2, length2);
356 	str = s;
357       }
358     else
359       str = string2;
360   else
361     str = string1;
362 
363   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
364 			 ret_len);
365   re_free (s);
366   return rval;
367 }
368 
369 /* The parameters have the same meaning as those of re_search.
370    Additional parameters:
371    If RET_LEN is true the length of the match is returned (re_match style);
372    otherwise the position of the match is returned.  */
373 
374 static regoff_t
375 internal_function
376 re_search_stub (struct re_pattern_buffer *bufp,
377 		const char *string, Idx length,
378 		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
379 		bool ret_len)
380 {
381   reg_errcode_t result;
382   regmatch_t *pmatch;
383   Idx nregs;
384   regoff_t rval;
385   int eflags = 0;
386 #ifdef _LIBC
387   re_dfa_t *dfa = (re_dfa_t *) bufp->re_buffer;
388 #endif
389   Idx last_start = start + range;
390 
391   /* Check for out-of-range.  */
392   if (BE (start < 0 || start > length, 0))
393     return -1;
394   if (sizeof start < sizeof range)
395     {
396       regoff_t length_offset = length;
397       regoff_t start_offset = start;
398       if (BE (length_offset - start_offset < range, 0))
399 	last_start = length;
400       else if (BE (range < - start_offset, 0))
401 	last_start = 0;
402     }
403   else
404     {
405       if (BE ((last_start < start) != (range < 0), 0))
406 	{
407 	  /* Overflow occurred when computing last_start; substitute
408 	     the extreme value.  */
409 	  last_start = range < 0 ? 0 : length;
410 	}
411       else
412 	{
413 	  if (BE (length < last_start, 0))
414 	    last_start = length;
415 	  else if (BE (last_start < 0, 0))
416 	    last_start = 0;
417 	}
418     }
419 
420   __libc_lock_lock (dfa->lock);
421 
422   eflags |= (bufp->re_not_bol) ? REG_NOTBOL : 0;
423   eflags |= (bufp->re_not_eol) ? REG_NOTEOL : 0;
424 
425   /* Compile fastmap if we haven't yet.  */
426   if (start < last_start && bufp->re_fastmap != NULL
427       && !bufp->re_fastmap_accurate)
428     re_compile_fastmap (bufp);
429 
430   if (BE (bufp->re_no_sub, 0))
431     regs = NULL;
432 
433   /* We need at least 1 register.  */
434   if (regs == NULL)
435     nregs = 1;
436   else if (BE (bufp->re_regs_allocated == REG_FIXED
437 	       && regs->rm_num_regs <= bufp->re_nsub, 0))
438     {
439       nregs = regs->rm_num_regs;
440       if (BE (nregs < 1, 0))
441 	{
442 	  /* Nothing can be copied to regs.  */
443 	  regs = NULL;
444 	  nregs = 1;
445 	}
446     }
447   else
448     nregs = bufp->re_nsub + 1;
449   pmatch = re_xmalloc (regmatch_t, nregs);
450   if (BE (pmatch == NULL, 0))
451     {
452       rval = -2;
453       goto out;
454     }
455 
456   result = re_search_internal (bufp, string, length, start, last_start, stop,
457 			       nregs, pmatch, eflags);
458 
459   rval = 0;
460 
461   /* I hope we needn't fill ther regs with -1's when no match was found.  */
462   if (result != REG_NOERROR)
463     rval = -1;
464   else if (regs != NULL)
465     {
466       /* If caller wants register contents data back, copy them.  */
467       bufp->re_regs_allocated = re_copy_regs (regs, pmatch, nregs,
468 					      bufp->re_regs_allocated);
469       if (BE (bufp->re_regs_allocated == REG_UNALLOCATED, 0))
470 	rval = -2;
471     }
472 
473   if (BE (rval == 0, 1))
474     {
475       if (ret_len)
476 	{
477 	  assert (pmatch[0].rm_so == start);
478 	  rval = pmatch[0].rm_eo - start;
479 	}
480       else
481 	rval = pmatch[0].rm_so;
482     }
483   re_free (pmatch);
484  out:
485   __libc_lock_unlock (dfa->lock);
486   return rval;
487 }
488 
489 static unsigned
490 internal_function
491 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
492 	      int regs_allocated)
493 {
494   int rval = REG_REALLOCATE;
495   Idx i;
496   Idx need_regs = nregs + 1;
497   /* We need one extra element beyond `rm_num_regs' for the `-1' marker GNU code
498      uses.  */
499 
500   /* Have the register data arrays been allocated?  */
501   if (regs_allocated == REG_UNALLOCATED)
502     { /* No.  So allocate them with malloc.  */
503       regs->rm_start = re_xmalloc (regoff_t, need_regs);
504       regs->rm_end = re_malloc (regoff_t, need_regs);
505       if (BE (regs->rm_start == NULL, 0) || BE (regs->rm_end == NULL, 0))
506 	return REG_UNALLOCATED;
507       regs->rm_num_regs = need_regs;
508     }
509   else if (regs_allocated == REG_REALLOCATE)
510     { /* Yes.  If we need more elements than were already
511 	 allocated, reallocate them.  If we need fewer, just
512 	 leave it alone.  */
513       if (BE (need_regs > regs->rm_num_regs, 0))
514 	{
515 	  regoff_t *new_start =
516 	    re_xrealloc (regs->rm_start, regoff_t, need_regs);
517 	  regoff_t *new_end = re_realloc (regs->rm_end, regoff_t, need_regs);
518 	  if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
519 	    return REG_UNALLOCATED;
520 	  regs->rm_start = new_start;
521 	  regs->rm_end = new_end;
522 	  regs->rm_num_regs = need_regs;
523 	}
524     }
525   else
526     {
527       assert (regs_allocated == REG_FIXED);
528       /* This function may not be called with REG_FIXED and nregs too big.  */
529       assert (regs->rm_num_regs >= nregs);
530       rval = REG_FIXED;
531     }
532 
533   /* Copy the regs.  */
534   for (i = 0; i < nregs; ++i)
535     {
536       regs->rm_start[i] = pmatch[i].rm_so;
537       regs->rm_end[i] = pmatch[i].rm_eo;
538     }
539   for ( ; i < regs->rm_num_regs; ++i)
540     regs->rm_start[i] = regs->rm_end[i] = -1;
541 
542   return rval;
543 }
544 
545 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
546    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
547    this memory for recording register information.  STARTS and ENDS
548    must be allocated using the malloc library routine, and must each
549    be at least NUM_REGS * sizeof (regoff_t) bytes long.
550 
551    If NUM_REGS == 0, then subsequent matches should allocate their own
552    register data.
553 
554    Unless this function is called, the first search or match using
555    PATTERN_BUFFER will allocate its own register data, without
556    freeing the old data.  */
557 
558 void
559 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
560 		  __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
561 {
562   if (num_regs)
563     {
564       bufp->re_regs_allocated = REG_REALLOCATE;
565       regs->rm_num_regs = num_regs;
566       regs->rm_start = starts;
567       regs->rm_end = ends;
568     }
569   else
570     {
571       bufp->re_regs_allocated = REG_UNALLOCATED;
572       regs->rm_num_regs = 0;
573       regs->rm_start = regs->rm_end = NULL;
574     }
575 }
576 #ifdef _LIBC
577 weak_alias (__re_set_registers, re_set_registers)
578 #endif
579 
580 /* Entry points compatible with 4.2 BSD regex library.  We don't define
581    them unless specifically requested.  */
582 
583 #if defined _REGEX_RE_COMP || defined _LIBC
584 int
585 # ifdef _LIBC
586 weak_function
587 # endif
588 re_exec (const char *s)
589 {
590   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
591 }
592 #endif /* _REGEX_RE_COMP */
593 
594 /* Internal entry point.  */
595 
596 /* Searches for a compiled pattern PREG in the string STRING, whose
597    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
598    meaning as with regexec.  LAST_START is START + RANGE, where
599    START and RANGE have the same meaning as with re_search.
600    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
601    otherwise return the error code.
602    Note: We assume front end functions already check ranges.
603    (0 <= LAST_START && LAST_START <= LENGTH)  */
604 
605 static reg_errcode_t
606 internal_function
607 re_search_internal (const regex_t *preg,
608 		    const char *string, Idx length,
609 		    Idx start, Idx last_start, Idx stop,
610 		    size_t nmatch, regmatch_t pmatch[],
611 		    int eflags)
612 {
613   reg_errcode_t err;
614   re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
615   Idx left_lim, right_lim;
616   int incr;
617   bool fl_longest_match;
618   int match_kind;
619   Idx match_first, match_last = REG_MISSING;
620   Idx extra_nmatch;
621   bool sb;
622   int ch;
623 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
624   re_match_context_t mctx = { .dfa = dfa };
625 #else
626   re_match_context_t mctx;
627 #endif
628   char *fastmap = ((preg->re_fastmap != NULL && preg->re_fastmap_accurate
629 		    && start != last_start && !preg->re_can_be_null)
630 		   ? preg->re_fastmap : NULL);
631   unsigned REG_TRANSLATE_TYPE t =
632     (unsigned REG_TRANSLATE_TYPE) preg->re_translate;
633 
634 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
635   memset (&mctx, '\0', sizeof (re_match_context_t));
636   mctx.dfa = dfa;
637 #endif
638 
639   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
640   nmatch -= extra_nmatch;
641 
642   /* Check if the DFA haven't been compiled.  */
643   if (BE (preg->re_used == 0 || dfa->init_state == NULL
644 	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
645 	  || dfa->init_state_begbuf == NULL, 0))
646     return REG_NOMATCH;
647 
648 #ifdef DEBUG
649   /* We assume front-end functions already check them.  */
650   assert (0 <= last_start && last_start <= length);
651 #endif
652 
653   /* If initial states with non-begbuf contexts have no elements,
654      the regex must be anchored.  If preg->re_newline_anchor is set,
655      we'll never use init_state_nl, so do not check it.  */
656   if (dfa->init_state->nodes.nelem == 0
657       && dfa->init_state_word->nodes.nelem == 0
658       && (dfa->init_state_nl->nodes.nelem == 0
659 	  || !preg->re_newline_anchor))
660     {
661       if (start != 0 && last_start != 0)
662         return REG_NOMATCH;
663       start = last_start = 0;
664     }
665 
666   /* We must check the longest matching, if nmatch > 0.  */
667   fl_longest_match = (nmatch != 0 || dfa->nbackref);
668 
669   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
670 			    preg->re_translate,
671 			    preg->re_syntax & REG_IGNORE_CASE, dfa);
672   if (BE (err != REG_NOERROR, 0))
673     goto free_return;
674   mctx.input.stop = stop;
675   mctx.input.raw_stop = stop;
676   mctx.input.newline_anchor = preg->re_newline_anchor;
677 
678   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
679   if (BE (err != REG_NOERROR, 0))
680     goto free_return;
681 
682   /* We will log all the DFA states through which the dfa pass,
683      if nmatch > 1, or this dfa has "multibyte node", which is a
684      back-reference or a node which can accept multibyte character or
685      multi character collating element.  */
686   if (nmatch > 1 || dfa->has_mb_node)
687     {
688       mctx.state_log = re_xmalloc (re_dfastate_t *, mctx.input.bufs_len + 1);
689       if (BE (mctx.state_log == NULL, 0))
690 	{
691 	  err = REG_ESPACE;
692 	  goto free_return;
693 	}
694     }
695   else
696     mctx.state_log = NULL;
697 
698   match_first = start;
699   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
700 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
701 
702   /* Check incrementally whether of not the input string match.  */
703   incr = (last_start < start) ? -1 : 1;
704   left_lim = (last_start < start) ? last_start : start;
705   right_lim = (last_start < start) ? start : last_start;
706   sb = dfa->mb_cur_max == 1;
707   match_kind =
708     (fastmap
709      ? ((sb || !(preg->re_syntax & REG_IGNORE_CASE || t) ? 4 : 0)
710 	| (start <= last_start ? 2 : 0)
711 	| (t != NULL ? 1 : 0))
712      : 8);
713 
714   for (;; match_first += incr)
715     {
716       err = REG_NOMATCH;
717       if (match_first < left_lim || right_lim < match_first)
718 	goto free_return;
719 
720       /* Advance as rapidly as possible through the string, until we
721 	 find a plausible place to start matching.  This may be done
722 	 with varying efficiency, so there are various possibilities:
723 	 only the most common of them are specialized, in order to
724 	 save on code size.  We use a switch statement for speed.  */
725       switch (match_kind)
726 	{
727 	case 8:
728 	  /* No fastmap.  */
729 	  break;
730 
731 	case 7:
732 	  /* Fastmap with single-byte translation, match forward.  */
733 	  while (BE (match_first < right_lim, 1)
734 		 && !fastmap[t[(unsigned char) string[match_first]]])
735 	    ++match_first;
736 	  goto forward_match_found_start_or_reached_end;
737 
738 	case 6:
739 	  /* Fastmap without translation, match forward.  */
740 	  while (BE (match_first < right_lim, 1)
741 		 && !fastmap[(unsigned char) string[match_first]])
742 	    ++match_first;
743 
744 	forward_match_found_start_or_reached_end:
745 	  if (BE (match_first == right_lim, 0))
746 	    {
747 	      ch = match_first >= length
748 		       ? 0 : (unsigned char) string[match_first];
749 	      if (!fastmap[t ? t[ch] : ch])
750 		goto free_return;
751 	    }
752 	  break;
753 
754 	case 4:
755 	case 5:
756 	  /* Fastmap without multi-byte translation, match backwards.  */
757 	  while (match_first >= left_lim)
758 	    {
759 	      ch = match_first >= length
760 		       ? 0 : (unsigned char) string[match_first];
761 	      if (fastmap[t ? t[ch] : ch])
762 		break;
763 	      --match_first;
764 	    }
765 	  if (match_first < left_lim)
766 	    goto free_return;
767 	  break;
768 
769 	default:
770 	  /* In this case, we can't determine easily the current byte,
771 	     since it might be a component byte of a multibyte
772 	     character.  Then we use the constructed buffer instead.  */
773 	  for (;;)
774 	    {
775 	      /* If MATCH_FIRST is out of the valid range, reconstruct the
776 		 buffers.  */
777 	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
778 	      if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
779 		{
780 		  err = re_string_reconstruct (&mctx.input, match_first,
781 					       eflags);
782 		  if (BE (err != REG_NOERROR, 0))
783 		    goto free_return;
784 
785 		  offset = match_first - mctx.input.raw_mbs_idx;
786 		}
787 	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
788 		 Note that MATCH_FIRST must not be smaller than 0.  */
789 	      ch = (match_first >= length
790 		    ? 0 : re_string_byte_at (&mctx.input, offset));
791 	      if (fastmap[ch])
792 		break;
793 	      match_first += incr;
794 	      if (match_first < left_lim || match_first > right_lim)
795 	        {
796 	          err = REG_NOMATCH;
797 	          goto free_return;
798 	        }
799 	    }
800 	  break;
801 	}
802 
803       /* Reconstruct the buffers so that the matcher can assume that
804 	 the matching starts from the beginning of the buffer.  */
805       err = re_string_reconstruct (&mctx.input, match_first, eflags);
806       if (BE (err != REG_NOERROR, 0))
807 	goto free_return;
808 
809 #ifdef RE_ENABLE_I18N
810      /* Don't consider this char as a possible match start if it part,
811 	yet isn't the head, of a multibyte character.  */
812       if (!sb && !re_string_first_byte (&mctx.input, 0))
813 	continue;
814 #endif
815 
816       /* It seems to be appropriate one, then use the matcher.  */
817       /* We assume that the matching starts from 0.  */
818       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
819       match_last = check_matching (&mctx, fl_longest_match,
820 				   start <= last_start ? &match_first : NULL);
821       if (match_last != REG_MISSING)
822 	{
823 	  if (BE (match_last == REG_ERROR, 0))
824 	    {
825 	      err = REG_ESPACE;
826 	      goto free_return;
827 	    }
828 	  else
829 	    {
830 	      mctx.match_last = match_last;
831 	      if ((!preg->re_no_sub && nmatch > 1) || dfa->nbackref)
832 		{
833 		  re_dfastate_t *pstate = mctx.state_log[match_last];
834 		  mctx.last_node = check_halt_state_context (&mctx, pstate,
835 							     match_last);
836 		}
837 	      if ((!preg->re_no_sub && nmatch > 1 && dfa->has_plural_match)
838 		  || dfa->nbackref)
839 		{
840 		  err = prune_impossible_nodes (&mctx);
841 		  if (err == REG_NOERROR)
842 		    break;
843 		  if (BE (err != REG_NOMATCH, 0))
844 		    goto free_return;
845 		  match_last = REG_MISSING;
846 		}
847 	      else
848 		break; /* We found a match.  */
849 	    }
850 	}
851 
852       match_ctx_clean (&mctx);
853     }
854 
855 #ifdef DEBUG
856   assert (match_last != REG_MISSING);
857   assert (err == REG_NOERROR);
858 #endif
859 
860   /* Set pmatch[] if we need.  */
861   if (nmatch > 0)
862     {
863       Idx reg_idx;
864 
865       /* Initialize registers.  */
866       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
867 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
868 
869       /* Set the points where matching start/end.  */
870       pmatch[0].rm_so = 0;
871       pmatch[0].rm_eo = mctx.match_last;
872       /* FIXME: This function should fail if mctx.match_last exceeds
873 	 the maximum possible regoff_t value.  We need a new error
874 	 code REG_OVERFLOW.  */
875 
876       if (!preg->re_no_sub && nmatch > 1)
877 	{
878 	  err = set_regs (preg, &mctx, nmatch, pmatch,
879 			  dfa->has_plural_match && dfa->nbackref > 0);
880 	  if (BE (err != REG_NOERROR, 0))
881 	    goto free_return;
882 	}
883 
884       /* At last, add the offset to the each registers, since we slided
885 	 the buffers so that we could assume that the matching starts
886 	 from 0.  */
887       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
888 	if (pmatch[reg_idx].rm_so != -1)
889 	  {
890 #ifdef RE_ENABLE_I18N
891 	    if (BE (mctx.input.offsets_needed != 0, 0))
892 	      {
893 		pmatch[reg_idx].rm_so =
894 		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
895 		   ? mctx.input.valid_raw_len
896 		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
897 		pmatch[reg_idx].rm_eo =
898 		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
899 		   ? mctx.input.valid_raw_len
900 		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
901 	      }
902 #else
903 	    assert (mctx.input.offsets_needed == 0);
904 #endif
905 	    pmatch[reg_idx].rm_so += match_first;
906 	    pmatch[reg_idx].rm_eo += match_first;
907 	  }
908       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
909 	{
910 	  pmatch[nmatch + reg_idx].rm_so = -1;
911 	  pmatch[nmatch + reg_idx].rm_eo = -1;
912 	}
913 
914       if (dfa->subexp_map)
915         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
916           if (dfa->subexp_map[reg_idx] != reg_idx)
917             {
918               pmatch[reg_idx + 1].rm_so
919                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
920               pmatch[reg_idx + 1].rm_eo
921                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
922             }
923     }
924 
925  free_return:
926   re_free (mctx.state_log);
927   if (dfa->nbackref)
928     match_ctx_free (&mctx);
929   re_string_destruct (&mctx.input);
930   return err;
931 }
932 
933 static reg_errcode_t
934 internal_function
935 prune_impossible_nodes (re_match_context_t *mctx)
936 {
937   re_dfa_t *const dfa = mctx->dfa;
938   Idx halt_node, match_last;
939   reg_errcode_t ret;
940   re_dfastate_t **sifted_states;
941   re_dfastate_t **lim_states = NULL;
942   re_sift_context_t sctx;
943 #ifdef DEBUG
944   assert (mctx->state_log != NULL);
945 #endif
946   match_last = mctx->match_last;
947   halt_node = mctx->last_node;
948   sifted_states = re_xmalloc (re_dfastate_t *, match_last + 1);
949   if (BE (sifted_states == NULL, 0))
950     {
951       ret = REG_ESPACE;
952       goto free_return;
953     }
954   if (dfa->nbackref)
955     {
956       lim_states = re_xmalloc (re_dfastate_t *, match_last + 1);
957       if (BE (lim_states == NULL, 0))
958 	{
959 	  ret = REG_ESPACE;
960 	  goto free_return;
961 	}
962       while (1)
963 	{
964 	  memset (lim_states, '\0',
965 		  sizeof (re_dfastate_t *) * (match_last + 1));
966 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
967 			 match_last);
968 	  ret = sift_states_backward (mctx, &sctx);
969 	  re_node_set_free (&sctx.limits);
970 	  if (BE (ret != REG_NOERROR, 0))
971 	      goto free_return;
972 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
973 	    break;
974 	  do
975 	    {
976 	      --match_last;
977 	      if (! REG_VALID_INDEX (match_last))
978 		{
979 		  ret = REG_NOMATCH;
980 		  goto free_return;
981 		}
982 	    } while (mctx->state_log[match_last] == NULL
983 		     || !mctx->state_log[match_last]->halt);
984 	  halt_node = check_halt_state_context (mctx,
985 						mctx->state_log[match_last],
986 						match_last);
987 	}
988       ret = merge_state_array (dfa, sifted_states, lim_states,
989 			       match_last + 1);
990       re_free (lim_states);
991       lim_states = NULL;
992       if (BE (ret != REG_NOERROR, 0))
993 	goto free_return;
994     }
995   else
996     {
997       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
998       ret = sift_states_backward (mctx, &sctx);
999       re_node_set_free (&sctx.limits);
1000       if (BE (ret != REG_NOERROR, 0))
1001 	goto free_return;
1002     }
1003   re_free (mctx->state_log);
1004   mctx->state_log = sifted_states;
1005   sifted_states = NULL;
1006   mctx->last_node = halt_node;
1007   mctx->match_last = match_last;
1008   ret = REG_NOERROR;
1009  free_return:
1010   re_free (sifted_states);
1011   re_free (lim_states);
1012   return ret;
1013 }
1014 
1015 /* Acquire an initial state and return it.
1016    We must select appropriate initial state depending on the context,
1017    since initial states may have constraints like "\<", "^", etc..  */
1018 
1019 static inline re_dfastate_t *
1020 __attribute ((always_inline)) internal_function
1021 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1022 			    Idx idx)
1023 {
1024   re_dfa_t *const dfa = mctx->dfa;
1025   if (dfa->init_state->has_constraint)
1026     {
1027       unsigned int context;
1028       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1029       if (IS_WORD_CONTEXT (context))
1030 	return dfa->init_state_word;
1031       else if (IS_ORDINARY_CONTEXT (context))
1032 	return dfa->init_state;
1033       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1034 	return dfa->init_state_begbuf;
1035       else if (IS_NEWLINE_CONTEXT (context))
1036 	return dfa->init_state_nl;
1037       else if (IS_BEGBUF_CONTEXT (context))
1038 	{
1039 	  /* It is relatively rare case, then calculate on demand.  */
1040 	  return re_acquire_state_context (err, dfa,
1041 					   dfa->init_state->entrance_nodes,
1042 					   context);
1043 	}
1044       else
1045 	/* Must not happen?  */
1046 	return dfa->init_state;
1047     }
1048   else
1049     return dfa->init_state;
1050 }
1051 
1052 /* Check whether the regular expression match input string INPUT or not,
1053    and return the index where the matching end.  Return REG_MISSING if
1054    there is no match, and return REG_ERROR in case of an error.
1055    FL_LONGEST_MATCH means we want the POSIX longest matching.
1056    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1057    next place where we may want to try matching.
1058    Note that the matcher assume that the maching starts from the current
1059    index of the buffer.  */
1060 
1061 static Idx
1062 internal_function
1063 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1064 		Idx *p_match_first)
1065 {
1066   re_dfa_t *const dfa = mctx->dfa;
1067   reg_errcode_t err;
1068   Idx match = 0;
1069   Idx match_last = REG_MISSING;
1070   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1071   re_dfastate_t *cur_state;
1072   bool at_init_state = p_match_first != NULL;
1073   Idx next_start_idx = cur_str_idx;
1074 
1075   err = REG_NOERROR;
1076   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1077   /* An initial state must not be NULL (invalid).  */
1078   if (BE (cur_state == NULL, 0))
1079     {
1080       assert (err == REG_ESPACE);
1081       return REG_ERROR;
1082     }
1083 
1084   if (mctx->state_log != NULL)
1085     {
1086       mctx->state_log[cur_str_idx] = cur_state;
1087 
1088       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1089 	 later.  E.g. Processing back references.  */
1090       if (BE (dfa->nbackref, 0))
1091 	{
1092 	  at_init_state = false;
1093 	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1094 	  if (BE (err != REG_NOERROR, 0))
1095 	    return err;
1096 
1097 	  if (cur_state->has_backref)
1098 	    {
1099 	      err = transit_state_bkref (mctx, &cur_state->nodes);
1100 	      if (BE (err != REG_NOERROR, 0))
1101 	        return err;
1102 	    }
1103 	}
1104     }
1105 
1106   /* If the RE accepts NULL string.  */
1107   if (BE (cur_state->halt, 0))
1108     {
1109       if (!cur_state->has_constraint
1110 	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1111 	{
1112 	  if (!fl_longest_match)
1113 	    return cur_str_idx;
1114 	  else
1115 	    {
1116 	      match_last = cur_str_idx;
1117 	      match = 1;
1118 	    }
1119 	}
1120     }
1121 
1122   while (!re_string_eoi (&mctx->input))
1123     {
1124       re_dfastate_t *old_state = cur_state;
1125       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1126 
1127       if (BE (next_char_idx >= mctx->input.bufs_len, 0)
1128           || (BE (next_char_idx >= mctx->input.valid_len, 0)
1129               && mctx->input.valid_len < mctx->input.len))
1130         {
1131           err = extend_buffers (mctx);
1132           if (BE (err != REG_NOERROR, 0))
1133 	    {
1134 	      assert (err == REG_ESPACE);
1135 	      return REG_ERROR;
1136 	    }
1137         }
1138 
1139       cur_state = transit_state (&err, mctx, cur_state);
1140       if (mctx->state_log != NULL)
1141 	cur_state = merge_state_with_log (&err, mctx, cur_state);
1142 
1143       if (cur_state == NULL)
1144 	{
1145 	  /* Reached the invalid state or an error.  Try to recover a valid
1146 	     state using the state log, if available and if we have not
1147 	     already found a valid (even if not the longest) match.  */
1148 	  if (BE (err != REG_NOERROR, 0))
1149 	    return REG_ERROR;
1150 
1151 	  if (mctx->state_log == NULL
1152 	      || (match && !fl_longest_match)
1153 	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1154 	    break;
1155 	}
1156 
1157       if (BE (at_init_state, 0))
1158 	{
1159 	  if (old_state == cur_state)
1160 	    next_start_idx = next_char_idx;
1161 	  else
1162 	    at_init_state = false;
1163 	}
1164 
1165       if (cur_state->halt)
1166 	{
1167 	  /* Reached a halt state.
1168 	     Check the halt state can satisfy the current context.  */
1169 	  if (!cur_state->has_constraint
1170 	      || check_halt_state_context (mctx, cur_state,
1171 					   re_string_cur_idx (&mctx->input)))
1172 	    {
1173 	      /* We found an appropriate halt state.  */
1174 	      match_last = re_string_cur_idx (&mctx->input);
1175 	      match = 1;
1176 
1177 	      /* We found a match, do not modify match_first below.  */
1178 	      p_match_first = NULL;
1179 	      if (!fl_longest_match)
1180 		break;
1181 	    }
1182 	}
1183     }
1184 
1185   if (p_match_first)
1186     *p_match_first += next_start_idx;
1187 
1188   return match_last;
1189 }
1190 
1191 /* Check NODE match the current context.  */
1192 
1193 static bool
1194 internal_function
1195 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1196 {
1197   re_token_type_t type = dfa->nodes[node].type;
1198   unsigned int constraint = dfa->nodes[node].constraint;
1199   if (type != END_OF_RE)
1200     return false;
1201   if (!constraint)
1202     return true;
1203   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1204     return false;
1205   return true;
1206 }
1207 
1208 /* Check the halt state STATE match the current context.
1209    Return 0 if not match, if the node, STATE has, is a halt node and
1210    match the context, return the node.  */
1211 
1212 static Idx
1213 internal_function
1214 check_halt_state_context (const re_match_context_t *mctx,
1215 			  const re_dfastate_t *state, Idx idx)
1216 {
1217   Idx i;
1218   unsigned int context;
1219 #ifdef DEBUG
1220   assert (state->halt);
1221 #endif
1222   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1223   for (i = 0; i < state->nodes.nelem; ++i)
1224     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1225       return state->nodes.elems[i];
1226   return 0;
1227 }
1228 
1229 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1230    corresponding to the DFA).
1231    Return the destination node, and update EPS_VIA_NODES;
1232    return REG_MISSING in case of errors.  */
1233 
1234 static Idx
1235 internal_function
1236 proceed_next_node (const re_match_context_t *mctx,
1237 		   Idx nregs, regmatch_t *regs, Idx *pidx, Idx node,
1238 		   re_node_set *eps_via_nodes, struct re_fail_stack_t *fs)
1239 {
1240   re_dfa_t *const dfa = mctx->dfa;
1241   Idx i;
1242   bool ok;
1243   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1244     {
1245       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1246       re_node_set *edests = &dfa->edests[node];
1247       Idx dest_node;
1248       ok = re_node_set_insert (eps_via_nodes, node);
1249       if (BE (! ok, 0))
1250 	return REG_ERROR;
1251       /* Pick up a valid destination, or return REG_MISSING if none
1252 	 is found.  */
1253       for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1254 	{
1255 	  Idx candidate = edests->elems[i];
1256 	  if (!re_node_set_contains (cur_nodes, candidate))
1257 	    continue;
1258           if (dest_node == REG_MISSING)
1259 	    dest_node = candidate;
1260 
1261           else
1262 	    {
1263 	      /* In order to avoid infinite loop like "(a*)*", return the second
1264 	         epsilon-transition if the first was already considered.  */
1265 	      if (re_node_set_contains (eps_via_nodes, dest_node))
1266 	        return candidate;
1267 
1268 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1269 	      else if (fs != NULL
1270 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1271 				           eps_via_nodes))
1272 		return REG_ERROR;
1273 
1274 	      /* We know we are going to exit.  */
1275 	      break;
1276 	    }
1277 	}
1278       return dest_node;
1279     }
1280   else
1281     {
1282       Idx naccepted = 0;
1283       re_token_type_t type = dfa->nodes[node].type;
1284 
1285 #ifdef RE_ENABLE_I18N
1286       if (dfa->nodes[node].accept_mb)
1287 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1288       else
1289 #endif /* RE_ENABLE_I18N */
1290       if (type == OP_BACK_REF)
1291 	{
1292 	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1293 	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1294 	  if (fs != NULL)
1295 	    {
1296 	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1297 		return REG_MISSING;
1298 	      else if (naccepted)
1299 		{
1300 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1301 		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1302 			      naccepted) != 0)
1303 		    return REG_MISSING;
1304 		}
1305 	    }
1306 
1307 	  if (naccepted == 0)
1308 	    {
1309 	      Idx dest_node;
1310 	      ok = re_node_set_insert (eps_via_nodes, node);
1311 	      if (BE (! ok, 0))
1312 		return REG_ERROR;
1313 	      dest_node = dfa->edests[node].elems[0];
1314 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1315 					dest_node))
1316 		return dest_node;
1317 	    }
1318 	}
1319 
1320       if (naccepted != 0
1321 	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1322 	{
1323 	  Idx dest_node = dfa->nexts[node];
1324 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1325 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1326 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1327 					       dest_node)))
1328 	    return REG_MISSING;
1329 	  re_node_set_empty (eps_via_nodes);
1330 	  return dest_node;
1331 	}
1332     }
1333   return REG_MISSING;
1334 }
1335 
1336 static reg_errcode_t
1337 internal_function
1338 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1339 		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1340 {
1341   reg_errcode_t err;
1342   Idx num = fs->num++;
1343   if (fs->num == fs->alloc)
1344     {
1345       struct re_fail_stack_ent_t *new_array =
1346 	re_x2realloc (fs->stack, struct re_fail_stack_ent_t, &fs->alloc);
1347       if (new_array == NULL)
1348 	return REG_ESPACE;
1349       fs->stack = new_array;
1350     }
1351   fs->stack[num].idx = str_idx;
1352   fs->stack[num].node = dest_node;
1353   fs->stack[num].regs = re_xmalloc (regmatch_t, nregs);
1354   if (fs->stack[num].regs == NULL)
1355     return REG_ESPACE;
1356   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1357   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1358   return err;
1359 }
1360 
1361 static Idx
1362 internal_function
1363 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx,
1364 		Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1365 {
1366   Idx num = --fs->num;
1367   assert (REG_VALID_INDEX (num));
1368   *pidx = fs->stack[num].idx;
1369   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1370   re_node_set_free (eps_via_nodes);
1371   re_free (fs->stack[num].regs);
1372   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1373   return fs->stack[num].node;
1374 }
1375 
1376 /* Set the positions where the subexpressions are starts/ends to registers
1377    PMATCH.
1378    Note: We assume that pmatch[0] is already set, and
1379    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1380 
1381 static reg_errcode_t
1382 internal_function
1383 set_regs (const regex_t *preg, const re_match_context_t *mctx,
1384 	  size_t nmatch, regmatch_t *pmatch, bool fl_backtrack)
1385 {
1386   re_dfa_t *dfa = (re_dfa_t *) preg->re_buffer;
1387   Idx idx, cur_node;
1388   re_node_set eps_via_nodes;
1389   struct re_fail_stack_t *fs;
1390   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1391   regmatch_t *prev_idx_match;
1392   bool prev_idx_match_malloced = false;
1393 
1394 #ifdef DEBUG
1395   assert (nmatch > 1);
1396   assert (mctx->state_log != NULL);
1397 #endif
1398   if (fl_backtrack)
1399     {
1400       fs = &fs_body;
1401       fs->stack = re_xmalloc (struct re_fail_stack_ent_t, fs->alloc);
1402       if (fs->stack == NULL)
1403 	return REG_ESPACE;
1404     }
1405   else
1406     fs = NULL;
1407 
1408   cur_node = dfa->init_node;
1409   re_node_set_init_empty (&eps_via_nodes);
1410 
1411   if (re_alloc_oversized (nmatch, sizeof (regmatch_t)))
1412     {
1413       free_fail_stack_return (fs);
1414       return REG_ESPACE;
1415     }
1416 #ifndef __SSP__
1417   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1418     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1419   else
1420 #endif
1421     {
1422       prev_idx_match = re_malloc (regmatch_t, nmatch);
1423       if (prev_idx_match == NULL)
1424 	{
1425 	  free_fail_stack_return (fs);
1426 	  return REG_ESPACE;
1427 	}
1428       prev_idx_match_malloced = true;
1429     }
1430   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1431 
1432   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1433     {
1434       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1435 
1436       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1437 	{
1438 	  Idx reg_idx;
1439 	  if (fs)
1440 	    {
1441 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1442 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1443 		  break;
1444 	      if (reg_idx == nmatch)
1445 		{
1446 		  re_node_set_free (&eps_via_nodes);
1447 		  if (prev_idx_match_malloced)
1448 		    re_free (prev_idx_match);
1449 		  return free_fail_stack_return (fs);
1450 		}
1451 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1452 					 &eps_via_nodes);
1453 	    }
1454 	  else
1455 	    {
1456 	      re_node_set_free (&eps_via_nodes);
1457 	      if (prev_idx_match_malloced)
1458 		re_free (prev_idx_match);
1459 	      return REG_NOERROR;
1460 	    }
1461 	}
1462 
1463       /* Proceed to next node.  */
1464       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1465 				    &eps_via_nodes, fs);
1466 
1467       if (BE (! REG_VALID_INDEX (cur_node), 0))
1468 	{
1469 	  if (BE (cur_node == REG_ERROR, 0))
1470 	    {
1471 	      re_node_set_free (&eps_via_nodes);
1472 	      if (prev_idx_match_malloced)
1473 		re_free (prev_idx_match);
1474 	      free_fail_stack_return (fs);
1475 	      return REG_ESPACE;
1476 	    }
1477 	  if (fs)
1478 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1479 				       &eps_via_nodes);
1480 	  else
1481 	    {
1482 	      re_node_set_free (&eps_via_nodes);
1483 	      if (prev_idx_match_malloced)
1484 		re_free (prev_idx_match);
1485 	      return REG_NOMATCH;
1486 	    }
1487 	}
1488     }
1489   re_node_set_free (&eps_via_nodes);
1490   if (prev_idx_match_malloced)
1491     re_free (prev_idx_match);
1492   return free_fail_stack_return (fs);
1493 }
1494 
1495 static reg_errcode_t
1496 internal_function
1497 free_fail_stack_return (struct re_fail_stack_t *fs)
1498 {
1499   if (fs)
1500     {
1501       Idx fs_idx;
1502       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1503 	{
1504 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1505 	  re_free (fs->stack[fs_idx].regs);
1506 	}
1507       re_free (fs->stack);
1508     }
1509   return REG_NOERROR;
1510 }
1511 
1512 static void
1513 internal_function
1514 update_regs (re_dfa_t *dfa, regmatch_t *pmatch, regmatch_t *prev_idx_match,
1515 	     Idx cur_node, Idx cur_idx, Idx nmatch)
1516 {
1517   int type = dfa->nodes[cur_node].type;
1518   if (type == OP_OPEN_SUBEXP)
1519     {
1520       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1521 
1522       /* We are at the first node of this sub expression.  */
1523       if (reg_num < nmatch)
1524 	{
1525 	  pmatch[reg_num].rm_so = cur_idx;
1526 	  pmatch[reg_num].rm_eo = -1;
1527 	}
1528     }
1529   else if (type == OP_CLOSE_SUBEXP)
1530     {
1531       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1532       if (reg_num < nmatch)
1533 	{
1534 	  /* We are at the last node of this sub expression.  */
1535 	  if (pmatch[reg_num].rm_so < cur_idx)
1536 	    {
1537 	      pmatch[reg_num].rm_eo = cur_idx;
1538 	      /* This is a non-empty match or we are not inside an optional
1539 		 subexpression.  Accept this right away.  */
1540 	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1541 	    }
1542 	  else
1543 	    {
1544 	      if (dfa->nodes[cur_node].opt_subexp
1545 		  && prev_idx_match[reg_num].rm_so != -1)
1546 		/* We transited through an empty match for an optional
1547 		   subexpression, like (a?)*, and this is not the subexp's
1548 		   first match.  Copy back the old content of the registers
1549 		   so that matches of an inner subexpression are undone as
1550 		   well, like in ((a?))*.  */
1551 		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1552 	      else
1553 		/* We completed a subexpression, but it may be part of
1554 		   an optional one, so do not update PREV_IDX_MATCH.  */
1555 		pmatch[reg_num].rm_eo = cur_idx;
1556 	    }
1557 	}
1558     }
1559 }
1560 
1561 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1562    and sift the nodes in each states according to the following rules.
1563    Updated state_log will be wrote to STATE_LOG.
1564 
1565    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
1566      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1567 	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
1568 	the LAST_NODE, we throw away the node `a'.
1569      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
1570 	string `s' and transit to `b':
1571 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1572 	   away the node `a'.
1573 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1574 	    thrown away, we throw away the node `a'.
1575      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1576 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1577 	   node `a'.
1578 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1579 	    we throw away the node `a'.  */
1580 
1581 #define STATE_NODE_CONTAINS(state,node) \
1582   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1583 
1584 static reg_errcode_t
1585 internal_function
1586 sift_states_backward (re_match_context_t *mctx, re_sift_context_t *sctx)
1587 {
1588   reg_errcode_t err;
1589   int null_cnt = 0;
1590   Idx str_idx = sctx->last_str_idx;
1591   re_node_set cur_dest;
1592 
1593 #ifdef DEBUG
1594   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1595 #endif
1596 
1597   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1598      transit to the last_node and the last_node itself.  */
1599   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1600   if (BE (err != REG_NOERROR, 0))
1601     return err;
1602   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1603   if (BE (err != REG_NOERROR, 0))
1604     goto free_return;
1605 
1606   /* Then check each states in the state_log.  */
1607   while (str_idx > 0)
1608     {
1609       /* Update counters.  */
1610       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1611       if (null_cnt > mctx->max_mb_elem_len)
1612 	{
1613 	  memset (sctx->sifted_states, '\0',
1614 		  sizeof (re_dfastate_t *) * str_idx);
1615 	  re_node_set_free (&cur_dest);
1616 	  return REG_NOERROR;
1617 	}
1618       re_node_set_empty (&cur_dest);
1619       --str_idx;
1620 
1621       if (mctx->state_log[str_idx])
1622 	{
1623 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1624           if (BE (err != REG_NOERROR, 0))
1625 	    goto free_return;
1626 	}
1627 
1628       /* Add all the nodes which satisfy the following conditions:
1629 	 - It can epsilon transit to a node in CUR_DEST.
1630 	 - It is in CUR_SRC.
1631 	 And update state_log.  */
1632       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1633       if (BE (err != REG_NOERROR, 0))
1634 	goto free_return;
1635     }
1636   err = REG_NOERROR;
1637  free_return:
1638   re_node_set_free (&cur_dest);
1639   return err;
1640 }
1641 
1642 static reg_errcode_t
1643 internal_function
1644 build_sifted_states (re_match_context_t *mctx, re_sift_context_t *sctx,
1645 		     Idx str_idx, re_node_set *cur_dest)
1646 {
1647   re_dfa_t *const dfa = mctx->dfa;
1648   re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1649   Idx i;
1650 
1651   /* Then build the next sifted state.
1652      We build the next sifted state on `cur_dest', and update
1653      `sifted_states[str_idx]' with `cur_dest'.
1654      Note:
1655      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
1656      `cur_src' points the node_set of the old `state_log[str_idx]'
1657      (with the epsilon nodes pre-filtered out).  */
1658   for (i = 0; i < cur_src->nelem; i++)
1659     {
1660       Idx prev_node = cur_src->elems[i];
1661       int naccepted = 0;
1662       bool ok;
1663 
1664 #ifdef DEBUG
1665       re_token_type_t type = dfa->nodes[prev_node].type;
1666       assert (!IS_EPSILON_NODE (type));
1667 #endif
1668 #ifdef RE_ENABLE_I18N
1669       /* If the node may accept `multi byte'.  */
1670       if (dfa->nodes[prev_node].accept_mb)
1671 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1672 					 str_idx, sctx->last_str_idx);
1673 #endif /* RE_ENABLE_I18N */
1674 
1675       /* We don't check backreferences here.
1676 	 See update_cur_sifted_state().  */
1677       if (!naccepted
1678 	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1679 	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1680 				  dfa->nexts[prev_node]))
1681 	naccepted = 1;
1682 
1683       if (naccepted == 0)
1684 	continue;
1685 
1686       if (sctx->limits.nelem)
1687 	{
1688 	  Idx to_idx = str_idx + naccepted;
1689 	  if (check_dst_limits (mctx, &sctx->limits,
1690 				dfa->nexts[prev_node], to_idx,
1691 				prev_node, str_idx))
1692 	    continue;
1693 	}
1694       ok = re_node_set_insert (cur_dest, prev_node);
1695       if (BE (! ok, 0))
1696 	return REG_ESPACE;
1697     }
1698 
1699   return REG_NOERROR;
1700 }
1701 
1702 /* Helper functions.  */
1703 
1704 static reg_errcode_t
1705 internal_function
1706 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1707 {
1708   Idx top = mctx->state_log_top;
1709 
1710   if (next_state_log_idx >= mctx->input.bufs_len
1711       || (next_state_log_idx >= mctx->input.valid_len
1712 	  && mctx->input.valid_len < mctx->input.len))
1713     {
1714       reg_errcode_t err;
1715       err = extend_buffers (mctx);
1716       if (BE (err != REG_NOERROR, 0))
1717 	return err;
1718     }
1719 
1720   if (top < next_state_log_idx)
1721     {
1722       memset (mctx->state_log + top + 1, '\0',
1723 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1724       mctx->state_log_top = next_state_log_idx;
1725     }
1726   return REG_NOERROR;
1727 }
1728 
1729 static reg_errcode_t
1730 internal_function
1731 merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, re_dfastate_t **src,
1732 		   Idx num)
1733 {
1734   Idx st_idx;
1735   reg_errcode_t err;
1736   for (st_idx = 0; st_idx < num; ++st_idx)
1737     {
1738       if (dst[st_idx] == NULL)
1739 	dst[st_idx] = src[st_idx];
1740       else if (src[st_idx] != NULL)
1741 	{
1742 	  re_node_set merged_set;
1743 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1744 					&src[st_idx]->nodes);
1745 	  if (BE (err != REG_NOERROR, 0))
1746 	    return err;
1747 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1748 	  re_node_set_free (&merged_set);
1749 	  if (BE (err != REG_NOERROR, 0))
1750 	    return err;
1751 	}
1752     }
1753   return REG_NOERROR;
1754 }
1755 
1756 static reg_errcode_t
1757 internal_function
1758 update_cur_sifted_state (re_match_context_t *mctx, re_sift_context_t *sctx,
1759 			 Idx str_idx, re_node_set *dest_nodes)
1760 {
1761   re_dfa_t *const dfa = mctx->dfa;
1762   reg_errcode_t err;
1763   const re_node_set *candidates;
1764   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1765 		: &mctx->state_log[str_idx]->nodes);
1766 
1767   if (dest_nodes->nelem == 0)
1768     sctx->sifted_states[str_idx] = NULL;
1769   else
1770     {
1771       if (candidates)
1772 	{
1773 	  /* At first, add the nodes which can epsilon transit to a node in
1774 	     DEST_NODE.  */
1775 	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1776 	  if (BE (err != REG_NOERROR, 0))
1777 	    return err;
1778 
1779 	  /* Then, check the limitations in the current sift_context.  */
1780 	  if (sctx->limits.nelem)
1781 	    {
1782 	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1783 					 mctx->bkref_ents, str_idx);
1784 	      if (BE (err != REG_NOERROR, 0))
1785 		return err;
1786 	    }
1787 	}
1788 
1789       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1790       if (BE (err != REG_NOERROR, 0))
1791 	return err;
1792     }
1793 
1794   if (candidates && mctx->state_log[str_idx]->has_backref)
1795     {
1796       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1797       if (BE (err != REG_NOERROR, 0))
1798 	return err;
1799     }
1800   return REG_NOERROR;
1801 }
1802 
1803 static reg_errcode_t
1804 internal_function
1805 add_epsilon_src_nodes (re_dfa_t *dfa, re_node_set *dest_nodes,
1806 		       const re_node_set *candidates)
1807 {
1808   reg_errcode_t err = REG_NOERROR;
1809   Idx i;
1810 
1811   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1812   if (BE (err != REG_NOERROR, 0))
1813     return err;
1814 
1815   if (!state->inveclosure.alloc)
1816     {
1817       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1818       if (BE (err != REG_NOERROR, 0))
1819         return REG_ESPACE;
1820       for (i = 0; i < dest_nodes->nelem; i++)
1821         re_node_set_merge (&state->inveclosure,
1822 			   dfa->inveclosures + dest_nodes->elems[i]);
1823     }
1824   return re_node_set_add_intersect (dest_nodes, candidates,
1825 				    &state->inveclosure);
1826 }
1827 
1828 static reg_errcode_t
1829 internal_function
1830 sub_epsilon_src_nodes (re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1831 		       const re_node_set *candidates)
1832 {
1833     Idx ecl_idx;
1834     reg_errcode_t err;
1835     re_node_set *inv_eclosure = dfa->inveclosures + node;
1836     re_node_set except_nodes;
1837     re_node_set_init_empty (&except_nodes);
1838     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1839       {
1840 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1841 	if (cur_node == node)
1842 	  continue;
1843 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1844 	  {
1845 	    Idx edst1 = dfa->edests[cur_node].elems[0];
1846 	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1847 			 ? dfa->edests[cur_node].elems[1] : REG_MISSING);
1848 	    if ((!re_node_set_contains (inv_eclosure, edst1)
1849 		 && re_node_set_contains (dest_nodes, edst1))
1850 		|| (REG_VALID_NONZERO_INDEX (edst2)
1851 		    && !re_node_set_contains (inv_eclosure, edst2)
1852 		    && re_node_set_contains (dest_nodes, edst2)))
1853 	      {
1854 		err = re_node_set_add_intersect (&except_nodes, candidates,
1855 						 dfa->inveclosures + cur_node);
1856 		if (BE (err != REG_NOERROR, 0))
1857 		  {
1858 		    re_node_set_free (&except_nodes);
1859 		    return err;
1860 		  }
1861 	      }
1862 	  }
1863       }
1864     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1865       {
1866 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1867 	if (!re_node_set_contains (&except_nodes, cur_node))
1868 	  {
1869 	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1870 	    re_node_set_remove_at (dest_nodes, idx);
1871 	  }
1872       }
1873     re_node_set_free (&except_nodes);
1874     return REG_NOERROR;
1875 }
1876 
1877 static bool
1878 internal_function
1879 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1880 		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1881 {
1882   re_dfa_t *const dfa = mctx->dfa;
1883   Idx lim_idx, src_pos, dst_pos;
1884 
1885   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1886   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1887   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1888     {
1889       Idx subexp_idx;
1890       struct re_backref_cache_entry *ent;
1891       ent = mctx->bkref_ents + limits->elems[lim_idx];
1892       subexp_idx = dfa->nodes[ent->node].opr.idx;
1893 
1894       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1895 					   subexp_idx, dst_node, dst_idx,
1896 					   dst_bkref_idx);
1897       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1898 					   subexp_idx, src_node, src_idx,
1899 					   src_bkref_idx);
1900 
1901       /* In case of:
1902 	 <src> <dst> ( <subexp> )
1903 	 ( <subexp> ) <src> <dst>
1904 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1905       if (src_pos == dst_pos)
1906 	continue; /* This is unrelated limitation.  */
1907       else
1908 	return true;
1909     }
1910   return false;
1911 }
1912 
1913 static int
1914 internal_function
1915 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1916 			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
1917 {
1918   re_dfa_t *const dfa = mctx->dfa;
1919   re_node_set *eclosures = dfa->eclosures + from_node;
1920   Idx node_idx;
1921 
1922   /* Else, we are on the boundary: examine the nodes on the epsilon
1923      closure.  */
1924   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1925     {
1926       Idx node = eclosures->elems[node_idx];
1927       switch (dfa->nodes[node].type)
1928 	{
1929 	case OP_BACK_REF:
1930 	  if (bkref_idx != REG_MISSING)
1931 	    {
1932 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1933 	      do
1934 	        {
1935 		  Idx dst;
1936 		  int cpos;
1937 
1938 		  if (ent->node != node)
1939 		    continue;
1940 
1941 		  if (subexp_idx < BITSET_WORD_BITS
1942 		      && !(ent->eps_reachable_subexps_map
1943 			   & ((bitset_word) 1 << subexp_idx)))
1944 		    continue;
1945 
1946 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
1947 		     OP_CLOSE_SUBEXP cases below.  But, if the
1948 		     destination node is the same node as the source
1949 		     node, don't recurse because it would cause an
1950 		     infinite loop: a regex that exhibits this behavior
1951 		     is ()\1*\1*  */
1952 		  dst = dfa->edests[node].elems[0];
1953 		  if (dst == from_node)
1954 		    {
1955 		      if (boundaries & 1)
1956 		        return -1;
1957 		      else /* if (boundaries & 2) */
1958 		        return 0;
1959 		    }
1960 
1961 		  cpos =
1962 		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1963 						 dst, bkref_idx);
1964 		  if (cpos == -1 /* && (boundaries & 1) */)
1965 		    return -1;
1966 		  if (cpos == 0 && (boundaries & 2))
1967 		    return 0;
1968 
1969 		  if (subexp_idx < BITSET_WORD_BITS)
1970 		    ent->eps_reachable_subexps_map &=
1971 		      ~ ((bitset_word) 1 << subexp_idx);
1972 	        }
1973 	      while (ent++->more);
1974 	    }
1975 	  break;
1976 
1977 	case OP_OPEN_SUBEXP:
1978 	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1979 	    return -1;
1980 	  break;
1981 
1982 	case OP_CLOSE_SUBEXP:
1983 	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1984 	    return 0;
1985 	  break;
1986 
1987 	default:
1988 	    break;
1989 	}
1990     }
1991 
1992   return (boundaries & 2) ? 1 : 0;
1993 }
1994 
1995 static int
1996 internal_function
1997 check_dst_limits_calc_pos (const re_match_context_t *mctx,
1998 			   Idx limit, Idx subexp_idx,
1999 			   Idx from_node, Idx str_idx, Idx bkref_idx)
2000 {
2001   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2002   int boundaries;
2003 
2004   /* If we are outside the range of the subexpression, return -1 or 1.  */
2005   if (str_idx < lim->subexp_from)
2006     return -1;
2007 
2008   if (lim->subexp_to < str_idx)
2009     return 1;
2010 
2011   /* If we are within the subexpression, return 0.  */
2012   boundaries = (str_idx == lim->subexp_from);
2013   boundaries |= (str_idx == lim->subexp_to) << 1;
2014   if (boundaries == 0)
2015     return 0;
2016 
2017   /* Else, examine epsilon closure.  */
2018   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2019 				      from_node, bkref_idx);
2020 }
2021 
2022 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2023    which are against limitations from DEST_NODES. */
2024 
2025 static reg_errcode_t
2026 internal_function
2027 check_subexp_limits (re_dfa_t *dfa, re_node_set *dest_nodes,
2028 		     const re_node_set *candidates, re_node_set *limits,
2029 		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2030 {
2031   reg_errcode_t err;
2032   Idx node_idx, lim_idx;
2033 
2034   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2035     {
2036       Idx subexp_idx;
2037       struct re_backref_cache_entry *ent;
2038       ent = bkref_ents + limits->elems[lim_idx];
2039 
2040       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2041 	continue; /* This is unrelated limitation.  */
2042 
2043       subexp_idx = dfa->nodes[ent->node].opr.idx;
2044       if (ent->subexp_to == str_idx)
2045 	{
2046 	  Idx ops_node = REG_MISSING;
2047 	  Idx cls_node = REG_MISSING;
2048 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2049 	    {
2050 	      Idx node = dest_nodes->elems[node_idx];
2051 	      re_token_type_t type = dfa->nodes[node].type;
2052 	      if (type == OP_OPEN_SUBEXP
2053 		  && subexp_idx == dfa->nodes[node].opr.idx)
2054 		ops_node = node;
2055 	      else if (type == OP_CLOSE_SUBEXP
2056 		       && subexp_idx == dfa->nodes[node].opr.idx)
2057 		cls_node = node;
2058 	    }
2059 
2060 	  /* Check the limitation of the open subexpression.  */
2061 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2062 	  if (REG_VALID_INDEX (ops_node))
2063 	    {
2064 	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2065 					   candidates);
2066 	      if (BE (err != REG_NOERROR, 0))
2067 		return err;
2068 	    }
2069 
2070 	  /* Check the limitation of the close subexpression.  */
2071 	  if (REG_VALID_INDEX (cls_node))
2072 	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2073 	      {
2074 		Idx node = dest_nodes->elems[node_idx];
2075 		if (!re_node_set_contains (dfa->inveclosures + node,
2076 					   cls_node)
2077 		    && !re_node_set_contains (dfa->eclosures + node,
2078 					      cls_node))
2079 		  {
2080 		    /* It is against this limitation.
2081 		       Remove it form the current sifted state.  */
2082 		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2083 						 candidates);
2084 		    if (BE (err != REG_NOERROR, 0))
2085 		      return err;
2086 		    --node_idx;
2087 		  }
2088 	      }
2089 	}
2090       else /* (ent->subexp_to != str_idx)  */
2091 	{
2092 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2093 	    {
2094 	      Idx node = dest_nodes->elems[node_idx];
2095 	      re_token_type_t type = dfa->nodes[node].type;
2096 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2097 		{
2098 		  if (subexp_idx != dfa->nodes[node].opr.idx)
2099 		    continue;
2100 		  /* It is against this limitation.
2101 		     Remove it form the current sifted state.  */
2102 		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2103 					       candidates);
2104 		  if (BE (err != REG_NOERROR, 0))
2105 		    return err;
2106 		}
2107 	    }
2108 	}
2109     }
2110   return REG_NOERROR;
2111 }
2112 
2113 static reg_errcode_t
2114 internal_function
2115 sift_states_bkref (re_match_context_t *mctx, re_sift_context_t *sctx,
2116 		   Idx str_idx, const re_node_set *candidates)
2117 {
2118   re_dfa_t *const dfa = mctx->dfa;
2119   reg_errcode_t err;
2120   Idx node_idx, node;
2121   re_sift_context_t local_sctx;
2122   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2123 
2124   if (first_idx == REG_MISSING)
2125     return REG_NOERROR;
2126 
2127   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2128 
2129   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2130     {
2131       Idx enabled_idx;
2132       re_token_type_t type;
2133       struct re_backref_cache_entry *entry;
2134       node = candidates->elems[node_idx];
2135       type = dfa->nodes[node].type;
2136       /* Avoid infinite loop for the REs like "()\1+".  */
2137       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2138 	continue;
2139       if (type != OP_BACK_REF)
2140 	continue;
2141 
2142       entry = mctx->bkref_ents + first_idx;
2143       enabled_idx = first_idx;
2144       do
2145 	{
2146 	  bool ok;
2147 	  Idx subexp_len, to_idx, dst_node;
2148 	  re_dfastate_t *cur_state;
2149 
2150 	  if (entry->node != node)
2151 	    continue;
2152 	  subexp_len = entry->subexp_to - entry->subexp_from;
2153 	  to_idx = str_idx + subexp_len;
2154 	  dst_node = (subexp_len ? dfa->nexts[node]
2155 		      : dfa->edests[node].elems[0]);
2156 
2157 	  if (to_idx > sctx->last_str_idx
2158 	      || sctx->sifted_states[to_idx] == NULL
2159 	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2160 	      || check_dst_limits (mctx, &sctx->limits, node,
2161 				   str_idx, dst_node, to_idx))
2162 	    continue;
2163 
2164 	  if (local_sctx.sifted_states == NULL)
2165 	    {
2166 	      local_sctx = *sctx;
2167 	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2168 	      if (BE (err != REG_NOERROR, 0))
2169 		goto free_return;
2170 	    }
2171 	  local_sctx.last_node = node;
2172 	  local_sctx.last_str_idx = str_idx;
2173 	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2174 	  if (BE (! ok, 0))
2175 	    {
2176 	      err = REG_ESPACE;
2177 	      goto free_return;
2178 	    }
2179 	  cur_state = local_sctx.sifted_states[str_idx];
2180 	  err = sift_states_backward (mctx, &local_sctx);
2181 	  if (BE (err != REG_NOERROR, 0))
2182 	    goto free_return;
2183 	  if (sctx->limited_states != NULL)
2184 	    {
2185 	      err = merge_state_array (dfa, sctx->limited_states,
2186 				       local_sctx.sifted_states,
2187 				       str_idx + 1);
2188 	      if (BE (err != REG_NOERROR, 0))
2189 		goto free_return;
2190 	    }
2191 	  local_sctx.sifted_states[str_idx] = cur_state;
2192 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2193 
2194 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2195           entry = mctx->bkref_ents + enabled_idx;
2196 	}
2197       while (enabled_idx++, entry++->more);
2198     }
2199   err = REG_NOERROR;
2200  free_return:
2201   if (local_sctx.sifted_states != NULL)
2202     {
2203       re_node_set_free (&local_sctx.limits);
2204     }
2205 
2206   return err;
2207 }
2208 
2209 
2210 #ifdef RE_ENABLE_I18N
2211 static int
2212 internal_function
2213 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2214 		     Idx node_idx, Idx str_idx, Idx max_str_idx)
2215 {
2216   re_dfa_t *const dfa = mctx->dfa;
2217   int naccepted;
2218   /* Check the node can accept `multi byte'.  */
2219   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2220   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2221       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2222 			    dfa->nexts[node_idx]))
2223     /* The node can't accept the `multi byte', or the
2224        destination was already thrown away, then the node
2225        could't accept the current input `multi byte'.   */
2226     naccepted = 0;
2227   /* Otherwise, it is sure that the node could accept
2228      `naccepted' bytes input.  */
2229   return naccepted;
2230 }
2231 #endif /* RE_ENABLE_I18N */
2232 
2233 
2234 /* Functions for state transition.  */
2235 
2236 /* Return the next state to which the current state STATE will transit by
2237    accepting the current input byte, and update STATE_LOG if necessary.
2238    If STATE can accept a multibyte char/collating element/back reference
2239    update the destination of STATE_LOG.  */
2240 
2241 static re_dfastate_t *
2242 internal_function
2243 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2244 	       re_dfastate_t *state)
2245 {
2246   re_dfastate_t **trtable;
2247   unsigned char ch;
2248 
2249 #ifdef RE_ENABLE_I18N
2250   /* If the current state can accept multibyte.  */
2251   if (BE (state->accept_mb, 0))
2252     {
2253       *err = transit_state_mb (mctx, state);
2254       if (BE (*err != REG_NOERROR, 0))
2255 	return NULL;
2256     }
2257 #endif /* RE_ENABLE_I18N */
2258 
2259   /* Then decide the next state with the single byte.  */
2260 #if 0
2261   if (0)
2262     /* don't use transition table  */
2263     return transit_state_sb (err, mctx, state);
2264 #endif
2265 
2266   /* Use transition table  */
2267   ch = re_string_fetch_byte (&mctx->input);
2268   for (;;)
2269     {
2270       trtable = state->trtable;
2271       if (BE (trtable != NULL, 1))
2272 	return trtable[ch];
2273 
2274       trtable = state->word_trtable;
2275       if (BE (trtable != NULL, 1))
2276         {
2277 	  unsigned int context;
2278 	  context
2279 	    = re_string_context_at (&mctx->input,
2280 				    re_string_cur_idx (&mctx->input) - 1,
2281 				    mctx->eflags);
2282 	  if (IS_WORD_CONTEXT (context))
2283 	    return trtable[ch + SBC_MAX];
2284 	  else
2285 	    return trtable[ch];
2286 	}
2287 
2288       if (!build_trtable (mctx->dfa, state))
2289 	{
2290 	  *err = REG_ESPACE;
2291 	  return NULL;
2292 	}
2293 
2294       /* Retry, we now have a transition table.  */
2295     }
2296 }
2297 
2298 /* Update the state_log if we need */
2299 re_dfastate_t *
2300 internal_function
2301 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2302 		      re_dfastate_t *next_state)
2303 {
2304   re_dfa_t *const dfa = mctx->dfa;
2305   Idx cur_idx = re_string_cur_idx (&mctx->input);
2306 
2307   if (cur_idx > mctx->state_log_top)
2308     {
2309       mctx->state_log[cur_idx] = next_state;
2310       mctx->state_log_top = cur_idx;
2311     }
2312   else if (mctx->state_log[cur_idx] == 0)
2313     {
2314       mctx->state_log[cur_idx] = next_state;
2315     }
2316   else
2317     {
2318       re_dfastate_t *pstate;
2319       unsigned int context;
2320       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2321       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2322          the destination of a multibyte char/collating element/
2323          back reference.  Then the next state is the union set of
2324          these destinations and the results of the transition table.  */
2325       pstate = mctx->state_log[cur_idx];
2326       log_nodes = pstate->entrance_nodes;
2327       if (next_state != NULL)
2328         {
2329           table_nodes = next_state->entrance_nodes;
2330           *err = re_node_set_init_union (&next_nodes, table_nodes,
2331 					     log_nodes);
2332           if (BE (*err != REG_NOERROR, 0))
2333 	    return NULL;
2334         }
2335       else
2336         next_nodes = *log_nodes;
2337       /* Note: We already add the nodes of the initial state,
2338 	 then we don't need to add them here.  */
2339 
2340       context = re_string_context_at (&mctx->input,
2341 				      re_string_cur_idx (&mctx->input) - 1,
2342 				      mctx->eflags);
2343       next_state = mctx->state_log[cur_idx]
2344         = re_acquire_state_context (err, dfa, &next_nodes, context);
2345       /* We don't need to check errors here, since the return value of
2346          this function is next_state and ERR is already set.  */
2347 
2348       if (table_nodes != NULL)
2349         re_node_set_free (&next_nodes);
2350     }
2351 
2352   if (BE (dfa->nbackref, 0) && next_state != NULL)
2353     {
2354       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2355 	 later.  We must check them here, since the back references in the
2356 	 next state might use them.  */
2357       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2358 					cur_idx);
2359       if (BE (*err != REG_NOERROR, 0))
2360 	return NULL;
2361 
2362       /* If the next state has back references.  */
2363       if (next_state->has_backref)
2364 	{
2365 	  *err = transit_state_bkref (mctx, &next_state->nodes);
2366 	  if (BE (*err != REG_NOERROR, 0))
2367 	    return NULL;
2368 	  next_state = mctx->state_log[cur_idx];
2369 	}
2370     }
2371 
2372   return next_state;
2373 }
2374 
2375 /* Skip bytes in the input that correspond to part of a
2376    multi-byte match, then look in the log for a state
2377    from which to restart matching.  */
2378 static re_dfastate_t *
2379 internal_function
2380 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2381 {
2382   re_dfastate_t *cur_state = NULL;
2383   do
2384     {
2385       Idx max = mctx->state_log_top;
2386       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2387 
2388       do
2389 	{
2390           if (++cur_str_idx > max)
2391             return NULL;
2392           re_string_skip_bytes (&mctx->input, 1);
2393 	}
2394       while (mctx->state_log[cur_str_idx] == NULL);
2395 
2396       cur_state = merge_state_with_log (err, mctx, NULL);
2397     }
2398   while (*err == REG_NOERROR && cur_state == NULL);
2399   return cur_state;
2400 }
2401 
2402 /* Helper functions for transit_state.  */
2403 
2404 /* From the node set CUR_NODES, pick up the nodes whose types are
2405    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2406    expression. And register them to use them later for evaluating the
2407    correspoding back references.  */
2408 
2409 static reg_errcode_t
2410 internal_function
2411 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2412 			   Idx str_idx)
2413 {
2414   re_dfa_t *const dfa = mctx->dfa;
2415   Idx node_idx;
2416   reg_errcode_t err;
2417 
2418   /* TODO: This isn't efficient.
2419 	   Because there might be more than one nodes whose types are
2420 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2421 	   nodes.
2422 	   E.g. RE: (a){2}  */
2423   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2424     {
2425       Idx node = cur_nodes->elems[node_idx];
2426       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2427 	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2428 	  && (dfa->used_bkref_map
2429 	      & ((bitset_word) 1 << dfa->nodes[node].opr.idx)))
2430 	{
2431 	  err = match_ctx_add_subtop (mctx, node, str_idx);
2432 	  if (BE (err != REG_NOERROR, 0))
2433 	    return err;
2434 	}
2435     }
2436   return REG_NOERROR;
2437 }
2438 
2439 #if 0
2440 /* Return the next state to which the current state STATE will transit by
2441    accepting the current input byte.  */
2442 
2443 static re_dfastate_t *
2444 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2445 		  re_dfastate_t *state)
2446 {
2447   re_dfa_t *const dfa = mctx->dfa;
2448   re_node_set next_nodes;
2449   re_dfastate_t *next_state;
2450   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2451   unsigned int context;
2452 
2453   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2454   if (BE (*err != REG_NOERROR, 0))
2455     return NULL;
2456   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2457     {
2458       Idx cur_node = state->nodes.elems[node_cnt];
2459       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2460 	{
2461 	  *err = re_node_set_merge (&next_nodes,
2462 				    dfa->eclosures + dfa->nexts[cur_node]);
2463 	  if (BE (*err != REG_NOERROR, 0))
2464 	    {
2465 	      re_node_set_free (&next_nodes);
2466 	      return NULL;
2467 	    }
2468 	}
2469     }
2470   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2471   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2472   /* We don't need to check errors here, since the return value of
2473      this function is next_state and ERR is already set.  */
2474 
2475   re_node_set_free (&next_nodes);
2476   re_string_skip_bytes (&mctx->input, 1);
2477   return next_state;
2478 }
2479 #endif
2480 
2481 #ifdef RE_ENABLE_I18N
2482 static reg_errcode_t
2483 internal_function
2484 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2485 {
2486   re_dfa_t *const dfa = mctx->dfa;
2487   reg_errcode_t err;
2488   Idx i;
2489 
2490   for (i = 0; i < pstate->nodes.nelem; ++i)
2491     {
2492       re_node_set dest_nodes, *new_nodes;
2493       Idx cur_node_idx = pstate->nodes.elems[i];
2494       int naccepted;
2495       Idx dest_idx;
2496       unsigned int context;
2497       re_dfastate_t *dest_state;
2498 
2499       if (!dfa->nodes[cur_node_idx].accept_mb)
2500         continue;
2501 
2502       if (dfa->nodes[cur_node_idx].constraint)
2503 	{
2504 	  context = re_string_context_at (&mctx->input,
2505 					  re_string_cur_idx (&mctx->input),
2506 					  mctx->eflags);
2507 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2508 					   context))
2509 	    continue;
2510 	}
2511 
2512       /* How many bytes the node can accept?  */
2513       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2514 					   re_string_cur_idx (&mctx->input));
2515       if (naccepted == 0)
2516 	continue;
2517 
2518       /* The node can accepts `naccepted' bytes.  */
2519       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2520       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2521 			       : mctx->max_mb_elem_len);
2522       err = clean_state_log_if_needed (mctx, dest_idx);
2523       if (BE (err != REG_NOERROR, 0))
2524 	return err;
2525 #ifdef DEBUG
2526       assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2527 #endif
2528       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2529 
2530       dest_state = mctx->state_log[dest_idx];
2531       if (dest_state == NULL)
2532 	dest_nodes = *new_nodes;
2533       else
2534 	{
2535 	  err = re_node_set_init_union (&dest_nodes,
2536 					dest_state->entrance_nodes, new_nodes);
2537 	  if (BE (err != REG_NOERROR, 0))
2538 	    return err;
2539 	}
2540       context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags);
2541       mctx->state_log[dest_idx]
2542 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2543       if (dest_state != NULL)
2544 	re_node_set_free (&dest_nodes);
2545       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2546 	return err;
2547     }
2548   return REG_NOERROR;
2549 }
2550 #endif /* RE_ENABLE_I18N */
2551 
2552 static reg_errcode_t
2553 internal_function
2554 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2555 {
2556   re_dfa_t *const dfa = mctx->dfa;
2557   reg_errcode_t err;
2558   Idx i;
2559   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2560 
2561   for (i = 0; i < nodes->nelem; ++i)
2562     {
2563       Idx dest_str_idx, prev_nelem, bkc_idx;
2564       Idx node_idx = nodes->elems[i];
2565       unsigned int context;
2566       const re_token_t *node = dfa->nodes + node_idx;
2567       re_node_set *new_dest_nodes;
2568 
2569       /* Check whether `node' is a backreference or not.  */
2570       if (node->type != OP_BACK_REF)
2571 	continue;
2572 
2573       if (node->constraint)
2574 	{
2575 	  context = re_string_context_at (&mctx->input, cur_str_idx,
2576 					  mctx->eflags);
2577 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2578 	    continue;
2579 	}
2580 
2581       /* `node' is a backreference.
2582 	 Check the substring which the substring matched.  */
2583       bkc_idx = mctx->nbkref_ents;
2584       err = get_subexp (mctx, node_idx, cur_str_idx);
2585       if (BE (err != REG_NOERROR, 0))
2586 	goto free_return;
2587 
2588       /* And add the epsilon closures (which is `new_dest_nodes') of
2589 	 the backreference to appropriate state_log.  */
2590 #ifdef DEBUG
2591       assert (dfa->nexts[node_idx] != REG_MISSING);
2592 #endif
2593       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2594 	{
2595 	  Idx subexp_len;
2596 	  re_dfastate_t *dest_state;
2597 	  struct re_backref_cache_entry *bkref_ent;
2598 	  bkref_ent = mctx->bkref_ents + bkc_idx;
2599 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2600 	    continue;
2601 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2602 	  new_dest_nodes = (subexp_len == 0
2603 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2604 			    : dfa->eclosures + dfa->nexts[node_idx]);
2605 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2606 			  - bkref_ent->subexp_from);
2607 	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2608 					  mctx->eflags);
2609 	  dest_state = mctx->state_log[dest_str_idx];
2610 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2611 			: mctx->state_log[cur_str_idx]->nodes.nelem);
2612 	  /* Add `new_dest_node' to state_log.  */
2613 	  if (dest_state == NULL)
2614 	    {
2615 	      mctx->state_log[dest_str_idx]
2616 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2617 					    context);
2618 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2619 		      && err != REG_NOERROR, 0))
2620 		goto free_return;
2621 	    }
2622 	  else
2623 	    {
2624 	      re_node_set dest_nodes;
2625 	      err = re_node_set_init_union (&dest_nodes,
2626 					    dest_state->entrance_nodes,
2627 					    new_dest_nodes);
2628 	      if (BE (err != REG_NOERROR, 0))
2629 		{
2630 		  re_node_set_free (&dest_nodes);
2631 		  goto free_return;
2632 		}
2633 	      mctx->state_log[dest_str_idx]
2634 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2635 	      re_node_set_free (&dest_nodes);
2636 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2637 		      && err != REG_NOERROR, 0))
2638 		goto free_return;
2639 	    }
2640 	  /* We need to check recursively if the backreference can epsilon
2641 	     transit.  */
2642 	  if (subexp_len == 0
2643 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2644 	    {
2645 	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2646 					       cur_str_idx);
2647 	      if (BE (err != REG_NOERROR, 0))
2648 		goto free_return;
2649 	      err = transit_state_bkref (mctx, new_dest_nodes);
2650 	      if (BE (err != REG_NOERROR, 0))
2651 		goto free_return;
2652 	    }
2653 	}
2654     }
2655   err = REG_NOERROR;
2656  free_return:
2657   return err;
2658 }
2659 
2660 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2661    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2662    Note that we might collect inappropriate candidates here.
2663    However, the cost of checking them strictly here is too high, then we
2664    delay these checking for prune_impossible_nodes().  */
2665 
2666 static reg_errcode_t
2667 internal_function
2668 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2669 {
2670   re_dfa_t *const dfa = mctx->dfa;
2671   Idx subexp_num, sub_top_idx;
2672   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2673   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2674   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2675   if (cache_idx != REG_MISSING)
2676     {
2677       const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
2678       do
2679         if (entry->node == bkref_node)
2680 	  return REG_NOERROR; /* We already checked it.  */
2681       while (entry++->more);
2682     }
2683 
2684   subexp_num = dfa->nodes[bkref_node].opr.idx;
2685 
2686   /* For each sub expression  */
2687   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2688     {
2689       reg_errcode_t err;
2690       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2691       re_sub_match_last_t *sub_last;
2692       Idx sub_last_idx, sl_str, bkref_str_off;
2693 
2694       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2695 	continue; /* It isn't related.  */
2696 
2697       sl_str = sub_top->str_idx;
2698       bkref_str_off = bkref_str_idx;
2699       /* At first, check the last node of sub expressions we already
2700 	 evaluated.  */
2701       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2702 	{
2703 	  regoff_t sl_str_diff;
2704 	  sub_last = sub_top->lasts[sub_last_idx];
2705 	  sl_str_diff = sub_last->str_idx - sl_str;
2706 	  /* The matched string by the sub expression match with the substring
2707 	     at the back reference?  */
2708 	  if (sl_str_diff > 0)
2709 	    {
2710 	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2711 		{
2712 		  /* Not enough chars for a successful match.  */
2713 		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2714 		    break;
2715 
2716 		  err = clean_state_log_if_needed (mctx,
2717 						   bkref_str_off
2718 						   + sl_str_diff);
2719 		  if (BE (err != REG_NOERROR, 0))
2720 		    return err;
2721 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2722 		}
2723 	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2724 		break; /* We don't need to search this sub expression any more.  */
2725 	    }
2726 	  bkref_str_off += sl_str_diff;
2727 	  sl_str += sl_str_diff;
2728 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2729 				bkref_str_idx);
2730 
2731 	  /* Reload buf, since the preceding call might have reallocated
2732 	     the buffer.  */
2733 	  buf = (const char *) re_string_get_buffer (&mctx->input);
2734 
2735 	  if (err == REG_NOMATCH)
2736 	    continue;
2737 	  if (BE (err != REG_NOERROR, 0))
2738 	    return err;
2739 	}
2740 
2741       if (sub_last_idx < sub_top->nlasts)
2742 	continue;
2743       if (sub_last_idx > 0)
2744 	++sl_str;
2745       /* Then, search for the other last nodes of the sub expression.  */
2746       for (; sl_str <= bkref_str_idx; ++sl_str)
2747 	{
2748 	  Idx cls_node;
2749 	  regoff_t sl_str_off;
2750 	  const re_node_set *nodes;
2751 	  sl_str_off = sl_str - sub_top->str_idx;
2752 	  /* The matched string by the sub expression match with the substring
2753 	     at the back reference?  */
2754 	  if (sl_str_off > 0)
2755 	    {
2756 	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2757 		{
2758 		  /* If we are at the end of the input, we cannot match.  */
2759 		  if (bkref_str_off >= mctx->input.len)
2760 		    break;
2761 
2762 		  err = extend_buffers (mctx);
2763 		  if (BE (err != REG_NOERROR, 0))
2764 		    return err;
2765 
2766 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2767 		}
2768 	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2769 		break; /* We don't need to search this sub expression
2770 			  any more.  */
2771 	    }
2772 	  if (mctx->state_log[sl_str] == NULL)
2773 	    continue;
2774 	  /* Does this state have a ')' of the sub expression?  */
2775 	  nodes = &mctx->state_log[sl_str]->nodes;
2776 	  cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP);
2777 	  if (cls_node == REG_MISSING)
2778 	    continue; /* No.  */
2779 	  if (sub_top->path == NULL)
2780 	    {
2781 	      sub_top->path = re_calloc (state_array_t,
2782 					 sl_str - sub_top->str_idx + 1);
2783 	      if (sub_top->path == NULL)
2784 		return REG_ESPACE;
2785 	    }
2786 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2787 	     in the current context?  */
2788 	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2789 			       sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP);
2790 	  if (err == REG_NOMATCH)
2791 	      continue;
2792 	  if (BE (err != REG_NOERROR, 0))
2793 	      return err;
2794 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2795 	  if (BE (sub_last == NULL, 0))
2796 	    return REG_ESPACE;
2797 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2798 				bkref_str_idx);
2799 	  if (err == REG_NOMATCH)
2800 	    continue;
2801 	}
2802     }
2803   return REG_NOERROR;
2804 }
2805 
2806 /* Helper functions for get_subexp().  */
2807 
2808 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2809    If it can arrive, register the sub expression expressed with SUB_TOP
2810    and SUB_LAST.  */
2811 
2812 static reg_errcode_t
2813 internal_function
2814 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2815 		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2816 {
2817   reg_errcode_t err;
2818   Idx to_idx;
2819   /* Can the subexpression arrive the back reference?  */
2820   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2821 		       sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP);
2822   if (err != REG_NOERROR)
2823     return err;
2824   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2825 			     sub_last->str_idx);
2826   if (BE (err != REG_NOERROR, 0))
2827     return err;
2828   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2829   return clean_state_log_if_needed (mctx, to_idx);
2830 }
2831 
2832 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2833    Search '(' if FL_OPEN, or search ')' otherwise.
2834    TODO: This function isn't efficient...
2835 	 Because there might be more than one nodes whose types are
2836 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2837 	 nodes.
2838 	 E.g. RE: (a){2}  */
2839 
2840 static Idx
2841 internal_function
2842 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2843 		  Idx subexp_idx, int type)
2844 {
2845   Idx cls_idx;
2846   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2847     {
2848       Idx cls_node = nodes->elems[cls_idx];
2849       const re_token_t *node = dfa->nodes + cls_node;
2850       if (node->type == type
2851 	  && node->opr.idx == subexp_idx)
2852 	return cls_node;
2853     }
2854   return REG_MISSING;
2855 }
2856 
2857 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2858    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2859    heavily reused.
2860    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2861 
2862 static reg_errcode_t
2863 internal_function
2864 check_arrival (re_match_context_t *mctx, state_array_t *path,
2865 	       Idx top_node, Idx top_str, Idx last_node, Idx last_str,
2866 	       int type)
2867 {
2868   re_dfa_t *const dfa = mctx->dfa;
2869   reg_errcode_t err;
2870   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2871   re_dfastate_t *cur_state = NULL;
2872   re_node_set *cur_nodes, next_nodes;
2873   re_dfastate_t **backup_state_log;
2874   unsigned int context;
2875 
2876   subexp_num = dfa->nodes[top_node].opr.idx;
2877   /* Extend the buffer if we need.  */
2878   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2879     {
2880       re_dfastate_t **new_array;
2881       Idx old_alloc = path->alloc;
2882       Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
2883       if (BE (new_alloc < old_alloc, 0))
2884 	return REG_ESPACE;
2885       new_array = re_xrealloc (path->array, re_dfastate_t *, new_alloc);
2886       if (BE (new_array == NULL, 0))
2887 	return REG_ESPACE;
2888       path->array = new_array;
2889       path->alloc = new_alloc;
2890       memset (new_array + old_alloc, '\0',
2891 	      sizeof (re_dfastate_t *) * (new_alloc - old_alloc));
2892     }
2893 
2894   str_idx = path->next_idx == 0 ? top_str : path->next_idx;
2895 
2896   /* Temporary modify MCTX.  */
2897   backup_state_log = mctx->state_log;
2898   backup_cur_idx = mctx->input.cur_idx;
2899   mctx->state_log = path->array;
2900   mctx->input.cur_idx = str_idx;
2901 
2902   /* Setup initial node set.  */
2903   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2904   if (str_idx == top_str)
2905     {
2906       err = re_node_set_init_1 (&next_nodes, top_node);
2907       if (BE (err != REG_NOERROR, 0))
2908 	return err;
2909       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2910       if (BE (err != REG_NOERROR, 0))
2911 	{
2912 	  re_node_set_free (&next_nodes);
2913 	  return err;
2914 	}
2915     }
2916   else
2917     {
2918       cur_state = mctx->state_log[str_idx];
2919       if (cur_state && cur_state->has_backref)
2920 	{
2921 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2922 	  if (BE ( err != REG_NOERROR, 0))
2923 	    return err;
2924 	}
2925       else
2926 	re_node_set_init_empty (&next_nodes);
2927     }
2928   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2929     {
2930       if (next_nodes.nelem)
2931 	{
2932 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2933 				    subexp_num, type);
2934 	  if (BE ( err != REG_NOERROR, 0))
2935 	    {
2936 	      re_node_set_free (&next_nodes);
2937 	      return err;
2938 	    }
2939 	}
2940       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2941       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2942 	{
2943 	  re_node_set_free (&next_nodes);
2944 	  return err;
2945 	}
2946       mctx->state_log[str_idx] = cur_state;
2947     }
2948 
2949   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2950     {
2951       re_node_set_empty (&next_nodes);
2952       if (mctx->state_log[str_idx + 1])
2953 	{
2954 	  err = re_node_set_merge (&next_nodes,
2955 				   &mctx->state_log[str_idx + 1]->nodes);
2956 	  if (BE (err != REG_NOERROR, 0))
2957 	    {
2958 	      re_node_set_free (&next_nodes);
2959 	      return err;
2960 	    }
2961 	}
2962       if (cur_state)
2963 	{
2964 	  err = check_arrival_add_next_nodes (mctx, str_idx,
2965 					      &cur_state->non_eps_nodes, &next_nodes);
2966 	  if (BE (err != REG_NOERROR, 0))
2967 	    {
2968 	      re_node_set_free (&next_nodes);
2969 	      return err;
2970 	    }
2971 	}
2972       ++str_idx;
2973       if (next_nodes.nelem)
2974 	{
2975 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2976 	  if (BE (err != REG_NOERROR, 0))
2977 	    {
2978 	      re_node_set_free (&next_nodes);
2979 	      return err;
2980 	    }
2981 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2982 				    subexp_num, type);
2983 	  if (BE ( err != REG_NOERROR, 0))
2984 	    {
2985 	      re_node_set_free (&next_nodes);
2986 	      return err;
2987 	    }
2988 	}
2989       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2990       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2991       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2992 	{
2993 	  re_node_set_free (&next_nodes);
2994 	  return err;
2995 	}
2996       mctx->state_log[str_idx] = cur_state;
2997       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2998     }
2999   re_node_set_free (&next_nodes);
3000   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3001 	       : &mctx->state_log[last_str]->nodes);
3002   path->next_idx = str_idx;
3003 
3004   /* Fix MCTX.  */
3005   mctx->state_log = backup_state_log;
3006   mctx->input.cur_idx = backup_cur_idx;
3007 
3008   /* Then check the current node set has the node LAST_NODE.  */
3009   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3010     return REG_NOERROR;
3011 
3012   return REG_NOMATCH;
3013 }
3014 
3015 /* Helper functions for check_arrival.  */
3016 
3017 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3018    to NEXT_NODES.
3019    TODO: This function is similar to the functions transit_state*(),
3020 	 however this function has many additional works.
3021 	 Can't we unify them?  */
3022 
3023 static reg_errcode_t
3024 internal_function
3025 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3026 			      re_node_set *cur_nodes,
3027 			      re_node_set *next_nodes)
3028 {
3029   re_dfa_t *const dfa = mctx->dfa;
3030   bool ok;
3031   Idx cur_idx;
3032   reg_errcode_t err;
3033   re_node_set union_set;
3034   re_node_set_init_empty (&union_set);
3035   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3036     {
3037       int naccepted = 0;
3038       Idx cur_node = cur_nodes->elems[cur_idx];
3039 #ifdef DEBUG
3040       re_token_type_t type = dfa->nodes[cur_node].type;
3041       assert (!IS_EPSILON_NODE (type));
3042 #endif
3043 #ifdef RE_ENABLE_I18N
3044       /* If the node may accept `multi byte'.  */
3045       if (dfa->nodes[cur_node].accept_mb)
3046 	{
3047 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3048 					       str_idx);
3049 	  if (naccepted > 1)
3050 	    {
3051 	      re_dfastate_t *dest_state;
3052 	      Idx next_node = dfa->nexts[cur_node];
3053 	      Idx next_idx = str_idx + naccepted;
3054 	      dest_state = mctx->state_log[next_idx];
3055 	      re_node_set_empty (&union_set);
3056 	      if (dest_state)
3057 		{
3058 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3059 		  if (BE (err != REG_NOERROR, 0))
3060 		    {
3061 		      re_node_set_free (&union_set);
3062 		      return err;
3063 		    }
3064 		}
3065 	      ok = re_node_set_insert (&union_set, next_node);
3066 	      if (BE (! ok, 0))
3067 		{
3068 		  re_node_set_free (&union_set);
3069 		  return REG_ESPACE;
3070 		}
3071 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3072 							    &union_set);
3073 	      if (BE (mctx->state_log[next_idx] == NULL
3074 		      && err != REG_NOERROR, 0))
3075 		{
3076 		  re_node_set_free (&union_set);
3077 		  return err;
3078 		}
3079 	    }
3080 	}
3081 #endif /* RE_ENABLE_I18N */
3082       if (naccepted
3083 	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3084 	{
3085 	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3086 	  if (BE (! ok, 0))
3087 	    {
3088 	      re_node_set_free (&union_set);
3089 	      return REG_ESPACE;
3090 	    }
3091 	}
3092     }
3093   re_node_set_free (&union_set);
3094   return REG_NOERROR;
3095 }
3096 
3097 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3098    CUR_NODES, however exclude the nodes which are:
3099     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3100     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3101 */
3102 
3103 static reg_errcode_t
3104 internal_function
3105 check_arrival_expand_ecl (re_dfa_t *dfa, re_node_set *cur_nodes,
3106 			  Idx ex_subexp, int type)
3107 {
3108   reg_errcode_t err;
3109   Idx idx, outside_node;
3110   re_node_set new_nodes;
3111 #ifdef DEBUG
3112   assert (cur_nodes->nelem);
3113 #endif
3114   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3115   if (BE (err != REG_NOERROR, 0))
3116     return err;
3117   /* Create a new node set NEW_NODES with the nodes which are epsilon
3118      closures of the node in CUR_NODES.  */
3119 
3120   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3121     {
3122       Idx cur_node = cur_nodes->elems[idx];
3123       re_node_set *eclosure = dfa->eclosures + cur_node;
3124       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3125       if (outside_node == REG_MISSING)
3126 	{
3127 	  /* There are no problematic nodes, just merge them.  */
3128 	  err = re_node_set_merge (&new_nodes, eclosure);
3129 	  if (BE (err != REG_NOERROR, 0))
3130 	    {
3131 	      re_node_set_free (&new_nodes);
3132 	      return err;
3133 	    }
3134 	}
3135       else
3136 	{
3137 	  /* There are problematic nodes, re-calculate incrementally.  */
3138 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3139 					      ex_subexp, type);
3140 	  if (BE (err != REG_NOERROR, 0))
3141 	    {
3142 	      re_node_set_free (&new_nodes);
3143 	      return err;
3144 	    }
3145 	}
3146     }
3147   re_node_set_free (cur_nodes);
3148   *cur_nodes = new_nodes;
3149   return REG_NOERROR;
3150 }
3151 
3152 /* Helper function for check_arrival_expand_ecl.
3153    Check incrementally the epsilon closure of TARGET, and if it isn't
3154    problematic append it to DST_NODES.  */
3155 
3156 static reg_errcode_t
3157 internal_function
3158 check_arrival_expand_ecl_sub (re_dfa_t *dfa, re_node_set *dst_nodes,
3159 			      Idx target, Idx ex_subexp, int type)
3160 {
3161   Idx cur_node;
3162   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3163     {
3164       bool ok;
3165 
3166       if (dfa->nodes[cur_node].type == type
3167 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3168 	{
3169 	  if (type == OP_CLOSE_SUBEXP)
3170 	    {
3171 	      ok = re_node_set_insert (dst_nodes, cur_node);
3172 	      if (BE (! ok, 0))
3173 		return REG_ESPACE;
3174 	    }
3175 	  break;
3176 	}
3177       ok = re_node_set_insert (dst_nodes, cur_node);
3178       if (BE (! ok, 0))
3179 	return REG_ESPACE;
3180       if (dfa->edests[cur_node].nelem == 0)
3181 	break;
3182       if (dfa->edests[cur_node].nelem == 2)
3183 	{
3184 	  reg_errcode_t ret =
3185 	    check_arrival_expand_ecl_sub (dfa, dst_nodes,
3186 					  dfa->edests[cur_node].elems[1],
3187 					  ex_subexp, type);
3188 	  if (BE (ret != REG_NOERROR, 0))
3189 	    return ret;
3190 	}
3191       cur_node = dfa->edests[cur_node].elems[0];
3192     }
3193   return REG_NOERROR;
3194 }
3195 
3196 
3197 /* For all the back references in the current state, calculate the
3198    destination of the back references by the appropriate entry
3199    in MCTX->BKREF_ENTS.  */
3200 
3201 static reg_errcode_t
3202 internal_function
3203 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3204 		    Idx cur_str, Idx subexp_num, int type)
3205 {
3206   re_dfa_t *const dfa = mctx->dfa;
3207   reg_errcode_t err;
3208   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3209   struct re_backref_cache_entry *ent;
3210 
3211   if (cache_idx_start == REG_MISSING)
3212     return REG_NOERROR;
3213 
3214  restart:
3215   ent = mctx->bkref_ents + cache_idx_start;
3216   do
3217     {
3218       Idx to_idx, next_node;
3219 
3220       /* Is this entry ENT is appropriate?  */
3221       if (!re_node_set_contains (cur_nodes, ent->node))
3222 	continue; /* No.  */
3223 
3224       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3225       /* Calculate the destination of the back reference, and append it
3226 	 to MCTX->STATE_LOG.  */
3227       if (to_idx == cur_str)
3228 	{
3229 	  /* The backreference did epsilon transit, we must re-check all the
3230 	     node in the current state.  */
3231 	  re_node_set new_dests;
3232 	  reg_errcode_t err2, err3;
3233 	  next_node = dfa->edests[ent->node].elems[0];
3234 	  if (re_node_set_contains (cur_nodes, next_node))
3235 	    continue;
3236 	  err = re_node_set_init_1 (&new_dests, next_node);
3237 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3238 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3239 	  re_node_set_free (&new_dests);
3240 	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3241 		  || err3 != REG_NOERROR, 0))
3242 	    {
3243 	      err = (err != REG_NOERROR ? err
3244 		     : (err2 != REG_NOERROR ? err2 : err3));
3245 	      return err;
3246 	    }
3247 	  /* TODO: It is still inefficient...  */
3248 	  goto restart;
3249 	}
3250       else
3251 	{
3252 	  re_node_set union_set;
3253 	  next_node = dfa->nexts[ent->node];
3254 	  if (mctx->state_log[to_idx])
3255 	    {
3256 	      bool ok;
3257 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3258 					next_node))
3259 		continue;
3260 	      err = re_node_set_init_copy (&union_set,
3261 					   &mctx->state_log[to_idx]->nodes);
3262 	      ok = re_node_set_insert (&union_set, next_node);
3263 	      if (BE (err != REG_NOERROR || ! ok, 0))
3264 		{
3265 		  re_node_set_free (&union_set);
3266 		  err = err != REG_NOERROR ? err : REG_ESPACE;
3267 		  return err;
3268 		}
3269 	    }
3270 	  else
3271 	    {
3272 	      err = re_node_set_init_1 (&union_set, next_node);
3273 	      if (BE (err != REG_NOERROR, 0))
3274 		return err;
3275 	    }
3276 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3277 	  re_node_set_free (&union_set);
3278 	  if (BE (mctx->state_log[to_idx] == NULL
3279 		  && err != REG_NOERROR, 0))
3280 	    return err;
3281 	}
3282     }
3283   while (ent++->more);
3284   return REG_NOERROR;
3285 }
3286 
3287 /* Build transition table for the state.
3288    Return true if successful.  */
3289 
3290 static bool
3291 internal_function
3292 build_trtable (re_dfa_t *dfa, re_dfastate_t *state)
3293 {
3294   reg_errcode_t err;
3295   Idx i, j;
3296   int ch;
3297   bool need_word_trtable = false;
3298   bitset_word elem, mask;
3299   bool dests_node_malloced = false, dest_states_malloced = false;
3300   Idx ndests; /* Number of the destination states from `state'.  */
3301   re_dfastate_t **trtable;
3302   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3303   re_node_set follows, *dests_node;
3304   bitset *dests_ch;
3305   bitset acceptable;
3306 
3307   struct dests_alloc
3308   {
3309     re_node_set dests_node[SBC_MAX];
3310     bitset dests_ch[SBC_MAX];
3311   } *dests_alloc;
3312 
3313   /* We build DFA states which corresponds to the destination nodes
3314      from `state'.  `dests_node[i]' represents the nodes which i-th
3315      destination state contains, and `dests_ch[i]' represents the
3316      characters which i-th destination state accepts.  */
3317 #ifndef __SSP__
3318   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3319     dests_alloc = (struct dests_alloc *) alloca (sizeof dests_alloc[0]);
3320   else
3321 #endif
3322     {
3323       dests_alloc = re_malloc (struct dests_alloc, 1);
3324       if (BE (dests_alloc == NULL, 0))
3325 	return false;
3326       dests_node_malloced = true;
3327     }
3328   dests_node = dests_alloc->dests_node;
3329   dests_ch = dests_alloc->dests_ch;
3330 
3331   /* Initialize transiton table.  */
3332   state->word_trtable = state->trtable = NULL;
3333 
3334   /* At first, group all nodes belonging to `state' into several
3335      destinations.  */
3336   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3337   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3338     {
3339       if (dests_node_malloced)
3340 	free (dests_alloc);
3341       if (ndests == 0)
3342 	{
3343 	  state->trtable = re_calloc (re_dfastate_t *, SBC_MAX);
3344 	  return true;
3345 	}
3346       return false;
3347     }
3348 
3349   err = re_node_set_alloc (&follows, ndests + 1);
3350   if (BE (err != REG_NOERROR, 0))
3351     goto out_free;
3352 
3353   /* Avoid arithmetic overflow in size calculation.  */
3354   if (BE (((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX)
3355 	   / (3 * sizeof (re_dfastate_t *)))
3356 	  < ndests, 0))
3357     goto out_free;
3358 
3359 #ifndef __SSP__
3360   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
3361 			 + ndests * 3 * sizeof (re_dfastate_t *)))
3362     dest_states = (re_dfastate_t **)
3363       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3364   else
3365 #endif
3366     {
3367       dest_states = (re_dfastate_t **)
3368 	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3369       if (BE (dest_states == NULL, 0))
3370 	{
3371 out_free:
3372 	  if (dest_states_malloced)
3373 	    free (dest_states);
3374 	  re_node_set_free (&follows);
3375 	  for (i = 0; i < ndests; ++i)
3376 	    re_node_set_free (dests_node + i);
3377 	  if (dests_node_malloced)
3378 	    free (dests_alloc);
3379 	  return false;
3380 	}
3381       dest_states_malloced = true;
3382     }
3383   dest_states_word = dest_states + ndests;
3384   dest_states_nl = dest_states_word + ndests;
3385   bitset_empty (acceptable);
3386 
3387   /* Then build the states for all destinations.  */
3388   for (i = 0; i < ndests; ++i)
3389     {
3390       Idx next_node;
3391       re_node_set_empty (&follows);
3392       /* Merge the follows of this destination states.  */
3393       for (j = 0; j < dests_node[i].nelem; ++j)
3394 	{
3395 	  next_node = dfa->nexts[dests_node[i].elems[j]];
3396 	  if (next_node != REG_MISSING)
3397 	    {
3398 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3399 	      if (BE (err != REG_NOERROR, 0))
3400 		goto out_free;
3401 	    }
3402 	}
3403       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3404       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3405 	goto out_free;
3406       /* If the new state has context constraint,
3407 	 build appropriate states for these contexts.  */
3408       if (dest_states[i]->has_constraint)
3409 	{
3410 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3411 							  CONTEXT_WORD);
3412 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3413 	    goto out_free;
3414 
3415 	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3416 	    need_word_trtable = true;
3417 
3418 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3419 							CONTEXT_NEWLINE);
3420 	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3421 	    goto out_free;
3422  	}
3423       else
3424 	{
3425 	  dest_states_word[i] = dest_states[i];
3426 	  dest_states_nl[i] = dest_states[i];
3427 	}
3428       bitset_merge (acceptable, dests_ch[i]);
3429     }
3430 
3431   if (!BE (need_word_trtable, 0))
3432     {
3433       /* We don't care about whether the following character is a word
3434 	 character, or we are in a single-byte character set so we can
3435 	 discern by looking at the character code: allocate a
3436 	 256-entry transition table.  */
3437       trtable = state->trtable = re_calloc (re_dfastate_t *, SBC_MAX);
3438       if (BE (trtable == NULL, 0))
3439 	goto out_free;
3440 
3441       /* For all characters ch...:  */
3442       for (i = 0; i < BITSET_WORDS; ++i)
3443 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3444 	     elem;
3445 	     mask <<= 1, elem >>= 1, ++ch)
3446 	  if (BE (elem & 1, 0))
3447 	    {
3448 	      /* There must be exactly one destination which accepts
3449 		 character ch.  See group_nodes_into_DFAstates.  */
3450 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3451 		;
3452 
3453 	      /* j-th destination accepts the word character ch.  */
3454 	      if (dfa->word_char[i] & mask)
3455 		trtable[ch] = dest_states_word[j];
3456 	      else
3457 		trtable[ch] = dest_states[j];
3458 	    }
3459     }
3460   else
3461     {
3462       /* We care about whether the following character is a word
3463 	 character, and we are in a multi-byte character set: discern
3464 	 by looking at the character code: build two 256-entry
3465 	 transition tables, one starting at trtable[0] and one
3466 	 starting at trtable[SBC_MAX].  */
3467       trtable = state->word_trtable = re_calloc (re_dfastate_t *, 2 * SBC_MAX);
3468       if (BE (trtable == NULL, 0))
3469 	goto out_free;
3470 
3471       /* For all characters ch...:  */
3472       for (i = 0; i < BITSET_WORDS; ++i)
3473 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3474 	     elem;
3475 	     mask <<= 1, elem >>= 1, ++ch)
3476 	  if (BE (elem & 1, 0))
3477 	    {
3478 	      /* There must be exactly one destination which accepts
3479 		 character ch.  See group_nodes_into_DFAstates.  */
3480 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3481 		;
3482 
3483 	      /* j-th destination accepts the word character ch.  */
3484 	      trtable[ch] = dest_states[j];
3485 	      trtable[ch + SBC_MAX] = dest_states_word[j];
3486 	    }
3487     }
3488 
3489   /* new line */
3490   if (bitset_contain (acceptable, NEWLINE_CHAR))
3491     {
3492       /* The current state accepts newline character.  */
3493       for (j = 0; j < ndests; ++j)
3494 	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3495 	  {
3496 	    /* k-th destination accepts newline character.  */
3497 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3498 	    if (need_word_trtable)
3499 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3500 	    /* There must be only one destination which accepts
3501 	       newline.  See group_nodes_into_DFAstates.  */
3502 	    break;
3503 	  }
3504     }
3505 
3506   if (dest_states_malloced)
3507     free (dest_states);
3508 
3509   re_node_set_free (&follows);
3510   for (i = 0; i < ndests; ++i)
3511     re_node_set_free (dests_node + i);
3512 
3513   if (dests_node_malloced)
3514     free (dests_alloc);
3515 
3516   return true;
3517 }
3518 
3519 /* Group all nodes belonging to STATE into several destinations.
3520    Then for all destinations, set the nodes belonging to the destination
3521    to DESTS_NODE[i] and set the characters accepted by the destination
3522    to DEST_CH[i].  This function return the number of destinations.  */
3523 
3524 static Idx
3525 internal_function
3526 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3527 			    re_node_set *dests_node, bitset *dests_ch)
3528 {
3529   reg_errcode_t err;
3530   bool ok;
3531   Idx i, j, k;
3532   Idx ndests; /* Number of the destinations from `state'.  */
3533   bitset accepts; /* Characters a node can accept.  */
3534   const re_node_set *cur_nodes = &state->nodes;
3535   bitset_empty (accepts);
3536   ndests = 0;
3537 
3538   /* For all the nodes belonging to `state',  */
3539   for (i = 0; i < cur_nodes->nelem; ++i)
3540     {
3541       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3542       re_token_type_t type = node->type;
3543       unsigned int constraint = node->constraint;
3544 
3545       /* Enumerate all single byte character this node can accept.  */
3546       if (type == CHARACTER)
3547 	bitset_set (accepts, node->opr.c);
3548       else if (type == SIMPLE_BRACKET)
3549 	{
3550 	  bitset_merge (accepts, node->opr.sbcset);
3551 	}
3552       else if (type == OP_PERIOD)
3553 	{
3554 #ifdef RE_ENABLE_I18N
3555 	  if (dfa->mb_cur_max > 1)
3556 	    bitset_merge (accepts, dfa->sb_char);
3557 	  else
3558 #endif
3559 	    bitset_set_all (accepts);
3560 	  if (!(dfa->syntax & REG_DOT_NEWLINE))
3561 	    bitset_clear (accepts, '\n');
3562 	  if (dfa->syntax & REG_DOT_NOT_NULL)
3563 	    bitset_clear (accepts, '\0');
3564 	}
3565 #ifdef RE_ENABLE_I18N
3566       else if (type == OP_UTF8_PERIOD)
3567         {
3568 	  if (SBC_MAX / 2 % BITSET_WORD_BITS == 0)
3569 	    memset (accepts, -1, sizeof accepts / 2);
3570 	  else
3571 	    bitset_merge (accepts, utf8_sb_map);
3572 	  if (!(dfa->syntax & REG_DOT_NEWLINE))
3573 	    bitset_clear (accepts, '\n');
3574 	  if (dfa->syntax & REG_DOT_NOT_NULL)
3575 	    bitset_clear (accepts, '\0');
3576         }
3577 #endif
3578       else
3579 	continue;
3580 
3581       /* Check the `accepts' and sift the characters which are not
3582 	 match it the context.  */
3583       if (constraint)
3584 	{
3585 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3586 	    {
3587 	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3588 	      bitset_empty (accepts);
3589 	      if (accepts_newline)
3590 		bitset_set (accepts, NEWLINE_CHAR);
3591 	      else
3592 		continue;
3593 	    }
3594 	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3595 	    {
3596 	      bitset_empty (accepts);
3597 	      continue;
3598 	    }
3599 
3600 	  if (constraint & NEXT_WORD_CONSTRAINT)
3601 	    {
3602 	      bitset_word any_set = 0;
3603 	      if (type == CHARACTER && !node->word_char)
3604 		{
3605 		  bitset_empty (accepts);
3606 		  continue;
3607 		}
3608 #ifdef RE_ENABLE_I18N
3609 	      if (dfa->mb_cur_max > 1)
3610 		for (j = 0; j < BITSET_WORDS; ++j)
3611 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3612 	      else
3613 #endif
3614 		for (j = 0; j < BITSET_WORDS; ++j)
3615 		  any_set |= (accepts[j] &= dfa->word_char[j]);
3616 	      if (!any_set)
3617 		continue;
3618 	    }
3619 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3620 	    {
3621 	      bitset_word any_set = 0;
3622 	      if (type == CHARACTER && node->word_char)
3623 		{
3624 		  bitset_empty (accepts);
3625 		  continue;
3626 		}
3627 #ifdef RE_ENABLE_I18N
3628 	      if (dfa->mb_cur_max > 1)
3629 		for (j = 0; j < BITSET_WORDS; ++j)
3630 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3631 	      else
3632 #endif
3633 		for (j = 0; j < BITSET_WORDS; ++j)
3634 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3635 	      if (!any_set)
3636 		continue;
3637 	    }
3638 	}
3639 
3640       /* Then divide `accepts' into DFA states, or create a new
3641 	 state.  Above, we make sure that accepts is not empty.  */
3642       for (j = 0; j < ndests; ++j)
3643 	{
3644 	  bitset intersec; /* Intersection sets, see below.  */
3645 	  bitset remains;
3646 	  /* Flags, see below.  */
3647 	  bitset_word has_intersec, not_subset, not_consumed;
3648 
3649 	  /* Optimization, skip if this state doesn't accept the character.  */
3650 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3651 	    continue;
3652 
3653 	  /* Enumerate the intersection set of this state and `accepts'.  */
3654 	  has_intersec = 0;
3655 	  for (k = 0; k < BITSET_WORDS; ++k)
3656 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3657 	  /* And skip if the intersection set is empty.  */
3658 	  if (!has_intersec)
3659 	    continue;
3660 
3661 	  /* Then check if this state is a subset of `accepts'.  */
3662 	  not_subset = not_consumed = 0;
3663 	  for (k = 0; k < BITSET_WORDS; ++k)
3664 	    {
3665 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3666 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3667 	    }
3668 
3669 	  /* If this state isn't a subset of `accepts', create a
3670 	     new group state, which has the `remains'. */
3671 	  if (not_subset)
3672 	    {
3673 	      bitset_copy (dests_ch[ndests], remains);
3674 	      bitset_copy (dests_ch[j], intersec);
3675 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3676 	      if (BE (err != REG_NOERROR, 0))
3677 		goto error_return;
3678 	      ++ndests;
3679 	    }
3680 
3681 	  /* Put the position in the current group. */
3682 	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3683 	  if (BE (! ok, 0))
3684 	    goto error_return;
3685 
3686 	  /* If all characters are consumed, go to next node. */
3687 	  if (!not_consumed)
3688 	    break;
3689 	}
3690       /* Some characters remain, create a new group. */
3691       if (j == ndests)
3692 	{
3693 	  bitset_copy (dests_ch[ndests], accepts);
3694 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3695 	  if (BE (err != REG_NOERROR, 0))
3696 	    goto error_return;
3697 	  ++ndests;
3698 	  bitset_empty (accepts);
3699 	}
3700     }
3701   return ndests;
3702  error_return:
3703   for (j = 0; j < ndests; ++j)
3704     re_node_set_free (dests_node + j);
3705   return REG_MISSING;
3706 }
3707 
3708 #ifdef RE_ENABLE_I18N
3709 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
3710    Return the number of the bytes the node accepts.
3711    STR_IDX is the current index of the input string.
3712 
3713    This function handles the nodes which can accept one character, or
3714    one collating element like '.', '[a-z]', opposite to the other nodes
3715    can only accept one byte.  */
3716 
3717 static int
3718 internal_function
3719 check_node_accept_bytes (re_dfa_t *dfa, Idx node_idx,
3720 			 const re_string_t *input, Idx str_idx)
3721 {
3722   const re_token_t *node = dfa->nodes + node_idx;
3723   int char_len, elem_len;
3724   Idx i;
3725 
3726   if (BE (node->type == OP_UTF8_PERIOD, 0))
3727     {
3728       unsigned char c = re_string_byte_at (input, str_idx), d;
3729       if (BE (c < 0xc2, 1))
3730 	return 0;
3731 
3732       if (str_idx + 2 > input->len)
3733 	return 0;
3734 
3735       d = re_string_byte_at (input, str_idx + 1);
3736       if (c < 0xe0)
3737 	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3738       else if (c < 0xf0)
3739 	{
3740 	  char_len = 3;
3741 	  if (c == 0xe0 && d < 0xa0)
3742 	    return 0;
3743 	}
3744       else if (c < 0xf8)
3745 	{
3746 	  char_len = 4;
3747 	  if (c == 0xf0 && d < 0x90)
3748 	    return 0;
3749 	}
3750       else if (c < 0xfc)
3751 	{
3752 	  char_len = 5;
3753 	  if (c == 0xf8 && d < 0x88)
3754 	    return 0;
3755 	}
3756       else if (c < 0xfe)
3757 	{
3758 	  char_len = 6;
3759 	  if (c == 0xfc && d < 0x84)
3760 	    return 0;
3761 	}
3762       else
3763 	return 0;
3764 
3765       if (str_idx + char_len > input->len)
3766 	return 0;
3767 
3768       for (i = 1; i < char_len; ++i)
3769 	{
3770 	  d = re_string_byte_at (input, str_idx + i);
3771 	  if (d < 0x80 || d > 0xbf)
3772 	    return 0;
3773 	}
3774       return char_len;
3775     }
3776 
3777   char_len = re_string_char_size_at (input, str_idx);
3778   if (node->type == OP_PERIOD)
3779     {
3780       if (char_len <= 1)
3781         return 0;
3782       /* FIXME: I don't think this if is needed, as both '\n'
3783 	 and '\0' are char_len == 1.  */
3784       /* '.' accepts any one character except the following two cases.  */
3785       if ((!(dfa->syntax & REG_DOT_NEWLINE) &&
3786 	   re_string_byte_at (input, str_idx) == '\n') ||
3787 	  ((dfa->syntax & REG_DOT_NOT_NULL) &&
3788 	   re_string_byte_at (input, str_idx) == '\0'))
3789 	return 0;
3790       return char_len;
3791     }
3792 
3793   elem_len = re_string_elem_size_at (input, str_idx);
3794   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3795     return 0;
3796 
3797   if (node->type == COMPLEX_BRACKET)
3798     {
3799       const re_charset_t *cset = node->opr.mbcset;
3800 # ifdef _LIBC
3801       const unsigned char *pin
3802 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3803       Idx j;
3804       uint32_t nrules;
3805 # endif /* _LIBC */
3806       int match_len = 0;
3807       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3808 		    ? re_string_wchar_at (input, str_idx) : 0);
3809 
3810       /* match with multibyte character?  */
3811       for (i = 0; i < cset->nmbchars; ++i)
3812 	if (wc == cset->mbchars[i])
3813 	  {
3814 	    match_len = char_len;
3815 	    goto check_node_accept_bytes_match;
3816 	  }
3817       /* match with character_class?  */
3818       for (i = 0; i < cset->nchar_classes; ++i)
3819 	{
3820 	  wctype_t wt = cset->char_classes[i];
3821 	  if (__iswctype (wc, wt))
3822 	    {
3823 	      match_len = char_len;
3824 	      goto check_node_accept_bytes_match;
3825 	    }
3826 	}
3827 
3828 # ifdef _LIBC
3829       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3830       if (nrules != 0)
3831 	{
3832 	  unsigned int in_collseq = 0;
3833 	  const int32_t *table, *indirect;
3834 	  const unsigned char *weights, *extra;
3835 	  const char *collseqwc;
3836 	  int32_t idx;
3837 	  /* This #include defines a local function!  */
3838 #  include <locale/weight.h>
3839 
3840 	  /* match with collating_symbol?  */
3841 	  if (cset->ncoll_syms)
3842 	    extra = (const unsigned char *)
3843 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3844 	  for (i = 0; i < cset->ncoll_syms; ++i)
3845 	    {
3846 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3847 	      /* Compare the length of input collating element and
3848 		 the length of current collating element.  */
3849 	      if (*coll_sym != elem_len)
3850 		continue;
3851 	      /* Compare each bytes.  */
3852 	      for (j = 0; j < *coll_sym; j++)
3853 		if (pin[j] != coll_sym[1 + j])
3854 		  break;
3855 	      if (j == *coll_sym)
3856 		{
3857 		  /* Match if every bytes is equal.  */
3858 		  match_len = j;
3859 		  goto check_node_accept_bytes_match;
3860 		}
3861 	    }
3862 
3863 	  if (cset->nranges)
3864 	    {
3865 	      if (elem_len <= char_len)
3866 		{
3867 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3868 		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3869 		}
3870 	      else
3871 		in_collseq = find_collation_sequence_value (pin, elem_len);
3872 	    }
3873 	  /* match with range expression?  */
3874 	  for (i = 0; i < cset->nranges; ++i)
3875 	    if (cset->range_starts[i] <= in_collseq
3876 		&& in_collseq <= cset->range_ends[i])
3877 	      {
3878 		match_len = elem_len;
3879 		goto check_node_accept_bytes_match;
3880 	      }
3881 
3882 	  /* match with equivalence_class?  */
3883 	  if (cset->nequiv_classes)
3884 	    {
3885 	      const unsigned char *cp = pin;
3886 	      table = (const int32_t *)
3887 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3888 	      weights = (const unsigned char *)
3889 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3890 	      extra = (const unsigned char *)
3891 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3892 	      indirect = (const int32_t *)
3893 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3894 	      idx = findidx (&cp);
3895 	      if (idx > 0)
3896 		for (i = 0; i < cset->nequiv_classes; ++i)
3897 		  {
3898 		    int32_t equiv_class_idx = cset->equiv_classes[i];
3899 		    size_t weight_len = weights[idx];
3900 		    if (weight_len == weights[equiv_class_idx])
3901 		      {
3902 			Idx cnt = 0;
3903 			while (cnt <= weight_len
3904 			       && (weights[equiv_class_idx + 1 + cnt]
3905 				   == weights[idx + 1 + cnt]))
3906 			  ++cnt;
3907 			if (cnt > weight_len)
3908 			  {
3909 			    match_len = elem_len;
3910 			    goto check_node_accept_bytes_match;
3911 			  }
3912 		      }
3913 		  }
3914 	    }
3915 	}
3916       else
3917 # endif /* _LIBC */
3918 	{
3919 	  /* match with range expression?  */
3920 #if __GNUC__ >= 2
3921 	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
3922 #else
3923 	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
3924 	  cmp_buf[2] = wc;
3925 #endif
3926 	  for (i = 0; i < cset->nranges; ++i)
3927 	    {
3928 	      cmp_buf[0] = cset->range_starts[i];
3929 	      cmp_buf[4] = cset->range_ends[i];
3930 	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
3931 		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
3932 		{
3933 		  match_len = char_len;
3934 		  goto check_node_accept_bytes_match;
3935 		}
3936 	    }
3937 	}
3938     check_node_accept_bytes_match:
3939       if (!cset->non_match)
3940 	return match_len;
3941       else
3942 	{
3943 	  if (match_len > 0)
3944 	    return 0;
3945 	  else
3946 	    return (elem_len > char_len) ? elem_len : char_len;
3947 	}
3948     }
3949   return 0;
3950 }
3951 
3952 # ifdef _LIBC
3953 static unsigned int
3954 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3955 {
3956   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3957   if (nrules == 0)
3958     {
3959       if (mbs_len == 1)
3960 	{
3961 	  /* No valid character.  Match it as a single byte character.  */
3962 	  const unsigned char *collseq = (const unsigned char *)
3963 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3964 	  return collseq[mbs[0]];
3965 	}
3966       return UINT_MAX;
3967     }
3968   else
3969     {
3970       int32_t idx;
3971       const unsigned char *extra = (const unsigned char *)
3972 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3973       int32_t extrasize = (const unsigned char *)
3974 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3975 
3976       for (idx = 0; idx < extrasize;)
3977 	{
3978 	  int mbs_cnt;
3979 	  bool found = false;
3980 	  int32_t elem_mbs_len;
3981 	  /* Skip the name of collating element name.  */
3982 	  idx = idx + extra[idx] + 1;
3983 	  elem_mbs_len = extra[idx++];
3984 	  if (mbs_len == elem_mbs_len)
3985 	    {
3986 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3987 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3988 		  break;
3989 	      if (mbs_cnt == elem_mbs_len)
3990 		/* Found the entry.  */
3991 		found = true;
3992 	    }
3993 	  /* Skip the byte sequence of the collating element.  */
3994 	  idx += elem_mbs_len;
3995 	  /* Adjust for the alignment.  */
3996 	  idx = (idx + 3) & ~3;
3997 	  /* Skip the collation sequence value.  */
3998 	  idx += sizeof (uint32_t);
3999 	  /* Skip the wide char sequence of the collating element.  */
4000 	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
4001 	  /* If we found the entry, return the sequence value.  */
4002 	  if (found)
4003 	    return *(uint32_t *) (extra + idx);
4004 	  /* Skip the collation sequence value.  */
4005 	  idx += sizeof (uint32_t);
4006 	}
4007       return UINT_MAX;
4008     }
4009 }
4010 # endif /* _LIBC */
4011 #endif /* RE_ENABLE_I18N */
4012 
4013 /* Check whether the node accepts the byte which is IDX-th
4014    byte of the INPUT.  */
4015 
4016 static bool
4017 internal_function
4018 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4019 		   Idx idx)
4020 {
4021   unsigned char ch;
4022   ch = re_string_byte_at (&mctx->input, idx);
4023   switch (node->type)
4024     {
4025     case CHARACTER:
4026       if (node->opr.c != ch)
4027         return false;
4028       break;
4029 
4030     case SIMPLE_BRACKET:
4031       if (!bitset_contain (node->opr.sbcset, ch))
4032         return false;
4033       break;
4034 
4035 #ifdef RE_ENABLE_I18N
4036     case OP_UTF8_PERIOD:
4037       if (ch >= 0x80)
4038         return false;
4039       /* FALLTHROUGH */
4040 #endif
4041     case OP_PERIOD:
4042       if ((ch == '\n' && !(mctx->dfa->syntax & REG_DOT_NEWLINE))
4043 	  || (ch == '\0' && (mctx->dfa->syntax & REG_DOT_NOT_NULL)))
4044 	return false;
4045       break;
4046 
4047     default:
4048       return false;
4049     }
4050 
4051   if (node->constraint)
4052     {
4053       /* The node has constraints.  Check whether the current context
4054 	 satisfies the constraints.  */
4055       unsigned int context = re_string_context_at (&mctx->input, idx,
4056 						   mctx->eflags);
4057       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4058 	return false;
4059     }
4060 
4061   return true;
4062 }
4063 
4064 /* Extend the buffers, if the buffers have run out.  */
4065 
4066 static reg_errcode_t
4067 internal_function
4068 extend_buffers (re_match_context_t *mctx)
4069 {
4070   reg_errcode_t ret;
4071   re_string_t *pstr = &mctx->input;
4072 
4073   /* Double the lengthes of the buffers.  */
4074   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
4075   if (BE (ret != REG_NOERROR, 0))
4076     return ret;
4077 
4078   if (mctx->state_log != NULL)
4079     {
4080       /* And double the length of state_log.  */
4081       /* XXX We have no indication of the size of this buffer.  If this
4082 	 allocation fail we have no indication that the state_log array
4083 	 does not have the right size.  */
4084       re_dfastate_t **new_array = re_xrealloc (mctx->state_log, re_dfastate_t *,
4085 					       pstr->bufs_len + 1);
4086       if (BE (new_array == NULL, 0))
4087 	return REG_ESPACE;
4088       mctx->state_log = new_array;
4089     }
4090 
4091   /* Then reconstruct the buffers.  */
4092   if (pstr->icase)
4093     {
4094 #ifdef RE_ENABLE_I18N
4095       if (pstr->mb_cur_max > 1)
4096 	{
4097 	  ret = build_wcs_upper_buffer (pstr);
4098 	  if (BE (ret != REG_NOERROR, 0))
4099 	    return ret;
4100 	}
4101       else
4102 #endif /* RE_ENABLE_I18N  */
4103 	build_upper_buffer (pstr);
4104     }
4105   else
4106     {
4107 #ifdef RE_ENABLE_I18N
4108       if (pstr->mb_cur_max > 1)
4109 	build_wcs_buffer (pstr);
4110       else
4111 #endif /* RE_ENABLE_I18N  */
4112 	{
4113 	  if (pstr->trans != NULL)
4114 	    re_string_translate_buffer (pstr);
4115 	}
4116     }
4117   return REG_NOERROR;
4118 }
4119 
4120 
4121 /* Functions for matching context.  */
4122 
4123 /* Initialize MCTX.  */
4124 
4125 static reg_errcode_t
4126 internal_function
4127 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4128 {
4129   mctx->eflags = eflags;
4130   mctx->match_last = REG_MISSING;
4131   if (n > 0)
4132     {
4133       mctx->bkref_ents = re_xmalloc (struct re_backref_cache_entry, n);
4134       mctx->sub_tops = re_xmalloc (re_sub_match_top_t *, n);
4135       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4136 	return REG_ESPACE;
4137     }
4138   /* Already zero-ed by the caller.
4139      else
4140        mctx->bkref_ents = NULL;
4141      mctx->nbkref_ents = 0;
4142      mctx->nsub_tops = 0;  */
4143   mctx->abkref_ents = n;
4144   mctx->max_mb_elem_len = 1;
4145   mctx->asub_tops = n;
4146   return REG_NOERROR;
4147 }
4148 
4149 /* Clean the entries which depend on the current input in MCTX.
4150    This function must be invoked when the matcher changes the start index
4151    of the input, or changes the input string.  */
4152 
4153 static void
4154 internal_function
4155 match_ctx_clean (re_match_context_t *mctx)
4156 {
4157   Idx st_idx;
4158   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4159     {
4160       Idx sl_idx;
4161       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4162       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4163 	{
4164 	  re_sub_match_last_t *last = top->lasts[sl_idx];
4165 	  re_free (last->path.array);
4166 	  re_free (last);
4167 	}
4168       re_free (top->lasts);
4169       if (top->path)
4170 	{
4171 	  re_free (top->path->array);
4172 	  re_free (top->path);
4173 	}
4174       free (top);
4175     }
4176 
4177   mctx->nsub_tops = 0;
4178   mctx->nbkref_ents = 0;
4179 }
4180 
4181 /* Free all the memory associated with MCTX.  */
4182 
4183 static void
4184 internal_function
4185 match_ctx_free (re_match_context_t *mctx)
4186 {
4187   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4188   match_ctx_clean (mctx);
4189   re_free (mctx->sub_tops);
4190   re_free (mctx->bkref_ents);
4191 }
4192 
4193 /* Add a new backreference entry to MCTX.
4194    Note that we assume that caller never call this function with duplicate
4195    entry, and call with STR_IDX which isn't smaller than any existing entry.
4196 */
4197 
4198 static reg_errcode_t
4199 internal_function
4200 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx,
4201 		     Idx from, Idx to)
4202 {
4203   if (mctx->nbkref_ents >= mctx->abkref_ents)
4204     {
4205       struct re_backref_cache_entry* new_entry;
4206       new_entry = re_x2realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4207 				&mctx->abkref_ents);
4208       if (BE (new_entry == NULL, 0))
4209 	{
4210 	  re_free (mctx->bkref_ents);
4211 	  return REG_ESPACE;
4212 	}
4213       mctx->bkref_ents = new_entry;
4214       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4215 	      (sizeof (struct re_backref_cache_entry)
4216 	       * (mctx->abkref_ents - mctx->nbkref_ents)));
4217     }
4218   if (mctx->nbkref_ents > 0
4219       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4220     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4221 
4222   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4223   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4224   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4225   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4226 
4227   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4228      If bit N is clear, means that this entry won't epsilon-transition to
4229      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4230      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4231      such node.
4232 
4233      A backreference does not epsilon-transition unless it is empty, so set
4234      to all zeros if FROM != TO.  */
4235   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4236     = (from == to ? -1 : 0);
4237 
4238   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4239   if (mctx->max_mb_elem_len < to - from)
4240     mctx->max_mb_elem_len = to - from;
4241   return REG_NOERROR;
4242 }
4243 
4244 /* Return the first entry with the same str_idx, or REG_MISSING if none is
4245    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4246 
4247 static Idx
4248 internal_function
4249 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4250 {
4251   Idx left, right, mid, last;
4252   last = right = mctx->nbkref_ents;
4253   for (left = 0; left < right;)
4254     {
4255       mid = (left + right) / 2;
4256       if (mctx->bkref_ents[mid].str_idx < str_idx)
4257 	left = mid + 1;
4258       else
4259 	right = mid;
4260     }
4261   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4262     return left;
4263   else
4264     return REG_MISSING;
4265 }
4266 
4267 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4268    at STR_IDX.  */
4269 
4270 static reg_errcode_t
4271 internal_function
4272 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4273 {
4274 #ifdef DEBUG
4275   assert (mctx->sub_tops != NULL);
4276   assert (mctx->asub_tops > 0);
4277 #endif
4278   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4279     {
4280       Idx new_asub_tops = mctx->asub_tops;
4281       re_sub_match_top_t **new_array = re_x2realloc (mctx->sub_tops,
4282 						     re_sub_match_top_t *,
4283 						     &new_asub_tops);
4284       if (BE (new_array == NULL, 0))
4285 	return REG_ESPACE;
4286       mctx->sub_tops = new_array;
4287       mctx->asub_tops = new_asub_tops;
4288     }
4289   mctx->sub_tops[mctx->nsub_tops] = re_calloc (re_sub_match_top_t, 1);
4290   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4291     return REG_ESPACE;
4292   mctx->sub_tops[mctx->nsub_tops]->node = node;
4293   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4294   return REG_NOERROR;
4295 }
4296 
4297 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4298    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4299 
4300 static re_sub_match_last_t *
4301 internal_function
4302 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4303 {
4304   re_sub_match_last_t *new_entry;
4305   if (BE (subtop->nlasts == subtop->alasts, 0))
4306     {
4307       Idx new_alasts = subtop->alasts;
4308       re_sub_match_last_t **new_array = re_x2realloc (subtop->lasts,
4309 						      re_sub_match_last_t *,
4310 						      &new_alasts);
4311       if (BE (new_array == NULL, 0))
4312 	return NULL;
4313       subtop->lasts = new_array;
4314       subtop->alasts = new_alasts;
4315     }
4316   new_entry = re_calloc (re_sub_match_last_t, 1);
4317   if (BE (new_entry != NULL, 1))
4318     {
4319       subtop->lasts[subtop->nlasts] = new_entry;
4320       new_entry->node = node;
4321       new_entry->str_idx = str_idx;
4322       ++subtop->nlasts;
4323     }
4324   return new_entry;
4325 }
4326 
4327 static void
4328 internal_function
4329 sift_ctx_init (re_sift_context_t *sctx,
4330 	       re_dfastate_t **sifted_sts,
4331 	       re_dfastate_t **limited_sts,
4332 	       Idx last_node, Idx last_str_idx)
4333 {
4334   sctx->sifted_states = sifted_sts;
4335   sctx->limited_states = limited_sts;
4336   sctx->last_node = last_node;
4337   sctx->last_str_idx = last_str_idx;
4338   re_node_set_init_empty (&sctx->limits);
4339 }
4340