xref: /dflybsd-src/contrib/grep/lib/regexec.c (revision 8035ceffafed38b82bfdfa0cf5306e678483d2d8)
1 /* Extended regular expression matching and search library.
2    Copyright (C) 2002-2015 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU General Public
8    License as published by the Free Software Foundation; either
9    version 3 of the License, or (at your option) any later version.
10 
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    General Public License for more details.
15 
16    You should have received a copy of the GNU General Public
17    License along with the GNU C Library; if not, see
18    <http://www.gnu.org/licenses/>.  */
19 
20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21 				     Idx n) internal_function;
22 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
23 static void match_ctx_free (re_match_context_t *cache) internal_function;
24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25 					  Idx str_idx, Idx from, Idx to)
26      internal_function;
27 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
28      internal_function;
29 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
30 					   Idx str_idx) internal_function;
31 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
32 						    Idx node, Idx str_idx)
33      internal_function;
34 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
35 			   re_dfastate_t **limited_sts, Idx last_node,
36 			   Idx last_str_idx)
37      internal_function;
38 static reg_errcode_t re_search_internal (const regex_t *preg,
39 					 const char *string, Idx length,
40 					 Idx start, Idx last_start, Idx stop,
41 					 size_t nmatch, regmatch_t pmatch[],
42 					 int eflags) internal_function;
43 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
44 				  const char *string1, Idx length1,
45 				  const char *string2, Idx length2,
46 				  Idx start, regoff_t range,
47 				  struct re_registers *regs,
48 				  Idx stop, bool ret_len) internal_function;
49 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
50 				const char *string, Idx length, Idx start,
51 				regoff_t range, Idx stop,
52 				struct re_registers *regs,
53 				bool ret_len) internal_function;
54 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
55                               Idx nregs, int regs_allocated) internal_function;
56 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
57      internal_function;
58 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
59 			   Idx *p_match_first) internal_function;
60 static Idx check_halt_state_context (const re_match_context_t *mctx,
61 				     const re_dfastate_t *state, Idx idx)
62      internal_function;
63 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
64 			 regmatch_t *prev_idx_match, Idx cur_node,
65 			 Idx cur_idx, Idx nmatch) internal_function;
66 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
67 				      Idx str_idx, Idx dest_node, Idx nregs,
68 				      regmatch_t *regs,
69 				      re_node_set *eps_via_nodes)
70      internal_function;
71 static reg_errcode_t set_regs (const regex_t *preg,
72 			       const re_match_context_t *mctx,
73 			       size_t nmatch, regmatch_t *pmatch,
74 			       bool fl_backtrack) internal_function;
75 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
76      internal_function;
77 
78 #ifdef RE_ENABLE_I18N
79 static int sift_states_iter_mb (const re_match_context_t *mctx,
80 				re_sift_context_t *sctx,
81 				Idx node_idx, Idx str_idx, Idx max_str_idx)
82      internal_function;
83 #endif /* RE_ENABLE_I18N */
84 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
85 					   re_sift_context_t *sctx)
86      internal_function;
87 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
88 					  re_sift_context_t *sctx, Idx str_idx,
89 					  re_node_set *cur_dest)
90      internal_function;
91 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
92 					      re_sift_context_t *sctx,
93 					      Idx str_idx,
94 					      re_node_set *dest_nodes)
95      internal_function;
96 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
97 					    re_node_set *dest_nodes,
98 					    const re_node_set *candidates)
99      internal_function;
100 static bool check_dst_limits (const re_match_context_t *mctx,
101 			      const re_node_set *limits,
102 			      Idx dst_node, Idx dst_idx, Idx src_node,
103 			      Idx src_idx) internal_function;
104 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
105 					int boundaries, Idx subexp_idx,
106 					Idx from_node, Idx bkref_idx)
107      internal_function;
108 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
109 				      Idx limit, Idx subexp_idx,
110 				      Idx node, Idx str_idx,
111 				      Idx bkref_idx) internal_function;
112 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
113 					  re_node_set *dest_nodes,
114 					  const re_node_set *candidates,
115 					  re_node_set *limits,
116 					  struct re_backref_cache_entry *bkref_ents,
117 					  Idx str_idx) internal_function;
118 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
119 					re_sift_context_t *sctx,
120 					Idx str_idx, const re_node_set *candidates)
121      internal_function;
122 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
123 					re_dfastate_t **dst,
124 					re_dfastate_t **src, Idx num)
125      internal_function;
126 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
127 					 re_match_context_t *mctx) internal_function;
128 static re_dfastate_t *transit_state (reg_errcode_t *err,
129 				     re_match_context_t *mctx,
130 				     re_dfastate_t *state) internal_function;
131 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
132 					    re_match_context_t *mctx,
133 					    re_dfastate_t *next_state)
134      internal_function;
135 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
136 						re_node_set *cur_nodes,
137 						Idx str_idx) internal_function;
138 #if 0
139 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
140 					re_match_context_t *mctx,
141 					re_dfastate_t *pstate)
142      internal_function;
143 #endif
144 #ifdef RE_ENABLE_I18N
145 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
146 				       re_dfastate_t *pstate)
147      internal_function;
148 #endif /* RE_ENABLE_I18N */
149 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
150 					  const re_node_set *nodes)
151      internal_function;
152 static reg_errcode_t get_subexp (re_match_context_t *mctx,
153 				 Idx bkref_node, Idx bkref_str_idx)
154      internal_function;
155 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
156 				     const re_sub_match_top_t *sub_top,
157 				     re_sub_match_last_t *sub_last,
158 				     Idx bkref_node, Idx bkref_str)
159      internal_function;
160 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
161 			     Idx subexp_idx, int type) internal_function;
162 static reg_errcode_t check_arrival (re_match_context_t *mctx,
163 				    state_array_t *path, Idx top_node,
164 				    Idx top_str, Idx last_node, Idx last_str,
165 				    int type) internal_function;
166 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
167 						   Idx str_idx,
168 						   re_node_set *cur_nodes,
169 						   re_node_set *next_nodes)
170      internal_function;
171 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
172 					       re_node_set *cur_nodes,
173 					       Idx ex_subexp, int type)
174      internal_function;
175 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
176 						   re_node_set *dst_nodes,
177 						   Idx target, Idx ex_subexp,
178 						   int type) internal_function;
179 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
180 					 re_node_set *cur_nodes, Idx cur_str,
181 					 Idx subexp_num, int type)
182      internal_function;
183 static bool build_trtable (const re_dfa_t *dfa,
184 			   re_dfastate_t *state) internal_function;
185 #ifdef RE_ENABLE_I18N
186 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
187 				    const re_string_t *input, Idx idx)
188      internal_function;
189 # ifdef _LIBC
190 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
191 						   size_t name_len)
192      internal_function;
193 # endif /* _LIBC */
194 #endif /* RE_ENABLE_I18N */
195 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
196 				       const re_dfastate_t *state,
197 				       re_node_set *states_node,
198 				       bitset_t *states_ch) internal_function;
199 static bool check_node_accept (const re_match_context_t *mctx,
200 			       const re_token_t *node, Idx idx)
201      internal_function;
202 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
203      internal_function;
204 
205 /* Entry point for POSIX code.  */
206 
207 /* regexec searches for a given pattern, specified by PREG, in the
208    string STRING.
209 
210    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
211    'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
212    least NMATCH elements, and we set them to the offsets of the
213    corresponding matched substrings.
214 
215    EFLAGS specifies "execution flags" which affect matching: if
216    REG_NOTBOL is set, then ^ does not match at the beginning of the
217    string; if REG_NOTEOL is set, then $ does not match at the end.
218 
219    We return 0 if we find a match and REG_NOMATCH if not.  */
220 
221 int
222 regexec (preg, string, nmatch, pmatch, eflags)
223     const regex_t *_Restrict_ preg;
224     const char *_Restrict_ string;
225     size_t nmatch;
226     regmatch_t pmatch[_Restrict_arr_];
227     int eflags;
228 {
229   reg_errcode_t err;
230   Idx start, length;
231   re_dfa_t *dfa = preg->buffer;
232 
233   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
234     return REG_BADPAT;
235 
236   if (eflags & REG_STARTEND)
237     {
238       start = pmatch[0].rm_so;
239       length = pmatch[0].rm_eo;
240     }
241   else
242     {
243       start = 0;
244       length = strlen (string);
245     }
246 
247   lock_lock (dfa->lock);
248   if (preg->no_sub)
249     err = re_search_internal (preg, string, length, start, length,
250 			      length, 0, NULL, eflags);
251   else
252     err = re_search_internal (preg, string, length, start, length,
253 			      length, nmatch, pmatch, eflags);
254   lock_unlock (dfa->lock);
255   return err != REG_NOERROR;
256 }
257 
258 #ifdef _LIBC
259 # include <shlib-compat.h>
260 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
261 
262 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
263 __typeof__ (__regexec) __compat_regexec;
264 
265 int
266 attribute_compat_text_section
267 __compat_regexec (const regex_t *_Restrict_ preg,
268 		  const char *_Restrict_ string, size_t nmatch,
269 		  regmatch_t pmatch[], int eflags)
270 {
271   return regexec (preg, string, nmatch, pmatch,
272 		  eflags & (REG_NOTBOL | REG_NOTEOL));
273 }
274 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
275 # endif
276 #endif
277 
278 /* Entry points for GNU code.  */
279 
280 /* re_match, re_search, re_match_2, re_search_2
281 
282    The former two functions operate on STRING with length LENGTH,
283    while the later two operate on concatenation of STRING1 and STRING2
284    with lengths LENGTH1 and LENGTH2, respectively.
285 
286    re_match() matches the compiled pattern in BUFP against the string,
287    starting at index START.
288 
289    re_search() first tries matching at index START, then it tries to match
290    starting from index START + 1, and so on.  The last start position tried
291    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
292    way as re_match().)
293 
294    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
295    the first STOP characters of the concatenation of the strings should be
296    concerned.
297 
298    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
299    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
300    computed relative to the concatenation, not relative to the individual
301    strings.)
302 
303    On success, re_match* functions return the length of the match, re_search*
304    return the position of the start of the match.  Return value -1 means no
305    match was found and -2 indicates an internal error.  */
306 
307 regoff_t
308 re_match (bufp, string, length, start, regs)
309     struct re_pattern_buffer *bufp;
310     const char *string;
311     Idx length, start;
312     struct re_registers *regs;
313 {
314   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
315 }
316 #ifdef _LIBC
317 weak_alias (__re_match, re_match)
318 #endif
319 
320 regoff_t
321 re_search (bufp, string, length, start, range, regs)
322     struct re_pattern_buffer *bufp;
323     const char *string;
324     Idx length, start;
325     regoff_t range;
326     struct re_registers *regs;
327 {
328   return re_search_stub (bufp, string, length, start, range, length, regs,
329 			 false);
330 }
331 #ifdef _LIBC
332 weak_alias (__re_search, re_search)
333 #endif
334 
335 regoff_t
336 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
337     struct re_pattern_buffer *bufp;
338     const char *string1, *string2;
339     Idx length1, length2, start, stop;
340     struct re_registers *regs;
341 {
342   return re_search_2_stub (bufp, string1, length1, string2, length2,
343 			   start, 0, regs, stop, true);
344 }
345 #ifdef _LIBC
346 weak_alias (__re_match_2, re_match_2)
347 #endif
348 
349 regoff_t
350 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
351     struct re_pattern_buffer *bufp;
352     const char *string1, *string2;
353     Idx length1, length2, start, stop;
354     regoff_t range;
355     struct re_registers *regs;
356 {
357   return re_search_2_stub (bufp, string1, length1, string2, length2,
358 			   start, range, regs, stop, false);
359 }
360 #ifdef _LIBC
361 weak_alias (__re_search_2, re_search_2)
362 #endif
363 
364 static regoff_t
365 re_search_2_stub (struct re_pattern_buffer *bufp,
366 		  const char *string1, Idx length1,
367 		  const char *string2, Idx length2,
368 		  Idx start, regoff_t range, struct re_registers *regs,
369 		  Idx stop, bool ret_len)
370 {
371   const char *str;
372   regoff_t rval;
373   Idx len = length1 + length2;
374   char *s = NULL;
375 
376   if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
377     return -2;
378 
379   /* Concatenate the strings.  */
380   if (length2 > 0)
381     if (length1 > 0)
382       {
383 	s = re_malloc (char, len);
384 
385 	if (BE (s == NULL, 0))
386 	  return -2;
387 #ifdef _LIBC
388 	memcpy (__mempcpy (s, string1, length1), string2, length2);
389 #else
390 	memcpy (s, string1, length1);
391 	memcpy (s + length1, string2, length2);
392 #endif
393 	str = s;
394       }
395     else
396       str = string2;
397   else
398     str = string1;
399 
400   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
401 			 ret_len);
402   re_free (s);
403   return rval;
404 }
405 
406 /* The parameters have the same meaning as those of re_search.
407    Additional parameters:
408    If RET_LEN is true the length of the match is returned (re_match style);
409    otherwise the position of the match is returned.  */
410 
411 static regoff_t
412 re_search_stub (struct re_pattern_buffer *bufp,
413 		const char *string, Idx length,
414 		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
415 		bool ret_len)
416 {
417   reg_errcode_t result;
418   regmatch_t *pmatch;
419   Idx nregs;
420   regoff_t rval;
421   int eflags = 0;
422   re_dfa_t *dfa = bufp->buffer;
423   Idx last_start = start + range;
424 
425   /* Check for out-of-range.  */
426   if (BE (start < 0 || start > length, 0))
427     return -1;
428   if (BE (length < last_start || (0 <= range && last_start < start), 0))
429     last_start = length;
430   else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0))
431     last_start = 0;
432 
433   lock_lock (dfa->lock);
434 
435   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
436   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
437 
438   /* Compile fastmap if we haven't yet.  */
439   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
440     re_compile_fastmap (bufp);
441 
442   if (BE (bufp->no_sub, 0))
443     regs = NULL;
444 
445   /* We need at least 1 register.  */
446   if (regs == NULL)
447     nregs = 1;
448   else if (BE (bufp->regs_allocated == REGS_FIXED
449 	       && regs->num_regs <= bufp->re_nsub, 0))
450     {
451       nregs = regs->num_regs;
452       if (BE (nregs < 1, 0))
453 	{
454 	  /* Nothing can be copied to regs.  */
455 	  regs = NULL;
456 	  nregs = 1;
457 	}
458     }
459   else
460     nregs = bufp->re_nsub + 1;
461   pmatch = re_malloc (regmatch_t, nregs);
462   if (BE (pmatch == NULL, 0))
463     {
464       rval = -2;
465       goto out;
466     }
467 
468   result = re_search_internal (bufp, string, length, start, last_start, stop,
469 			       nregs, pmatch, eflags);
470 
471   rval = 0;
472 
473   /* I hope we needn't fill their regs with -1's when no match was found.  */
474   if (result != REG_NOERROR)
475     rval = result == REG_NOMATCH ? -1 : -2;
476   else if (regs != NULL)
477     {
478       /* If caller wants register contents data back, copy them.  */
479       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
480 					   bufp->regs_allocated);
481       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
482 	rval = -2;
483     }
484 
485   if (BE (rval == 0, 1))
486     {
487       if (ret_len)
488 	{
489 	  assert (pmatch[0].rm_so == start);
490 	  rval = pmatch[0].rm_eo - start;
491 	}
492       else
493 	rval = pmatch[0].rm_so;
494     }
495   re_free (pmatch);
496  out:
497   lock_unlock (dfa->lock);
498   return rval;
499 }
500 
501 static unsigned
502 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
503 	      int regs_allocated)
504 {
505   int rval = REGS_REALLOCATE;
506   Idx i;
507   Idx need_regs = nregs + 1;
508   /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
509      uses.  */
510 
511   /* Have the register data arrays been allocated?  */
512   if (regs_allocated == REGS_UNALLOCATED)
513     { /* No.  So allocate them with malloc.  */
514       regs->start = re_malloc (regoff_t, need_regs);
515       if (BE (regs->start == NULL, 0))
516 	return REGS_UNALLOCATED;
517       regs->end = re_malloc (regoff_t, need_regs);
518       if (BE (regs->end == NULL, 0))
519 	{
520 	  re_free (regs->start);
521 	  return REGS_UNALLOCATED;
522 	}
523       regs->num_regs = need_regs;
524     }
525   else if (regs_allocated == REGS_REALLOCATE)
526     { /* Yes.  If we need more elements than were already
527 	 allocated, reallocate them.  If we need fewer, just
528 	 leave it alone.  */
529       if (BE (need_regs > regs->num_regs, 0))
530 	{
531 	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
532 	  regoff_t *new_end;
533 	  if (BE (new_start == NULL, 0))
534 	    return REGS_UNALLOCATED;
535 	  new_end = re_realloc (regs->end, regoff_t, need_regs);
536 	  if (BE (new_end == NULL, 0))
537 	    {
538 	      re_free (new_start);
539 	      return REGS_UNALLOCATED;
540 	    }
541 	  regs->start = new_start;
542 	  regs->end = new_end;
543 	  regs->num_regs = need_regs;
544 	}
545     }
546   else
547     {
548       assert (regs_allocated == REGS_FIXED);
549       /* This function may not be called with REGS_FIXED and nregs too big.  */
550       assert (regs->num_regs >= nregs);
551       rval = REGS_FIXED;
552     }
553 
554   /* Copy the regs.  */
555   for (i = 0; i < nregs; ++i)
556     {
557       regs->start[i] = pmatch[i].rm_so;
558       regs->end[i] = pmatch[i].rm_eo;
559     }
560   for ( ; i < regs->num_regs; ++i)
561     regs->start[i] = regs->end[i] = -1;
562 
563   return rval;
564 }
565 
566 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
567    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
568    this memory for recording register information.  STARTS and ENDS
569    must be allocated using the malloc library routine, and must each
570    be at least NUM_REGS * sizeof (regoff_t) bytes long.
571 
572    If NUM_REGS == 0, then subsequent matches should allocate their own
573    register data.
574 
575    Unless this function is called, the first search or match using
576    PATTERN_BUFFER will allocate its own register data, without
577    freeing the old data.  */
578 
579 void
580 re_set_registers (bufp, regs, num_regs, starts, ends)
581     struct re_pattern_buffer *bufp;
582     struct re_registers *regs;
583     __re_size_t num_regs;
584     regoff_t *starts, *ends;
585 {
586   if (num_regs)
587     {
588       bufp->regs_allocated = REGS_REALLOCATE;
589       regs->num_regs = num_regs;
590       regs->start = starts;
591       regs->end = ends;
592     }
593   else
594     {
595       bufp->regs_allocated = REGS_UNALLOCATED;
596       regs->num_regs = 0;
597       regs->start = regs->end = NULL;
598     }
599 }
600 #ifdef _LIBC
601 weak_alias (__re_set_registers, re_set_registers)
602 #endif
603 
604 /* Entry points compatible with 4.2 BSD regex library.  We don't define
605    them unless specifically requested.  */
606 
607 #if defined _REGEX_RE_COMP || defined _LIBC
608 int
609 # ifdef _LIBC
610 weak_function
611 # endif
612 re_exec (s)
613      const char *s;
614 {
615   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
616 }
617 #endif /* _REGEX_RE_COMP */
618 
619 /* Internal entry point.  */
620 
621 /* Searches for a compiled pattern PREG in the string STRING, whose
622    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
623    meaning as with regexec.  LAST_START is START + RANGE, where
624    START and RANGE have the same meaning as with re_search.
625    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
626    otherwise return the error code.
627    Note: We assume front end functions already check ranges.
628    (0 <= LAST_START && LAST_START <= LENGTH)  */
629 
630 static reg_errcode_t
631 __attribute_warn_unused_result__
632 re_search_internal (const regex_t *preg,
633 		    const char *string, Idx length,
634 		    Idx start, Idx last_start, Idx stop,
635 		    size_t nmatch, regmatch_t pmatch[],
636 		    int eflags)
637 {
638   reg_errcode_t err;
639   const re_dfa_t *dfa = preg->buffer;
640   Idx left_lim, right_lim;
641   int incr;
642   bool fl_longest_match;
643   int match_kind;
644   Idx match_first;
645   Idx match_last = REG_MISSING;
646   Idx extra_nmatch;
647   bool sb;
648   int ch;
649 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
650   re_match_context_t mctx = { .dfa = dfa };
651 #else
652   re_match_context_t mctx;
653 #endif
654   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
655 		    && start != last_start && !preg->can_be_null)
656 		   ? preg->fastmap : NULL);
657   RE_TRANSLATE_TYPE t = preg->translate;
658 
659 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
660   memset (&mctx, '\0', sizeof (re_match_context_t));
661   mctx.dfa = dfa;
662 #endif
663 
664   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
665   nmatch -= extra_nmatch;
666 
667   /* Check if the DFA haven't been compiled.  */
668   if (BE (preg->used == 0 || dfa->init_state == NULL
669 	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
670 	  || dfa->init_state_begbuf == NULL, 0))
671     return REG_NOMATCH;
672 
673 #ifdef DEBUG
674   /* We assume front-end functions already check them.  */
675   assert (0 <= last_start && last_start <= length);
676 #endif
677 
678   /* If initial states with non-begbuf contexts have no elements,
679      the regex must be anchored.  If preg->newline_anchor is set,
680      we'll never use init_state_nl, so do not check it.  */
681   if (dfa->init_state->nodes.nelem == 0
682       && dfa->init_state_word->nodes.nelem == 0
683       && (dfa->init_state_nl->nodes.nelem == 0
684 	  || !preg->newline_anchor))
685     {
686       if (start != 0 && last_start != 0)
687         return REG_NOMATCH;
688       start = last_start = 0;
689     }
690 
691   /* We must check the longest matching, if nmatch > 0.  */
692   fl_longest_match = (nmatch != 0 || dfa->nbackref);
693 
694   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
695 			    preg->translate, (preg->syntax & RE_ICASE) != 0,
696 			    dfa);
697   if (BE (err != REG_NOERROR, 0))
698     goto free_return;
699   mctx.input.stop = stop;
700   mctx.input.raw_stop = stop;
701   mctx.input.newline_anchor = preg->newline_anchor;
702 
703   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
704   if (BE (err != REG_NOERROR, 0))
705     goto free_return;
706 
707   /* We will log all the DFA states through which the dfa pass,
708      if nmatch > 1, or this dfa has "multibyte node", which is a
709      back-reference or a node which can accept multibyte character or
710      multi character collating element.  */
711   if (nmatch > 1 || dfa->has_mb_node)
712     {
713       /* Avoid overflow.  */
714       if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
715                <= mctx.input.bufs_len), 0))
716 	{
717 	  err = REG_ESPACE;
718 	  goto free_return;
719 	}
720 
721       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
722       if (BE (mctx.state_log == NULL, 0))
723 	{
724 	  err = REG_ESPACE;
725 	  goto free_return;
726 	}
727     }
728   else
729     mctx.state_log = NULL;
730 
731   match_first = start;
732   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
733 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
734 
735   /* Check incrementally whether the input string matches.  */
736   incr = (last_start < start) ? -1 : 1;
737   left_lim = (last_start < start) ? last_start : start;
738   right_lim = (last_start < start) ? start : last_start;
739   sb = dfa->mb_cur_max == 1;
740   match_kind =
741     (fastmap
742      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
743 	| (start <= last_start ? 2 : 0)
744 	| (t != NULL ? 1 : 0))
745      : 8);
746 
747   for (;; match_first += incr)
748     {
749       err = REG_NOMATCH;
750       if (match_first < left_lim || right_lim < match_first)
751 	goto free_return;
752 
753       /* Advance as rapidly as possible through the string, until we
754 	 find a plausible place to start matching.  This may be done
755 	 with varying efficiency, so there are various possibilities:
756 	 only the most common of them are specialized, in order to
757 	 save on code size.  We use a switch statement for speed.  */
758       switch (match_kind)
759 	{
760 	case 8:
761 	  /* No fastmap.  */
762 	  break;
763 
764 	case 7:
765 	  /* Fastmap with single-byte translation, match forward.  */
766 	  while (BE (match_first < right_lim, 1)
767 		 && !fastmap[t[(unsigned char) string[match_first]]])
768 	    ++match_first;
769 	  goto forward_match_found_start_or_reached_end;
770 
771 	case 6:
772 	  /* Fastmap without translation, match forward.  */
773 	  while (BE (match_first < right_lim, 1)
774 		 && !fastmap[(unsigned char) string[match_first]])
775 	    ++match_first;
776 
777 	forward_match_found_start_or_reached_end:
778 	  if (BE (match_first == right_lim, 0))
779 	    {
780 	      ch = match_first >= length
781 		       ? 0 : (unsigned char) string[match_first];
782 	      if (!fastmap[t ? t[ch] : ch])
783 		goto free_return;
784 	    }
785 	  break;
786 
787 	case 4:
788 	case 5:
789 	  /* Fastmap without multi-byte translation, match backwards.  */
790 	  while (match_first >= left_lim)
791 	    {
792 	      ch = match_first >= length
793 		       ? 0 : (unsigned char) string[match_first];
794 	      if (fastmap[t ? t[ch] : ch])
795 		break;
796 	      --match_first;
797 	    }
798 	  if (match_first < left_lim)
799 	    goto free_return;
800 	  break;
801 
802 	default:
803 	  /* In this case, we can't determine easily the current byte,
804 	     since it might be a component byte of a multibyte
805 	     character.  Then we use the constructed buffer instead.  */
806 	  for (;;)
807 	    {
808 	      /* If MATCH_FIRST is out of the valid range, reconstruct the
809 		 buffers.  */
810 	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
811 	      if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
812 		{
813 		  err = re_string_reconstruct (&mctx.input, match_first,
814 					       eflags);
815 		  if (BE (err != REG_NOERROR, 0))
816 		    goto free_return;
817 
818 		  offset = match_first - mctx.input.raw_mbs_idx;
819 		}
820 	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
821 		 Note that MATCH_FIRST must not be smaller than 0.  */
822 	      ch = (match_first >= length
823 		    ? 0 : re_string_byte_at (&mctx.input, offset));
824 	      if (fastmap[ch])
825 		break;
826 	      match_first += incr;
827 	      if (match_first < left_lim || match_first > right_lim)
828 		{
829 		  err = REG_NOMATCH;
830 		  goto free_return;
831 		}
832 	    }
833 	  break;
834 	}
835 
836       /* Reconstruct the buffers so that the matcher can assume that
837 	 the matching starts from the beginning of the buffer.  */
838       err = re_string_reconstruct (&mctx.input, match_first, eflags);
839       if (BE (err != REG_NOERROR, 0))
840 	goto free_return;
841 
842 #ifdef RE_ENABLE_I18N
843      /* Don't consider this char as a possible match start if it part,
844 	yet isn't the head, of a multibyte character.  */
845       if (!sb && !re_string_first_byte (&mctx.input, 0))
846 	continue;
847 #endif
848 
849       /* It seems to be appropriate one, then use the matcher.  */
850       /* We assume that the matching starts from 0.  */
851       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
852       match_last = check_matching (&mctx, fl_longest_match,
853 				   start <= last_start ? &match_first : NULL);
854       if (match_last != REG_MISSING)
855 	{
856 	  if (BE (match_last == REG_ERROR, 0))
857 	    {
858 	      err = REG_ESPACE;
859 	      goto free_return;
860 	    }
861 	  else
862 	    {
863 	      mctx.match_last = match_last;
864 	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
865 		{
866 		  re_dfastate_t *pstate = mctx.state_log[match_last];
867 		  mctx.last_node = check_halt_state_context (&mctx, pstate,
868 							     match_last);
869 		}
870 	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
871 		  || dfa->nbackref)
872 		{
873 		  err = prune_impossible_nodes (&mctx);
874 		  if (err == REG_NOERROR)
875 		    break;
876 		  if (BE (err != REG_NOMATCH, 0))
877 		    goto free_return;
878 		  match_last = REG_MISSING;
879 		}
880 	      else
881 		break; /* We found a match.  */
882 	    }
883 	}
884 
885       match_ctx_clean (&mctx);
886     }
887 
888 #ifdef DEBUG
889   assert (match_last != REG_MISSING);
890   assert (err == REG_NOERROR);
891 #endif
892 
893   /* Set pmatch[] if we need.  */
894   if (nmatch > 0)
895     {
896       Idx reg_idx;
897 
898       /* Initialize registers.  */
899       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
900 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
901 
902       /* Set the points where matching start/end.  */
903       pmatch[0].rm_so = 0;
904       pmatch[0].rm_eo = mctx.match_last;
905       /* FIXME: This function should fail if mctx.match_last exceeds
906 	 the maximum possible regoff_t value.  We need a new error
907 	 code REG_OVERFLOW.  */
908 
909       if (!preg->no_sub && nmatch > 1)
910 	{
911 	  err = set_regs (preg, &mctx, nmatch, pmatch,
912 			  dfa->has_plural_match && dfa->nbackref > 0);
913 	  if (BE (err != REG_NOERROR, 0))
914 	    goto free_return;
915 	}
916 
917       /* At last, add the offset to each register, since we slid
918 	 the buffers so that we could assume that the matching starts
919 	 from 0.  */
920       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
921 	if (pmatch[reg_idx].rm_so != -1)
922 	  {
923 #ifdef RE_ENABLE_I18N
924 	    if (BE (mctx.input.offsets_needed != 0, 0))
925 	      {
926 		pmatch[reg_idx].rm_so =
927 		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
928 		   ? mctx.input.valid_raw_len
929 		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
930 		pmatch[reg_idx].rm_eo =
931 		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
932 		   ? mctx.input.valid_raw_len
933 		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
934 	      }
935 #else
936 	    assert (mctx.input.offsets_needed == 0);
937 #endif
938 	    pmatch[reg_idx].rm_so += match_first;
939 	    pmatch[reg_idx].rm_eo += match_first;
940 	  }
941       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
942 	{
943 	  pmatch[nmatch + reg_idx].rm_so = -1;
944 	  pmatch[nmatch + reg_idx].rm_eo = -1;
945 	}
946 
947       if (dfa->subexp_map)
948 	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
949 	  if (dfa->subexp_map[reg_idx] != reg_idx)
950 	    {
951 	      pmatch[reg_idx + 1].rm_so
952 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
953 	      pmatch[reg_idx + 1].rm_eo
954 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
955 	    }
956     }
957 
958  free_return:
959   re_free (mctx.state_log);
960   if (dfa->nbackref)
961     match_ctx_free (&mctx);
962   re_string_destruct (&mctx.input);
963   return err;
964 }
965 
966 static reg_errcode_t
967 __attribute_warn_unused_result__
968 prune_impossible_nodes (re_match_context_t *mctx)
969 {
970   const re_dfa_t *const dfa = mctx->dfa;
971   Idx halt_node, match_last;
972   reg_errcode_t ret;
973   re_dfastate_t **sifted_states;
974   re_dfastate_t **lim_states = NULL;
975   re_sift_context_t sctx;
976 #ifdef DEBUG
977   assert (mctx->state_log != NULL);
978 #endif
979   match_last = mctx->match_last;
980   halt_node = mctx->last_node;
981 
982   /* Avoid overflow.  */
983   if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0))
984     return REG_ESPACE;
985 
986   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
987   if (BE (sifted_states == NULL, 0))
988     {
989       ret = REG_ESPACE;
990       goto free_return;
991     }
992   if (dfa->nbackref)
993     {
994       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
995       if (BE (lim_states == NULL, 0))
996 	{
997 	  ret = REG_ESPACE;
998 	  goto free_return;
999 	}
1000       while (1)
1001 	{
1002 	  memset (lim_states, '\0',
1003 		  sizeof (re_dfastate_t *) * (match_last + 1));
1004 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
1005 			 match_last);
1006 	  ret = sift_states_backward (mctx, &sctx);
1007 	  re_node_set_free (&sctx.limits);
1008 	  if (BE (ret != REG_NOERROR, 0))
1009 	      goto free_return;
1010 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
1011 	    break;
1012 	  do
1013 	    {
1014 	      --match_last;
1015 	      if (! REG_VALID_INDEX (match_last))
1016 		{
1017 		  ret = REG_NOMATCH;
1018 		  goto free_return;
1019 		}
1020 	    } while (mctx->state_log[match_last] == NULL
1021 		     || !mctx->state_log[match_last]->halt);
1022 	  halt_node = check_halt_state_context (mctx,
1023 						mctx->state_log[match_last],
1024 						match_last);
1025 	}
1026       ret = merge_state_array (dfa, sifted_states, lim_states,
1027 			       match_last + 1);
1028       re_free (lim_states);
1029       lim_states = NULL;
1030       if (BE (ret != REG_NOERROR, 0))
1031 	goto free_return;
1032     }
1033   else
1034     {
1035       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
1036       ret = sift_states_backward (mctx, &sctx);
1037       re_node_set_free (&sctx.limits);
1038       if (BE (ret != REG_NOERROR, 0))
1039 	goto free_return;
1040       if (sifted_states[0] == NULL)
1041 	{
1042 	  ret = REG_NOMATCH;
1043 	  goto free_return;
1044 	}
1045     }
1046   re_free (mctx->state_log);
1047   mctx->state_log = sifted_states;
1048   sifted_states = NULL;
1049   mctx->last_node = halt_node;
1050   mctx->match_last = match_last;
1051   ret = REG_NOERROR;
1052  free_return:
1053   re_free (sifted_states);
1054   re_free (lim_states);
1055   return ret;
1056 }
1057 
1058 /* Acquire an initial state and return it.
1059    We must select appropriate initial state depending on the context,
1060    since initial states may have constraints like "\<", "^", etc..  */
1061 
1062 static inline re_dfastate_t *
1063 __attribute__ ((always_inline)) internal_function
1064 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1065 			    Idx idx)
1066 {
1067   const re_dfa_t *const dfa = mctx->dfa;
1068   if (dfa->init_state->has_constraint)
1069     {
1070       unsigned int context;
1071       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1072       if (IS_WORD_CONTEXT (context))
1073 	return dfa->init_state_word;
1074       else if (IS_ORDINARY_CONTEXT (context))
1075 	return dfa->init_state;
1076       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1077 	return dfa->init_state_begbuf;
1078       else if (IS_NEWLINE_CONTEXT (context))
1079 	return dfa->init_state_nl;
1080       else if (IS_BEGBUF_CONTEXT (context))
1081 	{
1082 	  /* It is relatively rare case, then calculate on demand.  */
1083 	  return re_acquire_state_context (err, dfa,
1084 					   dfa->init_state->entrance_nodes,
1085 					   context);
1086 	}
1087       else
1088 	/* Must not happen?  */
1089 	return dfa->init_state;
1090     }
1091   else
1092     return dfa->init_state;
1093 }
1094 
1095 /* Check whether the regular expression match input string INPUT or not,
1096    and return the index where the matching end.  Return REG_MISSING if
1097    there is no match, and return REG_ERROR in case of an error.
1098    FL_LONGEST_MATCH means we want the POSIX longest matching.
1099    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1100    next place where we may want to try matching.
1101    Note that the matcher assumes that the matching starts from the current
1102    index of the buffer.  */
1103 
1104 static Idx
1105 internal_function __attribute_warn_unused_result__
1106 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1107 		Idx *p_match_first)
1108 {
1109   const re_dfa_t *const dfa = mctx->dfa;
1110   reg_errcode_t err;
1111   Idx match = 0;
1112   Idx match_last = REG_MISSING;
1113   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1114   re_dfastate_t *cur_state;
1115   bool at_init_state = p_match_first != NULL;
1116   Idx next_start_idx = cur_str_idx;
1117 
1118   err = REG_NOERROR;
1119   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1120   /* An initial state must not be NULL (invalid).  */
1121   if (BE (cur_state == NULL, 0))
1122     {
1123       assert (err == REG_ESPACE);
1124       return REG_ERROR;
1125     }
1126 
1127   if (mctx->state_log != NULL)
1128     {
1129       mctx->state_log[cur_str_idx] = cur_state;
1130 
1131       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1132 	 later.  E.g. Processing back references.  */
1133       if (BE (dfa->nbackref, 0))
1134 	{
1135 	  at_init_state = false;
1136 	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1137 	  if (BE (err != REG_NOERROR, 0))
1138 	    return err;
1139 
1140 	  if (cur_state->has_backref)
1141 	    {
1142 	      err = transit_state_bkref (mctx, &cur_state->nodes);
1143 	      if (BE (err != REG_NOERROR, 0))
1144 		return err;
1145 	    }
1146 	}
1147     }
1148 
1149   /* If the RE accepts NULL string.  */
1150   if (BE (cur_state->halt, 0))
1151     {
1152       if (!cur_state->has_constraint
1153 	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1154 	{
1155 	  if (!fl_longest_match)
1156 	    return cur_str_idx;
1157 	  else
1158 	    {
1159 	      match_last = cur_str_idx;
1160 	      match = 1;
1161 	    }
1162 	}
1163     }
1164 
1165   while (!re_string_eoi (&mctx->input))
1166     {
1167       re_dfastate_t *old_state = cur_state;
1168       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1169 
1170       if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
1171 	   && mctx->input.bufs_len < mctx->input.len)
1172 	  || (BE (next_char_idx >= mctx->input.valid_len, 0)
1173 	      && mctx->input.valid_len < mctx->input.len))
1174 	{
1175 	  err = extend_buffers (mctx, next_char_idx + 1);
1176 	  if (BE (err != REG_NOERROR, 0))
1177 	    {
1178 	      assert (err == REG_ESPACE);
1179 	      return REG_ERROR;
1180 	    }
1181 	}
1182 
1183       cur_state = transit_state (&err, mctx, cur_state);
1184       if (mctx->state_log != NULL)
1185 	cur_state = merge_state_with_log (&err, mctx, cur_state);
1186 
1187       if (cur_state == NULL)
1188 	{
1189 	  /* Reached the invalid state or an error.  Try to recover a valid
1190 	     state using the state log, if available and if we have not
1191 	     already found a valid (even if not the longest) match.  */
1192 	  if (BE (err != REG_NOERROR, 0))
1193 	    return REG_ERROR;
1194 
1195 	  if (mctx->state_log == NULL
1196 	      || (match && !fl_longest_match)
1197 	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1198 	    break;
1199 	}
1200 
1201       if (BE (at_init_state, 0))
1202 	{
1203 	  if (old_state == cur_state)
1204 	    next_start_idx = next_char_idx;
1205 	  else
1206 	    at_init_state = false;
1207 	}
1208 
1209       if (cur_state->halt)
1210 	{
1211 	  /* Reached a halt state.
1212 	     Check the halt state can satisfy the current context.  */
1213 	  if (!cur_state->has_constraint
1214 	      || check_halt_state_context (mctx, cur_state,
1215 					   re_string_cur_idx (&mctx->input)))
1216 	    {
1217 	      /* We found an appropriate halt state.  */
1218 	      match_last = re_string_cur_idx (&mctx->input);
1219 	      match = 1;
1220 
1221 	      /* We found a match, do not modify match_first below.  */
1222 	      p_match_first = NULL;
1223 	      if (!fl_longest_match)
1224 		break;
1225 	    }
1226 	}
1227     }
1228 
1229   if (p_match_first)
1230     *p_match_first += next_start_idx;
1231 
1232   return match_last;
1233 }
1234 
1235 /* Check NODE match the current context.  */
1236 
1237 static bool
1238 internal_function
1239 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1240 {
1241   re_token_type_t type = dfa->nodes[node].type;
1242   unsigned int constraint = dfa->nodes[node].constraint;
1243   if (type != END_OF_RE)
1244     return false;
1245   if (!constraint)
1246     return true;
1247   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1248     return false;
1249   return true;
1250 }
1251 
1252 /* Check the halt state STATE match the current context.
1253    Return 0 if not match, if the node, STATE has, is a halt node and
1254    match the context, return the node.  */
1255 
1256 static Idx
1257 internal_function
1258 check_halt_state_context (const re_match_context_t *mctx,
1259 			  const re_dfastate_t *state, Idx idx)
1260 {
1261   Idx i;
1262   unsigned int context;
1263 #ifdef DEBUG
1264   assert (state->halt);
1265 #endif
1266   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1267   for (i = 0; i < state->nodes.nelem; ++i)
1268     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1269       return state->nodes.elems[i];
1270   return 0;
1271 }
1272 
1273 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1274    corresponding to the DFA).
1275    Return the destination node, and update EPS_VIA_NODES;
1276    return REG_MISSING in case of errors.  */
1277 
1278 static Idx
1279 internal_function
1280 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1281 		   Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1282 		   struct re_fail_stack_t *fs)
1283 {
1284   const re_dfa_t *const dfa = mctx->dfa;
1285   Idx i;
1286   bool ok;
1287   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1288     {
1289       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1290       re_node_set *edests = &dfa->edests[node];
1291       Idx dest_node;
1292       ok = re_node_set_insert (eps_via_nodes, node);
1293       if (BE (! ok, 0))
1294 	return REG_ERROR;
1295       /* Pick up a valid destination, or return REG_MISSING if none
1296 	 is found.  */
1297       for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1298 	{
1299 	  Idx candidate = edests->elems[i];
1300 	  if (!re_node_set_contains (cur_nodes, candidate))
1301 	    continue;
1302           if (dest_node == REG_MISSING)
1303 	    dest_node = candidate;
1304 
1305 	  else
1306 	    {
1307 	      /* In order to avoid infinite loop like "(a*)*", return the second
1308 		 epsilon-transition if the first was already considered.  */
1309 	      if (re_node_set_contains (eps_via_nodes, dest_node))
1310 		return candidate;
1311 
1312 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1313 	      else if (fs != NULL
1314 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1315 					   eps_via_nodes))
1316 		return REG_ERROR;
1317 
1318 	      /* We know we are going to exit.  */
1319 	      break;
1320 	    }
1321 	}
1322       return dest_node;
1323     }
1324   else
1325     {
1326       Idx naccepted = 0;
1327       re_token_type_t type = dfa->nodes[node].type;
1328 
1329 #ifdef RE_ENABLE_I18N
1330       if (dfa->nodes[node].accept_mb)
1331 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1332       else
1333 #endif /* RE_ENABLE_I18N */
1334       if (type == OP_BACK_REF)
1335 	{
1336 	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1337 	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1338 	  if (fs != NULL)
1339 	    {
1340 	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1341 		return REG_MISSING;
1342 	      else if (naccepted)
1343 		{
1344 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1345 		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1346 			      naccepted) != 0)
1347 		    return REG_MISSING;
1348 		}
1349 	    }
1350 
1351 	  if (naccepted == 0)
1352 	    {
1353 	      Idx dest_node;
1354 	      ok = re_node_set_insert (eps_via_nodes, node);
1355 	      if (BE (! ok, 0))
1356 		return REG_ERROR;
1357 	      dest_node = dfa->edests[node].elems[0];
1358 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1359 					dest_node))
1360 		return dest_node;
1361 	    }
1362 	}
1363 
1364       if (naccepted != 0
1365 	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1366 	{
1367 	  Idx dest_node = dfa->nexts[node];
1368 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1369 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1370 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1371 					       dest_node)))
1372 	    return REG_MISSING;
1373 	  re_node_set_empty (eps_via_nodes);
1374 	  return dest_node;
1375 	}
1376     }
1377   return REG_MISSING;
1378 }
1379 
1380 static reg_errcode_t
1381 internal_function __attribute_warn_unused_result__
1382 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1383 		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1384 {
1385   reg_errcode_t err;
1386   Idx num = fs->num++;
1387   if (fs->num == fs->alloc)
1388     {
1389       struct re_fail_stack_ent_t *new_array;
1390       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
1391 				       * fs->alloc * 2));
1392       if (new_array == NULL)
1393 	return REG_ESPACE;
1394       fs->alloc *= 2;
1395       fs->stack = new_array;
1396     }
1397   fs->stack[num].idx = str_idx;
1398   fs->stack[num].node = dest_node;
1399   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1400   if (fs->stack[num].regs == NULL)
1401     return REG_ESPACE;
1402   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1403   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1404   return err;
1405 }
1406 
1407 static Idx
1408 internal_function
1409 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1410 		regmatch_t *regs, re_node_set *eps_via_nodes)
1411 {
1412   Idx num = --fs->num;
1413   assert (REG_VALID_INDEX (num));
1414   *pidx = fs->stack[num].idx;
1415   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1416   re_node_set_free (eps_via_nodes);
1417   re_free (fs->stack[num].regs);
1418   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1419   return fs->stack[num].node;
1420 }
1421 
1422 /* Set the positions where the subexpressions are starts/ends to registers
1423    PMATCH.
1424    Note: We assume that pmatch[0] is already set, and
1425    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1426 
1427 static reg_errcode_t
1428 internal_function __attribute_warn_unused_result__
1429 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1430 	  regmatch_t *pmatch, bool fl_backtrack)
1431 {
1432   const re_dfa_t *dfa = preg->buffer;
1433   Idx idx, cur_node;
1434   re_node_set eps_via_nodes;
1435   struct re_fail_stack_t *fs;
1436   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1437   regmatch_t *prev_idx_match;
1438   bool prev_idx_match_malloced = false;
1439 
1440 #ifdef DEBUG
1441   assert (nmatch > 1);
1442   assert (mctx->state_log != NULL);
1443 #endif
1444   if (fl_backtrack)
1445     {
1446       fs = &fs_body;
1447       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1448       if (fs->stack == NULL)
1449 	return REG_ESPACE;
1450     }
1451   else
1452     fs = NULL;
1453 
1454   cur_node = dfa->init_node;
1455   re_node_set_init_empty (&eps_via_nodes);
1456 
1457   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1458     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1459   else
1460     {
1461       prev_idx_match = re_malloc (regmatch_t, nmatch);
1462       if (prev_idx_match == NULL)
1463 	{
1464 	  free_fail_stack_return (fs);
1465 	  return REG_ESPACE;
1466 	}
1467       prev_idx_match_malloced = true;
1468     }
1469   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1470 
1471   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1472     {
1473       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1474 
1475       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1476 	{
1477 	  Idx reg_idx;
1478 	  if (fs)
1479 	    {
1480 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1481 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1482 		  break;
1483 	      if (reg_idx == nmatch)
1484 		{
1485 		  re_node_set_free (&eps_via_nodes);
1486 		  if (prev_idx_match_malloced)
1487 		    re_free (prev_idx_match);
1488 		  return free_fail_stack_return (fs);
1489 		}
1490 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1491 					 &eps_via_nodes);
1492 	    }
1493 	  else
1494 	    {
1495 	      re_node_set_free (&eps_via_nodes);
1496 	      if (prev_idx_match_malloced)
1497 		re_free (prev_idx_match);
1498 	      return REG_NOERROR;
1499 	    }
1500 	}
1501 
1502       /* Proceed to next node.  */
1503       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1504 				    &eps_via_nodes, fs);
1505 
1506       if (BE (! REG_VALID_INDEX (cur_node), 0))
1507 	{
1508 	  if (BE (cur_node == REG_ERROR, 0))
1509 	    {
1510 	      re_node_set_free (&eps_via_nodes);
1511 	      if (prev_idx_match_malloced)
1512 		re_free (prev_idx_match);
1513 	      free_fail_stack_return (fs);
1514 	      return REG_ESPACE;
1515 	    }
1516 	  if (fs)
1517 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1518 				       &eps_via_nodes);
1519 	  else
1520 	    {
1521 	      re_node_set_free (&eps_via_nodes);
1522 	      if (prev_idx_match_malloced)
1523 		re_free (prev_idx_match);
1524 	      return REG_NOMATCH;
1525 	    }
1526 	}
1527     }
1528   re_node_set_free (&eps_via_nodes);
1529   if (prev_idx_match_malloced)
1530     re_free (prev_idx_match);
1531   return free_fail_stack_return (fs);
1532 }
1533 
1534 static reg_errcode_t
1535 internal_function
1536 free_fail_stack_return (struct re_fail_stack_t *fs)
1537 {
1538   if (fs)
1539     {
1540       Idx fs_idx;
1541       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1542 	{
1543 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1544 	  re_free (fs->stack[fs_idx].regs);
1545 	}
1546       re_free (fs->stack);
1547     }
1548   return REG_NOERROR;
1549 }
1550 
1551 static void
1552 internal_function
1553 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1554 	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1555 {
1556   int type = dfa->nodes[cur_node].type;
1557   if (type == OP_OPEN_SUBEXP)
1558     {
1559       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1560 
1561       /* We are at the first node of this sub expression.  */
1562       if (reg_num < nmatch)
1563 	{
1564 	  pmatch[reg_num].rm_so = cur_idx;
1565 	  pmatch[reg_num].rm_eo = -1;
1566 	}
1567     }
1568   else if (type == OP_CLOSE_SUBEXP)
1569     {
1570       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1571       if (reg_num < nmatch)
1572 	{
1573 	  /* We are at the last node of this sub expression.  */
1574 	  if (pmatch[reg_num].rm_so < cur_idx)
1575 	    {
1576 	      pmatch[reg_num].rm_eo = cur_idx;
1577 	      /* This is a non-empty match or we are not inside an optional
1578 		 subexpression.  Accept this right away.  */
1579 	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1580 	    }
1581 	  else
1582 	    {
1583 	      if (dfa->nodes[cur_node].opt_subexp
1584 		  && prev_idx_match[reg_num].rm_so != -1)
1585 		/* We transited through an empty match for an optional
1586 		   subexpression, like (a?)*, and this is not the subexp's
1587 		   first match.  Copy back the old content of the registers
1588 		   so that matches of an inner subexpression are undone as
1589 		   well, like in ((a?))*.  */
1590 		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1591 	      else
1592 		/* We completed a subexpression, but it may be part of
1593 		   an optional one, so do not update PREV_IDX_MATCH.  */
1594 		pmatch[reg_num].rm_eo = cur_idx;
1595 	    }
1596 	}
1597     }
1598 }
1599 
1600 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1601    and sift the nodes in each states according to the following rules.
1602    Updated state_log will be wrote to STATE_LOG.
1603 
1604    Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1605      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1606 	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1607 	the LAST_NODE, we throw away the node 'a'.
1608      2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1609 	string 's' and transit to 'b':
1610 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1611 	   away the node 'a'.
1612 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1613 	    thrown away, we throw away the node 'a'.
1614      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1615 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1616 	   node 'a'.
1617 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1618 	    we throw away the node 'a'.  */
1619 
1620 #define STATE_NODE_CONTAINS(state,node) \
1621   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1622 
1623 static reg_errcode_t
1624 internal_function
1625 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1626 {
1627   reg_errcode_t err;
1628   int null_cnt = 0;
1629   Idx str_idx = sctx->last_str_idx;
1630   re_node_set cur_dest;
1631 
1632 #ifdef DEBUG
1633   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1634 #endif
1635 
1636   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1637      transit to the last_node and the last_node itself.  */
1638   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1639   if (BE (err != REG_NOERROR, 0))
1640     return err;
1641   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1642   if (BE (err != REG_NOERROR, 0))
1643     goto free_return;
1644 
1645   /* Then check each states in the state_log.  */
1646   while (str_idx > 0)
1647     {
1648       /* Update counters.  */
1649       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1650       if (null_cnt > mctx->max_mb_elem_len)
1651 	{
1652 	  memset (sctx->sifted_states, '\0',
1653 		  sizeof (re_dfastate_t *) * str_idx);
1654 	  re_node_set_free (&cur_dest);
1655 	  return REG_NOERROR;
1656 	}
1657       re_node_set_empty (&cur_dest);
1658       --str_idx;
1659 
1660       if (mctx->state_log[str_idx])
1661 	{
1662 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1663 	  if (BE (err != REG_NOERROR, 0))
1664 	    goto free_return;
1665 	}
1666 
1667       /* Add all the nodes which satisfy the following conditions:
1668 	 - It can epsilon transit to a node in CUR_DEST.
1669 	 - It is in CUR_SRC.
1670 	 And update state_log.  */
1671       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1672       if (BE (err != REG_NOERROR, 0))
1673 	goto free_return;
1674     }
1675   err = REG_NOERROR;
1676  free_return:
1677   re_node_set_free (&cur_dest);
1678   return err;
1679 }
1680 
1681 static reg_errcode_t
1682 internal_function __attribute_warn_unused_result__
1683 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1684 		     Idx str_idx, re_node_set *cur_dest)
1685 {
1686   const re_dfa_t *const dfa = mctx->dfa;
1687   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1688   Idx i;
1689 
1690   /* Then build the next sifted state.
1691      We build the next sifted state on 'cur_dest', and update
1692      'sifted_states[str_idx]' with 'cur_dest'.
1693      Note:
1694      'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1695      'cur_src' points the node_set of the old 'state_log[str_idx]'
1696      (with the epsilon nodes pre-filtered out).  */
1697   for (i = 0; i < cur_src->nelem; i++)
1698     {
1699       Idx prev_node = cur_src->elems[i];
1700       int naccepted = 0;
1701       bool ok;
1702 
1703 #ifdef DEBUG
1704       re_token_type_t type = dfa->nodes[prev_node].type;
1705       assert (!IS_EPSILON_NODE (type));
1706 #endif
1707 #ifdef RE_ENABLE_I18N
1708       /* If the node may accept "multi byte".  */
1709       if (dfa->nodes[prev_node].accept_mb)
1710 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1711 					 str_idx, sctx->last_str_idx);
1712 #endif /* RE_ENABLE_I18N */
1713 
1714       /* We don't check backreferences here.
1715 	 See update_cur_sifted_state().  */
1716       if (!naccepted
1717 	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1718 	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1719 				  dfa->nexts[prev_node]))
1720 	naccepted = 1;
1721 
1722       if (naccepted == 0)
1723 	continue;
1724 
1725       if (sctx->limits.nelem)
1726 	{
1727 	  Idx to_idx = str_idx + naccepted;
1728 	  if (check_dst_limits (mctx, &sctx->limits,
1729 				dfa->nexts[prev_node], to_idx,
1730 				prev_node, str_idx))
1731 	    continue;
1732 	}
1733       ok = re_node_set_insert (cur_dest, prev_node);
1734       if (BE (! ok, 0))
1735 	return REG_ESPACE;
1736     }
1737 
1738   return REG_NOERROR;
1739 }
1740 
1741 /* Helper functions.  */
1742 
1743 static reg_errcode_t
1744 internal_function
1745 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1746 {
1747   Idx top = mctx->state_log_top;
1748 
1749   if ((next_state_log_idx >= mctx->input.bufs_len
1750        && mctx->input.bufs_len < mctx->input.len)
1751       || (next_state_log_idx >= mctx->input.valid_len
1752 	  && mctx->input.valid_len < mctx->input.len))
1753     {
1754       reg_errcode_t err;
1755       err = extend_buffers (mctx, next_state_log_idx + 1);
1756       if (BE (err != REG_NOERROR, 0))
1757 	return err;
1758     }
1759 
1760   if (top < next_state_log_idx)
1761     {
1762       memset (mctx->state_log + top + 1, '\0',
1763 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1764       mctx->state_log_top = next_state_log_idx;
1765     }
1766   return REG_NOERROR;
1767 }
1768 
1769 static reg_errcode_t
1770 internal_function
1771 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1772 		   re_dfastate_t **src, Idx num)
1773 {
1774   Idx st_idx;
1775   reg_errcode_t err;
1776   for (st_idx = 0; st_idx < num; ++st_idx)
1777     {
1778       if (dst[st_idx] == NULL)
1779 	dst[st_idx] = src[st_idx];
1780       else if (src[st_idx] != NULL)
1781 	{
1782 	  re_node_set merged_set;
1783 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1784 					&src[st_idx]->nodes);
1785 	  if (BE (err != REG_NOERROR, 0))
1786 	    return err;
1787 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1788 	  re_node_set_free (&merged_set);
1789 	  if (BE (err != REG_NOERROR, 0))
1790 	    return err;
1791 	}
1792     }
1793   return REG_NOERROR;
1794 }
1795 
1796 static reg_errcode_t
1797 internal_function
1798 update_cur_sifted_state (const re_match_context_t *mctx,
1799 			 re_sift_context_t *sctx, Idx str_idx,
1800 			 re_node_set *dest_nodes)
1801 {
1802   const re_dfa_t *const dfa = mctx->dfa;
1803   reg_errcode_t err = REG_NOERROR;
1804   const re_node_set *candidates;
1805   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1806 		: &mctx->state_log[str_idx]->nodes);
1807 
1808   if (dest_nodes->nelem == 0)
1809     sctx->sifted_states[str_idx] = NULL;
1810   else
1811     {
1812       if (candidates)
1813 	{
1814 	  /* At first, add the nodes which can epsilon transit to a node in
1815 	     DEST_NODE.  */
1816 	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1817 	  if (BE (err != REG_NOERROR, 0))
1818 	    return err;
1819 
1820 	  /* Then, check the limitations in the current sift_context.  */
1821 	  if (sctx->limits.nelem)
1822 	    {
1823 	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1824 					 mctx->bkref_ents, str_idx);
1825 	      if (BE (err != REG_NOERROR, 0))
1826 		return err;
1827 	    }
1828 	}
1829 
1830       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1831       if (BE (err != REG_NOERROR, 0))
1832 	return err;
1833     }
1834 
1835   if (candidates && mctx->state_log[str_idx]->has_backref)
1836     {
1837       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1838       if (BE (err != REG_NOERROR, 0))
1839 	return err;
1840     }
1841   return REG_NOERROR;
1842 }
1843 
1844 static reg_errcode_t
1845 internal_function __attribute_warn_unused_result__
1846 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1847 		       const re_node_set *candidates)
1848 {
1849   reg_errcode_t err = REG_NOERROR;
1850   Idx i;
1851 
1852   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1853   if (BE (err != REG_NOERROR, 0))
1854     return err;
1855 
1856   if (!state->inveclosure.alloc)
1857     {
1858       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1859       if (BE (err != REG_NOERROR, 0))
1860 	return REG_ESPACE;
1861       for (i = 0; i < dest_nodes->nelem; i++)
1862 	{
1863 	  err = re_node_set_merge (&state->inveclosure,
1864 				   dfa->inveclosures + dest_nodes->elems[i]);
1865 	  if (BE (err != REG_NOERROR, 0))
1866 	    return REG_ESPACE;
1867 	}
1868     }
1869   return re_node_set_add_intersect (dest_nodes, candidates,
1870 				    &state->inveclosure);
1871 }
1872 
1873 static reg_errcode_t
1874 internal_function
1875 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1876 		       const re_node_set *candidates)
1877 {
1878     Idx ecl_idx;
1879     reg_errcode_t err;
1880     re_node_set *inv_eclosure = dfa->inveclosures + node;
1881     re_node_set except_nodes;
1882     re_node_set_init_empty (&except_nodes);
1883     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1884       {
1885 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1886 	if (cur_node == node)
1887 	  continue;
1888 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1889 	  {
1890 	    Idx edst1 = dfa->edests[cur_node].elems[0];
1891 	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1892 			 ? dfa->edests[cur_node].elems[1] : REG_MISSING);
1893 	    if ((!re_node_set_contains (inv_eclosure, edst1)
1894 		 && re_node_set_contains (dest_nodes, edst1))
1895 		|| (REG_VALID_NONZERO_INDEX (edst2)
1896 		    && !re_node_set_contains (inv_eclosure, edst2)
1897 		    && re_node_set_contains (dest_nodes, edst2)))
1898 	      {
1899 		err = re_node_set_add_intersect (&except_nodes, candidates,
1900 						 dfa->inveclosures + cur_node);
1901 		if (BE (err != REG_NOERROR, 0))
1902 		  {
1903 		    re_node_set_free (&except_nodes);
1904 		    return err;
1905 		  }
1906 	      }
1907 	  }
1908       }
1909     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1910       {
1911 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1912 	if (!re_node_set_contains (&except_nodes, cur_node))
1913 	  {
1914 	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1915 	    re_node_set_remove_at (dest_nodes, idx);
1916 	  }
1917       }
1918     re_node_set_free (&except_nodes);
1919     return REG_NOERROR;
1920 }
1921 
1922 static bool
1923 internal_function
1924 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1925 		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1926 {
1927   const re_dfa_t *const dfa = mctx->dfa;
1928   Idx lim_idx, src_pos, dst_pos;
1929 
1930   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1931   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1932   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1933     {
1934       Idx subexp_idx;
1935       struct re_backref_cache_entry *ent;
1936       ent = mctx->bkref_ents + limits->elems[lim_idx];
1937       subexp_idx = dfa->nodes[ent->node].opr.idx;
1938 
1939       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1940 					   subexp_idx, dst_node, dst_idx,
1941 					   dst_bkref_idx);
1942       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1943 					   subexp_idx, src_node, src_idx,
1944 					   src_bkref_idx);
1945 
1946       /* In case of:
1947 	 <src> <dst> ( <subexp> )
1948 	 ( <subexp> ) <src> <dst>
1949 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1950       if (src_pos == dst_pos)
1951 	continue; /* This is unrelated limitation.  */
1952       else
1953 	return true;
1954     }
1955   return false;
1956 }
1957 
1958 static int
1959 internal_function
1960 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1961 			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
1962 {
1963   const re_dfa_t *const dfa = mctx->dfa;
1964   const re_node_set *eclosures = dfa->eclosures + from_node;
1965   Idx node_idx;
1966 
1967   /* Else, we are on the boundary: examine the nodes on the epsilon
1968      closure.  */
1969   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1970     {
1971       Idx node = eclosures->elems[node_idx];
1972       switch (dfa->nodes[node].type)
1973 	{
1974 	case OP_BACK_REF:
1975 	  if (bkref_idx != REG_MISSING)
1976 	    {
1977 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1978 	      do
1979 		{
1980 		  Idx dst;
1981 		  int cpos;
1982 
1983 		  if (ent->node != node)
1984 		    continue;
1985 
1986 		  if (subexp_idx < BITSET_WORD_BITS
1987 		      && !(ent->eps_reachable_subexps_map
1988 			   & ((bitset_word_t) 1 << subexp_idx)))
1989 		    continue;
1990 
1991 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
1992 		     OP_CLOSE_SUBEXP cases below.  But, if the
1993 		     destination node is the same node as the source
1994 		     node, don't recurse because it would cause an
1995 		     infinite loop: a regex that exhibits this behavior
1996 		     is ()\1*\1*  */
1997 		  dst = dfa->edests[node].elems[0];
1998 		  if (dst == from_node)
1999 		    {
2000 		      if (boundaries & 1)
2001 			return -1;
2002 		      else /* if (boundaries & 2) */
2003 			return 0;
2004 		    }
2005 
2006 		  cpos =
2007 		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2008 						 dst, bkref_idx);
2009 		  if (cpos == -1 /* && (boundaries & 1) */)
2010 		    return -1;
2011 		  if (cpos == 0 && (boundaries & 2))
2012 		    return 0;
2013 
2014 		  if (subexp_idx < BITSET_WORD_BITS)
2015 		    ent->eps_reachable_subexps_map
2016 		      &= ~((bitset_word_t) 1 << subexp_idx);
2017 		}
2018 	      while (ent++->more);
2019 	    }
2020 	  break;
2021 
2022 	case OP_OPEN_SUBEXP:
2023 	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
2024 	    return -1;
2025 	  break;
2026 
2027 	case OP_CLOSE_SUBEXP:
2028 	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
2029 	    return 0;
2030 	  break;
2031 
2032 	default:
2033 	    break;
2034 	}
2035     }
2036 
2037   return (boundaries & 2) ? 1 : 0;
2038 }
2039 
2040 static int
2041 internal_function
2042 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
2043 			   Idx subexp_idx, Idx from_node, Idx str_idx,
2044 			   Idx bkref_idx)
2045 {
2046   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
2047   int boundaries;
2048 
2049   /* If we are outside the range of the subexpression, return -1 or 1.  */
2050   if (str_idx < lim->subexp_from)
2051     return -1;
2052 
2053   if (lim->subexp_to < str_idx)
2054     return 1;
2055 
2056   /* If we are within the subexpression, return 0.  */
2057   boundaries = (str_idx == lim->subexp_from);
2058   boundaries |= (str_idx == lim->subexp_to) << 1;
2059   if (boundaries == 0)
2060     return 0;
2061 
2062   /* Else, examine epsilon closure.  */
2063   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2064 				      from_node, bkref_idx);
2065 }
2066 
2067 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2068    which are against limitations from DEST_NODES. */
2069 
2070 static reg_errcode_t
2071 internal_function
2072 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2073 		     const re_node_set *candidates, re_node_set *limits,
2074 		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2075 {
2076   reg_errcode_t err;
2077   Idx node_idx, lim_idx;
2078 
2079   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2080     {
2081       Idx subexp_idx;
2082       struct re_backref_cache_entry *ent;
2083       ent = bkref_ents + limits->elems[lim_idx];
2084 
2085       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2086 	continue; /* This is unrelated limitation.  */
2087 
2088       subexp_idx = dfa->nodes[ent->node].opr.idx;
2089       if (ent->subexp_to == str_idx)
2090 	{
2091 	  Idx ops_node = REG_MISSING;
2092 	  Idx cls_node = REG_MISSING;
2093 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2094 	    {
2095 	      Idx node = dest_nodes->elems[node_idx];
2096 	      re_token_type_t type = dfa->nodes[node].type;
2097 	      if (type == OP_OPEN_SUBEXP
2098 		  && subexp_idx == dfa->nodes[node].opr.idx)
2099 		ops_node = node;
2100 	      else if (type == OP_CLOSE_SUBEXP
2101 		       && subexp_idx == dfa->nodes[node].opr.idx)
2102 		cls_node = node;
2103 	    }
2104 
2105 	  /* Check the limitation of the open subexpression.  */
2106 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2107 	  if (REG_VALID_INDEX (ops_node))
2108 	    {
2109 	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2110 					   candidates);
2111 	      if (BE (err != REG_NOERROR, 0))
2112 		return err;
2113 	    }
2114 
2115 	  /* Check the limitation of the close subexpression.  */
2116 	  if (REG_VALID_INDEX (cls_node))
2117 	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2118 	      {
2119 		Idx node = dest_nodes->elems[node_idx];
2120 		if (!re_node_set_contains (dfa->inveclosures + node,
2121 					   cls_node)
2122 		    && !re_node_set_contains (dfa->eclosures + node,
2123 					      cls_node))
2124 		  {
2125 		    /* It is against this limitation.
2126 		       Remove it form the current sifted state.  */
2127 		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2128 						 candidates);
2129 		    if (BE (err != REG_NOERROR, 0))
2130 		      return err;
2131 		    --node_idx;
2132 		  }
2133 	      }
2134 	}
2135       else /* (ent->subexp_to != str_idx)  */
2136 	{
2137 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2138 	    {
2139 	      Idx node = dest_nodes->elems[node_idx];
2140 	      re_token_type_t type = dfa->nodes[node].type;
2141 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2142 		{
2143 		  if (subexp_idx != dfa->nodes[node].opr.idx)
2144 		    continue;
2145 		  /* It is against this limitation.
2146 		     Remove it form the current sifted state.  */
2147 		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2148 					       candidates);
2149 		  if (BE (err != REG_NOERROR, 0))
2150 		    return err;
2151 		}
2152 	    }
2153 	}
2154     }
2155   return REG_NOERROR;
2156 }
2157 
2158 static reg_errcode_t
2159 internal_function __attribute_warn_unused_result__
2160 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2161 		   Idx str_idx, const re_node_set *candidates)
2162 {
2163   const re_dfa_t *const dfa = mctx->dfa;
2164   reg_errcode_t err;
2165   Idx node_idx, node;
2166   re_sift_context_t local_sctx;
2167   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2168 
2169   if (first_idx == REG_MISSING)
2170     return REG_NOERROR;
2171 
2172   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2173 
2174   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2175     {
2176       Idx enabled_idx;
2177       re_token_type_t type;
2178       struct re_backref_cache_entry *entry;
2179       node = candidates->elems[node_idx];
2180       type = dfa->nodes[node].type;
2181       /* Avoid infinite loop for the REs like "()\1+".  */
2182       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2183 	continue;
2184       if (type != OP_BACK_REF)
2185 	continue;
2186 
2187       entry = mctx->bkref_ents + first_idx;
2188       enabled_idx = first_idx;
2189       do
2190 	{
2191 	  Idx subexp_len;
2192 	  Idx to_idx;
2193 	  Idx dst_node;
2194 	  bool ok;
2195 	  re_dfastate_t *cur_state;
2196 
2197 	  if (entry->node != node)
2198 	    continue;
2199 	  subexp_len = entry->subexp_to - entry->subexp_from;
2200 	  to_idx = str_idx + subexp_len;
2201 	  dst_node = (subexp_len ? dfa->nexts[node]
2202 		      : dfa->edests[node].elems[0]);
2203 
2204 	  if (to_idx > sctx->last_str_idx
2205 	      || sctx->sifted_states[to_idx] == NULL
2206 	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2207 	      || check_dst_limits (mctx, &sctx->limits, node,
2208 				   str_idx, dst_node, to_idx))
2209 	    continue;
2210 
2211 	  if (local_sctx.sifted_states == NULL)
2212 	    {
2213 	      local_sctx = *sctx;
2214 	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2215 	      if (BE (err != REG_NOERROR, 0))
2216 		goto free_return;
2217 	    }
2218 	  local_sctx.last_node = node;
2219 	  local_sctx.last_str_idx = str_idx;
2220 	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2221 	  if (BE (! ok, 0))
2222 	    {
2223 	      err = REG_ESPACE;
2224 	      goto free_return;
2225 	    }
2226 	  cur_state = local_sctx.sifted_states[str_idx];
2227 	  err = sift_states_backward (mctx, &local_sctx);
2228 	  if (BE (err != REG_NOERROR, 0))
2229 	    goto free_return;
2230 	  if (sctx->limited_states != NULL)
2231 	    {
2232 	      err = merge_state_array (dfa, sctx->limited_states,
2233 				       local_sctx.sifted_states,
2234 				       str_idx + 1);
2235 	      if (BE (err != REG_NOERROR, 0))
2236 		goto free_return;
2237 	    }
2238 	  local_sctx.sifted_states[str_idx] = cur_state;
2239 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2240 
2241 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2242 	  entry = mctx->bkref_ents + enabled_idx;
2243 	}
2244       while (enabled_idx++, entry++->more);
2245     }
2246   err = REG_NOERROR;
2247  free_return:
2248   if (local_sctx.sifted_states != NULL)
2249     {
2250       re_node_set_free (&local_sctx.limits);
2251     }
2252 
2253   return err;
2254 }
2255 
2256 
2257 #ifdef RE_ENABLE_I18N
2258 static int
2259 internal_function
2260 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2261 		     Idx node_idx, Idx str_idx, Idx max_str_idx)
2262 {
2263   const re_dfa_t *const dfa = mctx->dfa;
2264   int naccepted;
2265   /* Check the node can accept "multi byte".  */
2266   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2267   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2268       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2269 			    dfa->nexts[node_idx]))
2270     /* The node can't accept the "multi byte", or the
2271        destination was already thrown away, then the node
2272        could't accept the current input "multi byte".   */
2273     naccepted = 0;
2274   /* Otherwise, it is sure that the node could accept
2275      'naccepted' bytes input.  */
2276   return naccepted;
2277 }
2278 #endif /* RE_ENABLE_I18N */
2279 
2280 
2281 /* Functions for state transition.  */
2282 
2283 /* Return the next state to which the current state STATE will transit by
2284    accepting the current input byte, and update STATE_LOG if necessary.
2285    If STATE can accept a multibyte char/collating element/back reference
2286    update the destination of STATE_LOG.  */
2287 
2288 static re_dfastate_t *
2289 internal_function __attribute_warn_unused_result__
2290 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2291 	       re_dfastate_t *state)
2292 {
2293   re_dfastate_t **trtable;
2294   unsigned char ch;
2295 
2296 #ifdef RE_ENABLE_I18N
2297   /* If the current state can accept multibyte.  */
2298   if (BE (state->accept_mb, 0))
2299     {
2300       *err = transit_state_mb (mctx, state);
2301       if (BE (*err != REG_NOERROR, 0))
2302 	return NULL;
2303     }
2304 #endif /* RE_ENABLE_I18N */
2305 
2306   /* Then decide the next state with the single byte.  */
2307 #if 0
2308   if (0)
2309     /* don't use transition table  */
2310     return transit_state_sb (err, mctx, state);
2311 #endif
2312 
2313   /* Use transition table  */
2314   ch = re_string_fetch_byte (&mctx->input);
2315   for (;;)
2316     {
2317       trtable = state->trtable;
2318       if (BE (trtable != NULL, 1))
2319 	return trtable[ch];
2320 
2321       trtable = state->word_trtable;
2322       if (BE (trtable != NULL, 1))
2323 	{
2324 	  unsigned int context;
2325 	  context
2326 	    = re_string_context_at (&mctx->input,
2327 				    re_string_cur_idx (&mctx->input) - 1,
2328 				    mctx->eflags);
2329 	  if (IS_WORD_CONTEXT (context))
2330 	    return trtable[ch + SBC_MAX];
2331 	  else
2332 	    return trtable[ch];
2333 	}
2334 
2335       if (!build_trtable (mctx->dfa, state))
2336 	{
2337 	  *err = REG_ESPACE;
2338 	  return NULL;
2339 	}
2340 
2341       /* Retry, we now have a transition table.  */
2342     }
2343 }
2344 
2345 /* Update the state_log if we need */
2346 static re_dfastate_t *
2347 internal_function
2348 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2349 		      re_dfastate_t *next_state)
2350 {
2351   const re_dfa_t *const dfa = mctx->dfa;
2352   Idx cur_idx = re_string_cur_idx (&mctx->input);
2353 
2354   if (cur_idx > mctx->state_log_top)
2355     {
2356       mctx->state_log[cur_idx] = next_state;
2357       mctx->state_log_top = cur_idx;
2358     }
2359   else if (mctx->state_log[cur_idx] == 0)
2360     {
2361       mctx->state_log[cur_idx] = next_state;
2362     }
2363   else
2364     {
2365       re_dfastate_t *pstate;
2366       unsigned int context;
2367       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2368       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2369 	 the destination of a multibyte char/collating element/
2370 	 back reference.  Then the next state is the union set of
2371 	 these destinations and the results of the transition table.  */
2372       pstate = mctx->state_log[cur_idx];
2373       log_nodes = pstate->entrance_nodes;
2374       if (next_state != NULL)
2375 	{
2376 	  table_nodes = next_state->entrance_nodes;
2377 	  *err = re_node_set_init_union (&next_nodes, table_nodes,
2378 					     log_nodes);
2379 	  if (BE (*err != REG_NOERROR, 0))
2380 	    return NULL;
2381 	}
2382       else
2383 	next_nodes = *log_nodes;
2384       /* Note: We already add the nodes of the initial state,
2385 	 then we don't need to add them here.  */
2386 
2387       context = re_string_context_at (&mctx->input,
2388 				      re_string_cur_idx (&mctx->input) - 1,
2389 				      mctx->eflags);
2390       next_state = mctx->state_log[cur_idx]
2391 	= re_acquire_state_context (err, dfa, &next_nodes, context);
2392       /* We don't need to check errors here, since the return value of
2393 	 this function is next_state and ERR is already set.  */
2394 
2395       if (table_nodes != NULL)
2396 	re_node_set_free (&next_nodes);
2397     }
2398 
2399   if (BE (dfa->nbackref, 0) && next_state != NULL)
2400     {
2401       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2402 	 later.  We must check them here, since the back references in the
2403 	 next state might use them.  */
2404       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2405 					cur_idx);
2406       if (BE (*err != REG_NOERROR, 0))
2407 	return NULL;
2408 
2409       /* If the next state has back references.  */
2410       if (next_state->has_backref)
2411 	{
2412 	  *err = transit_state_bkref (mctx, &next_state->nodes);
2413 	  if (BE (*err != REG_NOERROR, 0))
2414 	    return NULL;
2415 	  next_state = mctx->state_log[cur_idx];
2416 	}
2417     }
2418 
2419   return next_state;
2420 }
2421 
2422 /* Skip bytes in the input that correspond to part of a
2423    multi-byte match, then look in the log for a state
2424    from which to restart matching.  */
2425 static re_dfastate_t *
2426 internal_function
2427 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2428 {
2429   re_dfastate_t *cur_state;
2430   do
2431     {
2432       Idx max = mctx->state_log_top;
2433       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2434 
2435       do
2436 	{
2437 	  if (++cur_str_idx > max)
2438 	    return NULL;
2439 	  re_string_skip_bytes (&mctx->input, 1);
2440 	}
2441       while (mctx->state_log[cur_str_idx] == NULL);
2442 
2443       cur_state = merge_state_with_log (err, mctx, NULL);
2444     }
2445   while (*err == REG_NOERROR && cur_state == NULL);
2446   return cur_state;
2447 }
2448 
2449 /* Helper functions for transit_state.  */
2450 
2451 /* From the node set CUR_NODES, pick up the nodes whose types are
2452    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2453    expression. And register them to use them later for evaluating the
2454    corresponding back references.  */
2455 
2456 static reg_errcode_t
2457 internal_function
2458 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2459 			   Idx str_idx)
2460 {
2461   const re_dfa_t *const dfa = mctx->dfa;
2462   Idx node_idx;
2463   reg_errcode_t err;
2464 
2465   /* TODO: This isn't efficient.
2466 	   Because there might be more than one nodes whose types are
2467 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2468 	   nodes.
2469 	   E.g. RE: (a){2}  */
2470   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2471     {
2472       Idx node = cur_nodes->elems[node_idx];
2473       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2474 	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2475 	  && (dfa->used_bkref_map
2476 	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2477 	{
2478 	  err = match_ctx_add_subtop (mctx, node, str_idx);
2479 	  if (BE (err != REG_NOERROR, 0))
2480 	    return err;
2481 	}
2482     }
2483   return REG_NOERROR;
2484 }
2485 
2486 #if 0
2487 /* Return the next state to which the current state STATE will transit by
2488    accepting the current input byte.  */
2489 
2490 static re_dfastate_t *
2491 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2492 		  re_dfastate_t *state)
2493 {
2494   const re_dfa_t *const dfa = mctx->dfa;
2495   re_node_set next_nodes;
2496   re_dfastate_t *next_state;
2497   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2498   unsigned int context;
2499 
2500   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2501   if (BE (*err != REG_NOERROR, 0))
2502     return NULL;
2503   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2504     {
2505       Idx cur_node = state->nodes.elems[node_cnt];
2506       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2507 	{
2508 	  *err = re_node_set_merge (&next_nodes,
2509 				    dfa->eclosures + dfa->nexts[cur_node]);
2510 	  if (BE (*err != REG_NOERROR, 0))
2511 	    {
2512 	      re_node_set_free (&next_nodes);
2513 	      return NULL;
2514 	    }
2515 	}
2516     }
2517   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2518   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2519   /* We don't need to check errors here, since the return value of
2520      this function is next_state and ERR is already set.  */
2521 
2522   re_node_set_free (&next_nodes);
2523   re_string_skip_bytes (&mctx->input, 1);
2524   return next_state;
2525 }
2526 #endif
2527 
2528 #ifdef RE_ENABLE_I18N
2529 static reg_errcode_t
2530 internal_function
2531 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2532 {
2533   const re_dfa_t *const dfa = mctx->dfa;
2534   reg_errcode_t err;
2535   Idx i;
2536 
2537   for (i = 0; i < pstate->nodes.nelem; ++i)
2538     {
2539       re_node_set dest_nodes, *new_nodes;
2540       Idx cur_node_idx = pstate->nodes.elems[i];
2541       int naccepted;
2542       Idx dest_idx;
2543       unsigned int context;
2544       re_dfastate_t *dest_state;
2545 
2546       if (!dfa->nodes[cur_node_idx].accept_mb)
2547 	continue;
2548 
2549       if (dfa->nodes[cur_node_idx].constraint)
2550 	{
2551 	  context = re_string_context_at (&mctx->input,
2552 					  re_string_cur_idx (&mctx->input),
2553 					  mctx->eflags);
2554 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2555 					   context))
2556 	    continue;
2557 	}
2558 
2559       /* How many bytes the node can accept?  */
2560       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2561 					   re_string_cur_idx (&mctx->input));
2562       if (naccepted == 0)
2563 	continue;
2564 
2565       /* The node can accepts 'naccepted' bytes.  */
2566       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2567       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2568 			       : mctx->max_mb_elem_len);
2569       err = clean_state_log_if_needed (mctx, dest_idx);
2570       if (BE (err != REG_NOERROR, 0))
2571 	return err;
2572 #ifdef DEBUG
2573       assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2574 #endif
2575       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2576 
2577       dest_state = mctx->state_log[dest_idx];
2578       if (dest_state == NULL)
2579 	dest_nodes = *new_nodes;
2580       else
2581 	{
2582 	  err = re_node_set_init_union (&dest_nodes,
2583 					dest_state->entrance_nodes, new_nodes);
2584 	  if (BE (err != REG_NOERROR, 0))
2585 	    return err;
2586 	}
2587       context = re_string_context_at (&mctx->input, dest_idx - 1,
2588 				      mctx->eflags);
2589       mctx->state_log[dest_idx]
2590 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2591       if (dest_state != NULL)
2592 	re_node_set_free (&dest_nodes);
2593       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
2594 	return err;
2595     }
2596   return REG_NOERROR;
2597 }
2598 #endif /* RE_ENABLE_I18N */
2599 
2600 static reg_errcode_t
2601 internal_function
2602 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2603 {
2604   const re_dfa_t *const dfa = mctx->dfa;
2605   reg_errcode_t err;
2606   Idx i;
2607   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2608 
2609   for (i = 0; i < nodes->nelem; ++i)
2610     {
2611       Idx dest_str_idx, prev_nelem, bkc_idx;
2612       Idx node_idx = nodes->elems[i];
2613       unsigned int context;
2614       const re_token_t *node = dfa->nodes + node_idx;
2615       re_node_set *new_dest_nodes;
2616 
2617       /* Check whether 'node' is a backreference or not.  */
2618       if (node->type != OP_BACK_REF)
2619 	continue;
2620 
2621       if (node->constraint)
2622 	{
2623 	  context = re_string_context_at (&mctx->input, cur_str_idx,
2624 					  mctx->eflags);
2625 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2626 	    continue;
2627 	}
2628 
2629       /* 'node' is a backreference.
2630 	 Check the substring which the substring matched.  */
2631       bkc_idx = mctx->nbkref_ents;
2632       err = get_subexp (mctx, node_idx, cur_str_idx);
2633       if (BE (err != REG_NOERROR, 0))
2634 	goto free_return;
2635 
2636       /* And add the epsilon closures (which is 'new_dest_nodes') of
2637 	 the backreference to appropriate state_log.  */
2638 #ifdef DEBUG
2639       assert (dfa->nexts[node_idx] != REG_MISSING);
2640 #endif
2641       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2642 	{
2643 	  Idx subexp_len;
2644 	  re_dfastate_t *dest_state;
2645 	  struct re_backref_cache_entry *bkref_ent;
2646 	  bkref_ent = mctx->bkref_ents + bkc_idx;
2647 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2648 	    continue;
2649 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2650 	  new_dest_nodes = (subexp_len == 0
2651 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2652 			    : dfa->eclosures + dfa->nexts[node_idx]);
2653 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2654 			  - bkref_ent->subexp_from);
2655 	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2656 					  mctx->eflags);
2657 	  dest_state = mctx->state_log[dest_str_idx];
2658 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2659 			: mctx->state_log[cur_str_idx]->nodes.nelem);
2660 	  /* Add 'new_dest_node' to state_log.  */
2661 	  if (dest_state == NULL)
2662 	    {
2663 	      mctx->state_log[dest_str_idx]
2664 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2665 					    context);
2666 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2667 		      && err != REG_NOERROR, 0))
2668 		goto free_return;
2669 	    }
2670 	  else
2671 	    {
2672 	      re_node_set dest_nodes;
2673 	      err = re_node_set_init_union (&dest_nodes,
2674 					    dest_state->entrance_nodes,
2675 					    new_dest_nodes);
2676 	      if (BE (err != REG_NOERROR, 0))
2677 		{
2678 		  re_node_set_free (&dest_nodes);
2679 		  goto free_return;
2680 		}
2681 	      mctx->state_log[dest_str_idx]
2682 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2683 	      re_node_set_free (&dest_nodes);
2684 	      if (BE (mctx->state_log[dest_str_idx] == NULL
2685 		      && err != REG_NOERROR, 0))
2686 		goto free_return;
2687 	    }
2688 	  /* We need to check recursively if the backreference can epsilon
2689 	     transit.  */
2690 	  if (subexp_len == 0
2691 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2692 	    {
2693 	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2694 					       cur_str_idx);
2695 	      if (BE (err != REG_NOERROR, 0))
2696 		goto free_return;
2697 	      err = transit_state_bkref (mctx, new_dest_nodes);
2698 	      if (BE (err != REG_NOERROR, 0))
2699 		goto free_return;
2700 	    }
2701 	}
2702     }
2703   err = REG_NOERROR;
2704  free_return:
2705   return err;
2706 }
2707 
2708 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2709    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2710    Note that we might collect inappropriate candidates here.
2711    However, the cost of checking them strictly here is too high, then we
2712    delay these checking for prune_impossible_nodes().  */
2713 
2714 static reg_errcode_t
2715 internal_function __attribute_warn_unused_result__
2716 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2717 {
2718   const re_dfa_t *const dfa = mctx->dfa;
2719   Idx subexp_num, sub_top_idx;
2720   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2721   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2722   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2723   if (cache_idx != REG_MISSING)
2724     {
2725       const struct re_backref_cache_entry *entry
2726 	= mctx->bkref_ents + cache_idx;
2727       do
2728 	if (entry->node == bkref_node)
2729 	  return REG_NOERROR; /* We already checked it.  */
2730       while (entry++->more);
2731     }
2732 
2733   subexp_num = dfa->nodes[bkref_node].opr.idx;
2734 
2735   /* For each sub expression  */
2736   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2737     {
2738       reg_errcode_t err;
2739       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2740       re_sub_match_last_t *sub_last;
2741       Idx sub_last_idx, sl_str, bkref_str_off;
2742 
2743       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2744 	continue; /* It isn't related.  */
2745 
2746       sl_str = sub_top->str_idx;
2747       bkref_str_off = bkref_str_idx;
2748       /* At first, check the last node of sub expressions we already
2749 	 evaluated.  */
2750       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2751 	{
2752 	  regoff_t sl_str_diff;
2753 	  sub_last = sub_top->lasts[sub_last_idx];
2754 	  sl_str_diff = sub_last->str_idx - sl_str;
2755 	  /* The matched string by the sub expression match with the substring
2756 	     at the back reference?  */
2757 	  if (sl_str_diff > 0)
2758 	    {
2759 	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2760 		{
2761 		  /* Not enough chars for a successful match.  */
2762 		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2763 		    break;
2764 
2765 		  err = clean_state_log_if_needed (mctx,
2766 						   bkref_str_off
2767 						   + sl_str_diff);
2768 		  if (BE (err != REG_NOERROR, 0))
2769 		    return err;
2770 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2771 		}
2772 	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2773 		/* We don't need to search this sub expression any more.  */
2774 		break;
2775 	    }
2776 	  bkref_str_off += sl_str_diff;
2777 	  sl_str += sl_str_diff;
2778 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2779 				bkref_str_idx);
2780 
2781 	  /* Reload buf, since the preceding call might have reallocated
2782 	     the buffer.  */
2783 	  buf = (const char *) re_string_get_buffer (&mctx->input);
2784 
2785 	  if (err == REG_NOMATCH)
2786 	    continue;
2787 	  if (BE (err != REG_NOERROR, 0))
2788 	    return err;
2789 	}
2790 
2791       if (sub_last_idx < sub_top->nlasts)
2792 	continue;
2793       if (sub_last_idx > 0)
2794 	++sl_str;
2795       /* Then, search for the other last nodes of the sub expression.  */
2796       for (; sl_str <= bkref_str_idx; ++sl_str)
2797 	{
2798 	  Idx cls_node;
2799 	  regoff_t sl_str_off;
2800 	  const re_node_set *nodes;
2801 	  sl_str_off = sl_str - sub_top->str_idx;
2802 	  /* The matched string by the sub expression match with the substring
2803 	     at the back reference?  */
2804 	  if (sl_str_off > 0)
2805 	    {
2806 	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2807 		{
2808 		  /* If we are at the end of the input, we cannot match.  */
2809 		  if (bkref_str_off >= mctx->input.len)
2810 		    break;
2811 
2812 		  err = extend_buffers (mctx, bkref_str_off + 1);
2813 		  if (BE (err != REG_NOERROR, 0))
2814 		    return err;
2815 
2816 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2817 		}
2818 	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2819 		break; /* We don't need to search this sub expression
2820 			  any more.  */
2821 	    }
2822 	  if (mctx->state_log[sl_str] == NULL)
2823 	    continue;
2824 	  /* Does this state have a ')' of the sub expression?  */
2825 	  nodes = &mctx->state_log[sl_str]->nodes;
2826 	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
2827 				       OP_CLOSE_SUBEXP);
2828 	  if (cls_node == REG_MISSING)
2829 	    continue; /* No.  */
2830 	  if (sub_top->path == NULL)
2831 	    {
2832 	      sub_top->path = calloc (sizeof (state_array_t),
2833 				      sl_str - sub_top->str_idx + 1);
2834 	      if (sub_top->path == NULL)
2835 		return REG_ESPACE;
2836 	    }
2837 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2838 	     in the current context?  */
2839 	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2840 			       sub_top->str_idx, cls_node, sl_str,
2841 			       OP_CLOSE_SUBEXP);
2842 	  if (err == REG_NOMATCH)
2843 	      continue;
2844 	  if (BE (err != REG_NOERROR, 0))
2845 	      return err;
2846 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2847 	  if (BE (sub_last == NULL, 0))
2848 	    return REG_ESPACE;
2849 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2850 				bkref_str_idx);
2851 	  if (err == REG_NOMATCH)
2852 	    continue;
2853 	}
2854     }
2855   return REG_NOERROR;
2856 }
2857 
2858 /* Helper functions for get_subexp().  */
2859 
2860 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2861    If it can arrive, register the sub expression expressed with SUB_TOP
2862    and SUB_LAST.  */
2863 
2864 static reg_errcode_t
2865 internal_function
2866 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2867 		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2868 {
2869   reg_errcode_t err;
2870   Idx to_idx;
2871   /* Can the subexpression arrive the back reference?  */
2872   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2873 		       sub_last->str_idx, bkref_node, bkref_str,
2874 		       OP_OPEN_SUBEXP);
2875   if (err != REG_NOERROR)
2876     return err;
2877   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2878 			     sub_last->str_idx);
2879   if (BE (err != REG_NOERROR, 0))
2880     return err;
2881   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2882   return clean_state_log_if_needed (mctx, to_idx);
2883 }
2884 
2885 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2886    Search '(' if FL_OPEN, or search ')' otherwise.
2887    TODO: This function isn't efficient...
2888 	 Because there might be more than one nodes whose types are
2889 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2890 	 nodes.
2891 	 E.g. RE: (a){2}  */
2892 
2893 static Idx
2894 internal_function
2895 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2896 		  Idx subexp_idx, int type)
2897 {
2898   Idx cls_idx;
2899   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2900     {
2901       Idx cls_node = nodes->elems[cls_idx];
2902       const re_token_t *node = dfa->nodes + cls_node;
2903       if (node->type == type
2904 	  && node->opr.idx == subexp_idx)
2905 	return cls_node;
2906     }
2907   return REG_MISSING;
2908 }
2909 
2910 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2911    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2912    heavily reused.
2913    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2914 
2915 static reg_errcode_t
2916 internal_function __attribute_warn_unused_result__
2917 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2918 	       Idx top_str, Idx last_node, Idx last_str, int type)
2919 {
2920   const re_dfa_t *const dfa = mctx->dfa;
2921   reg_errcode_t err = REG_NOERROR;
2922   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2923   re_dfastate_t *cur_state = NULL;
2924   re_node_set *cur_nodes, next_nodes;
2925   re_dfastate_t **backup_state_log;
2926   unsigned int context;
2927 
2928   subexp_num = dfa->nodes[top_node].opr.idx;
2929   /* Extend the buffer if we need.  */
2930   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2931     {
2932       re_dfastate_t **new_array;
2933       Idx old_alloc = path->alloc;
2934       Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2935       Idx new_alloc;
2936       if (BE (IDX_MAX - old_alloc < incr_alloc, 0))
2937 	return REG_ESPACE;
2938       new_alloc = old_alloc + incr_alloc;
2939       if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
2940 	return REG_ESPACE;
2941       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2942       if (BE (new_array == NULL, 0))
2943 	return REG_ESPACE;
2944       path->array = new_array;
2945       path->alloc = new_alloc;
2946       memset (new_array + old_alloc, '\0',
2947 	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2948     }
2949 
2950   str_idx = path->next_idx ? path->next_idx : top_str;
2951 
2952   /* Temporary modify MCTX.  */
2953   backup_state_log = mctx->state_log;
2954   backup_cur_idx = mctx->input.cur_idx;
2955   mctx->state_log = path->array;
2956   mctx->input.cur_idx = str_idx;
2957 
2958   /* Setup initial node set.  */
2959   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2960   if (str_idx == top_str)
2961     {
2962       err = re_node_set_init_1 (&next_nodes, top_node);
2963       if (BE (err != REG_NOERROR, 0))
2964 	return err;
2965       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2966       if (BE (err != REG_NOERROR, 0))
2967 	{
2968 	  re_node_set_free (&next_nodes);
2969 	  return err;
2970 	}
2971     }
2972   else
2973     {
2974       cur_state = mctx->state_log[str_idx];
2975       if (cur_state && cur_state->has_backref)
2976 	{
2977 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2978 	  if (BE (err != REG_NOERROR, 0))
2979 	    return err;
2980 	}
2981       else
2982 	re_node_set_init_empty (&next_nodes);
2983     }
2984   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2985     {
2986       if (next_nodes.nelem)
2987 	{
2988 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2989 				    subexp_num, type);
2990 	  if (BE (err != REG_NOERROR, 0))
2991 	    {
2992 	      re_node_set_free (&next_nodes);
2993 	      return err;
2994 	    }
2995 	}
2996       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2997       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
2998 	{
2999 	  re_node_set_free (&next_nodes);
3000 	  return err;
3001 	}
3002       mctx->state_log[str_idx] = cur_state;
3003     }
3004 
3005   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
3006     {
3007       re_node_set_empty (&next_nodes);
3008       if (mctx->state_log[str_idx + 1])
3009 	{
3010 	  err = re_node_set_merge (&next_nodes,
3011 				   &mctx->state_log[str_idx + 1]->nodes);
3012 	  if (BE (err != REG_NOERROR, 0))
3013 	    {
3014 	      re_node_set_free (&next_nodes);
3015 	      return err;
3016 	    }
3017 	}
3018       if (cur_state)
3019 	{
3020 	  err = check_arrival_add_next_nodes (mctx, str_idx,
3021 					      &cur_state->non_eps_nodes,
3022 					      &next_nodes);
3023 	  if (BE (err != REG_NOERROR, 0))
3024 	    {
3025 	      re_node_set_free (&next_nodes);
3026 	      return err;
3027 	    }
3028 	}
3029       ++str_idx;
3030       if (next_nodes.nelem)
3031 	{
3032 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
3033 	  if (BE (err != REG_NOERROR, 0))
3034 	    {
3035 	      re_node_set_free (&next_nodes);
3036 	      return err;
3037 	    }
3038 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
3039 				    subexp_num, type);
3040 	  if (BE (err != REG_NOERROR, 0))
3041 	    {
3042 	      re_node_set_free (&next_nodes);
3043 	      return err;
3044 	    }
3045 	}
3046       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
3047       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
3048       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
3049 	{
3050 	  re_node_set_free (&next_nodes);
3051 	  return err;
3052 	}
3053       mctx->state_log[str_idx] = cur_state;
3054       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
3055     }
3056   re_node_set_free (&next_nodes);
3057   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
3058 	       : &mctx->state_log[last_str]->nodes);
3059   path->next_idx = str_idx;
3060 
3061   /* Fix MCTX.  */
3062   mctx->state_log = backup_state_log;
3063   mctx->input.cur_idx = backup_cur_idx;
3064 
3065   /* Then check the current node set has the node LAST_NODE.  */
3066   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3067     return REG_NOERROR;
3068 
3069   return REG_NOMATCH;
3070 }
3071 
3072 /* Helper functions for check_arrival.  */
3073 
3074 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3075    to NEXT_NODES.
3076    TODO: This function is similar to the functions transit_state*(),
3077 	 however this function has many additional works.
3078 	 Can't we unify them?  */
3079 
3080 static reg_errcode_t
3081 internal_function __attribute_warn_unused_result__
3082 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3083 			      re_node_set *cur_nodes, re_node_set *next_nodes)
3084 {
3085   const re_dfa_t *const dfa = mctx->dfa;
3086   bool ok;
3087   Idx cur_idx;
3088 #ifdef RE_ENABLE_I18N
3089   reg_errcode_t err = REG_NOERROR;
3090 #endif
3091   re_node_set union_set;
3092   re_node_set_init_empty (&union_set);
3093   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3094     {
3095       int naccepted = 0;
3096       Idx cur_node = cur_nodes->elems[cur_idx];
3097 #ifdef DEBUG
3098       re_token_type_t type = dfa->nodes[cur_node].type;
3099       assert (!IS_EPSILON_NODE (type));
3100 #endif
3101 #ifdef RE_ENABLE_I18N
3102       /* If the node may accept "multi byte".  */
3103       if (dfa->nodes[cur_node].accept_mb)
3104 	{
3105 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3106 					       str_idx);
3107 	  if (naccepted > 1)
3108 	    {
3109 	      re_dfastate_t *dest_state;
3110 	      Idx next_node = dfa->nexts[cur_node];
3111 	      Idx next_idx = str_idx + naccepted;
3112 	      dest_state = mctx->state_log[next_idx];
3113 	      re_node_set_empty (&union_set);
3114 	      if (dest_state)
3115 		{
3116 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3117 		  if (BE (err != REG_NOERROR, 0))
3118 		    {
3119 		      re_node_set_free (&union_set);
3120 		      return err;
3121 		    }
3122 		}
3123 	      ok = re_node_set_insert (&union_set, next_node);
3124 	      if (BE (! ok, 0))
3125 		{
3126 		  re_node_set_free (&union_set);
3127 		  return REG_ESPACE;
3128 		}
3129 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3130 							    &union_set);
3131 	      if (BE (mctx->state_log[next_idx] == NULL
3132 		      && err != REG_NOERROR, 0))
3133 		{
3134 		  re_node_set_free (&union_set);
3135 		  return err;
3136 		}
3137 	    }
3138 	}
3139 #endif /* RE_ENABLE_I18N */
3140       if (naccepted
3141 	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3142 	{
3143 	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3144 	  if (BE (! ok, 0))
3145 	    {
3146 	      re_node_set_free (&union_set);
3147 	      return REG_ESPACE;
3148 	    }
3149 	}
3150     }
3151   re_node_set_free (&union_set);
3152   return REG_NOERROR;
3153 }
3154 
3155 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3156    CUR_NODES, however exclude the nodes which are:
3157     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3158     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3159 */
3160 
3161 static reg_errcode_t
3162 internal_function
3163 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3164 			  Idx ex_subexp, int type)
3165 {
3166   reg_errcode_t err;
3167   Idx idx, outside_node;
3168   re_node_set new_nodes;
3169 #ifdef DEBUG
3170   assert (cur_nodes->nelem);
3171 #endif
3172   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3173   if (BE (err != REG_NOERROR, 0))
3174     return err;
3175   /* Create a new node set NEW_NODES with the nodes which are epsilon
3176      closures of the node in CUR_NODES.  */
3177 
3178   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3179     {
3180       Idx cur_node = cur_nodes->elems[idx];
3181       const re_node_set *eclosure = dfa->eclosures + cur_node;
3182       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3183       if (outside_node == REG_MISSING)
3184 	{
3185 	  /* There are no problematic nodes, just merge them.  */
3186 	  err = re_node_set_merge (&new_nodes, eclosure);
3187 	  if (BE (err != REG_NOERROR, 0))
3188 	    {
3189 	      re_node_set_free (&new_nodes);
3190 	      return err;
3191 	    }
3192 	}
3193       else
3194 	{
3195 	  /* There are problematic nodes, re-calculate incrementally.  */
3196 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3197 					      ex_subexp, type);
3198 	  if (BE (err != REG_NOERROR, 0))
3199 	    {
3200 	      re_node_set_free (&new_nodes);
3201 	      return err;
3202 	    }
3203 	}
3204     }
3205   re_node_set_free (cur_nodes);
3206   *cur_nodes = new_nodes;
3207   return REG_NOERROR;
3208 }
3209 
3210 /* Helper function for check_arrival_expand_ecl.
3211    Check incrementally the epsilon closure of TARGET, and if it isn't
3212    problematic append it to DST_NODES.  */
3213 
3214 static reg_errcode_t
3215 internal_function __attribute_warn_unused_result__
3216 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3217 			      Idx target, Idx ex_subexp, int type)
3218 {
3219   Idx cur_node;
3220   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3221     {
3222       bool ok;
3223 
3224       if (dfa->nodes[cur_node].type == type
3225 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3226 	{
3227 	  if (type == OP_CLOSE_SUBEXP)
3228 	    {
3229 	      ok = re_node_set_insert (dst_nodes, cur_node);
3230 	      if (BE (! ok, 0))
3231 		return REG_ESPACE;
3232 	    }
3233 	  break;
3234 	}
3235       ok = re_node_set_insert (dst_nodes, cur_node);
3236       if (BE (! ok, 0))
3237 	return REG_ESPACE;
3238       if (dfa->edests[cur_node].nelem == 0)
3239 	break;
3240       if (dfa->edests[cur_node].nelem == 2)
3241 	{
3242 	  reg_errcode_t err;
3243 	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3244 					      dfa->edests[cur_node].elems[1],
3245 					      ex_subexp, type);
3246 	  if (BE (err != REG_NOERROR, 0))
3247 	    return err;
3248 	}
3249       cur_node = dfa->edests[cur_node].elems[0];
3250     }
3251   return REG_NOERROR;
3252 }
3253 
3254 
3255 /* For all the back references in the current state, calculate the
3256    destination of the back references by the appropriate entry
3257    in MCTX->BKREF_ENTS.  */
3258 
3259 static reg_errcode_t
3260 internal_function __attribute_warn_unused_result__
3261 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3262 		    Idx cur_str, Idx subexp_num, int type)
3263 {
3264   const re_dfa_t *const dfa = mctx->dfa;
3265   reg_errcode_t err;
3266   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3267   struct re_backref_cache_entry *ent;
3268 
3269   if (cache_idx_start == REG_MISSING)
3270     return REG_NOERROR;
3271 
3272  restart:
3273   ent = mctx->bkref_ents + cache_idx_start;
3274   do
3275     {
3276       Idx to_idx, next_node;
3277 
3278       /* Is this entry ENT is appropriate?  */
3279       if (!re_node_set_contains (cur_nodes, ent->node))
3280 	continue; /* No.  */
3281 
3282       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3283       /* Calculate the destination of the back reference, and append it
3284 	 to MCTX->STATE_LOG.  */
3285       if (to_idx == cur_str)
3286 	{
3287 	  /* The backreference did epsilon transit, we must re-check all the
3288 	     node in the current state.  */
3289 	  re_node_set new_dests;
3290 	  reg_errcode_t err2, err3;
3291 	  next_node = dfa->edests[ent->node].elems[0];
3292 	  if (re_node_set_contains (cur_nodes, next_node))
3293 	    continue;
3294 	  err = re_node_set_init_1 (&new_dests, next_node);
3295 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3296 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3297 	  re_node_set_free (&new_dests);
3298 	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
3299 		  || err3 != REG_NOERROR, 0))
3300 	    {
3301 	      err = (err != REG_NOERROR ? err
3302 		     : (err2 != REG_NOERROR ? err2 : err3));
3303 	      return err;
3304 	    }
3305 	  /* TODO: It is still inefficient...  */
3306 	  goto restart;
3307 	}
3308       else
3309 	{
3310 	  re_node_set union_set;
3311 	  next_node = dfa->nexts[ent->node];
3312 	  if (mctx->state_log[to_idx])
3313 	    {
3314 	      bool ok;
3315 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3316 					next_node))
3317 		continue;
3318 	      err = re_node_set_init_copy (&union_set,
3319 					   &mctx->state_log[to_idx]->nodes);
3320 	      ok = re_node_set_insert (&union_set, next_node);
3321 	      if (BE (err != REG_NOERROR || ! ok, 0))
3322 		{
3323 		  re_node_set_free (&union_set);
3324 		  err = err != REG_NOERROR ? err : REG_ESPACE;
3325 		  return err;
3326 		}
3327 	    }
3328 	  else
3329 	    {
3330 	      err = re_node_set_init_1 (&union_set, next_node);
3331 	      if (BE (err != REG_NOERROR, 0))
3332 		return err;
3333 	    }
3334 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3335 	  re_node_set_free (&union_set);
3336 	  if (BE (mctx->state_log[to_idx] == NULL
3337 		  && err != REG_NOERROR, 0))
3338 	    return err;
3339 	}
3340     }
3341   while (ent++->more);
3342   return REG_NOERROR;
3343 }
3344 
3345 /* Build transition table for the state.
3346    Return true if successful.  */
3347 
3348 static bool
3349 internal_function
3350 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3351 {
3352   reg_errcode_t err;
3353   Idx i, j;
3354   int ch;
3355   bool need_word_trtable = false;
3356   bitset_word_t elem, mask;
3357   bool dests_node_malloced = false;
3358   bool dest_states_malloced = false;
3359   Idx ndests; /* Number of the destination states from 'state'.  */
3360   re_dfastate_t **trtable;
3361   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3362   re_node_set follows, *dests_node;
3363   bitset_t *dests_ch;
3364   bitset_t acceptable;
3365 
3366   struct dests_alloc
3367   {
3368     re_node_set dests_node[SBC_MAX];
3369     bitset_t dests_ch[SBC_MAX];
3370   } *dests_alloc;
3371 
3372   /* We build DFA states which corresponds to the destination nodes
3373      from 'state'.  'dests_node[i]' represents the nodes which i-th
3374      destination state contains, and 'dests_ch[i]' represents the
3375      characters which i-th destination state accepts.  */
3376   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3377     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3378   else
3379     {
3380       dests_alloc = re_malloc (struct dests_alloc, 1);
3381       if (BE (dests_alloc == NULL, 0))
3382 	return false;
3383       dests_node_malloced = true;
3384     }
3385   dests_node = dests_alloc->dests_node;
3386   dests_ch = dests_alloc->dests_ch;
3387 
3388   /* Initialize transition table.  */
3389   state->word_trtable = state->trtable = NULL;
3390 
3391   /* At first, group all nodes belonging to 'state' into several
3392      destinations.  */
3393   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3394   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3395     {
3396       if (dests_node_malloced)
3397 	free (dests_alloc);
3398       /* Return false in case of an error, true otherwise.  */
3399       if (ndests == 0)
3400 	{
3401 	  state->trtable = (re_dfastate_t **)
3402 	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3403           if (BE (state->trtable == NULL, 0))
3404             return false;
3405 	  return true;
3406 	}
3407       return false;
3408     }
3409 
3410   err = re_node_set_alloc (&follows, ndests + 1);
3411   if (BE (err != REG_NOERROR, 0))
3412     goto out_free;
3413 
3414   /* Avoid arithmetic overflow in size calculation.  */
3415   if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3416 	    / (3 * sizeof (re_dfastate_t *)))
3417 	   < ndests),
3418 	  0))
3419     goto out_free;
3420 
3421   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3422 			 + ndests * 3 * sizeof (re_dfastate_t *)))
3423     dest_states = (re_dfastate_t **)
3424       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3425   else
3426     {
3427       dest_states = (re_dfastate_t **)
3428 	malloc (ndests * 3 * sizeof (re_dfastate_t *));
3429       if (BE (dest_states == NULL, 0))
3430 	{
3431 out_free:
3432 	  if (dest_states_malloced)
3433 	    free (dest_states);
3434 	  re_node_set_free (&follows);
3435 	  for (i = 0; i < ndests; ++i)
3436 	    re_node_set_free (dests_node + i);
3437 	  if (dests_node_malloced)
3438 	    free (dests_alloc);
3439 	  return false;
3440 	}
3441       dest_states_malloced = true;
3442     }
3443   dest_states_word = dest_states + ndests;
3444   dest_states_nl = dest_states_word + ndests;
3445   bitset_empty (acceptable);
3446 
3447   /* Then build the states for all destinations.  */
3448   for (i = 0; i < ndests; ++i)
3449     {
3450       Idx next_node;
3451       re_node_set_empty (&follows);
3452       /* Merge the follows of this destination states.  */
3453       for (j = 0; j < dests_node[i].nelem; ++j)
3454 	{
3455 	  next_node = dfa->nexts[dests_node[i].elems[j]];
3456 	  if (next_node != REG_MISSING)
3457 	    {
3458 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3459 	      if (BE (err != REG_NOERROR, 0))
3460 		goto out_free;
3461 	    }
3462 	}
3463       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3464       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
3465 	goto out_free;
3466       /* If the new state has context constraint,
3467 	 build appropriate states for these contexts.  */
3468       if (dest_states[i]->has_constraint)
3469 	{
3470 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3471 							  CONTEXT_WORD);
3472 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
3473 	    goto out_free;
3474 
3475 	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3476 	    need_word_trtable = true;
3477 
3478 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3479 							CONTEXT_NEWLINE);
3480 	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
3481 	    goto out_free;
3482 	}
3483       else
3484 	{
3485 	  dest_states_word[i] = dest_states[i];
3486 	  dest_states_nl[i] = dest_states[i];
3487 	}
3488       bitset_merge (acceptable, dests_ch[i]);
3489     }
3490 
3491   if (!BE (need_word_trtable, 0))
3492     {
3493       /* We don't care about whether the following character is a word
3494 	 character, or we are in a single-byte character set so we can
3495 	 discern by looking at the character code: allocate a
3496 	 256-entry transition table.  */
3497       trtable = state->trtable =
3498 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3499       if (BE (trtable == NULL, 0))
3500 	goto out_free;
3501 
3502       /* For all characters ch...:  */
3503       for (i = 0; i < BITSET_WORDS; ++i)
3504 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3505 	     elem;
3506 	     mask <<= 1, elem >>= 1, ++ch)
3507 	  if (BE (elem & 1, 0))
3508 	    {
3509 	      /* There must be exactly one destination which accepts
3510 		 character ch.  See group_nodes_into_DFAstates.  */
3511 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3512 		;
3513 
3514 	      /* j-th destination accepts the word character ch.  */
3515 	      if (dfa->word_char[i] & mask)
3516 		trtable[ch] = dest_states_word[j];
3517 	      else
3518 		trtable[ch] = dest_states[j];
3519 	    }
3520     }
3521   else
3522     {
3523       /* We care about whether the following character is a word
3524 	 character, and we are in a multi-byte character set: discern
3525 	 by looking at the character code: build two 256-entry
3526 	 transition tables, one starting at trtable[0] and one
3527 	 starting at trtable[SBC_MAX].  */
3528       trtable = state->word_trtable =
3529 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3530       if (BE (trtable == NULL, 0))
3531 	goto out_free;
3532 
3533       /* For all characters ch...:  */
3534       for (i = 0; i < BITSET_WORDS; ++i)
3535 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3536 	     elem;
3537 	     mask <<= 1, elem >>= 1, ++ch)
3538 	  if (BE (elem & 1, 0))
3539 	    {
3540 	      /* There must be exactly one destination which accepts
3541 		 character ch.  See group_nodes_into_DFAstates.  */
3542 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3543 		;
3544 
3545 	      /* j-th destination accepts the word character ch.  */
3546 	      trtable[ch] = dest_states[j];
3547 	      trtable[ch + SBC_MAX] = dest_states_word[j];
3548 	    }
3549     }
3550 
3551   /* new line */
3552   if (bitset_contain (acceptable, NEWLINE_CHAR))
3553     {
3554       /* The current state accepts newline character.  */
3555       for (j = 0; j < ndests; ++j)
3556 	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3557 	  {
3558 	    /* k-th destination accepts newline character.  */
3559 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3560 	    if (need_word_trtable)
3561 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3562 	    /* There must be only one destination which accepts
3563 	       newline.  See group_nodes_into_DFAstates.  */
3564 	    break;
3565 	  }
3566     }
3567 
3568   if (dest_states_malloced)
3569     free (dest_states);
3570 
3571   re_node_set_free (&follows);
3572   for (i = 0; i < ndests; ++i)
3573     re_node_set_free (dests_node + i);
3574 
3575   if (dests_node_malloced)
3576     free (dests_alloc);
3577 
3578   return true;
3579 }
3580 
3581 /* Group all nodes belonging to STATE into several destinations.
3582    Then for all destinations, set the nodes belonging to the destination
3583    to DESTS_NODE[i] and set the characters accepted by the destination
3584    to DEST_CH[i].  This function return the number of destinations.  */
3585 
3586 static Idx
3587 internal_function
3588 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3589 			    re_node_set *dests_node, bitset_t *dests_ch)
3590 {
3591   reg_errcode_t err;
3592   bool ok;
3593   Idx i, j, k;
3594   Idx ndests; /* Number of the destinations from 'state'.  */
3595   bitset_t accepts; /* Characters a node can accept.  */
3596   const re_node_set *cur_nodes = &state->nodes;
3597   bitset_empty (accepts);
3598   ndests = 0;
3599 
3600   /* For all the nodes belonging to 'state',  */
3601   for (i = 0; i < cur_nodes->nelem; ++i)
3602     {
3603       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3604       re_token_type_t type = node->type;
3605       unsigned int constraint = node->constraint;
3606 
3607       /* Enumerate all single byte character this node can accept.  */
3608       if (type == CHARACTER)
3609 	bitset_set (accepts, node->opr.c);
3610       else if (type == SIMPLE_BRACKET)
3611 	{
3612 	  bitset_merge (accepts, node->opr.sbcset);
3613 	}
3614       else if (type == OP_PERIOD)
3615 	{
3616 #ifdef RE_ENABLE_I18N
3617 	  if (dfa->mb_cur_max > 1)
3618 	    bitset_merge (accepts, dfa->sb_char);
3619 	  else
3620 #endif
3621 	    bitset_set_all (accepts);
3622 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3623 	    bitset_clear (accepts, '\n');
3624 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3625 	    bitset_clear (accepts, '\0');
3626 	}
3627 #ifdef RE_ENABLE_I18N
3628       else if (type == OP_UTF8_PERIOD)
3629 	{
3630 	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3631 	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3632 	  else
3633 	    bitset_merge (accepts, utf8_sb_map);
3634 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3635 	    bitset_clear (accepts, '\n');
3636 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3637 	    bitset_clear (accepts, '\0');
3638 	}
3639 #endif
3640       else
3641 	continue;
3642 
3643       /* Check the 'accepts' and sift the characters which are not
3644 	 match it the context.  */
3645       if (constraint)
3646 	{
3647 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3648 	    {
3649 	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3650 	      bitset_empty (accepts);
3651 	      if (accepts_newline)
3652 		bitset_set (accepts, NEWLINE_CHAR);
3653 	      else
3654 		continue;
3655 	    }
3656 	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3657 	    {
3658 	      bitset_empty (accepts);
3659 	      continue;
3660 	    }
3661 
3662 	  if (constraint & NEXT_WORD_CONSTRAINT)
3663 	    {
3664 	      bitset_word_t any_set = 0;
3665 	      if (type == CHARACTER && !node->word_char)
3666 		{
3667 		  bitset_empty (accepts);
3668 		  continue;
3669 		}
3670 #ifdef RE_ENABLE_I18N
3671 	      if (dfa->mb_cur_max > 1)
3672 		for (j = 0; j < BITSET_WORDS; ++j)
3673 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3674 	      else
3675 #endif
3676 		for (j = 0; j < BITSET_WORDS; ++j)
3677 		  any_set |= (accepts[j] &= dfa->word_char[j]);
3678 	      if (!any_set)
3679 		continue;
3680 	    }
3681 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3682 	    {
3683 	      bitset_word_t any_set = 0;
3684 	      if (type == CHARACTER && node->word_char)
3685 		{
3686 		  bitset_empty (accepts);
3687 		  continue;
3688 		}
3689 #ifdef RE_ENABLE_I18N
3690 	      if (dfa->mb_cur_max > 1)
3691 		for (j = 0; j < BITSET_WORDS; ++j)
3692 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3693 	      else
3694 #endif
3695 		for (j = 0; j < BITSET_WORDS; ++j)
3696 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3697 	      if (!any_set)
3698 		continue;
3699 	    }
3700 	}
3701 
3702       /* Then divide 'accepts' into DFA states, or create a new
3703 	 state.  Above, we make sure that accepts is not empty.  */
3704       for (j = 0; j < ndests; ++j)
3705 	{
3706 	  bitset_t intersec; /* Intersection sets, see below.  */
3707 	  bitset_t remains;
3708 	  /* Flags, see below.  */
3709 	  bitset_word_t has_intersec, not_subset, not_consumed;
3710 
3711 	  /* Optimization, skip if this state doesn't accept the character.  */
3712 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3713 	    continue;
3714 
3715 	  /* Enumerate the intersection set of this state and 'accepts'.  */
3716 	  has_intersec = 0;
3717 	  for (k = 0; k < BITSET_WORDS; ++k)
3718 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3719 	  /* And skip if the intersection set is empty.  */
3720 	  if (!has_intersec)
3721 	    continue;
3722 
3723 	  /* Then check if this state is a subset of 'accepts'.  */
3724 	  not_subset = not_consumed = 0;
3725 	  for (k = 0; k < BITSET_WORDS; ++k)
3726 	    {
3727 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3728 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3729 	    }
3730 
3731 	  /* If this state isn't a subset of 'accepts', create a
3732 	     new group state, which has the 'remains'. */
3733 	  if (not_subset)
3734 	    {
3735 	      bitset_copy (dests_ch[ndests], remains);
3736 	      bitset_copy (dests_ch[j], intersec);
3737 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3738 	      if (BE (err != REG_NOERROR, 0))
3739 		goto error_return;
3740 	      ++ndests;
3741 	    }
3742 
3743 	  /* Put the position in the current group. */
3744 	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3745 	  if (BE (! ok, 0))
3746 	    goto error_return;
3747 
3748 	  /* If all characters are consumed, go to next node. */
3749 	  if (!not_consumed)
3750 	    break;
3751 	}
3752       /* Some characters remain, create a new group. */
3753       if (j == ndests)
3754 	{
3755 	  bitset_copy (dests_ch[ndests], accepts);
3756 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3757 	  if (BE (err != REG_NOERROR, 0))
3758 	    goto error_return;
3759 	  ++ndests;
3760 	  bitset_empty (accepts);
3761 	}
3762     }
3763   return ndests;
3764  error_return:
3765   for (j = 0; j < ndests; ++j)
3766     re_node_set_free (dests_node + j);
3767   return REG_MISSING;
3768 }
3769 
3770 #ifdef RE_ENABLE_I18N
3771 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
3772    Return the number of the bytes the node accepts.
3773    STR_IDX is the current index of the input string.
3774 
3775    This function handles the nodes which can accept one character, or
3776    one collating element like '.', '[a-z]', opposite to the other nodes
3777    can only accept one byte.  */
3778 
3779 # ifdef _LIBC
3780 #  include <locale/weight.h>
3781 # endif
3782 
3783 static int
3784 internal_function
3785 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3786 			 const re_string_t *input, Idx str_idx)
3787 {
3788   const re_token_t *node = dfa->nodes + node_idx;
3789   int char_len, elem_len;
3790   Idx i;
3791 
3792   if (BE (node->type == OP_UTF8_PERIOD, 0))
3793     {
3794       unsigned char c = re_string_byte_at (input, str_idx), d;
3795       if (BE (c < 0xc2, 1))
3796 	return 0;
3797 
3798       if (str_idx + 2 > input->len)
3799 	return 0;
3800 
3801       d = re_string_byte_at (input, str_idx + 1);
3802       if (c < 0xe0)
3803 	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3804       else if (c < 0xf0)
3805 	{
3806 	  char_len = 3;
3807 	  if (c == 0xe0 && d < 0xa0)
3808 	    return 0;
3809 	}
3810       else if (c < 0xf8)
3811 	{
3812 	  char_len = 4;
3813 	  if (c == 0xf0 && d < 0x90)
3814 	    return 0;
3815 	}
3816       else if (c < 0xfc)
3817 	{
3818 	  char_len = 5;
3819 	  if (c == 0xf8 && d < 0x88)
3820 	    return 0;
3821 	}
3822       else if (c < 0xfe)
3823 	{
3824 	  char_len = 6;
3825 	  if (c == 0xfc && d < 0x84)
3826 	    return 0;
3827 	}
3828       else
3829 	return 0;
3830 
3831       if (str_idx + char_len > input->len)
3832 	return 0;
3833 
3834       for (i = 1; i < char_len; ++i)
3835 	{
3836 	  d = re_string_byte_at (input, str_idx + i);
3837 	  if (d < 0x80 || d > 0xbf)
3838 	    return 0;
3839 	}
3840       return char_len;
3841     }
3842 
3843   char_len = re_string_char_size_at (input, str_idx);
3844   if (node->type == OP_PERIOD)
3845     {
3846       if (char_len <= 1)
3847 	return 0;
3848       /* FIXME: I don't think this if is needed, as both '\n'
3849 	 and '\0' are char_len == 1.  */
3850       /* '.' accepts any one character except the following two cases.  */
3851       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3852 	   re_string_byte_at (input, str_idx) == '\n') ||
3853 	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3854 	   re_string_byte_at (input, str_idx) == '\0'))
3855 	return 0;
3856       return char_len;
3857     }
3858 
3859   elem_len = re_string_elem_size_at (input, str_idx);
3860   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3861     return 0;
3862 
3863   if (node->type == COMPLEX_BRACKET)
3864     {
3865       const re_charset_t *cset = node->opr.mbcset;
3866 # ifdef _LIBC
3867       const unsigned char *pin
3868 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3869       Idx j;
3870       uint32_t nrules;
3871 # endif /* _LIBC */
3872       int match_len = 0;
3873       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3874 		    ? re_string_wchar_at (input, str_idx) : 0);
3875 
3876       /* match with multibyte character?  */
3877       for (i = 0; i < cset->nmbchars; ++i)
3878 	if (wc == cset->mbchars[i])
3879 	  {
3880 	    match_len = char_len;
3881 	    goto check_node_accept_bytes_match;
3882 	  }
3883       /* match with character_class?  */
3884       for (i = 0; i < cset->nchar_classes; ++i)
3885 	{
3886 	  wctype_t wt = cset->char_classes[i];
3887 	  if (__iswctype (wc, wt))
3888 	    {
3889 	      match_len = char_len;
3890 	      goto check_node_accept_bytes_match;
3891 	    }
3892 	}
3893 
3894 # ifdef _LIBC
3895       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3896       if (nrules != 0)
3897 	{
3898 	  unsigned int in_collseq = 0;
3899 	  const int32_t *table, *indirect;
3900 	  const unsigned char *weights, *extra;
3901 	  const char *collseqwc;
3902 
3903 	  /* match with collating_symbol?  */
3904 	  if (cset->ncoll_syms)
3905 	    extra = (const unsigned char *)
3906 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3907 	  for (i = 0; i < cset->ncoll_syms; ++i)
3908 	    {
3909 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3910 	      /* Compare the length of input collating element and
3911 		 the length of current collating element.  */
3912 	      if (*coll_sym != elem_len)
3913 		continue;
3914 	      /* Compare each bytes.  */
3915 	      for (j = 0; j < *coll_sym; j++)
3916 		if (pin[j] != coll_sym[1 + j])
3917 		  break;
3918 	      if (j == *coll_sym)
3919 		{
3920 		  /* Match if every bytes is equal.  */
3921 		  match_len = j;
3922 		  goto check_node_accept_bytes_match;
3923 		}
3924 	    }
3925 
3926 	  if (cset->nranges)
3927 	    {
3928 	      if (elem_len <= char_len)
3929 		{
3930 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3931 		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3932 		}
3933 	      else
3934 		in_collseq = find_collation_sequence_value (pin, elem_len);
3935 	    }
3936 	  /* match with range expression?  */
3937 	  /* FIXME: Implement rational ranges here, too.  */
3938 	  for (i = 0; i < cset->nranges; ++i)
3939 	    if (cset->range_starts[i] <= in_collseq
3940 		&& in_collseq <= cset->range_ends[i])
3941 	      {
3942 		match_len = elem_len;
3943 		goto check_node_accept_bytes_match;
3944 	      }
3945 
3946 	  /* match with equivalence_class?  */
3947 	  if (cset->nequiv_classes)
3948 	    {
3949 	      const unsigned char *cp = pin;
3950 	      table = (const int32_t *)
3951 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3952 	      weights = (const unsigned char *)
3953 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3954 	      extra = (const unsigned char *)
3955 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3956 	      indirect = (const int32_t *)
3957 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3958 	      int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3959 	      if (idx > 0)
3960 		for (i = 0; i < cset->nequiv_classes; ++i)
3961 		  {
3962 		    int32_t equiv_class_idx = cset->equiv_classes[i];
3963 		    size_t weight_len = weights[idx & 0xffffff];
3964 		    if (weight_len == weights[equiv_class_idx & 0xffffff]
3965 			&& (idx >> 24) == (equiv_class_idx >> 24))
3966 		      {
3967 			Idx cnt = 0;
3968 
3969 			idx &= 0xffffff;
3970 			equiv_class_idx &= 0xffffff;
3971 
3972 			while (cnt <= weight_len
3973 			       && (weights[equiv_class_idx + 1 + cnt]
3974 				   == weights[idx + 1 + cnt]))
3975 			  ++cnt;
3976 			if (cnt > weight_len)
3977 			  {
3978 			    match_len = elem_len;
3979 			    goto check_node_accept_bytes_match;
3980 			  }
3981 		      }
3982 		  }
3983 	    }
3984 	}
3985       else
3986 # endif /* _LIBC */
3987 	{
3988 	  /* match with range expression?  */
3989 	  for (i = 0; i < cset->nranges; ++i)
3990 	    {
3991 	      if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3992 		{
3993 		  match_len = char_len;
3994 		  goto check_node_accept_bytes_match;
3995 		}
3996 	    }
3997 	}
3998     check_node_accept_bytes_match:
3999       if (!cset->non_match)
4000 	return match_len;
4001       else
4002 	{
4003 	  if (match_len > 0)
4004 	    return 0;
4005 	  else
4006 	    return (elem_len > char_len) ? elem_len : char_len;
4007 	}
4008     }
4009   return 0;
4010 }
4011 
4012 # ifdef _LIBC
4013 static unsigned int
4014 internal_function
4015 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
4016 {
4017   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4018   if (nrules == 0)
4019     {
4020       if (mbs_len == 1)
4021 	{
4022 	  /* No valid character.  Match it as a single byte character.  */
4023 	  const unsigned char *collseq = (const unsigned char *)
4024 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
4025 	  return collseq[mbs[0]];
4026 	}
4027       return UINT_MAX;
4028     }
4029   else
4030     {
4031       int32_t idx;
4032       const unsigned char *extra = (const unsigned char *)
4033 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4034       int32_t extrasize = (const unsigned char *)
4035 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
4036 
4037       for (idx = 0; idx < extrasize;)
4038 	{
4039 	  int mbs_cnt;
4040 	  bool found = false;
4041 	  int32_t elem_mbs_len;
4042 	  /* Skip the name of collating element name.  */
4043 	  idx = idx + extra[idx] + 1;
4044 	  elem_mbs_len = extra[idx++];
4045 	  if (mbs_len == elem_mbs_len)
4046 	    {
4047 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
4048 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
4049 		  break;
4050 	      if (mbs_cnt == elem_mbs_len)
4051 		/* Found the entry.  */
4052 		found = true;
4053 	    }
4054 	  /* Skip the byte sequence of the collating element.  */
4055 	  idx += elem_mbs_len;
4056 	  /* Adjust for the alignment.  */
4057 	  idx = (idx + 3) & ~3;
4058 	  /* Skip the collation sequence value.  */
4059 	  idx += sizeof (uint32_t);
4060 	  /* Skip the wide char sequence of the collating element.  */
4061 	  idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
4062 	  /* If we found the entry, return the sequence value.  */
4063 	  if (found)
4064 	    return *(uint32_t *) (extra + idx);
4065 	  /* Skip the collation sequence value.  */
4066 	  idx += sizeof (uint32_t);
4067 	}
4068       return UINT_MAX;
4069     }
4070 }
4071 # endif /* _LIBC */
4072 #endif /* RE_ENABLE_I18N */
4073 
4074 /* Check whether the node accepts the byte which is IDX-th
4075    byte of the INPUT.  */
4076 
4077 static bool
4078 internal_function
4079 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4080 		   Idx idx)
4081 {
4082   unsigned char ch;
4083   ch = re_string_byte_at (&mctx->input, idx);
4084   switch (node->type)
4085     {
4086     case CHARACTER:
4087       if (node->opr.c != ch)
4088         return false;
4089       break;
4090 
4091     case SIMPLE_BRACKET:
4092       if (!bitset_contain (node->opr.sbcset, ch))
4093         return false;
4094       break;
4095 
4096 #ifdef RE_ENABLE_I18N
4097     case OP_UTF8_PERIOD:
4098       if (ch >= ASCII_CHARS)
4099         return false;
4100       /* FALLTHROUGH */
4101 #endif
4102     case OP_PERIOD:
4103       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4104 	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4105 	return false;
4106       break;
4107 
4108     default:
4109       return false;
4110     }
4111 
4112   if (node->constraint)
4113     {
4114       /* The node has constraints.  Check whether the current context
4115 	 satisfies the constraints.  */
4116       unsigned int context = re_string_context_at (&mctx->input, idx,
4117 						   mctx->eflags);
4118       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4119 	return false;
4120     }
4121 
4122   return true;
4123 }
4124 
4125 /* Extend the buffers, if the buffers have run out.  */
4126 
4127 static reg_errcode_t
4128 internal_function __attribute_warn_unused_result__
4129 extend_buffers (re_match_context_t *mctx, int min_len)
4130 {
4131   reg_errcode_t ret;
4132   re_string_t *pstr = &mctx->input;
4133 
4134   /* Avoid overflow.  */
4135   if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
4136           <= pstr->bufs_len, 0))
4137     return REG_ESPACE;
4138 
4139   /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
4140   ret = re_string_realloc_buffers (pstr,
4141 				   MAX (min_len,
4142 					MIN (pstr->len, pstr->bufs_len * 2)));
4143   if (BE (ret != REG_NOERROR, 0))
4144     return ret;
4145 
4146   if (mctx->state_log != NULL)
4147     {
4148       /* And double the length of state_log.  */
4149       /* XXX We have no indication of the size of this buffer.  If this
4150 	 allocation fail we have no indication that the state_log array
4151 	 does not have the right size.  */
4152       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4153 					      pstr->bufs_len + 1);
4154       if (BE (new_array == NULL, 0))
4155 	return REG_ESPACE;
4156       mctx->state_log = new_array;
4157     }
4158 
4159   /* Then reconstruct the buffers.  */
4160   if (pstr->icase)
4161     {
4162 #ifdef RE_ENABLE_I18N
4163       if (pstr->mb_cur_max > 1)
4164 	{
4165 	  ret = build_wcs_upper_buffer (pstr);
4166 	  if (BE (ret != REG_NOERROR, 0))
4167 	    return ret;
4168 	}
4169       else
4170 #endif /* RE_ENABLE_I18N  */
4171 	build_upper_buffer (pstr);
4172     }
4173   else
4174     {
4175 #ifdef RE_ENABLE_I18N
4176       if (pstr->mb_cur_max > 1)
4177 	build_wcs_buffer (pstr);
4178       else
4179 #endif /* RE_ENABLE_I18N  */
4180 	{
4181 	  if (pstr->trans != NULL)
4182 	    re_string_translate_buffer (pstr);
4183 	}
4184     }
4185   return REG_NOERROR;
4186 }
4187 
4188 
4189 /* Functions for matching context.  */
4190 
4191 /* Initialize MCTX.  */
4192 
4193 static reg_errcode_t
4194 internal_function __attribute_warn_unused_result__
4195 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4196 {
4197   mctx->eflags = eflags;
4198   mctx->match_last = REG_MISSING;
4199   if (n > 0)
4200     {
4201       /* Avoid overflow.  */
4202       size_t max_object_size =
4203 	MAX (sizeof (struct re_backref_cache_entry),
4204 	     sizeof (re_sub_match_top_t *));
4205       if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0))
4206 	return REG_ESPACE;
4207 
4208       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4209       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4210       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
4211 	return REG_ESPACE;
4212     }
4213   /* Already zero-ed by the caller.
4214      else
4215        mctx->bkref_ents = NULL;
4216      mctx->nbkref_ents = 0;
4217      mctx->nsub_tops = 0;  */
4218   mctx->abkref_ents = n;
4219   mctx->max_mb_elem_len = 1;
4220   mctx->asub_tops = n;
4221   return REG_NOERROR;
4222 }
4223 
4224 /* Clean the entries which depend on the current input in MCTX.
4225    This function must be invoked when the matcher changes the start index
4226    of the input, or changes the input string.  */
4227 
4228 static void
4229 internal_function
4230 match_ctx_clean (re_match_context_t *mctx)
4231 {
4232   Idx st_idx;
4233   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4234     {
4235       Idx sl_idx;
4236       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4237       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4238 	{
4239 	  re_sub_match_last_t *last = top->lasts[sl_idx];
4240 	  re_free (last->path.array);
4241 	  re_free (last);
4242 	}
4243       re_free (top->lasts);
4244       if (top->path)
4245 	{
4246 	  re_free (top->path->array);
4247 	  re_free (top->path);
4248 	}
4249       free (top);
4250     }
4251 
4252   mctx->nsub_tops = 0;
4253   mctx->nbkref_ents = 0;
4254 }
4255 
4256 /* Free all the memory associated with MCTX.  */
4257 
4258 static void
4259 internal_function
4260 match_ctx_free (re_match_context_t *mctx)
4261 {
4262   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4263   match_ctx_clean (mctx);
4264   re_free (mctx->sub_tops);
4265   re_free (mctx->bkref_ents);
4266 }
4267 
4268 /* Add a new backreference entry to MCTX.
4269    Note that we assume that caller never call this function with duplicate
4270    entry, and call with STR_IDX which isn't smaller than any existing entry.
4271 */
4272 
4273 static reg_errcode_t
4274 internal_function __attribute_warn_unused_result__
4275 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4276 		     Idx to)
4277 {
4278   if (mctx->nbkref_ents >= mctx->abkref_ents)
4279     {
4280       struct re_backref_cache_entry* new_entry;
4281       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4282 			      mctx->abkref_ents * 2);
4283       if (BE (new_entry == NULL, 0))
4284 	{
4285 	  re_free (mctx->bkref_ents);
4286 	  return REG_ESPACE;
4287 	}
4288       mctx->bkref_ents = new_entry;
4289       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4290 	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4291       mctx->abkref_ents *= 2;
4292     }
4293   if (mctx->nbkref_ents > 0
4294       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4295     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4296 
4297   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4298   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4299   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4300   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4301 
4302   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4303      If bit N is clear, means that this entry won't epsilon-transition to
4304      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4305      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4306      such node.
4307 
4308      A backreference does not epsilon-transition unless it is empty, so set
4309      to all zeros if FROM != TO.  */
4310   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4311     = (from == to ? -1 : 0);
4312 
4313   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4314   if (mctx->max_mb_elem_len < to - from)
4315     mctx->max_mb_elem_len = to - from;
4316   return REG_NOERROR;
4317 }
4318 
4319 /* Return the first entry with the same str_idx, or REG_MISSING if none is
4320    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4321 
4322 static Idx
4323 internal_function
4324 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4325 {
4326   Idx left, right, mid, last;
4327   last = right = mctx->nbkref_ents;
4328   for (left = 0; left < right;)
4329     {
4330       mid = (left + right) / 2;
4331       if (mctx->bkref_ents[mid].str_idx < str_idx)
4332 	left = mid + 1;
4333       else
4334 	right = mid;
4335     }
4336   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4337     return left;
4338   else
4339     return REG_MISSING;
4340 }
4341 
4342 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4343    at STR_IDX.  */
4344 
4345 static reg_errcode_t
4346 internal_function __attribute_warn_unused_result__
4347 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4348 {
4349 #ifdef DEBUG
4350   assert (mctx->sub_tops != NULL);
4351   assert (mctx->asub_tops > 0);
4352 #endif
4353   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4354     {
4355       Idx new_asub_tops = mctx->asub_tops * 2;
4356       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4357 						   re_sub_match_top_t *,
4358 						   new_asub_tops);
4359       if (BE (new_array == NULL, 0))
4360 	return REG_ESPACE;
4361       mctx->sub_tops = new_array;
4362       mctx->asub_tops = new_asub_tops;
4363     }
4364   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4365   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
4366     return REG_ESPACE;
4367   mctx->sub_tops[mctx->nsub_tops]->node = node;
4368   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4369   return REG_NOERROR;
4370 }
4371 
4372 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4373    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4374 
4375 static re_sub_match_last_t *
4376 internal_function
4377 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4378 {
4379   re_sub_match_last_t *new_entry;
4380   if (BE (subtop->nlasts == subtop->alasts, 0))
4381     {
4382       Idx new_alasts = 2 * subtop->alasts + 1;
4383       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4384 						    re_sub_match_last_t *,
4385 						    new_alasts);
4386       if (BE (new_array == NULL, 0))
4387 	return NULL;
4388       subtop->lasts = new_array;
4389       subtop->alasts = new_alasts;
4390     }
4391   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4392   if (BE (new_entry != NULL, 1))
4393     {
4394       subtop->lasts[subtop->nlasts] = new_entry;
4395       new_entry->node = node;
4396       new_entry->str_idx = str_idx;
4397       ++subtop->nlasts;
4398     }
4399   return new_entry;
4400 }
4401 
4402 static void
4403 internal_function
4404 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4405 	       re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4406 {
4407   sctx->sifted_states = sifted_sts;
4408   sctx->limited_states = limited_sts;
4409   sctx->last_node = last_node;
4410   sctx->last_str_idx = last_str_idx;
4411   re_node_set_init_empty (&sctx->limits);
4412 }
4413