xref: /dflybsd-src/contrib/diffutils/lib/regexec.c (revision f3c0e43315ed177c30bea14922610325ba413b22)
1 /* Extended regular expression matching and search library.
2    Copyright (C) 2002-2018 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5 
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU General Public
8    License as published by the Free Software Foundation; either
9    version 3 of the License, or (at your option) any later version.
10 
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    General Public License for more details.
15 
16    You should have received a copy of the GNU General Public
17    License along with the GNU C Library; if not, see
18    <https://www.gnu.org/licenses/>.  */
19 
20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21 				     Idx n);
22 static void match_ctx_clean (re_match_context_t *mctx);
23 static void match_ctx_free (re_match_context_t *cache);
24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25 					  Idx str_idx, Idx from, Idx to);
26 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
27 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
28 					   Idx str_idx);
29 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
30 						    Idx node, Idx str_idx);
31 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
32 			   re_dfastate_t **limited_sts, Idx last_node,
33 			   Idx last_str_idx);
34 static reg_errcode_t re_search_internal (const regex_t *preg,
35 					 const char *string, Idx length,
36 					 Idx start, Idx last_start, Idx stop,
37 					 size_t nmatch, regmatch_t pmatch[],
38 					 int eflags);
39 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
40 				  const char *string1, Idx length1,
41 				  const char *string2, Idx length2,
42 				  Idx start, regoff_t range,
43 				  struct re_registers *regs,
44 				  Idx stop, bool ret_len);
45 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
46 				const char *string, Idx length, Idx start,
47 				regoff_t range, Idx stop,
48 				struct re_registers *regs,
49 				bool ret_len);
50 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
51                               Idx nregs, int regs_allocated);
52 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
53 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
54 			   Idx *p_match_first);
55 static Idx check_halt_state_context (const re_match_context_t *mctx,
56 				     const re_dfastate_t *state, Idx idx);
57 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
58 			 regmatch_t *prev_idx_match, Idx cur_node,
59 			 Idx cur_idx, Idx nmatch);
60 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
61 				      Idx str_idx, Idx dest_node, Idx nregs,
62 				      regmatch_t *regs,
63 				      re_node_set *eps_via_nodes);
64 static reg_errcode_t set_regs (const regex_t *preg,
65 			       const re_match_context_t *mctx,
66 			       size_t nmatch, regmatch_t *pmatch,
67 			       bool fl_backtrack);
68 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
69 
70 #ifdef RE_ENABLE_I18N
71 static int sift_states_iter_mb (const re_match_context_t *mctx,
72 				re_sift_context_t *sctx,
73 				Idx node_idx, Idx str_idx, Idx max_str_idx);
74 #endif /* RE_ENABLE_I18N */
75 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
76 					   re_sift_context_t *sctx);
77 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
78 					  re_sift_context_t *sctx, Idx str_idx,
79 					  re_node_set *cur_dest);
80 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
81 					      re_sift_context_t *sctx,
82 					      Idx str_idx,
83 					      re_node_set *dest_nodes);
84 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
85 					    re_node_set *dest_nodes,
86 					    const re_node_set *candidates);
87 static bool check_dst_limits (const re_match_context_t *mctx,
88 			      const re_node_set *limits,
89 			      Idx dst_node, Idx dst_idx, Idx src_node,
90 			      Idx src_idx);
91 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
92 					int boundaries, Idx subexp_idx,
93 					Idx from_node, Idx bkref_idx);
94 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
95 				      Idx limit, Idx subexp_idx,
96 				      Idx node, Idx str_idx,
97 				      Idx bkref_idx);
98 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
99 					  re_node_set *dest_nodes,
100 					  const re_node_set *candidates,
101 					  re_node_set *limits,
102 					  struct re_backref_cache_entry *bkref_ents,
103 					  Idx str_idx);
104 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
105 					re_sift_context_t *sctx,
106 					Idx str_idx, const re_node_set *candidates);
107 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
108 					re_dfastate_t **dst,
109 					re_dfastate_t **src, Idx num);
110 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
111 					 re_match_context_t *mctx);
112 static re_dfastate_t *transit_state (reg_errcode_t *err,
113 				     re_match_context_t *mctx,
114 				     re_dfastate_t *state);
115 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
116 					    re_match_context_t *mctx,
117 					    re_dfastate_t *next_state);
118 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
119 						re_node_set *cur_nodes,
120 						Idx str_idx);
121 #if 0
122 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
123 					re_match_context_t *mctx,
124 					re_dfastate_t *pstate);
125 #endif
126 #ifdef RE_ENABLE_I18N
127 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
128 				       re_dfastate_t *pstate);
129 #endif /* RE_ENABLE_I18N */
130 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
131 					  const re_node_set *nodes);
132 static reg_errcode_t get_subexp (re_match_context_t *mctx,
133 				 Idx bkref_node, Idx bkref_str_idx);
134 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
135 				     const re_sub_match_top_t *sub_top,
136 				     re_sub_match_last_t *sub_last,
137 				     Idx bkref_node, Idx bkref_str);
138 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
139 			     Idx subexp_idx, int type);
140 static reg_errcode_t check_arrival (re_match_context_t *mctx,
141 				    state_array_t *path, Idx top_node,
142 				    Idx top_str, Idx last_node, Idx last_str,
143 				    int type);
144 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
145 						   Idx str_idx,
146 						   re_node_set *cur_nodes,
147 						   re_node_set *next_nodes);
148 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
149 					       re_node_set *cur_nodes,
150 					       Idx ex_subexp, int type);
151 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
152 						   re_node_set *dst_nodes,
153 						   Idx target, Idx ex_subexp,
154 						   int type);
155 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
156 					 re_node_set *cur_nodes, Idx cur_str,
157 					 Idx subexp_num, int type);
158 static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
159 #ifdef RE_ENABLE_I18N
160 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
161 				    const re_string_t *input, Idx idx);
162 # ifdef _LIBC
163 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
164 						   size_t name_len);
165 # endif /* _LIBC */
166 #endif /* RE_ENABLE_I18N */
167 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
168 				       const re_dfastate_t *state,
169 				       re_node_set *states_node,
170 				       bitset_t *states_ch);
171 static bool check_node_accept (const re_match_context_t *mctx,
172 			       const re_token_t *node, Idx idx);
173 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
174 
175 /* Entry point for POSIX code.  */
176 
177 /* regexec searches for a given pattern, specified by PREG, in the
178    string STRING.
179 
180    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
181    'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
182    least NMATCH elements, and we set them to the offsets of the
183    corresponding matched substrings.
184 
185    EFLAGS specifies "execution flags" which affect matching: if
186    REG_NOTBOL is set, then ^ does not match at the beginning of the
187    string; if REG_NOTEOL is set, then $ does not match at the end.
188 
189    We return 0 if we find a match and REG_NOMATCH if not.  */
190 
191 int
192 regexec (const regex_t *_Restrict_ preg, const char *_Restrict_ string,
193 	 size_t nmatch, regmatch_t pmatch[], int eflags)
194 {
195   reg_errcode_t err;
196   Idx start, length;
197   re_dfa_t *dfa = preg->buffer;
198 
199   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
200     return REG_BADPAT;
201 
202   if (eflags & REG_STARTEND)
203     {
204       start = pmatch[0].rm_so;
205       length = pmatch[0].rm_eo;
206     }
207   else
208     {
209       start = 0;
210       length = strlen (string);
211     }
212 
213   lock_lock (dfa->lock);
214   if (preg->no_sub)
215     err = re_search_internal (preg, string, length, start, length,
216 			      length, 0, NULL, eflags);
217   else
218     err = re_search_internal (preg, string, length, start, length,
219 			      length, nmatch, pmatch, eflags);
220   lock_unlock (dfa->lock);
221   return err != REG_NOERROR;
222 }
223 
224 #ifdef _LIBC
225 libc_hidden_def (__regexec)
226 
227 # include <shlib-compat.h>
228 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
229 
230 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
231 __typeof__ (__regexec) __compat_regexec;
232 
233 int
234 attribute_compat_text_section
235 __compat_regexec (const regex_t *_Restrict_ preg,
236 		  const char *_Restrict_ string, size_t nmatch,
237 		  regmatch_t pmatch[], int eflags)
238 {
239   return regexec (preg, string, nmatch, pmatch,
240 		  eflags & (REG_NOTBOL | REG_NOTEOL));
241 }
242 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
243 # endif
244 #endif
245 
246 /* Entry points for GNU code.  */
247 
248 /* re_match, re_search, re_match_2, re_search_2
249 
250    The former two functions operate on STRING with length LENGTH,
251    while the later two operate on concatenation of STRING1 and STRING2
252    with lengths LENGTH1 and LENGTH2, respectively.
253 
254    re_match() matches the compiled pattern in BUFP against the string,
255    starting at index START.
256 
257    re_search() first tries matching at index START, then it tries to match
258    starting from index START + 1, and so on.  The last start position tried
259    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
260    way as re_match().)
261 
262    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
263    the first STOP characters of the concatenation of the strings should be
264    concerned.
265 
266    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
267    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
268    computed relative to the concatenation, not relative to the individual
269    strings.)
270 
271    On success, re_match* functions return the length of the match, re_search*
272    return the position of the start of the match.  Return value -1 means no
273    match was found and -2 indicates an internal error.  */
274 
275 regoff_t
276 re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
277 	  Idx start, struct re_registers *regs)
278 {
279   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
280 }
281 #ifdef _LIBC
282 weak_alias (__re_match, re_match)
283 #endif
284 
285 regoff_t
286 re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
287 	   Idx start, regoff_t range, struct re_registers *regs)
288 {
289   return re_search_stub (bufp, string, length, start, range, length, regs,
290 			 false);
291 }
292 #ifdef _LIBC
293 weak_alias (__re_search, re_search)
294 #endif
295 
296 regoff_t
297 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
298 	    const char *string2, Idx length2, Idx start,
299 	    struct re_registers *regs, Idx stop)
300 {
301   return re_search_2_stub (bufp, string1, length1, string2, length2,
302 			   start, 0, regs, stop, true);
303 }
304 #ifdef _LIBC
305 weak_alias (__re_match_2, re_match_2)
306 #endif
307 
308 regoff_t
309 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
310 	     const char *string2, Idx length2, Idx start, regoff_t range,
311 	     struct re_registers *regs, Idx stop)
312 {
313   return re_search_2_stub (bufp, string1, length1, string2, length2,
314 			   start, range, regs, stop, false);
315 }
316 #ifdef _LIBC
317 weak_alias (__re_search_2, re_search_2)
318 #endif
319 
320 static regoff_t
321 re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
322 		  Idx length1, const char *string2, Idx length2, Idx start,
323 		  regoff_t range, struct re_registers *regs,
324 		  Idx stop, bool ret_len)
325 {
326   const char *str;
327   regoff_t rval;
328   Idx len;
329   char *s = NULL;
330 
331   if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0
332 			 || INT_ADD_WRAPV (length1, length2, &len))))
333     return -2;
334 
335   /* Concatenate the strings.  */
336   if (length2 > 0)
337     if (length1 > 0)
338       {
339 	s = re_malloc (char, len);
340 
341 	if (__glibc_unlikely (s == NULL))
342 	  return -2;
343 #ifdef _LIBC
344 	memcpy (__mempcpy (s, string1, length1), string2, length2);
345 #else
346 	memcpy (s, string1, length1);
347 	memcpy (s + length1, string2, length2);
348 #endif
349 	str = s;
350       }
351     else
352       str = string2;
353   else
354     str = string1;
355 
356   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
357 			 ret_len);
358   re_free (s);
359   return rval;
360 }
361 
362 /* The parameters have the same meaning as those of re_search.
363    Additional parameters:
364    If RET_LEN is true the length of the match is returned (re_match style);
365    otherwise the position of the match is returned.  */
366 
367 static regoff_t
368 re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
369 		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
370 		bool ret_len)
371 {
372   reg_errcode_t result;
373   regmatch_t *pmatch;
374   Idx nregs;
375   regoff_t rval;
376   int eflags = 0;
377   re_dfa_t *dfa = bufp->buffer;
378   Idx last_start = start + range;
379 
380   /* Check for out-of-range.  */
381   if (__glibc_unlikely (start < 0 || start > length))
382     return -1;
383   if (__glibc_unlikely (length < last_start
384 			|| (0 <= range && last_start < start)))
385     last_start = length;
386   else if (__glibc_unlikely (last_start < 0
387 			     || (range < 0 && start <= last_start)))
388     last_start = 0;
389 
390   lock_lock (dfa->lock);
391 
392   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
393   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
394 
395   /* Compile fastmap if we haven't yet.  */
396   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
397     re_compile_fastmap (bufp);
398 
399   if (__glibc_unlikely (bufp->no_sub))
400     regs = NULL;
401 
402   /* We need at least 1 register.  */
403   if (regs == NULL)
404     nregs = 1;
405   else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
406 			     && regs->num_regs <= bufp->re_nsub))
407     {
408       nregs = regs->num_regs;
409       if (__glibc_unlikely (nregs < 1))
410 	{
411 	  /* Nothing can be copied to regs.  */
412 	  regs = NULL;
413 	  nregs = 1;
414 	}
415     }
416   else
417     nregs = bufp->re_nsub + 1;
418   pmatch = re_malloc (regmatch_t, nregs);
419   if (__glibc_unlikely (pmatch == NULL))
420     {
421       rval = -2;
422       goto out;
423     }
424 
425   result = re_search_internal (bufp, string, length, start, last_start, stop,
426 			       nregs, pmatch, eflags);
427 
428   rval = 0;
429 
430   /* I hope we needn't fill their regs with -1's when no match was found.  */
431   if (result != REG_NOERROR)
432     rval = result == REG_NOMATCH ? -1 : -2;
433   else if (regs != NULL)
434     {
435       /* If caller wants register contents data back, copy them.  */
436       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
437 					   bufp->regs_allocated);
438       if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
439 	rval = -2;
440     }
441 
442   if (__glibc_likely (rval == 0))
443     {
444       if (ret_len)
445 	{
446 	  assert (pmatch[0].rm_so == start);
447 	  rval = pmatch[0].rm_eo - start;
448 	}
449       else
450 	rval = pmatch[0].rm_so;
451     }
452   re_free (pmatch);
453  out:
454   lock_unlock (dfa->lock);
455   return rval;
456 }
457 
458 static unsigned
459 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
460 	      int regs_allocated)
461 {
462   int rval = REGS_REALLOCATE;
463   Idx i;
464   Idx need_regs = nregs + 1;
465   /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
466      uses.  */
467 
468   /* Have the register data arrays been allocated?  */
469   if (regs_allocated == REGS_UNALLOCATED)
470     { /* No.  So allocate them with malloc.  */
471       regs->start = re_malloc (regoff_t, need_regs);
472       if (__glibc_unlikely (regs->start == NULL))
473 	return REGS_UNALLOCATED;
474       regs->end = re_malloc (regoff_t, need_regs);
475       if (__glibc_unlikely (regs->end == NULL))
476 	{
477 	  re_free (regs->start);
478 	  return REGS_UNALLOCATED;
479 	}
480       regs->num_regs = need_regs;
481     }
482   else if (regs_allocated == REGS_REALLOCATE)
483     { /* Yes.  If we need more elements than were already
484 	 allocated, reallocate them.  If we need fewer, just
485 	 leave it alone.  */
486       if (__glibc_unlikely (need_regs > regs->num_regs))
487 	{
488 	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
489 	  regoff_t *new_end;
490 	  if (__glibc_unlikely (new_start == NULL))
491 	    return REGS_UNALLOCATED;
492 	  new_end = re_realloc (regs->end, regoff_t, need_regs);
493 	  if (__glibc_unlikely (new_end == NULL))
494 	    {
495 	      re_free (new_start);
496 	      return REGS_UNALLOCATED;
497 	    }
498 	  regs->start = new_start;
499 	  regs->end = new_end;
500 	  regs->num_regs = need_regs;
501 	}
502     }
503   else
504     {
505       assert (regs_allocated == REGS_FIXED);
506       /* This function may not be called with REGS_FIXED and nregs too big.  */
507       assert (regs->num_regs >= nregs);
508       rval = REGS_FIXED;
509     }
510 
511   /* Copy the regs.  */
512   for (i = 0; i < nregs; ++i)
513     {
514       regs->start[i] = pmatch[i].rm_so;
515       regs->end[i] = pmatch[i].rm_eo;
516     }
517   for ( ; i < regs->num_regs; ++i)
518     regs->start[i] = regs->end[i] = -1;
519 
520   return rval;
521 }
522 
523 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
524    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
525    this memory for recording register information.  STARTS and ENDS
526    must be allocated using the malloc library routine, and must each
527    be at least NUM_REGS * sizeof (regoff_t) bytes long.
528 
529    If NUM_REGS == 0, then subsequent matches should allocate their own
530    register data.
531 
532    Unless this function is called, the first search or match using
533    PATTERN_BUFFER will allocate its own register data, without
534    freeing the old data.  */
535 
536 void
537 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
538 		  __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
539 {
540   if (num_regs)
541     {
542       bufp->regs_allocated = REGS_REALLOCATE;
543       regs->num_regs = num_regs;
544       regs->start = starts;
545       regs->end = ends;
546     }
547   else
548     {
549       bufp->regs_allocated = REGS_UNALLOCATED;
550       regs->num_regs = 0;
551       regs->start = regs->end = NULL;
552     }
553 }
554 #ifdef _LIBC
555 weak_alias (__re_set_registers, re_set_registers)
556 #endif
557 
558 /* Entry points compatible with 4.2 BSD regex library.  We don't define
559    them unless specifically requested.  */
560 
561 #if defined _REGEX_RE_COMP || defined _LIBC
562 int
563 # ifdef _LIBC
564 weak_function
565 # endif
566 re_exec (const char *s)
567 {
568   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
569 }
570 #endif /* _REGEX_RE_COMP */
571 
572 /* Internal entry point.  */
573 
574 /* Searches for a compiled pattern PREG in the string STRING, whose
575    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
576    meaning as with regexec.  LAST_START is START + RANGE, where
577    START and RANGE have the same meaning as with re_search.
578    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
579    otherwise return the error code.
580    Note: We assume front end functions already check ranges.
581    (0 <= LAST_START && LAST_START <= LENGTH)  */
582 
583 static reg_errcode_t
584 __attribute_warn_unused_result__
585 re_search_internal (const regex_t *preg, const char *string, Idx length,
586 		    Idx start, Idx last_start, Idx stop, size_t nmatch,
587 		    regmatch_t pmatch[], int eflags)
588 {
589   reg_errcode_t err;
590   const re_dfa_t *dfa = preg->buffer;
591   Idx left_lim, right_lim;
592   int incr;
593   bool fl_longest_match;
594   int match_kind;
595   Idx match_first;
596   Idx match_last = -1;
597   Idx extra_nmatch;
598   bool sb;
599   int ch;
600 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
601   re_match_context_t mctx = { .dfa = dfa };
602 #else
603   re_match_context_t mctx;
604 #endif
605   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
606 		    && start != last_start && !preg->can_be_null)
607 		   ? preg->fastmap : NULL);
608   RE_TRANSLATE_TYPE t = preg->translate;
609 
610 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
611   memset (&mctx, '\0', sizeof (re_match_context_t));
612   mctx.dfa = dfa;
613 #endif
614 
615   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
616   nmatch -= extra_nmatch;
617 
618   /* Check if the DFA haven't been compiled.  */
619   if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL
620 			|| dfa->init_state_word == NULL
621 			|| dfa->init_state_nl == NULL
622 			|| dfa->init_state_begbuf == NULL))
623     return REG_NOMATCH;
624 
625 #ifdef DEBUG
626   /* We assume front-end functions already check them.  */
627   assert (0 <= last_start && last_start <= length);
628 #endif
629 
630   /* If initial states with non-begbuf contexts have no elements,
631      the regex must be anchored.  If preg->newline_anchor is set,
632      we'll never use init_state_nl, so do not check it.  */
633   if (dfa->init_state->nodes.nelem == 0
634       && dfa->init_state_word->nodes.nelem == 0
635       && (dfa->init_state_nl->nodes.nelem == 0
636 	  || !preg->newline_anchor))
637     {
638       if (start != 0 && last_start != 0)
639         return REG_NOMATCH;
640       start = last_start = 0;
641     }
642 
643   /* We must check the longest matching, if nmatch > 0.  */
644   fl_longest_match = (nmatch != 0 || dfa->nbackref);
645 
646   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
647 			    preg->translate, (preg->syntax & RE_ICASE) != 0,
648 			    dfa);
649   if (__glibc_unlikely (err != REG_NOERROR))
650     goto free_return;
651   mctx.input.stop = stop;
652   mctx.input.raw_stop = stop;
653   mctx.input.newline_anchor = preg->newline_anchor;
654 
655   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
656   if (__glibc_unlikely (err != REG_NOERROR))
657     goto free_return;
658 
659   /* We will log all the DFA states through which the dfa pass,
660      if nmatch > 1, or this dfa has "multibyte node", which is a
661      back-reference or a node which can accept multibyte character or
662      multi character collating element.  */
663   if (nmatch > 1 || dfa->has_mb_node)
664     {
665       /* Avoid overflow.  */
666       if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
667 			     <= mctx.input.bufs_len)))
668 	{
669 	  err = REG_ESPACE;
670 	  goto free_return;
671 	}
672 
673       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
674       if (__glibc_unlikely (mctx.state_log == NULL))
675 	{
676 	  err = REG_ESPACE;
677 	  goto free_return;
678 	}
679     }
680   else
681     mctx.state_log = NULL;
682 
683   match_first = start;
684   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
685 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
686 
687   /* Check incrementally whether the input string matches.  */
688   incr = (last_start < start) ? -1 : 1;
689   left_lim = (last_start < start) ? last_start : start;
690   right_lim = (last_start < start) ? start : last_start;
691   sb = dfa->mb_cur_max == 1;
692   match_kind =
693     (fastmap
694      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
695 	| (start <= last_start ? 2 : 0)
696 	| (t != NULL ? 1 : 0))
697      : 8);
698 
699   for (;; match_first += incr)
700     {
701       err = REG_NOMATCH;
702       if (match_first < left_lim || right_lim < match_first)
703 	goto free_return;
704 
705       /* Advance as rapidly as possible through the string, until we
706 	 find a plausible place to start matching.  This may be done
707 	 with varying efficiency, so there are various possibilities:
708 	 only the most common of them are specialized, in order to
709 	 save on code size.  We use a switch statement for speed.  */
710       switch (match_kind)
711 	{
712 	case 8:
713 	  /* No fastmap.  */
714 	  break;
715 
716 	case 7:
717 	  /* Fastmap with single-byte translation, match forward.  */
718 	  while (__glibc_likely (match_first < right_lim)
719 		 && !fastmap[t[(unsigned char) string[match_first]]])
720 	    ++match_first;
721 	  goto forward_match_found_start_or_reached_end;
722 
723 	case 6:
724 	  /* Fastmap without translation, match forward.  */
725 	  while (__glibc_likely (match_first < right_lim)
726 		 && !fastmap[(unsigned char) string[match_first]])
727 	    ++match_first;
728 
729 	forward_match_found_start_or_reached_end:
730 	  if (__glibc_unlikely (match_first == right_lim))
731 	    {
732 	      ch = match_first >= length
733 		       ? 0 : (unsigned char) string[match_first];
734 	      if (!fastmap[t ? t[ch] : ch])
735 		goto free_return;
736 	    }
737 	  break;
738 
739 	case 4:
740 	case 5:
741 	  /* Fastmap without multi-byte translation, match backwards.  */
742 	  while (match_first >= left_lim)
743 	    {
744 	      ch = match_first >= length
745 		       ? 0 : (unsigned char) string[match_first];
746 	      if (fastmap[t ? t[ch] : ch])
747 		break;
748 	      --match_first;
749 	    }
750 	  if (match_first < left_lim)
751 	    goto free_return;
752 	  break;
753 
754 	default:
755 	  /* In this case, we can't determine easily the current byte,
756 	     since it might be a component byte of a multibyte
757 	     character.  Then we use the constructed buffer instead.  */
758 	  for (;;)
759 	    {
760 	      /* If MATCH_FIRST is out of the valid range, reconstruct the
761 		 buffers.  */
762 	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
763 	      if (__glibc_unlikely (offset
764 				    >= (__re_size_t) mctx.input.valid_raw_len))
765 		{
766 		  err = re_string_reconstruct (&mctx.input, match_first,
767 					       eflags);
768 		  if (__glibc_unlikely (err != REG_NOERROR))
769 		    goto free_return;
770 
771 		  offset = match_first - mctx.input.raw_mbs_idx;
772 		}
773 	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
774 		 Note that MATCH_FIRST must not be smaller than 0.  */
775 	      ch = (match_first >= length
776 		    ? 0 : re_string_byte_at (&mctx.input, offset));
777 	      if (fastmap[ch])
778 		break;
779 	      match_first += incr;
780 	      if (match_first < left_lim || match_first > right_lim)
781 		{
782 		  err = REG_NOMATCH;
783 		  goto free_return;
784 		}
785 	    }
786 	  break;
787 	}
788 
789       /* Reconstruct the buffers so that the matcher can assume that
790 	 the matching starts from the beginning of the buffer.  */
791       err = re_string_reconstruct (&mctx.input, match_first, eflags);
792       if (__glibc_unlikely (err != REG_NOERROR))
793 	goto free_return;
794 
795 #ifdef RE_ENABLE_I18N
796      /* Don't consider this char as a possible match start if it part,
797 	yet isn't the head, of a multibyte character.  */
798       if (!sb && !re_string_first_byte (&mctx.input, 0))
799 	continue;
800 #endif
801 
802       /* It seems to be appropriate one, then use the matcher.  */
803       /* We assume that the matching starts from 0.  */
804       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
805       match_last = check_matching (&mctx, fl_longest_match,
806 				   start <= last_start ? &match_first : NULL);
807       if (match_last != -1)
808 	{
809 	  if (__glibc_unlikely (match_last == -2))
810 	    {
811 	      err = REG_ESPACE;
812 	      goto free_return;
813 	    }
814 	  else
815 	    {
816 	      mctx.match_last = match_last;
817 	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
818 		{
819 		  re_dfastate_t *pstate = mctx.state_log[match_last];
820 		  mctx.last_node = check_halt_state_context (&mctx, pstate,
821 							     match_last);
822 		}
823 	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
824 		  || dfa->nbackref)
825 		{
826 		  err = prune_impossible_nodes (&mctx);
827 		  if (err == REG_NOERROR)
828 		    break;
829 		  if (__glibc_unlikely (err != REG_NOMATCH))
830 		    goto free_return;
831 		  match_last = -1;
832 		}
833 	      else
834 		break; /* We found a match.  */
835 	    }
836 	}
837 
838       match_ctx_clean (&mctx);
839     }
840 
841 #ifdef DEBUG
842   assert (match_last != -1);
843   assert (err == REG_NOERROR);
844 #endif
845 
846   /* Set pmatch[] if we need.  */
847   if (nmatch > 0)
848     {
849       Idx reg_idx;
850 
851       /* Initialize registers.  */
852       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
853 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
854 
855       /* Set the points where matching start/end.  */
856       pmatch[0].rm_so = 0;
857       pmatch[0].rm_eo = mctx.match_last;
858       /* FIXME: This function should fail if mctx.match_last exceeds
859 	 the maximum possible regoff_t value.  We need a new error
860 	 code REG_OVERFLOW.  */
861 
862       if (!preg->no_sub && nmatch > 1)
863 	{
864 	  err = set_regs (preg, &mctx, nmatch, pmatch,
865 			  dfa->has_plural_match && dfa->nbackref > 0);
866 	  if (__glibc_unlikely (err != REG_NOERROR))
867 	    goto free_return;
868 	}
869 
870       /* At last, add the offset to each register, since we slid
871 	 the buffers so that we could assume that the matching starts
872 	 from 0.  */
873       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
874 	if (pmatch[reg_idx].rm_so != -1)
875 	  {
876 #ifdef RE_ENABLE_I18N
877 	    if (__glibc_unlikely (mctx.input.offsets_needed != 0))
878 	      {
879 		pmatch[reg_idx].rm_so =
880 		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
881 		   ? mctx.input.valid_raw_len
882 		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
883 		pmatch[reg_idx].rm_eo =
884 		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
885 		   ? mctx.input.valid_raw_len
886 		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
887 	      }
888 #else
889 	    assert (mctx.input.offsets_needed == 0);
890 #endif
891 	    pmatch[reg_idx].rm_so += match_first;
892 	    pmatch[reg_idx].rm_eo += match_first;
893 	  }
894       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
895 	{
896 	  pmatch[nmatch + reg_idx].rm_so = -1;
897 	  pmatch[nmatch + reg_idx].rm_eo = -1;
898 	}
899 
900       if (dfa->subexp_map)
901 	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
902 	  if (dfa->subexp_map[reg_idx] != reg_idx)
903 	    {
904 	      pmatch[reg_idx + 1].rm_so
905 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
906 	      pmatch[reg_idx + 1].rm_eo
907 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
908 	    }
909     }
910 
911  free_return:
912   re_free (mctx.state_log);
913   if (dfa->nbackref)
914     match_ctx_free (&mctx);
915   re_string_destruct (&mctx.input);
916   return err;
917 }
918 
919 static reg_errcode_t
920 __attribute_warn_unused_result__
921 prune_impossible_nodes (re_match_context_t *mctx)
922 {
923   const re_dfa_t *const dfa = mctx->dfa;
924   Idx halt_node, match_last;
925   reg_errcode_t ret;
926   re_dfastate_t **sifted_states;
927   re_dfastate_t **lim_states = NULL;
928   re_sift_context_t sctx;
929 #ifdef DEBUG
930   assert (mctx->state_log != NULL);
931 #endif
932   match_last = mctx->match_last;
933   halt_node = mctx->last_node;
934 
935   /* Avoid overflow.  */
936   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
937 			<= match_last))
938     return REG_ESPACE;
939 
940   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
941   if (__glibc_unlikely (sifted_states == NULL))
942     {
943       ret = REG_ESPACE;
944       goto free_return;
945     }
946   if (dfa->nbackref)
947     {
948       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
949       if (__glibc_unlikely (lim_states == NULL))
950 	{
951 	  ret = REG_ESPACE;
952 	  goto free_return;
953 	}
954       while (1)
955 	{
956 	  memset (lim_states, '\0',
957 		  sizeof (re_dfastate_t *) * (match_last + 1));
958 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
959 			 match_last);
960 	  ret = sift_states_backward (mctx, &sctx);
961 	  re_node_set_free (&sctx.limits);
962 	  if (__glibc_unlikely (ret != REG_NOERROR))
963 	      goto free_return;
964 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
965 	    break;
966 	  do
967 	    {
968 	      --match_last;
969 	      if (match_last < 0)
970 		{
971 		  ret = REG_NOMATCH;
972 		  goto free_return;
973 		}
974 	    } while (mctx->state_log[match_last] == NULL
975 		     || !mctx->state_log[match_last]->halt);
976 	  halt_node = check_halt_state_context (mctx,
977 						mctx->state_log[match_last],
978 						match_last);
979 	}
980       ret = merge_state_array (dfa, sifted_states, lim_states,
981 			       match_last + 1);
982       re_free (lim_states);
983       lim_states = NULL;
984       if (__glibc_unlikely (ret != REG_NOERROR))
985 	goto free_return;
986     }
987   else
988     {
989       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
990       ret = sift_states_backward (mctx, &sctx);
991       re_node_set_free (&sctx.limits);
992       if (__glibc_unlikely (ret != REG_NOERROR))
993 	goto free_return;
994       if (sifted_states[0] == NULL)
995 	{
996 	  ret = REG_NOMATCH;
997 	  goto free_return;
998 	}
999     }
1000   re_free (mctx->state_log);
1001   mctx->state_log = sifted_states;
1002   sifted_states = NULL;
1003   mctx->last_node = halt_node;
1004   mctx->match_last = match_last;
1005   ret = REG_NOERROR;
1006  free_return:
1007   re_free (sifted_states);
1008   re_free (lim_states);
1009   return ret;
1010 }
1011 
1012 /* Acquire an initial state and return it.
1013    We must select appropriate initial state depending on the context,
1014    since initial states may have constraints like "\<", "^", etc..  */
1015 
1016 static inline re_dfastate_t *
1017 __attribute__ ((always_inline))
1018 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
1019 			    Idx idx)
1020 {
1021   const re_dfa_t *const dfa = mctx->dfa;
1022   if (dfa->init_state->has_constraint)
1023     {
1024       unsigned int context;
1025       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
1026       if (IS_WORD_CONTEXT (context))
1027 	return dfa->init_state_word;
1028       else if (IS_ORDINARY_CONTEXT (context))
1029 	return dfa->init_state;
1030       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1031 	return dfa->init_state_begbuf;
1032       else if (IS_NEWLINE_CONTEXT (context))
1033 	return dfa->init_state_nl;
1034       else if (IS_BEGBUF_CONTEXT (context))
1035 	{
1036 	  /* It is relatively rare case, then calculate on demand.  */
1037 	  return re_acquire_state_context (err, dfa,
1038 					   dfa->init_state->entrance_nodes,
1039 					   context);
1040 	}
1041       else
1042 	/* Must not happen?  */
1043 	return dfa->init_state;
1044     }
1045   else
1046     return dfa->init_state;
1047 }
1048 
1049 /* Check whether the regular expression match input string INPUT or not,
1050    and return the index where the matching end.  Return -1 if
1051    there is no match, and return -2 in case of an error.
1052    FL_LONGEST_MATCH means we want the POSIX longest matching.
1053    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1054    next place where we may want to try matching.
1055    Note that the matcher assumes that the matching starts from the current
1056    index of the buffer.  */
1057 
1058 static Idx
1059 __attribute_warn_unused_result__
1060 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1061 		Idx *p_match_first)
1062 {
1063   const re_dfa_t *const dfa = mctx->dfa;
1064   reg_errcode_t err;
1065   Idx match = 0;
1066   Idx match_last = -1;
1067   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1068   re_dfastate_t *cur_state;
1069   bool at_init_state = p_match_first != NULL;
1070   Idx next_start_idx = cur_str_idx;
1071 
1072   err = REG_NOERROR;
1073   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1074   /* An initial state must not be NULL (invalid).  */
1075   if (__glibc_unlikely (cur_state == NULL))
1076     {
1077       assert (err == REG_ESPACE);
1078       return -2;
1079     }
1080 
1081   if (mctx->state_log != NULL)
1082     {
1083       mctx->state_log[cur_str_idx] = cur_state;
1084 
1085       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1086 	 later.  E.g. Processing back references.  */
1087       if (__glibc_unlikely (dfa->nbackref))
1088 	{
1089 	  at_init_state = false;
1090 	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1091 	  if (__glibc_unlikely (err != REG_NOERROR))
1092 	    return err;
1093 
1094 	  if (cur_state->has_backref)
1095 	    {
1096 	      err = transit_state_bkref (mctx, &cur_state->nodes);
1097 	      if (__glibc_unlikely (err != REG_NOERROR))
1098 		return err;
1099 	    }
1100 	}
1101     }
1102 
1103   /* If the RE accepts NULL string.  */
1104   if (__glibc_unlikely (cur_state->halt))
1105     {
1106       if (!cur_state->has_constraint
1107 	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
1108 	{
1109 	  if (!fl_longest_match)
1110 	    return cur_str_idx;
1111 	  else
1112 	    {
1113 	      match_last = cur_str_idx;
1114 	      match = 1;
1115 	    }
1116 	}
1117     }
1118 
1119   while (!re_string_eoi (&mctx->input))
1120     {
1121       re_dfastate_t *old_state = cur_state;
1122       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1123 
1124       if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
1125 	   && mctx->input.bufs_len < mctx->input.len)
1126 	  || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
1127 	      && mctx->input.valid_len < mctx->input.len))
1128 	{
1129 	  err = extend_buffers (mctx, next_char_idx + 1);
1130 	  if (__glibc_unlikely (err != REG_NOERROR))
1131 	    {
1132 	      assert (err == REG_ESPACE);
1133 	      return -2;
1134 	    }
1135 	}
1136 
1137       cur_state = transit_state (&err, mctx, cur_state);
1138       if (mctx->state_log != NULL)
1139 	cur_state = merge_state_with_log (&err, mctx, cur_state);
1140 
1141       if (cur_state == NULL)
1142 	{
1143 	  /* Reached the invalid state or an error.  Try to recover a valid
1144 	     state using the state log, if available and if we have not
1145 	     already found a valid (even if not the longest) match.  */
1146 	  if (__glibc_unlikely (err != REG_NOERROR))
1147 	    return -2;
1148 
1149 	  if (mctx->state_log == NULL
1150 	      || (match && !fl_longest_match)
1151 	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
1152 	    break;
1153 	}
1154 
1155       if (__glibc_unlikely (at_init_state))
1156 	{
1157 	  if (old_state == cur_state)
1158 	    next_start_idx = next_char_idx;
1159 	  else
1160 	    at_init_state = false;
1161 	}
1162 
1163       if (cur_state->halt)
1164 	{
1165 	  /* Reached a halt state.
1166 	     Check the halt state can satisfy the current context.  */
1167 	  if (!cur_state->has_constraint
1168 	      || check_halt_state_context (mctx, cur_state,
1169 					   re_string_cur_idx (&mctx->input)))
1170 	    {
1171 	      /* We found an appropriate halt state.  */
1172 	      match_last = re_string_cur_idx (&mctx->input);
1173 	      match = 1;
1174 
1175 	      /* We found a match, do not modify match_first below.  */
1176 	      p_match_first = NULL;
1177 	      if (!fl_longest_match)
1178 		break;
1179 	    }
1180 	}
1181     }
1182 
1183   if (p_match_first)
1184     *p_match_first += next_start_idx;
1185 
1186   return match_last;
1187 }
1188 
1189 /* Check NODE match the current context.  */
1190 
1191 static bool
1192 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1193 {
1194   re_token_type_t type = dfa->nodes[node].type;
1195   unsigned int constraint = dfa->nodes[node].constraint;
1196   if (type != END_OF_RE)
1197     return false;
1198   if (!constraint)
1199     return true;
1200   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1201     return false;
1202   return true;
1203 }
1204 
1205 /* Check the halt state STATE match the current context.
1206    Return 0 if not match, if the node, STATE has, is a halt node and
1207    match the context, return the node.  */
1208 
1209 static Idx
1210 check_halt_state_context (const re_match_context_t *mctx,
1211 			  const re_dfastate_t *state, Idx idx)
1212 {
1213   Idx i;
1214   unsigned int context;
1215 #ifdef DEBUG
1216   assert (state->halt);
1217 #endif
1218   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1219   for (i = 0; i < state->nodes.nelem; ++i)
1220     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1221       return state->nodes.elems[i];
1222   return 0;
1223 }
1224 
1225 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1226    corresponding to the DFA).
1227    Return the destination node, and update EPS_VIA_NODES;
1228    return -1 in case of errors.  */
1229 
1230 static Idx
1231 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1232 		   Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1233 		   struct re_fail_stack_t *fs)
1234 {
1235   const re_dfa_t *const dfa = mctx->dfa;
1236   Idx i;
1237   bool ok;
1238   if (IS_EPSILON_NODE (dfa->nodes[node].type))
1239     {
1240       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1241       re_node_set *edests = &dfa->edests[node];
1242       Idx dest_node;
1243       ok = re_node_set_insert (eps_via_nodes, node);
1244       if (__glibc_unlikely (! ok))
1245 	return -2;
1246       /* Pick up a valid destination, or return -1 if none
1247 	 is found.  */
1248       for (dest_node = -1, i = 0; i < edests->nelem; ++i)
1249 	{
1250 	  Idx candidate = edests->elems[i];
1251 	  if (!re_node_set_contains (cur_nodes, candidate))
1252 	    continue;
1253           if (dest_node == -1)
1254 	    dest_node = candidate;
1255 
1256 	  else
1257 	    {
1258 	      /* In order to avoid infinite loop like "(a*)*", return the second
1259 		 epsilon-transition if the first was already considered.  */
1260 	      if (re_node_set_contains (eps_via_nodes, dest_node))
1261 		return candidate;
1262 
1263 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
1264 	      else if (fs != NULL
1265 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1266 					   eps_via_nodes))
1267 		return -2;
1268 
1269 	      /* We know we are going to exit.  */
1270 	      break;
1271 	    }
1272 	}
1273       return dest_node;
1274     }
1275   else
1276     {
1277       Idx naccepted = 0;
1278       re_token_type_t type = dfa->nodes[node].type;
1279 
1280 #ifdef RE_ENABLE_I18N
1281       if (dfa->nodes[node].accept_mb)
1282 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1283       else
1284 #endif /* RE_ENABLE_I18N */
1285       if (type == OP_BACK_REF)
1286 	{
1287 	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1288 	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1289 	  if (fs != NULL)
1290 	    {
1291 	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
1292 		return -1;
1293 	      else if (naccepted)
1294 		{
1295 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1296 		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1297 			      naccepted) != 0)
1298 		    return -1;
1299 		}
1300 	    }
1301 
1302 	  if (naccepted == 0)
1303 	    {
1304 	      Idx dest_node;
1305 	      ok = re_node_set_insert (eps_via_nodes, node);
1306 	      if (__glibc_unlikely (! ok))
1307 		return -2;
1308 	      dest_node = dfa->edests[node].elems[0];
1309 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1310 					dest_node))
1311 		return dest_node;
1312 	    }
1313 	}
1314 
1315       if (naccepted != 0
1316 	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
1317 	{
1318 	  Idx dest_node = dfa->nexts[node];
1319 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1320 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1321 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1322 					       dest_node)))
1323 	    return -1;
1324 	  re_node_set_empty (eps_via_nodes);
1325 	  return dest_node;
1326 	}
1327     }
1328   return -1;
1329 }
1330 
1331 static reg_errcode_t
1332 __attribute_warn_unused_result__
1333 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1334 		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
1335 {
1336   reg_errcode_t err;
1337   Idx num = fs->num++;
1338   if (fs->num == fs->alloc)
1339     {
1340       struct re_fail_stack_ent_t *new_array;
1341       new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1342                               fs->alloc * 2);
1343       if (new_array == NULL)
1344 	return REG_ESPACE;
1345       fs->alloc *= 2;
1346       fs->stack = new_array;
1347     }
1348   fs->stack[num].idx = str_idx;
1349   fs->stack[num].node = dest_node;
1350   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1351   if (fs->stack[num].regs == NULL)
1352     return REG_ESPACE;
1353   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1354   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1355   return err;
1356 }
1357 
1358 static Idx
1359 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1360 		regmatch_t *regs, re_node_set *eps_via_nodes)
1361 {
1362   Idx num = --fs->num;
1363   assert (num >= 0);
1364   *pidx = fs->stack[num].idx;
1365   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1366   re_node_set_free (eps_via_nodes);
1367   re_free (fs->stack[num].regs);
1368   *eps_via_nodes = fs->stack[num].eps_via_nodes;
1369   return fs->stack[num].node;
1370 }
1371 
1372 /* Set the positions where the subexpressions are starts/ends to registers
1373    PMATCH.
1374    Note: We assume that pmatch[0] is already set, and
1375    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
1376 
1377 static reg_errcode_t
1378 __attribute_warn_unused_result__
1379 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1380 	  regmatch_t *pmatch, bool fl_backtrack)
1381 {
1382   const re_dfa_t *dfa = preg->buffer;
1383   Idx idx, cur_node;
1384   re_node_set eps_via_nodes;
1385   struct re_fail_stack_t *fs;
1386   struct re_fail_stack_t fs_body = { 0, 2, NULL };
1387   regmatch_t *prev_idx_match;
1388   bool prev_idx_match_malloced = false;
1389 
1390 #ifdef DEBUG
1391   assert (nmatch > 1);
1392   assert (mctx->state_log != NULL);
1393 #endif
1394   if (fl_backtrack)
1395     {
1396       fs = &fs_body;
1397       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1398       if (fs->stack == NULL)
1399 	return REG_ESPACE;
1400     }
1401   else
1402     fs = NULL;
1403 
1404   cur_node = dfa->init_node;
1405   re_node_set_init_empty (&eps_via_nodes);
1406 
1407   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1408     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
1409   else
1410     {
1411       prev_idx_match = re_malloc (regmatch_t, nmatch);
1412       if (prev_idx_match == NULL)
1413 	{
1414 	  free_fail_stack_return (fs);
1415 	  return REG_ESPACE;
1416 	}
1417       prev_idx_match_malloced = true;
1418     }
1419   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1420 
1421   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1422     {
1423       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1424 
1425       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1426 	{
1427 	  Idx reg_idx;
1428 	  if (fs)
1429 	    {
1430 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1431 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1432 		  break;
1433 	      if (reg_idx == nmatch)
1434 		{
1435 		  re_node_set_free (&eps_via_nodes);
1436 		  if (prev_idx_match_malloced)
1437 		    re_free (prev_idx_match);
1438 		  return free_fail_stack_return (fs);
1439 		}
1440 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1441 					 &eps_via_nodes);
1442 	    }
1443 	  else
1444 	    {
1445 	      re_node_set_free (&eps_via_nodes);
1446 	      if (prev_idx_match_malloced)
1447 		re_free (prev_idx_match);
1448 	      return REG_NOERROR;
1449 	    }
1450 	}
1451 
1452       /* Proceed to next node.  */
1453       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1454 				    &eps_via_nodes, fs);
1455 
1456       if (__glibc_unlikely (cur_node < 0))
1457 	{
1458 	  if (__glibc_unlikely (cur_node == -2))
1459 	    {
1460 	      re_node_set_free (&eps_via_nodes);
1461 	      if (prev_idx_match_malloced)
1462 		re_free (prev_idx_match);
1463 	      free_fail_stack_return (fs);
1464 	      return REG_ESPACE;
1465 	    }
1466 	  if (fs)
1467 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1468 				       &eps_via_nodes);
1469 	  else
1470 	    {
1471 	      re_node_set_free (&eps_via_nodes);
1472 	      if (prev_idx_match_malloced)
1473 		re_free (prev_idx_match);
1474 	      return REG_NOMATCH;
1475 	    }
1476 	}
1477     }
1478   re_node_set_free (&eps_via_nodes);
1479   if (prev_idx_match_malloced)
1480     re_free (prev_idx_match);
1481   return free_fail_stack_return (fs);
1482 }
1483 
1484 static reg_errcode_t
1485 free_fail_stack_return (struct re_fail_stack_t *fs)
1486 {
1487   if (fs)
1488     {
1489       Idx fs_idx;
1490       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1491 	{
1492 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1493 	  re_free (fs->stack[fs_idx].regs);
1494 	}
1495       re_free (fs->stack);
1496     }
1497   return REG_NOERROR;
1498 }
1499 
1500 static void
1501 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1502 	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1503 {
1504   int type = dfa->nodes[cur_node].type;
1505   if (type == OP_OPEN_SUBEXP)
1506     {
1507       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1508 
1509       /* We are at the first node of this sub expression.  */
1510       if (reg_num < nmatch)
1511 	{
1512 	  pmatch[reg_num].rm_so = cur_idx;
1513 	  pmatch[reg_num].rm_eo = -1;
1514 	}
1515     }
1516   else if (type == OP_CLOSE_SUBEXP)
1517     {
1518       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1519       if (reg_num < nmatch)
1520 	{
1521 	  /* We are at the last node of this sub expression.  */
1522 	  if (pmatch[reg_num].rm_so < cur_idx)
1523 	    {
1524 	      pmatch[reg_num].rm_eo = cur_idx;
1525 	      /* This is a non-empty match or we are not inside an optional
1526 		 subexpression.  Accept this right away.  */
1527 	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1528 	    }
1529 	  else
1530 	    {
1531 	      if (dfa->nodes[cur_node].opt_subexp
1532 		  && prev_idx_match[reg_num].rm_so != -1)
1533 		/* We transited through an empty match for an optional
1534 		   subexpression, like (a?)*, and this is not the subexp's
1535 		   first match.  Copy back the old content of the registers
1536 		   so that matches of an inner subexpression are undone as
1537 		   well, like in ((a?))*.  */
1538 		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1539 	      else
1540 		/* We completed a subexpression, but it may be part of
1541 		   an optional one, so do not update PREV_IDX_MATCH.  */
1542 		pmatch[reg_num].rm_eo = cur_idx;
1543 	    }
1544 	}
1545     }
1546 }
1547 
1548 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1549    and sift the nodes in each states according to the following rules.
1550    Updated state_log will be wrote to STATE_LOG.
1551 
1552    Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1553      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1554 	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1555 	the LAST_NODE, we throw away the node 'a'.
1556      2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1557 	string 's' and transit to 'b':
1558 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1559 	   away the node 'a'.
1560 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1561 	    thrown away, we throw away the node 'a'.
1562      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1563 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1564 	   node 'a'.
1565 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1566 	    we throw away the node 'a'.  */
1567 
1568 #define STATE_NODE_CONTAINS(state,node) \
1569   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1570 
1571 static reg_errcode_t
1572 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1573 {
1574   reg_errcode_t err;
1575   int null_cnt = 0;
1576   Idx str_idx = sctx->last_str_idx;
1577   re_node_set cur_dest;
1578 
1579 #ifdef DEBUG
1580   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1581 #endif
1582 
1583   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
1584      transit to the last_node and the last_node itself.  */
1585   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1586   if (__glibc_unlikely (err != REG_NOERROR))
1587     return err;
1588   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1589   if (__glibc_unlikely (err != REG_NOERROR))
1590     goto free_return;
1591 
1592   /* Then check each states in the state_log.  */
1593   while (str_idx > 0)
1594     {
1595       /* Update counters.  */
1596       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1597       if (null_cnt > mctx->max_mb_elem_len)
1598 	{
1599 	  memset (sctx->sifted_states, '\0',
1600 		  sizeof (re_dfastate_t *) * str_idx);
1601 	  re_node_set_free (&cur_dest);
1602 	  return REG_NOERROR;
1603 	}
1604       re_node_set_empty (&cur_dest);
1605       --str_idx;
1606 
1607       if (mctx->state_log[str_idx])
1608 	{
1609 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1610 	  if (__glibc_unlikely (err != REG_NOERROR))
1611 	    goto free_return;
1612 	}
1613 
1614       /* Add all the nodes which satisfy the following conditions:
1615 	 - It can epsilon transit to a node in CUR_DEST.
1616 	 - It is in CUR_SRC.
1617 	 And update state_log.  */
1618       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1619       if (__glibc_unlikely (err != REG_NOERROR))
1620 	goto free_return;
1621     }
1622   err = REG_NOERROR;
1623  free_return:
1624   re_node_set_free (&cur_dest);
1625   return err;
1626 }
1627 
1628 static reg_errcode_t
1629 __attribute_warn_unused_result__
1630 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1631 		     Idx str_idx, re_node_set *cur_dest)
1632 {
1633   const re_dfa_t *const dfa = mctx->dfa;
1634   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1635   Idx i;
1636 
1637   /* Then build the next sifted state.
1638      We build the next sifted state on 'cur_dest', and update
1639      'sifted_states[str_idx]' with 'cur_dest'.
1640      Note:
1641      'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1642      'cur_src' points the node_set of the old 'state_log[str_idx]'
1643      (with the epsilon nodes pre-filtered out).  */
1644   for (i = 0; i < cur_src->nelem; i++)
1645     {
1646       Idx prev_node = cur_src->elems[i];
1647       int naccepted = 0;
1648       bool ok;
1649 
1650 #ifdef DEBUG
1651       re_token_type_t type = dfa->nodes[prev_node].type;
1652       assert (!IS_EPSILON_NODE (type));
1653 #endif
1654 #ifdef RE_ENABLE_I18N
1655       /* If the node may accept "multi byte".  */
1656       if (dfa->nodes[prev_node].accept_mb)
1657 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1658 					 str_idx, sctx->last_str_idx);
1659 #endif /* RE_ENABLE_I18N */
1660 
1661       /* We don't check backreferences here.
1662 	 See update_cur_sifted_state().  */
1663       if (!naccepted
1664 	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1665 	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1666 				  dfa->nexts[prev_node]))
1667 	naccepted = 1;
1668 
1669       if (naccepted == 0)
1670 	continue;
1671 
1672       if (sctx->limits.nelem)
1673 	{
1674 	  Idx to_idx = str_idx + naccepted;
1675 	  if (check_dst_limits (mctx, &sctx->limits,
1676 				dfa->nexts[prev_node], to_idx,
1677 				prev_node, str_idx))
1678 	    continue;
1679 	}
1680       ok = re_node_set_insert (cur_dest, prev_node);
1681       if (__glibc_unlikely (! ok))
1682 	return REG_ESPACE;
1683     }
1684 
1685   return REG_NOERROR;
1686 }
1687 
1688 /* Helper functions.  */
1689 
1690 static reg_errcode_t
1691 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1692 {
1693   Idx top = mctx->state_log_top;
1694 
1695   if ((next_state_log_idx >= mctx->input.bufs_len
1696        && mctx->input.bufs_len < mctx->input.len)
1697       || (next_state_log_idx >= mctx->input.valid_len
1698 	  && mctx->input.valid_len < mctx->input.len))
1699     {
1700       reg_errcode_t err;
1701       err = extend_buffers (mctx, next_state_log_idx + 1);
1702       if (__glibc_unlikely (err != REG_NOERROR))
1703 	return err;
1704     }
1705 
1706   if (top < next_state_log_idx)
1707     {
1708       memset (mctx->state_log + top + 1, '\0',
1709 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1710       mctx->state_log_top = next_state_log_idx;
1711     }
1712   return REG_NOERROR;
1713 }
1714 
1715 static reg_errcode_t
1716 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1717 		   re_dfastate_t **src, Idx num)
1718 {
1719   Idx st_idx;
1720   reg_errcode_t err;
1721   for (st_idx = 0; st_idx < num; ++st_idx)
1722     {
1723       if (dst[st_idx] == NULL)
1724 	dst[st_idx] = src[st_idx];
1725       else if (src[st_idx] != NULL)
1726 	{
1727 	  re_node_set merged_set;
1728 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1729 					&src[st_idx]->nodes);
1730 	  if (__glibc_unlikely (err != REG_NOERROR))
1731 	    return err;
1732 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1733 	  re_node_set_free (&merged_set);
1734 	  if (__glibc_unlikely (err != REG_NOERROR))
1735 	    return err;
1736 	}
1737     }
1738   return REG_NOERROR;
1739 }
1740 
1741 static reg_errcode_t
1742 update_cur_sifted_state (const re_match_context_t *mctx,
1743 			 re_sift_context_t *sctx, Idx str_idx,
1744 			 re_node_set *dest_nodes)
1745 {
1746   const re_dfa_t *const dfa = mctx->dfa;
1747   reg_errcode_t err = REG_NOERROR;
1748   const re_node_set *candidates;
1749   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1750 		: &mctx->state_log[str_idx]->nodes);
1751 
1752   if (dest_nodes->nelem == 0)
1753     sctx->sifted_states[str_idx] = NULL;
1754   else
1755     {
1756       if (candidates)
1757 	{
1758 	  /* At first, add the nodes which can epsilon transit to a node in
1759 	     DEST_NODE.  */
1760 	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1761 	  if (__glibc_unlikely (err != REG_NOERROR))
1762 	    return err;
1763 
1764 	  /* Then, check the limitations in the current sift_context.  */
1765 	  if (sctx->limits.nelem)
1766 	    {
1767 	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1768 					 mctx->bkref_ents, str_idx);
1769 	      if (__glibc_unlikely (err != REG_NOERROR))
1770 		return err;
1771 	    }
1772 	}
1773 
1774       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1775       if (__glibc_unlikely (err != REG_NOERROR))
1776 	return err;
1777     }
1778 
1779   if (candidates && mctx->state_log[str_idx]->has_backref)
1780     {
1781       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1782       if (__glibc_unlikely (err != REG_NOERROR))
1783 	return err;
1784     }
1785   return REG_NOERROR;
1786 }
1787 
1788 static reg_errcode_t
1789 __attribute_warn_unused_result__
1790 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1791 		       const re_node_set *candidates)
1792 {
1793   reg_errcode_t err = REG_NOERROR;
1794   Idx i;
1795 
1796   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1797   if (__glibc_unlikely (err != REG_NOERROR))
1798     return err;
1799 
1800   if (!state->inveclosure.alloc)
1801     {
1802       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1803       if (__glibc_unlikely (err != REG_NOERROR))
1804 	return REG_ESPACE;
1805       for (i = 0; i < dest_nodes->nelem; i++)
1806 	{
1807 	  err = re_node_set_merge (&state->inveclosure,
1808 				   dfa->inveclosures + dest_nodes->elems[i]);
1809 	  if (__glibc_unlikely (err != REG_NOERROR))
1810 	    return REG_ESPACE;
1811 	}
1812     }
1813   return re_node_set_add_intersect (dest_nodes, candidates,
1814 				    &state->inveclosure);
1815 }
1816 
1817 static reg_errcode_t
1818 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1819 		       const re_node_set *candidates)
1820 {
1821     Idx ecl_idx;
1822     reg_errcode_t err;
1823     re_node_set *inv_eclosure = dfa->inveclosures + node;
1824     re_node_set except_nodes;
1825     re_node_set_init_empty (&except_nodes);
1826     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1827       {
1828 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1829 	if (cur_node == node)
1830 	  continue;
1831 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1832 	  {
1833 	    Idx edst1 = dfa->edests[cur_node].elems[0];
1834 	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1835 			 ? dfa->edests[cur_node].elems[1] : -1);
1836 	    if ((!re_node_set_contains (inv_eclosure, edst1)
1837 		 && re_node_set_contains (dest_nodes, edst1))
1838 		|| (edst2 > 0
1839 		    && !re_node_set_contains (inv_eclosure, edst2)
1840 		    && re_node_set_contains (dest_nodes, edst2)))
1841 	      {
1842 		err = re_node_set_add_intersect (&except_nodes, candidates,
1843 						 dfa->inveclosures + cur_node);
1844 		if (__glibc_unlikely (err != REG_NOERROR))
1845 		  {
1846 		    re_node_set_free (&except_nodes);
1847 		    return err;
1848 		  }
1849 	      }
1850 	  }
1851       }
1852     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1853       {
1854 	Idx cur_node = inv_eclosure->elems[ecl_idx];
1855 	if (!re_node_set_contains (&except_nodes, cur_node))
1856 	  {
1857 	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1858 	    re_node_set_remove_at (dest_nodes, idx);
1859 	  }
1860       }
1861     re_node_set_free (&except_nodes);
1862     return REG_NOERROR;
1863 }
1864 
1865 static bool
1866 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1867 		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1868 {
1869   const re_dfa_t *const dfa = mctx->dfa;
1870   Idx lim_idx, src_pos, dst_pos;
1871 
1872   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1873   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1874   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1875     {
1876       Idx subexp_idx;
1877       struct re_backref_cache_entry *ent;
1878       ent = mctx->bkref_ents + limits->elems[lim_idx];
1879       subexp_idx = dfa->nodes[ent->node].opr.idx;
1880 
1881       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1882 					   subexp_idx, dst_node, dst_idx,
1883 					   dst_bkref_idx);
1884       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1885 					   subexp_idx, src_node, src_idx,
1886 					   src_bkref_idx);
1887 
1888       /* In case of:
1889 	 <src> <dst> ( <subexp> )
1890 	 ( <subexp> ) <src> <dst>
1891 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
1892       if (src_pos == dst_pos)
1893 	continue; /* This is unrelated limitation.  */
1894       else
1895 	return true;
1896     }
1897   return false;
1898 }
1899 
1900 static int
1901 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1902 			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
1903 {
1904   const re_dfa_t *const dfa = mctx->dfa;
1905   const re_node_set *eclosures = dfa->eclosures + from_node;
1906   Idx node_idx;
1907 
1908   /* Else, we are on the boundary: examine the nodes on the epsilon
1909      closure.  */
1910   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1911     {
1912       Idx node = eclosures->elems[node_idx];
1913       switch (dfa->nodes[node].type)
1914 	{
1915 	case OP_BACK_REF:
1916 	  if (bkref_idx != -1)
1917 	    {
1918 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1919 	      do
1920 		{
1921 		  Idx dst;
1922 		  int cpos;
1923 
1924 		  if (ent->node != node)
1925 		    continue;
1926 
1927 		  if (subexp_idx < BITSET_WORD_BITS
1928 		      && !(ent->eps_reachable_subexps_map
1929 			   & ((bitset_word_t) 1 << subexp_idx)))
1930 		    continue;
1931 
1932 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
1933 		     OP_CLOSE_SUBEXP cases below.  But, if the
1934 		     destination node is the same node as the source
1935 		     node, don't recurse because it would cause an
1936 		     infinite loop: a regex that exhibits this behavior
1937 		     is ()\1*\1*  */
1938 		  dst = dfa->edests[node].elems[0];
1939 		  if (dst == from_node)
1940 		    {
1941 		      if (boundaries & 1)
1942 			return -1;
1943 		      else /* if (boundaries & 2) */
1944 			return 0;
1945 		    }
1946 
1947 		  cpos =
1948 		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1949 						 dst, bkref_idx);
1950 		  if (cpos == -1 /* && (boundaries & 1) */)
1951 		    return -1;
1952 		  if (cpos == 0 && (boundaries & 2))
1953 		    return 0;
1954 
1955 		  if (subexp_idx < BITSET_WORD_BITS)
1956 		    ent->eps_reachable_subexps_map
1957 		      &= ~((bitset_word_t) 1 << subexp_idx);
1958 		}
1959 	      while (ent++->more);
1960 	    }
1961 	  break;
1962 
1963 	case OP_OPEN_SUBEXP:
1964 	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1965 	    return -1;
1966 	  break;
1967 
1968 	case OP_CLOSE_SUBEXP:
1969 	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1970 	    return 0;
1971 	  break;
1972 
1973 	default:
1974 	    break;
1975 	}
1976     }
1977 
1978   return (boundaries & 2) ? 1 : 0;
1979 }
1980 
1981 static int
1982 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
1983 			   Idx subexp_idx, Idx from_node, Idx str_idx,
1984 			   Idx bkref_idx)
1985 {
1986   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1987   int boundaries;
1988 
1989   /* If we are outside the range of the subexpression, return -1 or 1.  */
1990   if (str_idx < lim->subexp_from)
1991     return -1;
1992 
1993   if (lim->subexp_to < str_idx)
1994     return 1;
1995 
1996   /* If we are within the subexpression, return 0.  */
1997   boundaries = (str_idx == lim->subexp_from);
1998   boundaries |= (str_idx == lim->subexp_to) << 1;
1999   if (boundaries == 0)
2000     return 0;
2001 
2002   /* Else, examine epsilon closure.  */
2003   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2004 				      from_node, bkref_idx);
2005 }
2006 
2007 /* Check the limitations of sub expressions LIMITS, and remove the nodes
2008    which are against limitations from DEST_NODES. */
2009 
2010 static reg_errcode_t
2011 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
2012 		     const re_node_set *candidates, re_node_set *limits,
2013 		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2014 {
2015   reg_errcode_t err;
2016   Idx node_idx, lim_idx;
2017 
2018   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2019     {
2020       Idx subexp_idx;
2021       struct re_backref_cache_entry *ent;
2022       ent = bkref_ents + limits->elems[lim_idx];
2023 
2024       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
2025 	continue; /* This is unrelated limitation.  */
2026 
2027       subexp_idx = dfa->nodes[ent->node].opr.idx;
2028       if (ent->subexp_to == str_idx)
2029 	{
2030 	  Idx ops_node = -1;
2031 	  Idx cls_node = -1;
2032 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2033 	    {
2034 	      Idx node = dest_nodes->elems[node_idx];
2035 	      re_token_type_t type = dfa->nodes[node].type;
2036 	      if (type == OP_OPEN_SUBEXP
2037 		  && subexp_idx == dfa->nodes[node].opr.idx)
2038 		ops_node = node;
2039 	      else if (type == OP_CLOSE_SUBEXP
2040 		       && subexp_idx == dfa->nodes[node].opr.idx)
2041 		cls_node = node;
2042 	    }
2043 
2044 	  /* Check the limitation of the open subexpression.  */
2045 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2046 	  if (ops_node >= 0)
2047 	    {
2048 	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2049 					   candidates);
2050 	      if (__glibc_unlikely (err != REG_NOERROR))
2051 		return err;
2052 	    }
2053 
2054 	  /* Check the limitation of the close subexpression.  */
2055 	  if (cls_node >= 0)
2056 	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2057 	      {
2058 		Idx node = dest_nodes->elems[node_idx];
2059 		if (!re_node_set_contains (dfa->inveclosures + node,
2060 					   cls_node)
2061 		    && !re_node_set_contains (dfa->eclosures + node,
2062 					      cls_node))
2063 		  {
2064 		    /* It is against this limitation.
2065 		       Remove it form the current sifted state.  */
2066 		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2067 						 candidates);
2068 		    if (__glibc_unlikely (err != REG_NOERROR))
2069 		      return err;
2070 		    --node_idx;
2071 		  }
2072 	      }
2073 	}
2074       else /* (ent->subexp_to != str_idx)  */
2075 	{
2076 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2077 	    {
2078 	      Idx node = dest_nodes->elems[node_idx];
2079 	      re_token_type_t type = dfa->nodes[node].type;
2080 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2081 		{
2082 		  if (subexp_idx != dfa->nodes[node].opr.idx)
2083 		    continue;
2084 		  /* It is against this limitation.
2085 		     Remove it form the current sifted state.  */
2086 		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2087 					       candidates);
2088 		  if (__glibc_unlikely (err != REG_NOERROR))
2089 		    return err;
2090 		}
2091 	    }
2092 	}
2093     }
2094   return REG_NOERROR;
2095 }
2096 
2097 static reg_errcode_t
2098 __attribute_warn_unused_result__
2099 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2100 		   Idx str_idx, const re_node_set *candidates)
2101 {
2102   const re_dfa_t *const dfa = mctx->dfa;
2103   reg_errcode_t err;
2104   Idx node_idx, node;
2105   re_sift_context_t local_sctx;
2106   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2107 
2108   if (first_idx == -1)
2109     return REG_NOERROR;
2110 
2111   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
2112 
2113   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2114     {
2115       Idx enabled_idx;
2116       re_token_type_t type;
2117       struct re_backref_cache_entry *entry;
2118       node = candidates->elems[node_idx];
2119       type = dfa->nodes[node].type;
2120       /* Avoid infinite loop for the REs like "()\1+".  */
2121       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2122 	continue;
2123       if (type != OP_BACK_REF)
2124 	continue;
2125 
2126       entry = mctx->bkref_ents + first_idx;
2127       enabled_idx = first_idx;
2128       do
2129 	{
2130 	  Idx subexp_len;
2131 	  Idx to_idx;
2132 	  Idx dst_node;
2133 	  bool ok;
2134 	  re_dfastate_t *cur_state;
2135 
2136 	  if (entry->node != node)
2137 	    continue;
2138 	  subexp_len = entry->subexp_to - entry->subexp_from;
2139 	  to_idx = str_idx + subexp_len;
2140 	  dst_node = (subexp_len ? dfa->nexts[node]
2141 		      : dfa->edests[node].elems[0]);
2142 
2143 	  if (to_idx > sctx->last_str_idx
2144 	      || sctx->sifted_states[to_idx] == NULL
2145 	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2146 	      || check_dst_limits (mctx, &sctx->limits, node,
2147 				   str_idx, dst_node, to_idx))
2148 	    continue;
2149 
2150 	  if (local_sctx.sifted_states == NULL)
2151 	    {
2152 	      local_sctx = *sctx;
2153 	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2154 	      if (__glibc_unlikely (err != REG_NOERROR))
2155 		goto free_return;
2156 	    }
2157 	  local_sctx.last_node = node;
2158 	  local_sctx.last_str_idx = str_idx;
2159 	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2160 	  if (__glibc_unlikely (! ok))
2161 	    {
2162 	      err = REG_ESPACE;
2163 	      goto free_return;
2164 	    }
2165 	  cur_state = local_sctx.sifted_states[str_idx];
2166 	  err = sift_states_backward (mctx, &local_sctx);
2167 	  if (__glibc_unlikely (err != REG_NOERROR))
2168 	    goto free_return;
2169 	  if (sctx->limited_states != NULL)
2170 	    {
2171 	      err = merge_state_array (dfa, sctx->limited_states,
2172 				       local_sctx.sifted_states,
2173 				       str_idx + 1);
2174 	      if (__glibc_unlikely (err != REG_NOERROR))
2175 		goto free_return;
2176 	    }
2177 	  local_sctx.sifted_states[str_idx] = cur_state;
2178 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
2179 
2180 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
2181 	  entry = mctx->bkref_ents + enabled_idx;
2182 	}
2183       while (enabled_idx++, entry++->more);
2184     }
2185   err = REG_NOERROR;
2186  free_return:
2187   if (local_sctx.sifted_states != NULL)
2188     {
2189       re_node_set_free (&local_sctx.limits);
2190     }
2191 
2192   return err;
2193 }
2194 
2195 
2196 #ifdef RE_ENABLE_I18N
2197 static int
2198 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2199 		     Idx node_idx, Idx str_idx, Idx max_str_idx)
2200 {
2201   const re_dfa_t *const dfa = mctx->dfa;
2202   int naccepted;
2203   /* Check the node can accept "multi byte".  */
2204   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2205   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
2206       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2207 			    dfa->nexts[node_idx]))
2208     /* The node can't accept the "multi byte", or the
2209        destination was already thrown away, then the node
2210        could't accept the current input "multi byte".   */
2211     naccepted = 0;
2212   /* Otherwise, it is sure that the node could accept
2213      'naccepted' bytes input.  */
2214   return naccepted;
2215 }
2216 #endif /* RE_ENABLE_I18N */
2217 
2218 
2219 /* Functions for state transition.  */
2220 
2221 /* Return the next state to which the current state STATE will transit by
2222    accepting the current input byte, and update STATE_LOG if necessary.
2223    If STATE can accept a multibyte char/collating element/back reference
2224    update the destination of STATE_LOG.  */
2225 
2226 static re_dfastate_t *
2227 __attribute_warn_unused_result__
2228 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2229 	       re_dfastate_t *state)
2230 {
2231   re_dfastate_t **trtable;
2232   unsigned char ch;
2233 
2234 #ifdef RE_ENABLE_I18N
2235   /* If the current state can accept multibyte.  */
2236   if (__glibc_unlikely (state->accept_mb))
2237     {
2238       *err = transit_state_mb (mctx, state);
2239       if (__glibc_unlikely (*err != REG_NOERROR))
2240 	return NULL;
2241     }
2242 #endif /* RE_ENABLE_I18N */
2243 
2244   /* Then decide the next state with the single byte.  */
2245 #if 0
2246   if (0)
2247     /* don't use transition table  */
2248     return transit_state_sb (err, mctx, state);
2249 #endif
2250 
2251   /* Use transition table  */
2252   ch = re_string_fetch_byte (&mctx->input);
2253   for (;;)
2254     {
2255       trtable = state->trtable;
2256       if (__glibc_likely (trtable != NULL))
2257 	return trtable[ch];
2258 
2259       trtable = state->word_trtable;
2260       if (__glibc_likely (trtable != NULL))
2261 	{
2262 	  unsigned int context;
2263 	  context
2264 	    = re_string_context_at (&mctx->input,
2265 				    re_string_cur_idx (&mctx->input) - 1,
2266 				    mctx->eflags);
2267 	  if (IS_WORD_CONTEXT (context))
2268 	    return trtable[ch + SBC_MAX];
2269 	  else
2270 	    return trtable[ch];
2271 	}
2272 
2273       if (!build_trtable (mctx->dfa, state))
2274 	{
2275 	  *err = REG_ESPACE;
2276 	  return NULL;
2277 	}
2278 
2279       /* Retry, we now have a transition table.  */
2280     }
2281 }
2282 
2283 /* Update the state_log if we need */
2284 static re_dfastate_t *
2285 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2286 		      re_dfastate_t *next_state)
2287 {
2288   const re_dfa_t *const dfa = mctx->dfa;
2289   Idx cur_idx = re_string_cur_idx (&mctx->input);
2290 
2291   if (cur_idx > mctx->state_log_top)
2292     {
2293       mctx->state_log[cur_idx] = next_state;
2294       mctx->state_log_top = cur_idx;
2295     }
2296   else if (mctx->state_log[cur_idx] == 0)
2297     {
2298       mctx->state_log[cur_idx] = next_state;
2299     }
2300   else
2301     {
2302       re_dfastate_t *pstate;
2303       unsigned int context;
2304       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2305       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
2306 	 the destination of a multibyte char/collating element/
2307 	 back reference.  Then the next state is the union set of
2308 	 these destinations and the results of the transition table.  */
2309       pstate = mctx->state_log[cur_idx];
2310       log_nodes = pstate->entrance_nodes;
2311       if (next_state != NULL)
2312 	{
2313 	  table_nodes = next_state->entrance_nodes;
2314 	  *err = re_node_set_init_union (&next_nodes, table_nodes,
2315 					     log_nodes);
2316 	  if (__glibc_unlikely (*err != REG_NOERROR))
2317 	    return NULL;
2318 	}
2319       else
2320 	next_nodes = *log_nodes;
2321       /* Note: We already add the nodes of the initial state,
2322 	 then we don't need to add them here.  */
2323 
2324       context = re_string_context_at (&mctx->input,
2325 				      re_string_cur_idx (&mctx->input) - 1,
2326 				      mctx->eflags);
2327       next_state = mctx->state_log[cur_idx]
2328 	= re_acquire_state_context (err, dfa, &next_nodes, context);
2329       /* We don't need to check errors here, since the return value of
2330 	 this function is next_state and ERR is already set.  */
2331 
2332       if (table_nodes != NULL)
2333 	re_node_set_free (&next_nodes);
2334     }
2335 
2336   if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
2337     {
2338       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
2339 	 later.  We must check them here, since the back references in the
2340 	 next state might use them.  */
2341       *err = check_subexp_matching_top (mctx, &next_state->nodes,
2342 					cur_idx);
2343       if (__glibc_unlikely (*err != REG_NOERROR))
2344 	return NULL;
2345 
2346       /* If the next state has back references.  */
2347       if (next_state->has_backref)
2348 	{
2349 	  *err = transit_state_bkref (mctx, &next_state->nodes);
2350 	  if (__glibc_unlikely (*err != REG_NOERROR))
2351 	    return NULL;
2352 	  next_state = mctx->state_log[cur_idx];
2353 	}
2354     }
2355 
2356   return next_state;
2357 }
2358 
2359 /* Skip bytes in the input that correspond to part of a
2360    multi-byte match, then look in the log for a state
2361    from which to restart matching.  */
2362 static re_dfastate_t *
2363 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2364 {
2365   re_dfastate_t *cur_state;
2366   do
2367     {
2368       Idx max = mctx->state_log_top;
2369       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2370 
2371       do
2372 	{
2373 	  if (++cur_str_idx > max)
2374 	    return NULL;
2375 	  re_string_skip_bytes (&mctx->input, 1);
2376 	}
2377       while (mctx->state_log[cur_str_idx] == NULL);
2378 
2379       cur_state = merge_state_with_log (err, mctx, NULL);
2380     }
2381   while (*err == REG_NOERROR && cur_state == NULL);
2382   return cur_state;
2383 }
2384 
2385 /* Helper functions for transit_state.  */
2386 
2387 /* From the node set CUR_NODES, pick up the nodes whose types are
2388    OP_OPEN_SUBEXP and which have corresponding back references in the regular
2389    expression. And register them to use them later for evaluating the
2390    corresponding back references.  */
2391 
2392 static reg_errcode_t
2393 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2394 			   Idx str_idx)
2395 {
2396   const re_dfa_t *const dfa = mctx->dfa;
2397   Idx node_idx;
2398   reg_errcode_t err;
2399 
2400   /* TODO: This isn't efficient.
2401 	   Because there might be more than one nodes whose types are
2402 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2403 	   nodes.
2404 	   E.g. RE: (a){2}  */
2405   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2406     {
2407       Idx node = cur_nodes->elems[node_idx];
2408       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2409 	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2410 	  && (dfa->used_bkref_map
2411 	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2412 	{
2413 	  err = match_ctx_add_subtop (mctx, node, str_idx);
2414 	  if (__glibc_unlikely (err != REG_NOERROR))
2415 	    return err;
2416 	}
2417     }
2418   return REG_NOERROR;
2419 }
2420 
2421 #if 0
2422 /* Return the next state to which the current state STATE will transit by
2423    accepting the current input byte.  */
2424 
2425 static re_dfastate_t *
2426 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2427 		  re_dfastate_t *state)
2428 {
2429   const re_dfa_t *const dfa = mctx->dfa;
2430   re_node_set next_nodes;
2431   re_dfastate_t *next_state;
2432   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2433   unsigned int context;
2434 
2435   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2436   if (__glibc_unlikely (*err != REG_NOERROR))
2437     return NULL;
2438   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2439     {
2440       Idx cur_node = state->nodes.elems[node_cnt];
2441       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2442 	{
2443 	  *err = re_node_set_merge (&next_nodes,
2444 				    dfa->eclosures + dfa->nexts[cur_node]);
2445 	  if (__glibc_unlikely (*err != REG_NOERROR))
2446 	    {
2447 	      re_node_set_free (&next_nodes);
2448 	      return NULL;
2449 	    }
2450 	}
2451     }
2452   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2453   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2454   /* We don't need to check errors here, since the return value of
2455      this function is next_state and ERR is already set.  */
2456 
2457   re_node_set_free (&next_nodes);
2458   re_string_skip_bytes (&mctx->input, 1);
2459   return next_state;
2460 }
2461 #endif
2462 
2463 #ifdef RE_ENABLE_I18N
2464 static reg_errcode_t
2465 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2466 {
2467   const re_dfa_t *const dfa = mctx->dfa;
2468   reg_errcode_t err;
2469   Idx i;
2470 
2471   for (i = 0; i < pstate->nodes.nelem; ++i)
2472     {
2473       re_node_set dest_nodes, *new_nodes;
2474       Idx cur_node_idx = pstate->nodes.elems[i];
2475       int naccepted;
2476       Idx dest_idx;
2477       unsigned int context;
2478       re_dfastate_t *dest_state;
2479 
2480       if (!dfa->nodes[cur_node_idx].accept_mb)
2481 	continue;
2482 
2483       if (dfa->nodes[cur_node_idx].constraint)
2484 	{
2485 	  context = re_string_context_at (&mctx->input,
2486 					  re_string_cur_idx (&mctx->input),
2487 					  mctx->eflags);
2488 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2489 					   context))
2490 	    continue;
2491 	}
2492 
2493       /* How many bytes the node can accept?  */
2494       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2495 					   re_string_cur_idx (&mctx->input));
2496       if (naccepted == 0)
2497 	continue;
2498 
2499       /* The node can accepts 'naccepted' bytes.  */
2500       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2501       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2502 			       : mctx->max_mb_elem_len);
2503       err = clean_state_log_if_needed (mctx, dest_idx);
2504       if (__glibc_unlikely (err != REG_NOERROR))
2505 	return err;
2506 #ifdef DEBUG
2507       assert (dfa->nexts[cur_node_idx] != -1);
2508 #endif
2509       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2510 
2511       dest_state = mctx->state_log[dest_idx];
2512       if (dest_state == NULL)
2513 	dest_nodes = *new_nodes;
2514       else
2515 	{
2516 	  err = re_node_set_init_union (&dest_nodes,
2517 					dest_state->entrance_nodes, new_nodes);
2518 	  if (__glibc_unlikely (err != REG_NOERROR))
2519 	    return err;
2520 	}
2521       context = re_string_context_at (&mctx->input, dest_idx - 1,
2522 				      mctx->eflags);
2523       mctx->state_log[dest_idx]
2524 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2525       if (dest_state != NULL)
2526 	re_node_set_free (&dest_nodes);
2527       if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
2528 			    && err != REG_NOERROR))
2529 	return err;
2530     }
2531   return REG_NOERROR;
2532 }
2533 #endif /* RE_ENABLE_I18N */
2534 
2535 static reg_errcode_t
2536 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2537 {
2538   const re_dfa_t *const dfa = mctx->dfa;
2539   reg_errcode_t err;
2540   Idx i;
2541   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2542 
2543   for (i = 0; i < nodes->nelem; ++i)
2544     {
2545       Idx dest_str_idx, prev_nelem, bkc_idx;
2546       Idx node_idx = nodes->elems[i];
2547       unsigned int context;
2548       const re_token_t *node = dfa->nodes + node_idx;
2549       re_node_set *new_dest_nodes;
2550 
2551       /* Check whether 'node' is a backreference or not.  */
2552       if (node->type != OP_BACK_REF)
2553 	continue;
2554 
2555       if (node->constraint)
2556 	{
2557 	  context = re_string_context_at (&mctx->input, cur_str_idx,
2558 					  mctx->eflags);
2559 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2560 	    continue;
2561 	}
2562 
2563       /* 'node' is a backreference.
2564 	 Check the substring which the substring matched.  */
2565       bkc_idx = mctx->nbkref_ents;
2566       err = get_subexp (mctx, node_idx, cur_str_idx);
2567       if (__glibc_unlikely (err != REG_NOERROR))
2568 	goto free_return;
2569 
2570       /* And add the epsilon closures (which is 'new_dest_nodes') of
2571 	 the backreference to appropriate state_log.  */
2572 #ifdef DEBUG
2573       assert (dfa->nexts[node_idx] != -1);
2574 #endif
2575       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2576 	{
2577 	  Idx subexp_len;
2578 	  re_dfastate_t *dest_state;
2579 	  struct re_backref_cache_entry *bkref_ent;
2580 	  bkref_ent = mctx->bkref_ents + bkc_idx;
2581 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2582 	    continue;
2583 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2584 	  new_dest_nodes = (subexp_len == 0
2585 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2586 			    : dfa->eclosures + dfa->nexts[node_idx]);
2587 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2588 			  - bkref_ent->subexp_from);
2589 	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2590 					  mctx->eflags);
2591 	  dest_state = mctx->state_log[dest_str_idx];
2592 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2593 			: mctx->state_log[cur_str_idx]->nodes.nelem);
2594 	  /* Add 'new_dest_node' to state_log.  */
2595 	  if (dest_state == NULL)
2596 	    {
2597 	      mctx->state_log[dest_str_idx]
2598 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
2599 					    context);
2600 	      if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2601 				    && err != REG_NOERROR))
2602 		goto free_return;
2603 	    }
2604 	  else
2605 	    {
2606 	      re_node_set dest_nodes;
2607 	      err = re_node_set_init_union (&dest_nodes,
2608 					    dest_state->entrance_nodes,
2609 					    new_dest_nodes);
2610 	      if (__glibc_unlikely (err != REG_NOERROR))
2611 		{
2612 		  re_node_set_free (&dest_nodes);
2613 		  goto free_return;
2614 		}
2615 	      mctx->state_log[dest_str_idx]
2616 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2617 	      re_node_set_free (&dest_nodes);
2618 	      if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2619 				    && err != REG_NOERROR))
2620 		goto free_return;
2621 	    }
2622 	  /* We need to check recursively if the backreference can epsilon
2623 	     transit.  */
2624 	  if (subexp_len == 0
2625 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2626 	    {
2627 	      err = check_subexp_matching_top (mctx, new_dest_nodes,
2628 					       cur_str_idx);
2629 	      if (__glibc_unlikely (err != REG_NOERROR))
2630 		goto free_return;
2631 	      err = transit_state_bkref (mctx, new_dest_nodes);
2632 	      if (__glibc_unlikely (err != REG_NOERROR))
2633 		goto free_return;
2634 	    }
2635 	}
2636     }
2637   err = REG_NOERROR;
2638  free_return:
2639   return err;
2640 }
2641 
2642 /* Enumerate all the candidates which the backreference BKREF_NODE can match
2643    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2644    Note that we might collect inappropriate candidates here.
2645    However, the cost of checking them strictly here is too high, then we
2646    delay these checking for prune_impossible_nodes().  */
2647 
2648 static reg_errcode_t
2649 __attribute_warn_unused_result__
2650 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2651 {
2652   const re_dfa_t *const dfa = mctx->dfa;
2653   Idx subexp_num, sub_top_idx;
2654   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2655   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
2656   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2657   if (cache_idx != -1)
2658     {
2659       const struct re_backref_cache_entry *entry
2660 	= mctx->bkref_ents + cache_idx;
2661       do
2662 	if (entry->node == bkref_node)
2663 	  return REG_NOERROR; /* We already checked it.  */
2664       while (entry++->more);
2665     }
2666 
2667   subexp_num = dfa->nodes[bkref_node].opr.idx;
2668 
2669   /* For each sub expression  */
2670   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2671     {
2672       reg_errcode_t err;
2673       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2674       re_sub_match_last_t *sub_last;
2675       Idx sub_last_idx, sl_str, bkref_str_off;
2676 
2677       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2678 	continue; /* It isn't related.  */
2679 
2680       sl_str = sub_top->str_idx;
2681       bkref_str_off = bkref_str_idx;
2682       /* At first, check the last node of sub expressions we already
2683 	 evaluated.  */
2684       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2685 	{
2686 	  regoff_t sl_str_diff;
2687 	  sub_last = sub_top->lasts[sub_last_idx];
2688 	  sl_str_diff = sub_last->str_idx - sl_str;
2689 	  /* The matched string by the sub expression match with the substring
2690 	     at the back reference?  */
2691 	  if (sl_str_diff > 0)
2692 	    {
2693 	      if (__glibc_unlikely (bkref_str_off + sl_str_diff
2694 				    > mctx->input.valid_len))
2695 		{
2696 		  /* Not enough chars for a successful match.  */
2697 		  if (bkref_str_off + sl_str_diff > mctx->input.len)
2698 		    break;
2699 
2700 		  err = clean_state_log_if_needed (mctx,
2701 						   bkref_str_off
2702 						   + sl_str_diff);
2703 		  if (__glibc_unlikely (err != REG_NOERROR))
2704 		    return err;
2705 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2706 		}
2707 	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2708 		/* We don't need to search this sub expression any more.  */
2709 		break;
2710 	    }
2711 	  bkref_str_off += sl_str_diff;
2712 	  sl_str += sl_str_diff;
2713 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2714 				bkref_str_idx);
2715 
2716 	  /* Reload buf, since the preceding call might have reallocated
2717 	     the buffer.  */
2718 	  buf = (const char *) re_string_get_buffer (&mctx->input);
2719 
2720 	  if (err == REG_NOMATCH)
2721 	    continue;
2722 	  if (__glibc_unlikely (err != REG_NOERROR))
2723 	    return err;
2724 	}
2725 
2726       if (sub_last_idx < sub_top->nlasts)
2727 	continue;
2728       if (sub_last_idx > 0)
2729 	++sl_str;
2730       /* Then, search for the other last nodes of the sub expression.  */
2731       for (; sl_str <= bkref_str_idx; ++sl_str)
2732 	{
2733 	  Idx cls_node;
2734 	  regoff_t sl_str_off;
2735 	  const re_node_set *nodes;
2736 	  sl_str_off = sl_str - sub_top->str_idx;
2737 	  /* The matched string by the sub expression match with the substring
2738 	     at the back reference?  */
2739 	  if (sl_str_off > 0)
2740 	    {
2741 	      if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
2742 		{
2743 		  /* If we are at the end of the input, we cannot match.  */
2744 		  if (bkref_str_off >= mctx->input.len)
2745 		    break;
2746 
2747 		  err = extend_buffers (mctx, bkref_str_off + 1);
2748 		  if (__glibc_unlikely (err != REG_NOERROR))
2749 		    return err;
2750 
2751 		  buf = (const char *) re_string_get_buffer (&mctx->input);
2752 		}
2753 	      if (buf [bkref_str_off++] != buf[sl_str - 1])
2754 		break; /* We don't need to search this sub expression
2755 			  any more.  */
2756 	    }
2757 	  if (mctx->state_log[sl_str] == NULL)
2758 	    continue;
2759 	  /* Does this state have a ')' of the sub expression?  */
2760 	  nodes = &mctx->state_log[sl_str]->nodes;
2761 	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
2762 				       OP_CLOSE_SUBEXP);
2763 	  if (cls_node == -1)
2764 	    continue; /* No.  */
2765 	  if (sub_top->path == NULL)
2766 	    {
2767 	      sub_top->path = calloc (sizeof (state_array_t),
2768 				      sl_str - sub_top->str_idx + 1);
2769 	      if (sub_top->path == NULL)
2770 		return REG_ESPACE;
2771 	    }
2772 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2773 	     in the current context?  */
2774 	  err = check_arrival (mctx, sub_top->path, sub_top->node,
2775 			       sub_top->str_idx, cls_node, sl_str,
2776 			       OP_CLOSE_SUBEXP);
2777 	  if (err == REG_NOMATCH)
2778 	      continue;
2779 	  if (__glibc_unlikely (err != REG_NOERROR))
2780 	      return err;
2781 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2782 	  if (__glibc_unlikely (sub_last == NULL))
2783 	    return REG_ESPACE;
2784 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2785 				bkref_str_idx);
2786 	  buf = (const char *) re_string_get_buffer (&mctx->input);
2787 	  if (err == REG_NOMATCH)
2788 	    continue;
2789 	  if (__glibc_unlikely (err != REG_NOERROR))
2790 	    return err;
2791 	}
2792     }
2793   return REG_NOERROR;
2794 }
2795 
2796 /* Helper functions for get_subexp().  */
2797 
2798 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2799    If it can arrive, register the sub expression expressed with SUB_TOP
2800    and SUB_LAST.  */
2801 
2802 static reg_errcode_t
2803 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2804 		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2805 {
2806   reg_errcode_t err;
2807   Idx to_idx;
2808   /* Can the subexpression arrive the back reference?  */
2809   err = check_arrival (mctx, &sub_last->path, sub_last->node,
2810 		       sub_last->str_idx, bkref_node, bkref_str,
2811 		       OP_OPEN_SUBEXP);
2812   if (err != REG_NOERROR)
2813     return err;
2814   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2815 			     sub_last->str_idx);
2816   if (__glibc_unlikely (err != REG_NOERROR))
2817     return err;
2818   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2819   return clean_state_log_if_needed (mctx, to_idx);
2820 }
2821 
2822 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2823    Search '(' if FL_OPEN, or search ')' otherwise.
2824    TODO: This function isn't efficient...
2825 	 Because there might be more than one nodes whose types are
2826 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2827 	 nodes.
2828 	 E.g. RE: (a){2}  */
2829 
2830 static Idx
2831 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2832 		  Idx subexp_idx, int type)
2833 {
2834   Idx cls_idx;
2835   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2836     {
2837       Idx cls_node = nodes->elems[cls_idx];
2838       const re_token_t *node = dfa->nodes + cls_node;
2839       if (node->type == type
2840 	  && node->opr.idx == subexp_idx)
2841 	return cls_node;
2842     }
2843   return -1;
2844 }
2845 
2846 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2847    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
2848    heavily reused.
2849    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
2850 
2851 static reg_errcode_t
2852 __attribute_warn_unused_result__
2853 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2854 	       Idx top_str, Idx last_node, Idx last_str, int type)
2855 {
2856   const re_dfa_t *const dfa = mctx->dfa;
2857   reg_errcode_t err = REG_NOERROR;
2858   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2859   re_dfastate_t *cur_state = NULL;
2860   re_node_set *cur_nodes, next_nodes;
2861   re_dfastate_t **backup_state_log;
2862   unsigned int context;
2863 
2864   subexp_num = dfa->nodes[top_node].opr.idx;
2865   /* Extend the buffer if we need.  */
2866   if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1))
2867     {
2868       re_dfastate_t **new_array;
2869       Idx old_alloc = path->alloc;
2870       Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2871       Idx new_alloc;
2872       if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
2873 	return REG_ESPACE;
2874       new_alloc = old_alloc + incr_alloc;
2875       if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
2876 	return REG_ESPACE;
2877       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2878       if (__glibc_unlikely (new_array == NULL))
2879 	return REG_ESPACE;
2880       path->array = new_array;
2881       path->alloc = new_alloc;
2882       memset (new_array + old_alloc, '\0',
2883 	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2884     }
2885 
2886   str_idx = path->next_idx ? path->next_idx : top_str;
2887 
2888   /* Temporary modify MCTX.  */
2889   backup_state_log = mctx->state_log;
2890   backup_cur_idx = mctx->input.cur_idx;
2891   mctx->state_log = path->array;
2892   mctx->input.cur_idx = str_idx;
2893 
2894   /* Setup initial node set.  */
2895   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2896   if (str_idx == top_str)
2897     {
2898       err = re_node_set_init_1 (&next_nodes, top_node);
2899       if (__glibc_unlikely (err != REG_NOERROR))
2900 	return err;
2901       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2902       if (__glibc_unlikely (err != REG_NOERROR))
2903 	{
2904 	  re_node_set_free (&next_nodes);
2905 	  return err;
2906 	}
2907     }
2908   else
2909     {
2910       cur_state = mctx->state_log[str_idx];
2911       if (cur_state && cur_state->has_backref)
2912 	{
2913 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2914 	  if (__glibc_unlikely (err != REG_NOERROR))
2915 	    return err;
2916 	}
2917       else
2918 	re_node_set_init_empty (&next_nodes);
2919     }
2920   if (str_idx == top_str || (cur_state && cur_state->has_backref))
2921     {
2922       if (next_nodes.nelem)
2923 	{
2924 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2925 				    subexp_num, type);
2926 	  if (__glibc_unlikely (err != REG_NOERROR))
2927 	    {
2928 	      re_node_set_free (&next_nodes);
2929 	      return err;
2930 	    }
2931 	}
2932       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2933       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2934 	{
2935 	  re_node_set_free (&next_nodes);
2936 	  return err;
2937 	}
2938       mctx->state_log[str_idx] = cur_state;
2939     }
2940 
2941   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2942     {
2943       re_node_set_empty (&next_nodes);
2944       if (mctx->state_log[str_idx + 1])
2945 	{
2946 	  err = re_node_set_merge (&next_nodes,
2947 				   &mctx->state_log[str_idx + 1]->nodes);
2948 	  if (__glibc_unlikely (err != REG_NOERROR))
2949 	    {
2950 	      re_node_set_free (&next_nodes);
2951 	      return err;
2952 	    }
2953 	}
2954       if (cur_state)
2955 	{
2956 	  err = check_arrival_add_next_nodes (mctx, str_idx,
2957 					      &cur_state->non_eps_nodes,
2958 					      &next_nodes);
2959 	  if (__glibc_unlikely (err != REG_NOERROR))
2960 	    {
2961 	      re_node_set_free (&next_nodes);
2962 	      return err;
2963 	    }
2964 	}
2965       ++str_idx;
2966       if (next_nodes.nelem)
2967 	{
2968 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2969 	  if (__glibc_unlikely (err != REG_NOERROR))
2970 	    {
2971 	      re_node_set_free (&next_nodes);
2972 	      return err;
2973 	    }
2974 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2975 				    subexp_num, type);
2976 	  if (__glibc_unlikely (err != REG_NOERROR))
2977 	    {
2978 	      re_node_set_free (&next_nodes);
2979 	      return err;
2980 	    }
2981 	}
2982       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2983       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2984       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2985 	{
2986 	  re_node_set_free (&next_nodes);
2987 	  return err;
2988 	}
2989       mctx->state_log[str_idx] = cur_state;
2990       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2991     }
2992   re_node_set_free (&next_nodes);
2993   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2994 	       : &mctx->state_log[last_str]->nodes);
2995   path->next_idx = str_idx;
2996 
2997   /* Fix MCTX.  */
2998   mctx->state_log = backup_state_log;
2999   mctx->input.cur_idx = backup_cur_idx;
3000 
3001   /* Then check the current node set has the node LAST_NODE.  */
3002   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
3003     return REG_NOERROR;
3004 
3005   return REG_NOMATCH;
3006 }
3007 
3008 /* Helper functions for check_arrival.  */
3009 
3010 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
3011    to NEXT_NODES.
3012    TODO: This function is similar to the functions transit_state*(),
3013 	 however this function has many additional works.
3014 	 Can't we unify them?  */
3015 
3016 static reg_errcode_t
3017 __attribute_warn_unused_result__
3018 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3019 			      re_node_set *cur_nodes, re_node_set *next_nodes)
3020 {
3021   const re_dfa_t *const dfa = mctx->dfa;
3022   bool ok;
3023   Idx cur_idx;
3024 #ifdef RE_ENABLE_I18N
3025   reg_errcode_t err = REG_NOERROR;
3026 #endif
3027   re_node_set union_set;
3028   re_node_set_init_empty (&union_set);
3029   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3030     {
3031       int naccepted = 0;
3032       Idx cur_node = cur_nodes->elems[cur_idx];
3033 #ifdef DEBUG
3034       re_token_type_t type = dfa->nodes[cur_node].type;
3035       assert (!IS_EPSILON_NODE (type));
3036 #endif
3037 #ifdef RE_ENABLE_I18N
3038       /* If the node may accept "multi byte".  */
3039       if (dfa->nodes[cur_node].accept_mb)
3040 	{
3041 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3042 					       str_idx);
3043 	  if (naccepted > 1)
3044 	    {
3045 	      re_dfastate_t *dest_state;
3046 	      Idx next_node = dfa->nexts[cur_node];
3047 	      Idx next_idx = str_idx + naccepted;
3048 	      dest_state = mctx->state_log[next_idx];
3049 	      re_node_set_empty (&union_set);
3050 	      if (dest_state)
3051 		{
3052 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3053 		  if (__glibc_unlikely (err != REG_NOERROR))
3054 		    {
3055 		      re_node_set_free (&union_set);
3056 		      return err;
3057 		    }
3058 		}
3059 	      ok = re_node_set_insert (&union_set, next_node);
3060 	      if (__glibc_unlikely (! ok))
3061 		{
3062 		  re_node_set_free (&union_set);
3063 		  return REG_ESPACE;
3064 		}
3065 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3066 							    &union_set);
3067 	      if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
3068 				    && err != REG_NOERROR))
3069 		{
3070 		  re_node_set_free (&union_set);
3071 		  return err;
3072 		}
3073 	    }
3074 	}
3075 #endif /* RE_ENABLE_I18N */
3076       if (naccepted
3077 	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3078 	{
3079 	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3080 	  if (__glibc_unlikely (! ok))
3081 	    {
3082 	      re_node_set_free (&union_set);
3083 	      return REG_ESPACE;
3084 	    }
3085 	}
3086     }
3087   re_node_set_free (&union_set);
3088   return REG_NOERROR;
3089 }
3090 
3091 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
3092    CUR_NODES, however exclude the nodes which are:
3093     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3094     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3095 */
3096 
3097 static reg_errcode_t
3098 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3099 			  Idx ex_subexp, int type)
3100 {
3101   reg_errcode_t err;
3102   Idx idx, outside_node;
3103   re_node_set new_nodes;
3104 #ifdef DEBUG
3105   assert (cur_nodes->nelem);
3106 #endif
3107   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3108   if (__glibc_unlikely (err != REG_NOERROR))
3109     return err;
3110   /* Create a new node set NEW_NODES with the nodes which are epsilon
3111      closures of the node in CUR_NODES.  */
3112 
3113   for (idx = 0; idx < cur_nodes->nelem; ++idx)
3114     {
3115       Idx cur_node = cur_nodes->elems[idx];
3116       const re_node_set *eclosure = dfa->eclosures + cur_node;
3117       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3118       if (outside_node == -1)
3119 	{
3120 	  /* There are no problematic nodes, just merge them.  */
3121 	  err = re_node_set_merge (&new_nodes, eclosure);
3122 	  if (__glibc_unlikely (err != REG_NOERROR))
3123 	    {
3124 	      re_node_set_free (&new_nodes);
3125 	      return err;
3126 	    }
3127 	}
3128       else
3129 	{
3130 	  /* There are problematic nodes, re-calculate incrementally.  */
3131 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3132 					      ex_subexp, type);
3133 	  if (__glibc_unlikely (err != REG_NOERROR))
3134 	    {
3135 	      re_node_set_free (&new_nodes);
3136 	      return err;
3137 	    }
3138 	}
3139     }
3140   re_node_set_free (cur_nodes);
3141   *cur_nodes = new_nodes;
3142   return REG_NOERROR;
3143 }
3144 
3145 /* Helper function for check_arrival_expand_ecl.
3146    Check incrementally the epsilon closure of TARGET, and if it isn't
3147    problematic append it to DST_NODES.  */
3148 
3149 static reg_errcode_t
3150 __attribute_warn_unused_result__
3151 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3152 			      Idx target, Idx ex_subexp, int type)
3153 {
3154   Idx cur_node;
3155   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3156     {
3157       bool ok;
3158 
3159       if (dfa->nodes[cur_node].type == type
3160 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
3161 	{
3162 	  if (type == OP_CLOSE_SUBEXP)
3163 	    {
3164 	      ok = re_node_set_insert (dst_nodes, cur_node);
3165 	      if (__glibc_unlikely (! ok))
3166 		return REG_ESPACE;
3167 	    }
3168 	  break;
3169 	}
3170       ok = re_node_set_insert (dst_nodes, cur_node);
3171       if (__glibc_unlikely (! ok))
3172 	return REG_ESPACE;
3173       if (dfa->edests[cur_node].nelem == 0)
3174 	break;
3175       if (dfa->edests[cur_node].nelem == 2)
3176 	{
3177 	  reg_errcode_t err;
3178 	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3179 					      dfa->edests[cur_node].elems[1],
3180 					      ex_subexp, type);
3181 	  if (__glibc_unlikely (err != REG_NOERROR))
3182 	    return err;
3183 	}
3184       cur_node = dfa->edests[cur_node].elems[0];
3185     }
3186   return REG_NOERROR;
3187 }
3188 
3189 
3190 /* For all the back references in the current state, calculate the
3191    destination of the back references by the appropriate entry
3192    in MCTX->BKREF_ENTS.  */
3193 
3194 static reg_errcode_t
3195 __attribute_warn_unused_result__
3196 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3197 		    Idx cur_str, Idx subexp_num, int type)
3198 {
3199   const re_dfa_t *const dfa = mctx->dfa;
3200   reg_errcode_t err;
3201   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3202   struct re_backref_cache_entry *ent;
3203 
3204   if (cache_idx_start == -1)
3205     return REG_NOERROR;
3206 
3207  restart:
3208   ent = mctx->bkref_ents + cache_idx_start;
3209   do
3210     {
3211       Idx to_idx, next_node;
3212 
3213       /* Is this entry ENT is appropriate?  */
3214       if (!re_node_set_contains (cur_nodes, ent->node))
3215 	continue; /* No.  */
3216 
3217       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3218       /* Calculate the destination of the back reference, and append it
3219 	 to MCTX->STATE_LOG.  */
3220       if (to_idx == cur_str)
3221 	{
3222 	  /* The backreference did epsilon transit, we must re-check all the
3223 	     node in the current state.  */
3224 	  re_node_set new_dests;
3225 	  reg_errcode_t err2, err3;
3226 	  next_node = dfa->edests[ent->node].elems[0];
3227 	  if (re_node_set_contains (cur_nodes, next_node))
3228 	    continue;
3229 	  err = re_node_set_init_1 (&new_dests, next_node);
3230 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3231 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
3232 	  re_node_set_free (&new_dests);
3233 	  if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR
3234 				|| err3 != REG_NOERROR))
3235 	    {
3236 	      err = (err != REG_NOERROR ? err
3237 		     : (err2 != REG_NOERROR ? err2 : err3));
3238 	      return err;
3239 	    }
3240 	  /* TODO: It is still inefficient...  */
3241 	  goto restart;
3242 	}
3243       else
3244 	{
3245 	  re_node_set union_set;
3246 	  next_node = dfa->nexts[ent->node];
3247 	  if (mctx->state_log[to_idx])
3248 	    {
3249 	      bool ok;
3250 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3251 					next_node))
3252 		continue;
3253 	      err = re_node_set_init_copy (&union_set,
3254 					   &mctx->state_log[to_idx]->nodes);
3255 	      ok = re_node_set_insert (&union_set, next_node);
3256 	      if (__glibc_unlikely (err != REG_NOERROR || ! ok))
3257 		{
3258 		  re_node_set_free (&union_set);
3259 		  err = err != REG_NOERROR ? err : REG_ESPACE;
3260 		  return err;
3261 		}
3262 	    }
3263 	  else
3264 	    {
3265 	      err = re_node_set_init_1 (&union_set, next_node);
3266 	      if (__glibc_unlikely (err != REG_NOERROR))
3267 		return err;
3268 	    }
3269 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3270 	  re_node_set_free (&union_set);
3271 	  if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
3272 				&& err != REG_NOERROR))
3273 	    return err;
3274 	}
3275     }
3276   while (ent++->more);
3277   return REG_NOERROR;
3278 }
3279 
3280 /* Build transition table for the state.
3281    Return true if successful.  */
3282 
3283 static bool
3284 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3285 {
3286   reg_errcode_t err;
3287   Idx i, j;
3288   int ch;
3289   bool need_word_trtable = false;
3290   bitset_word_t elem, mask;
3291   bool dests_node_malloced = false;
3292   bool dest_states_malloced = false;
3293   Idx ndests; /* Number of the destination states from 'state'.  */
3294   re_dfastate_t **trtable;
3295   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
3296   re_node_set follows, *dests_node;
3297   bitset_t *dests_ch;
3298   bitset_t acceptable;
3299 
3300   struct dests_alloc
3301   {
3302     re_node_set dests_node[SBC_MAX];
3303     bitset_t dests_ch[SBC_MAX];
3304   } *dests_alloc;
3305 
3306   /* We build DFA states which corresponds to the destination nodes
3307      from 'state'.  'dests_node[i]' represents the nodes which i-th
3308      destination state contains, and 'dests_ch[i]' represents the
3309      characters which i-th destination state accepts.  */
3310   if (__libc_use_alloca (sizeof (struct dests_alloc)))
3311     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3312   else
3313     {
3314       dests_alloc = re_malloc (struct dests_alloc, 1);
3315       if (__glibc_unlikely (dests_alloc == NULL))
3316 	return false;
3317       dests_node_malloced = true;
3318     }
3319   dests_node = dests_alloc->dests_node;
3320   dests_ch = dests_alloc->dests_ch;
3321 
3322   /* Initialize transition table.  */
3323   state->word_trtable = state->trtable = NULL;
3324 
3325   /* At first, group all nodes belonging to 'state' into several
3326      destinations.  */
3327   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3328   if (__glibc_unlikely (ndests <= 0))
3329     {
3330       if (dests_node_malloced)
3331 	re_free (dests_alloc);
3332       /* Return false in case of an error, true otherwise.  */
3333       if (ndests == 0)
3334 	{
3335 	  state->trtable = (re_dfastate_t **)
3336 	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3337           if (__glibc_unlikely (state->trtable == NULL))
3338             return false;
3339 	  return true;
3340 	}
3341       return false;
3342     }
3343 
3344   err = re_node_set_alloc (&follows, ndests + 1);
3345   if (__glibc_unlikely (err != REG_NOERROR))
3346     goto out_free;
3347 
3348   /* Avoid arithmetic overflow in size calculation.  */
3349   size_t ndests_max
3350     = ((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3351        / (3 * sizeof (re_dfastate_t *)));
3352   if (__glibc_unlikely (ndests_max < ndests))
3353     goto out_free;
3354 
3355   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3356 			 + ndests * 3 * sizeof (re_dfastate_t *)))
3357     dest_states = (re_dfastate_t **)
3358       alloca (ndests * 3 * sizeof (re_dfastate_t *));
3359   else
3360     {
3361       dest_states = re_malloc (re_dfastate_t *, ndests * 3);
3362       if (__glibc_unlikely (dest_states == NULL))
3363 	{
3364 out_free:
3365 	  if (dest_states_malloced)
3366 	    re_free (dest_states);
3367 	  re_node_set_free (&follows);
3368 	  for (i = 0; i < ndests; ++i)
3369 	    re_node_set_free (dests_node + i);
3370 	  if (dests_node_malloced)
3371 	    re_free (dests_alloc);
3372 	  return false;
3373 	}
3374       dest_states_malloced = true;
3375     }
3376   dest_states_word = dest_states + ndests;
3377   dest_states_nl = dest_states_word + ndests;
3378   bitset_empty (acceptable);
3379 
3380   /* Then build the states for all destinations.  */
3381   for (i = 0; i < ndests; ++i)
3382     {
3383       Idx next_node;
3384       re_node_set_empty (&follows);
3385       /* Merge the follows of this destination states.  */
3386       for (j = 0; j < dests_node[i].nelem; ++j)
3387 	{
3388 	  next_node = dfa->nexts[dests_node[i].elems[j]];
3389 	  if (next_node != -1)
3390 	    {
3391 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3392 	      if (__glibc_unlikely (err != REG_NOERROR))
3393 		goto out_free;
3394 	    }
3395 	}
3396       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3397       if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
3398 	goto out_free;
3399       /* If the new state has context constraint,
3400 	 build appropriate states for these contexts.  */
3401       if (dest_states[i]->has_constraint)
3402 	{
3403 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3404 							  CONTEXT_WORD);
3405 	  if (__glibc_unlikely (dest_states_word[i] == NULL
3406 				&& err != REG_NOERROR))
3407 	    goto out_free;
3408 
3409 	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3410 	    need_word_trtable = true;
3411 
3412 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3413 							CONTEXT_NEWLINE);
3414 	  if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
3415 	    goto out_free;
3416 	}
3417       else
3418 	{
3419 	  dest_states_word[i] = dest_states[i];
3420 	  dest_states_nl[i] = dest_states[i];
3421 	}
3422       bitset_merge (acceptable, dests_ch[i]);
3423     }
3424 
3425   if (!__glibc_unlikely (need_word_trtable))
3426     {
3427       /* We don't care about whether the following character is a word
3428 	 character, or we are in a single-byte character set so we can
3429 	 discern by looking at the character code: allocate a
3430 	 256-entry transition table.  */
3431       trtable = state->trtable =
3432 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3433       if (__glibc_unlikely (trtable == NULL))
3434 	goto out_free;
3435 
3436       /* For all characters ch...:  */
3437       for (i = 0; i < BITSET_WORDS; ++i)
3438 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3439 	     elem;
3440 	     mask <<= 1, elem >>= 1, ++ch)
3441 	  if (__glibc_unlikely (elem & 1))
3442 	    {
3443 	      /* There must be exactly one destination which accepts
3444 		 character ch.  See group_nodes_into_DFAstates.  */
3445 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3446 		;
3447 
3448 	      /* j-th destination accepts the word character ch.  */
3449 	      if (dfa->word_char[i] & mask)
3450 		trtable[ch] = dest_states_word[j];
3451 	      else
3452 		trtable[ch] = dest_states[j];
3453 	    }
3454     }
3455   else
3456     {
3457       /* We care about whether the following character is a word
3458 	 character, and we are in a multi-byte character set: discern
3459 	 by looking at the character code: build two 256-entry
3460 	 transition tables, one starting at trtable[0] and one
3461 	 starting at trtable[SBC_MAX].  */
3462       trtable = state->word_trtable =
3463 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3464       if (__glibc_unlikely (trtable == NULL))
3465 	goto out_free;
3466 
3467       /* For all characters ch...:  */
3468       for (i = 0; i < BITSET_WORDS; ++i)
3469 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3470 	     elem;
3471 	     mask <<= 1, elem >>= 1, ++ch)
3472 	  if (__glibc_unlikely (elem & 1))
3473 	    {
3474 	      /* There must be exactly one destination which accepts
3475 		 character ch.  See group_nodes_into_DFAstates.  */
3476 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3477 		;
3478 
3479 	      /* j-th destination accepts the word character ch.  */
3480 	      trtable[ch] = dest_states[j];
3481 	      trtable[ch + SBC_MAX] = dest_states_word[j];
3482 	    }
3483     }
3484 
3485   /* new line */
3486   if (bitset_contain (acceptable, NEWLINE_CHAR))
3487     {
3488       /* The current state accepts newline character.  */
3489       for (j = 0; j < ndests; ++j)
3490 	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3491 	  {
3492 	    /* k-th destination accepts newline character.  */
3493 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
3494 	    if (need_word_trtable)
3495 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3496 	    /* There must be only one destination which accepts
3497 	       newline.  See group_nodes_into_DFAstates.  */
3498 	    break;
3499 	  }
3500     }
3501 
3502   if (dest_states_malloced)
3503     re_free (dest_states);
3504 
3505   re_node_set_free (&follows);
3506   for (i = 0; i < ndests; ++i)
3507     re_node_set_free (dests_node + i);
3508 
3509   if (dests_node_malloced)
3510     re_free (dests_alloc);
3511 
3512   return true;
3513 }
3514 
3515 /* Group all nodes belonging to STATE into several destinations.
3516    Then for all destinations, set the nodes belonging to the destination
3517    to DESTS_NODE[i] and set the characters accepted by the destination
3518    to DEST_CH[i].  This function return the number of destinations.  */
3519 
3520 static Idx
3521 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3522 			    re_node_set *dests_node, bitset_t *dests_ch)
3523 {
3524   reg_errcode_t err;
3525   bool ok;
3526   Idx i, j, k;
3527   Idx ndests; /* Number of the destinations from 'state'.  */
3528   bitset_t accepts; /* Characters a node can accept.  */
3529   const re_node_set *cur_nodes = &state->nodes;
3530   bitset_empty (accepts);
3531   ndests = 0;
3532 
3533   /* For all the nodes belonging to 'state',  */
3534   for (i = 0; i < cur_nodes->nelem; ++i)
3535     {
3536       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3537       re_token_type_t type = node->type;
3538       unsigned int constraint = node->constraint;
3539 
3540       /* Enumerate all single byte character this node can accept.  */
3541       if (type == CHARACTER)
3542 	bitset_set (accepts, node->opr.c);
3543       else if (type == SIMPLE_BRACKET)
3544 	{
3545 	  bitset_merge (accepts, node->opr.sbcset);
3546 	}
3547       else if (type == OP_PERIOD)
3548 	{
3549 #ifdef RE_ENABLE_I18N
3550 	  if (dfa->mb_cur_max > 1)
3551 	    bitset_merge (accepts, dfa->sb_char);
3552 	  else
3553 #endif
3554 	    bitset_set_all (accepts);
3555 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3556 	    bitset_clear (accepts, '\n');
3557 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3558 	    bitset_clear (accepts, '\0');
3559 	}
3560 #ifdef RE_ENABLE_I18N
3561       else if (type == OP_UTF8_PERIOD)
3562 	{
3563 	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3564 	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3565 	  else
3566 	    bitset_merge (accepts, utf8_sb_map);
3567 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
3568 	    bitset_clear (accepts, '\n');
3569 	  if (dfa->syntax & RE_DOT_NOT_NULL)
3570 	    bitset_clear (accepts, '\0');
3571 	}
3572 #endif
3573       else
3574 	continue;
3575 
3576       /* Check the 'accepts' and sift the characters which are not
3577 	 match it the context.  */
3578       if (constraint)
3579 	{
3580 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
3581 	    {
3582 	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3583 	      bitset_empty (accepts);
3584 	      if (accepts_newline)
3585 		bitset_set (accepts, NEWLINE_CHAR);
3586 	      else
3587 		continue;
3588 	    }
3589 	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
3590 	    {
3591 	      bitset_empty (accepts);
3592 	      continue;
3593 	    }
3594 
3595 	  if (constraint & NEXT_WORD_CONSTRAINT)
3596 	    {
3597 	      bitset_word_t any_set = 0;
3598 	      if (type == CHARACTER && !node->word_char)
3599 		{
3600 		  bitset_empty (accepts);
3601 		  continue;
3602 		}
3603 #ifdef RE_ENABLE_I18N
3604 	      if (dfa->mb_cur_max > 1)
3605 		for (j = 0; j < BITSET_WORDS; ++j)
3606 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3607 	      else
3608 #endif
3609 		for (j = 0; j < BITSET_WORDS; ++j)
3610 		  any_set |= (accepts[j] &= dfa->word_char[j]);
3611 	      if (!any_set)
3612 		continue;
3613 	    }
3614 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
3615 	    {
3616 	      bitset_word_t any_set = 0;
3617 	      if (type == CHARACTER && node->word_char)
3618 		{
3619 		  bitset_empty (accepts);
3620 		  continue;
3621 		}
3622 #ifdef RE_ENABLE_I18N
3623 	      if (dfa->mb_cur_max > 1)
3624 		for (j = 0; j < BITSET_WORDS; ++j)
3625 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3626 	      else
3627 #endif
3628 		for (j = 0; j < BITSET_WORDS; ++j)
3629 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
3630 	      if (!any_set)
3631 		continue;
3632 	    }
3633 	}
3634 
3635       /* Then divide 'accepts' into DFA states, or create a new
3636 	 state.  Above, we make sure that accepts is not empty.  */
3637       for (j = 0; j < ndests; ++j)
3638 	{
3639 	  bitset_t intersec; /* Intersection sets, see below.  */
3640 	  bitset_t remains;
3641 	  /* Flags, see below.  */
3642 	  bitset_word_t has_intersec, not_subset, not_consumed;
3643 
3644 	  /* Optimization, skip if this state doesn't accept the character.  */
3645 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3646 	    continue;
3647 
3648 	  /* Enumerate the intersection set of this state and 'accepts'.  */
3649 	  has_intersec = 0;
3650 	  for (k = 0; k < BITSET_WORDS; ++k)
3651 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3652 	  /* And skip if the intersection set is empty.  */
3653 	  if (!has_intersec)
3654 	    continue;
3655 
3656 	  /* Then check if this state is a subset of 'accepts'.  */
3657 	  not_subset = not_consumed = 0;
3658 	  for (k = 0; k < BITSET_WORDS; ++k)
3659 	    {
3660 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3661 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3662 	    }
3663 
3664 	  /* If this state isn't a subset of 'accepts', create a
3665 	     new group state, which has the 'remains'. */
3666 	  if (not_subset)
3667 	    {
3668 	      bitset_copy (dests_ch[ndests], remains);
3669 	      bitset_copy (dests_ch[j], intersec);
3670 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3671 	      if (__glibc_unlikely (err != REG_NOERROR))
3672 		goto error_return;
3673 	      ++ndests;
3674 	    }
3675 
3676 	  /* Put the position in the current group. */
3677 	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3678 	  if (__glibc_unlikely (! ok))
3679 	    goto error_return;
3680 
3681 	  /* If all characters are consumed, go to next node. */
3682 	  if (!not_consumed)
3683 	    break;
3684 	}
3685       /* Some characters remain, create a new group. */
3686       if (j == ndests)
3687 	{
3688 	  bitset_copy (dests_ch[ndests], accepts);
3689 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3690 	  if (__glibc_unlikely (err != REG_NOERROR))
3691 	    goto error_return;
3692 	  ++ndests;
3693 	  bitset_empty (accepts);
3694 	}
3695     }
3696   return ndests;
3697  error_return:
3698   for (j = 0; j < ndests; ++j)
3699     re_node_set_free (dests_node + j);
3700   return -1;
3701 }
3702 
3703 #ifdef RE_ENABLE_I18N
3704 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
3705    Return the number of the bytes the node accepts.
3706    STR_IDX is the current index of the input string.
3707 
3708    This function handles the nodes which can accept one character, or
3709    one collating element like '.', '[a-z]', opposite to the other nodes
3710    can only accept one byte.  */
3711 
3712 # ifdef _LIBC
3713 #  include <locale/weight.h>
3714 # endif
3715 
3716 static int
3717 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3718 			 const re_string_t *input, Idx str_idx)
3719 {
3720   const re_token_t *node = dfa->nodes + node_idx;
3721   int char_len, elem_len;
3722   Idx i;
3723 
3724   if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
3725     {
3726       unsigned char c = re_string_byte_at (input, str_idx), d;
3727       if (__glibc_likely (c < 0xc2))
3728 	return 0;
3729 
3730       if (str_idx + 2 > input->len)
3731 	return 0;
3732 
3733       d = re_string_byte_at (input, str_idx + 1);
3734       if (c < 0xe0)
3735 	return (d < 0x80 || d > 0xbf) ? 0 : 2;
3736       else if (c < 0xf0)
3737 	{
3738 	  char_len = 3;
3739 	  if (c == 0xe0 && d < 0xa0)
3740 	    return 0;
3741 	}
3742       else if (c < 0xf8)
3743 	{
3744 	  char_len = 4;
3745 	  if (c == 0xf0 && d < 0x90)
3746 	    return 0;
3747 	}
3748       else if (c < 0xfc)
3749 	{
3750 	  char_len = 5;
3751 	  if (c == 0xf8 && d < 0x88)
3752 	    return 0;
3753 	}
3754       else if (c < 0xfe)
3755 	{
3756 	  char_len = 6;
3757 	  if (c == 0xfc && d < 0x84)
3758 	    return 0;
3759 	}
3760       else
3761 	return 0;
3762 
3763       if (str_idx + char_len > input->len)
3764 	return 0;
3765 
3766       for (i = 1; i < char_len; ++i)
3767 	{
3768 	  d = re_string_byte_at (input, str_idx + i);
3769 	  if (d < 0x80 || d > 0xbf)
3770 	    return 0;
3771 	}
3772       return char_len;
3773     }
3774 
3775   char_len = re_string_char_size_at (input, str_idx);
3776   if (node->type == OP_PERIOD)
3777     {
3778       if (char_len <= 1)
3779 	return 0;
3780       /* FIXME: I don't think this if is needed, as both '\n'
3781 	 and '\0' are char_len == 1.  */
3782       /* '.' accepts any one character except the following two cases.  */
3783       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3784 	   re_string_byte_at (input, str_idx) == '\n') ||
3785 	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
3786 	   re_string_byte_at (input, str_idx) == '\0'))
3787 	return 0;
3788       return char_len;
3789     }
3790 
3791   elem_len = re_string_elem_size_at (input, str_idx);
3792   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3793     return 0;
3794 
3795   if (node->type == COMPLEX_BRACKET)
3796     {
3797       const re_charset_t *cset = node->opr.mbcset;
3798 # ifdef _LIBC
3799       const unsigned char *pin
3800 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3801       Idx j;
3802       uint32_t nrules;
3803 # endif /* _LIBC */
3804       int match_len = 0;
3805       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3806 		    ? re_string_wchar_at (input, str_idx) : 0);
3807 
3808       /* match with multibyte character?  */
3809       for (i = 0; i < cset->nmbchars; ++i)
3810 	if (wc == cset->mbchars[i])
3811 	  {
3812 	    match_len = char_len;
3813 	    goto check_node_accept_bytes_match;
3814 	  }
3815       /* match with character_class?  */
3816       for (i = 0; i < cset->nchar_classes; ++i)
3817 	{
3818 	  wctype_t wt = cset->char_classes[i];
3819 	  if (__iswctype (wc, wt))
3820 	    {
3821 	      match_len = char_len;
3822 	      goto check_node_accept_bytes_match;
3823 	    }
3824 	}
3825 
3826 # ifdef _LIBC
3827       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3828       if (nrules != 0)
3829 	{
3830 	  unsigned int in_collseq = 0;
3831 	  const int32_t *table, *indirect;
3832 	  const unsigned char *weights, *extra;
3833 	  const char *collseqwc;
3834 
3835 	  /* match with collating_symbol?  */
3836 	  if (cset->ncoll_syms)
3837 	    extra = (const unsigned char *)
3838 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3839 	  for (i = 0; i < cset->ncoll_syms; ++i)
3840 	    {
3841 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
3842 	      /* Compare the length of input collating element and
3843 		 the length of current collating element.  */
3844 	      if (*coll_sym != elem_len)
3845 		continue;
3846 	      /* Compare each bytes.  */
3847 	      for (j = 0; j < *coll_sym; j++)
3848 		if (pin[j] != coll_sym[1 + j])
3849 		  break;
3850 	      if (j == *coll_sym)
3851 		{
3852 		  /* Match if every bytes is equal.  */
3853 		  match_len = j;
3854 		  goto check_node_accept_bytes_match;
3855 		}
3856 	    }
3857 
3858 	  if (cset->nranges)
3859 	    {
3860 	      if (elem_len <= char_len)
3861 		{
3862 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3863 		  in_collseq = __collseq_table_lookup (collseqwc, wc);
3864 		}
3865 	      else
3866 		in_collseq = find_collation_sequence_value (pin, elem_len);
3867 	    }
3868 	  /* match with range expression?  */
3869 	  /* FIXME: Implement rational ranges here, too.  */
3870 	  for (i = 0; i < cset->nranges; ++i)
3871 	    if (cset->range_starts[i] <= in_collseq
3872 		&& in_collseq <= cset->range_ends[i])
3873 	      {
3874 		match_len = elem_len;
3875 		goto check_node_accept_bytes_match;
3876 	      }
3877 
3878 	  /* match with equivalence_class?  */
3879 	  if (cset->nequiv_classes)
3880 	    {
3881 	      const unsigned char *cp = pin;
3882 	      table = (const int32_t *)
3883 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3884 	      weights = (const unsigned char *)
3885 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3886 	      extra = (const unsigned char *)
3887 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3888 	      indirect = (const int32_t *)
3889 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3890 	      int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3891 	      int32_t rule = idx >> 24;
3892 	      idx &= 0xffffff;
3893 	      if (idx > 0)
3894 		{
3895 		  size_t weight_len = weights[idx];
3896 		  for (i = 0; i < cset->nequiv_classes; ++i)
3897 		    {
3898 		      int32_t equiv_class_idx = cset->equiv_classes[i];
3899 		      int32_t equiv_class_rule = equiv_class_idx >> 24;
3900 		      equiv_class_idx &= 0xffffff;
3901 		      if (weights[equiv_class_idx] == weight_len
3902 			  && equiv_class_rule == rule
3903 			  && memcmp (weights + idx + 1,
3904 				     weights + equiv_class_idx + 1,
3905 				     weight_len) == 0)
3906 			{
3907 			  match_len = elem_len;
3908 			  goto check_node_accept_bytes_match;
3909 			}
3910 		    }
3911 		}
3912 	    }
3913 	}
3914       else
3915 # endif /* _LIBC */
3916 	{
3917 	  /* match with range expression?  */
3918 	  for (i = 0; i < cset->nranges; ++i)
3919 	    {
3920 	      if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3921 		{
3922 		  match_len = char_len;
3923 		  goto check_node_accept_bytes_match;
3924 		}
3925 	    }
3926 	}
3927     check_node_accept_bytes_match:
3928       if (!cset->non_match)
3929 	return match_len;
3930       else
3931 	{
3932 	  if (match_len > 0)
3933 	    return 0;
3934 	  else
3935 	    return (elem_len > char_len) ? elem_len : char_len;
3936 	}
3937     }
3938   return 0;
3939 }
3940 
3941 # ifdef _LIBC
3942 static unsigned int
3943 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3944 {
3945   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3946   if (nrules == 0)
3947     {
3948       if (mbs_len == 1)
3949 	{
3950 	  /* No valid character.  Match it as a single byte character.  */
3951 	  const unsigned char *collseq = (const unsigned char *)
3952 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3953 	  return collseq[mbs[0]];
3954 	}
3955       return UINT_MAX;
3956     }
3957   else
3958     {
3959       int32_t idx;
3960       const unsigned char *extra = (const unsigned char *)
3961 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3962       int32_t extrasize = (const unsigned char *)
3963 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3964 
3965       for (idx = 0; idx < extrasize;)
3966 	{
3967 	  int mbs_cnt;
3968 	  bool found = false;
3969 	  int32_t elem_mbs_len;
3970 	  /* Skip the name of collating element name.  */
3971 	  idx = idx + extra[idx] + 1;
3972 	  elem_mbs_len = extra[idx++];
3973 	  if (mbs_len == elem_mbs_len)
3974 	    {
3975 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3976 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3977 		  break;
3978 	      if (mbs_cnt == elem_mbs_len)
3979 		/* Found the entry.  */
3980 		found = true;
3981 	    }
3982 	  /* Skip the byte sequence of the collating element.  */
3983 	  idx += elem_mbs_len;
3984 	  /* Adjust for the alignment.  */
3985 	  idx = (idx + 3) & ~3;
3986 	  /* Skip the collation sequence value.  */
3987 	  idx += sizeof (uint32_t);
3988 	  /* Skip the wide char sequence of the collating element.  */
3989 	  idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
3990 	  /* If we found the entry, return the sequence value.  */
3991 	  if (found)
3992 	    return *(uint32_t *) (extra + idx);
3993 	  /* Skip the collation sequence value.  */
3994 	  idx += sizeof (uint32_t);
3995 	}
3996       return UINT_MAX;
3997     }
3998 }
3999 # endif /* _LIBC */
4000 #endif /* RE_ENABLE_I18N */
4001 
4002 /* Check whether the node accepts the byte which is IDX-th
4003    byte of the INPUT.  */
4004 
4005 static bool
4006 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
4007 		   Idx idx)
4008 {
4009   unsigned char ch;
4010   ch = re_string_byte_at (&mctx->input, idx);
4011   switch (node->type)
4012     {
4013     case CHARACTER:
4014       if (node->opr.c != ch)
4015         return false;
4016       break;
4017 
4018     case SIMPLE_BRACKET:
4019       if (!bitset_contain (node->opr.sbcset, ch))
4020         return false;
4021       break;
4022 
4023 #ifdef RE_ENABLE_I18N
4024     case OP_UTF8_PERIOD:
4025       if (ch >= ASCII_CHARS)
4026         return false;
4027       FALLTHROUGH;
4028 #endif
4029     case OP_PERIOD:
4030       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4031 	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4032 	return false;
4033       break;
4034 
4035     default:
4036       return false;
4037     }
4038 
4039   if (node->constraint)
4040     {
4041       /* The node has constraints.  Check whether the current context
4042 	 satisfies the constraints.  */
4043       unsigned int context = re_string_context_at (&mctx->input, idx,
4044 						   mctx->eflags);
4045       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4046 	return false;
4047     }
4048 
4049   return true;
4050 }
4051 
4052 /* Extend the buffers, if the buffers have run out.  */
4053 
4054 static reg_errcode_t
4055 __attribute_warn_unused_result__
4056 extend_buffers (re_match_context_t *mctx, int min_len)
4057 {
4058   reg_errcode_t ret;
4059   re_string_t *pstr = &mctx->input;
4060 
4061   /* Avoid overflow.  */
4062   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
4063 			<= pstr->bufs_len))
4064     return REG_ESPACE;
4065 
4066   /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
4067   ret = re_string_realloc_buffers (pstr,
4068 				   MAX (min_len,
4069 					MIN (pstr->len, pstr->bufs_len * 2)));
4070   if (__glibc_unlikely (ret != REG_NOERROR))
4071     return ret;
4072 
4073   if (mctx->state_log != NULL)
4074     {
4075       /* And double the length of state_log.  */
4076       /* XXX We have no indication of the size of this buffer.  If this
4077 	 allocation fail we have no indication that the state_log array
4078 	 does not have the right size.  */
4079       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
4080 					      pstr->bufs_len + 1);
4081       if (__glibc_unlikely (new_array == NULL))
4082 	return REG_ESPACE;
4083       mctx->state_log = new_array;
4084     }
4085 
4086   /* Then reconstruct the buffers.  */
4087   if (pstr->icase)
4088     {
4089 #ifdef RE_ENABLE_I18N
4090       if (pstr->mb_cur_max > 1)
4091 	{
4092 	  ret = build_wcs_upper_buffer (pstr);
4093 	  if (__glibc_unlikely (ret != REG_NOERROR))
4094 	    return ret;
4095 	}
4096       else
4097 #endif /* RE_ENABLE_I18N  */
4098 	build_upper_buffer (pstr);
4099     }
4100   else
4101     {
4102 #ifdef RE_ENABLE_I18N
4103       if (pstr->mb_cur_max > 1)
4104 	build_wcs_buffer (pstr);
4105       else
4106 #endif /* RE_ENABLE_I18N  */
4107 	{
4108 	  if (pstr->trans != NULL)
4109 	    re_string_translate_buffer (pstr);
4110 	}
4111     }
4112   return REG_NOERROR;
4113 }
4114 
4115 
4116 /* Functions for matching context.  */
4117 
4118 /* Initialize MCTX.  */
4119 
4120 static reg_errcode_t
4121 __attribute_warn_unused_result__
4122 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4123 {
4124   mctx->eflags = eflags;
4125   mctx->match_last = -1;
4126   if (n > 0)
4127     {
4128       /* Avoid overflow.  */
4129       size_t max_object_size =
4130 	MAX (sizeof (struct re_backref_cache_entry),
4131 	     sizeof (re_sub_match_top_t *));
4132       if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
4133 	return REG_ESPACE;
4134 
4135       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4136       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4137       if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL))
4138 	return REG_ESPACE;
4139     }
4140   /* Already zero-ed by the caller.
4141      else
4142        mctx->bkref_ents = NULL;
4143      mctx->nbkref_ents = 0;
4144      mctx->nsub_tops = 0;  */
4145   mctx->abkref_ents = n;
4146   mctx->max_mb_elem_len = 1;
4147   mctx->asub_tops = n;
4148   return REG_NOERROR;
4149 }
4150 
4151 /* Clean the entries which depend on the current input in MCTX.
4152    This function must be invoked when the matcher changes the start index
4153    of the input, or changes the input string.  */
4154 
4155 static void
4156 match_ctx_clean (re_match_context_t *mctx)
4157 {
4158   Idx st_idx;
4159   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4160     {
4161       Idx sl_idx;
4162       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4163       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4164 	{
4165 	  re_sub_match_last_t *last = top->lasts[sl_idx];
4166 	  re_free (last->path.array);
4167 	  re_free (last);
4168 	}
4169       re_free (top->lasts);
4170       if (top->path)
4171 	{
4172 	  re_free (top->path->array);
4173 	  re_free (top->path);
4174 	}
4175       re_free (top);
4176     }
4177 
4178   mctx->nsub_tops = 0;
4179   mctx->nbkref_ents = 0;
4180 }
4181 
4182 /* Free all the memory associated with MCTX.  */
4183 
4184 static void
4185 match_ctx_free (re_match_context_t *mctx)
4186 {
4187   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
4188   match_ctx_clean (mctx);
4189   re_free (mctx->sub_tops);
4190   re_free (mctx->bkref_ents);
4191 }
4192 
4193 /* Add a new backreference entry to MCTX.
4194    Note that we assume that caller never call this function with duplicate
4195    entry, and call with STR_IDX which isn't smaller than any existing entry.
4196 */
4197 
4198 static reg_errcode_t
4199 __attribute_warn_unused_result__
4200 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4201 		     Idx to)
4202 {
4203   if (mctx->nbkref_ents >= mctx->abkref_ents)
4204     {
4205       struct re_backref_cache_entry* new_entry;
4206       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4207 			      mctx->abkref_ents * 2);
4208       if (__glibc_unlikely (new_entry == NULL))
4209 	{
4210 	  re_free (mctx->bkref_ents);
4211 	  return REG_ESPACE;
4212 	}
4213       mctx->bkref_ents = new_entry;
4214       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4215 	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4216       mctx->abkref_ents *= 2;
4217     }
4218   if (mctx->nbkref_ents > 0
4219       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4220     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4221 
4222   mctx->bkref_ents[mctx->nbkref_ents].node = node;
4223   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4224   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4225   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4226 
4227   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
4228      If bit N is clear, means that this entry won't epsilon-transition to
4229      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
4230      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4231      such node.
4232 
4233      A backreference does not epsilon-transition unless it is empty, so set
4234      to all zeros if FROM != TO.  */
4235   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4236     = (from == to ? -1 : 0);
4237 
4238   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4239   if (mctx->max_mb_elem_len < to - from)
4240     mctx->max_mb_elem_len = to - from;
4241   return REG_NOERROR;
4242 }
4243 
4244 /* Return the first entry with the same str_idx, or -1 if none is
4245    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
4246 
4247 static Idx
4248 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4249 {
4250   Idx left, right, mid, last;
4251   last = right = mctx->nbkref_ents;
4252   for (left = 0; left < right;)
4253     {
4254       mid = (left + right) / 2;
4255       if (mctx->bkref_ents[mid].str_idx < str_idx)
4256 	left = mid + 1;
4257       else
4258 	right = mid;
4259     }
4260   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4261     return left;
4262   else
4263     return -1;
4264 }
4265 
4266 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4267    at STR_IDX.  */
4268 
4269 static reg_errcode_t
4270 __attribute_warn_unused_result__
4271 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4272 {
4273 #ifdef DEBUG
4274   assert (mctx->sub_tops != NULL);
4275   assert (mctx->asub_tops > 0);
4276 #endif
4277   if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
4278     {
4279       Idx new_asub_tops = mctx->asub_tops * 2;
4280       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4281 						   re_sub_match_top_t *,
4282 						   new_asub_tops);
4283       if (__glibc_unlikely (new_array == NULL))
4284 	return REG_ESPACE;
4285       mctx->sub_tops = new_array;
4286       mctx->asub_tops = new_asub_tops;
4287     }
4288   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4289   if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
4290     return REG_ESPACE;
4291   mctx->sub_tops[mctx->nsub_tops]->node = node;
4292   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4293   return REG_NOERROR;
4294 }
4295 
4296 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4297    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
4298 
4299 static re_sub_match_last_t *
4300 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4301 {
4302   re_sub_match_last_t *new_entry;
4303   if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
4304     {
4305       Idx new_alasts = 2 * subtop->alasts + 1;
4306       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4307 						    re_sub_match_last_t *,
4308 						    new_alasts);
4309       if (__glibc_unlikely (new_array == NULL))
4310 	return NULL;
4311       subtop->lasts = new_array;
4312       subtop->alasts = new_alasts;
4313     }
4314   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4315   if (__glibc_likely (new_entry != NULL))
4316     {
4317       subtop->lasts[subtop->nlasts] = new_entry;
4318       new_entry->node = node;
4319       new_entry->str_idx = str_idx;
4320       ++subtop->nlasts;
4321     }
4322   return new_entry;
4323 }
4324 
4325 static void
4326 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4327 	       re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4328 {
4329   sctx->sifted_states = sifted_sts;
4330   sctx->limited_states = limited_sts;
4331   sctx->last_node = last_node;
4332   sctx->last_str_idx = last_str_idx;
4333   re_node_set_init_empty (&sctx->limits);
4334 }
4335