xref: /dflybsd-src/contrib/grep/lib/regexec.c (revision 91b9ed38d3db6a8a8ac5b66da1d43e6e331e259a)
195b7b453SJohn Marino /* Extended regular expression matching and search library.
2*09d4459fSDaniel Fojt    Copyright (C) 2002-2020 Free Software Foundation, Inc.
395b7b453SJohn Marino    This file is part of the GNU C Library.
495b7b453SJohn Marino    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
595b7b453SJohn Marino 
6680a9cb8SJohn Marino    The GNU C Library is free software; you can redistribute it and/or
7680a9cb8SJohn Marino    modify it under the terms of the GNU General Public
8680a9cb8SJohn Marino    License as published by the Free Software Foundation; either
9680a9cb8SJohn Marino    version 3 of the License, or (at your option) any later version.
1095b7b453SJohn Marino 
11680a9cb8SJohn Marino    The GNU C Library is distributed in the hope that it will be useful,
1295b7b453SJohn Marino    but WITHOUT ANY WARRANTY; without even the implied warranty of
13680a9cb8SJohn Marino    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14680a9cb8SJohn Marino    General Public License for more details.
1595b7b453SJohn Marino 
16680a9cb8SJohn Marino    You should have received a copy of the GNU General Public
17680a9cb8SJohn Marino    License along with the GNU C Library; if not, see
18*09d4459fSDaniel Fojt    <https://www.gnu.org/licenses/>.  */
1995b7b453SJohn Marino 
2095b7b453SJohn Marino static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21*09d4459fSDaniel Fojt 				     Idx n);
22*09d4459fSDaniel Fojt static void match_ctx_clean (re_match_context_t *mctx);
23*09d4459fSDaniel Fojt static void match_ctx_free (re_match_context_t *cache);
2495b7b453SJohn Marino static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25*09d4459fSDaniel Fojt 					  Idx str_idx, Idx from, Idx to);
26*09d4459fSDaniel Fojt static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
2795b7b453SJohn Marino static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
28*09d4459fSDaniel Fojt 					   Idx str_idx);
2995b7b453SJohn Marino static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
30*09d4459fSDaniel Fojt 						    Idx node, Idx str_idx);
3195b7b453SJohn Marino static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
3295b7b453SJohn Marino 			   re_dfastate_t **limited_sts, Idx last_node,
33*09d4459fSDaniel Fojt 			   Idx last_str_idx);
3495b7b453SJohn Marino static reg_errcode_t re_search_internal (const regex_t *preg,
3595b7b453SJohn Marino 					 const char *string, Idx length,
3695b7b453SJohn Marino 					 Idx start, Idx last_start, Idx stop,
3795b7b453SJohn Marino 					 size_t nmatch, regmatch_t pmatch[],
38*09d4459fSDaniel Fojt 					 int eflags);
3995b7b453SJohn Marino static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
4095b7b453SJohn Marino 				  const char *string1, Idx length1,
4195b7b453SJohn Marino 				  const char *string2, Idx length2,
4295b7b453SJohn Marino 				  Idx start, regoff_t range,
4395b7b453SJohn Marino 				  struct re_registers *regs,
44*09d4459fSDaniel Fojt 				  Idx stop, bool ret_len);
4595b7b453SJohn Marino static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
4695b7b453SJohn Marino 				const char *string, Idx length, Idx start,
4795b7b453SJohn Marino 				regoff_t range, Idx stop,
4895b7b453SJohn Marino 				struct re_registers *regs,
49*09d4459fSDaniel Fojt 				bool ret_len);
50cf28ed85SJohn Marino static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
51*09d4459fSDaniel Fojt                               Idx nregs, int regs_allocated);
52*09d4459fSDaniel Fojt static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
5395b7b453SJohn Marino static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
54*09d4459fSDaniel Fojt 			   Idx *p_match_first);
5595b7b453SJohn Marino static Idx check_halt_state_context (const re_match_context_t *mctx,
56*09d4459fSDaniel Fojt 				     const re_dfastate_t *state, Idx idx);
5795b7b453SJohn Marino static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
5895b7b453SJohn Marino 			 regmatch_t *prev_idx_match, Idx cur_node,
59*09d4459fSDaniel Fojt 			 Idx cur_idx, Idx nmatch);
6095b7b453SJohn Marino static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
6195b7b453SJohn Marino 				      Idx str_idx, Idx dest_node, Idx nregs,
6295b7b453SJohn Marino 				      regmatch_t *regs,
63*09d4459fSDaniel Fojt 				      re_node_set *eps_via_nodes);
6495b7b453SJohn Marino static reg_errcode_t set_regs (const regex_t *preg,
6595b7b453SJohn Marino 			       const re_match_context_t *mctx,
6695b7b453SJohn Marino 			       size_t nmatch, regmatch_t *pmatch,
67*09d4459fSDaniel Fojt 			       bool fl_backtrack);
68*09d4459fSDaniel Fojt static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
6995b7b453SJohn Marino 
7095b7b453SJohn Marino #ifdef RE_ENABLE_I18N
7195b7b453SJohn Marino static int sift_states_iter_mb (const re_match_context_t *mctx,
7295b7b453SJohn Marino 				re_sift_context_t *sctx,
73*09d4459fSDaniel Fojt 				Idx node_idx, Idx str_idx, Idx max_str_idx);
7495b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
7595b7b453SJohn Marino static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
76*09d4459fSDaniel Fojt 					   re_sift_context_t *sctx);
7795b7b453SJohn Marino static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
7895b7b453SJohn Marino 					  re_sift_context_t *sctx, Idx str_idx,
79*09d4459fSDaniel Fojt 					  re_node_set *cur_dest);
8095b7b453SJohn Marino static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
8195b7b453SJohn Marino 					      re_sift_context_t *sctx,
8295b7b453SJohn Marino 					      Idx str_idx,
83*09d4459fSDaniel Fojt 					      re_node_set *dest_nodes);
8495b7b453SJohn Marino static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
8595b7b453SJohn Marino 					    re_node_set *dest_nodes,
86*09d4459fSDaniel Fojt 					    const re_node_set *candidates);
8795b7b453SJohn Marino static bool check_dst_limits (const re_match_context_t *mctx,
8895b7b453SJohn Marino 			      const re_node_set *limits,
8995b7b453SJohn Marino 			      Idx dst_node, Idx dst_idx, Idx src_node,
90*09d4459fSDaniel Fojt 			      Idx src_idx);
9195b7b453SJohn Marino static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
9295b7b453SJohn Marino 					int boundaries, Idx subexp_idx,
93*09d4459fSDaniel Fojt 					Idx from_node, Idx bkref_idx);
9495b7b453SJohn Marino static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
9595b7b453SJohn Marino 				      Idx limit, Idx subexp_idx,
9695b7b453SJohn Marino 				      Idx node, Idx str_idx,
97*09d4459fSDaniel Fojt 				      Idx bkref_idx);
9895b7b453SJohn Marino static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
9995b7b453SJohn Marino 					  re_node_set *dest_nodes,
10095b7b453SJohn Marino 					  const re_node_set *candidates,
10195b7b453SJohn Marino 					  re_node_set *limits,
10295b7b453SJohn Marino 					  struct re_backref_cache_entry *bkref_ents,
103*09d4459fSDaniel Fojt 					  Idx str_idx);
10495b7b453SJohn Marino static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
10595b7b453SJohn Marino 					re_sift_context_t *sctx,
106*09d4459fSDaniel Fojt 					Idx str_idx, const re_node_set *candidates);
10795b7b453SJohn Marino static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
10895b7b453SJohn Marino 					re_dfastate_t **dst,
109*09d4459fSDaniel Fojt 					re_dfastate_t **src, Idx num);
11095b7b453SJohn Marino static re_dfastate_t *find_recover_state (reg_errcode_t *err,
111*09d4459fSDaniel Fojt 					 re_match_context_t *mctx);
11295b7b453SJohn Marino static re_dfastate_t *transit_state (reg_errcode_t *err,
11395b7b453SJohn Marino 				     re_match_context_t *mctx,
114*09d4459fSDaniel Fojt 				     re_dfastate_t *state);
11595b7b453SJohn Marino static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
11695b7b453SJohn Marino 					    re_match_context_t *mctx,
117*09d4459fSDaniel Fojt 					    re_dfastate_t *next_state);
11895b7b453SJohn Marino static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
11995b7b453SJohn Marino 						re_node_set *cur_nodes,
120*09d4459fSDaniel Fojt 						Idx str_idx);
12195b7b453SJohn Marino #if 0
12295b7b453SJohn Marino static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
12395b7b453SJohn Marino 					re_match_context_t *mctx,
124*09d4459fSDaniel Fojt 					re_dfastate_t *pstate);
12595b7b453SJohn Marino #endif
12695b7b453SJohn Marino #ifdef RE_ENABLE_I18N
12795b7b453SJohn Marino static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
128*09d4459fSDaniel Fojt 				       re_dfastate_t *pstate);
12995b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
13095b7b453SJohn Marino static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
131*09d4459fSDaniel Fojt 					  const re_node_set *nodes);
13295b7b453SJohn Marino static reg_errcode_t get_subexp (re_match_context_t *mctx,
133*09d4459fSDaniel Fojt 				 Idx bkref_node, Idx bkref_str_idx);
13495b7b453SJohn Marino static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
13595b7b453SJohn Marino 				     const re_sub_match_top_t *sub_top,
13695b7b453SJohn Marino 				     re_sub_match_last_t *sub_last,
137*09d4459fSDaniel Fojt 				     Idx bkref_node, Idx bkref_str);
13895b7b453SJohn Marino static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
139*09d4459fSDaniel Fojt 			     Idx subexp_idx, int type);
14095b7b453SJohn Marino static reg_errcode_t check_arrival (re_match_context_t *mctx,
14195b7b453SJohn Marino 				    state_array_t *path, Idx top_node,
14295b7b453SJohn Marino 				    Idx top_str, Idx last_node, Idx last_str,
143*09d4459fSDaniel Fojt 				    int type);
14495b7b453SJohn Marino static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
14595b7b453SJohn Marino 						   Idx str_idx,
14695b7b453SJohn Marino 						   re_node_set *cur_nodes,
147*09d4459fSDaniel Fojt 						   re_node_set *next_nodes);
14895b7b453SJohn Marino static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
14995b7b453SJohn Marino 					       re_node_set *cur_nodes,
150*09d4459fSDaniel Fojt 					       Idx ex_subexp, int type);
15195b7b453SJohn Marino static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
15295b7b453SJohn Marino 						   re_node_set *dst_nodes,
15395b7b453SJohn Marino 						   Idx target, Idx ex_subexp,
154*09d4459fSDaniel Fojt 						   int type);
15595b7b453SJohn Marino static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
15695b7b453SJohn Marino 					 re_node_set *cur_nodes, Idx cur_str,
157*09d4459fSDaniel Fojt 					 Idx subexp_num, int type);
158*09d4459fSDaniel Fojt static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
15995b7b453SJohn Marino #ifdef RE_ENABLE_I18N
16095b7b453SJohn Marino static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
161*09d4459fSDaniel Fojt 				    const re_string_t *input, Idx idx);
16295b7b453SJohn Marino # ifdef _LIBC
16395b7b453SJohn Marino static unsigned int find_collation_sequence_value (const unsigned char *mbs,
164*09d4459fSDaniel Fojt 						   size_t name_len);
16595b7b453SJohn Marino # endif /* _LIBC */
16695b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
16795b7b453SJohn Marino static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
16895b7b453SJohn Marino 				       const re_dfastate_t *state,
16995b7b453SJohn Marino 				       re_node_set *states_node,
170*09d4459fSDaniel Fojt 				       bitset_t *states_ch);
17195b7b453SJohn Marino static bool check_node_accept (const re_match_context_t *mctx,
172*09d4459fSDaniel Fojt 			       const re_token_t *node, Idx idx);
173*09d4459fSDaniel Fojt static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
17495b7b453SJohn Marino 
17595b7b453SJohn Marino /* Entry point for POSIX code.  */
17695b7b453SJohn Marino 
17795b7b453SJohn Marino /* regexec searches for a given pattern, specified by PREG, in the
17895b7b453SJohn Marino    string STRING.
17995b7b453SJohn Marino 
18095b7b453SJohn Marino    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
181cf28ed85SJohn Marino    'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
18295b7b453SJohn Marino    least NMATCH elements, and we set them to the offsets of the
18395b7b453SJohn Marino    corresponding matched substrings.
18495b7b453SJohn Marino 
185cf28ed85SJohn Marino    EFLAGS specifies "execution flags" which affect matching: if
18695b7b453SJohn Marino    REG_NOTBOL is set, then ^ does not match at the beginning of the
18795b7b453SJohn Marino    string; if REG_NOTEOL is set, then $ does not match at the end.
18895b7b453SJohn Marino 
18995b7b453SJohn Marino    We return 0 if we find a match and REG_NOMATCH if not.  */
19095b7b453SJohn Marino 
19195b7b453SJohn Marino int
regexec(const regex_t * __restrict preg,const char * __restrict string,size_t nmatch,regmatch_t pmatch[],int eflags)192*09d4459fSDaniel Fojt regexec (const regex_t *__restrict preg, const char *__restrict string,
193*09d4459fSDaniel Fojt 	 size_t nmatch, regmatch_t pmatch[], int eflags)
19495b7b453SJohn Marino {
19595b7b453SJohn Marino   reg_errcode_t err;
19695b7b453SJohn Marino   Idx start, length;
197cf28ed85SJohn Marino   re_dfa_t *dfa = preg->buffer;
19895b7b453SJohn Marino 
19995b7b453SJohn Marino   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
20095b7b453SJohn Marino     return REG_BADPAT;
20195b7b453SJohn Marino 
20295b7b453SJohn Marino   if (eflags & REG_STARTEND)
20395b7b453SJohn Marino     {
20495b7b453SJohn Marino       start = pmatch[0].rm_so;
20595b7b453SJohn Marino       length = pmatch[0].rm_eo;
20695b7b453SJohn Marino     }
20795b7b453SJohn Marino   else
20895b7b453SJohn Marino     {
20995b7b453SJohn Marino       start = 0;
21095b7b453SJohn Marino       length = strlen (string);
21195b7b453SJohn Marino     }
21295b7b453SJohn Marino 
213680a9cb8SJohn Marino   lock_lock (dfa->lock);
21495b7b453SJohn Marino   if (preg->no_sub)
21595b7b453SJohn Marino     err = re_search_internal (preg, string, length, start, length,
21695b7b453SJohn Marino 			      length, 0, NULL, eflags);
21795b7b453SJohn Marino   else
21895b7b453SJohn Marino     err = re_search_internal (preg, string, length, start, length,
21995b7b453SJohn Marino 			      length, nmatch, pmatch, eflags);
220680a9cb8SJohn Marino   lock_unlock (dfa->lock);
22195b7b453SJohn Marino   return err != REG_NOERROR;
22295b7b453SJohn Marino }
22395b7b453SJohn Marino 
22495b7b453SJohn Marino #ifdef _LIBC
225*09d4459fSDaniel Fojt libc_hidden_def (__regexec)
226*09d4459fSDaniel Fojt 
22795b7b453SJohn Marino # include <shlib-compat.h>
22895b7b453SJohn Marino versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
22995b7b453SJohn Marino 
23095b7b453SJohn Marino # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
23195b7b453SJohn Marino __typeof__ (__regexec) __compat_regexec;
23295b7b453SJohn Marino 
23395b7b453SJohn Marino int
23495b7b453SJohn Marino attribute_compat_text_section
__compat_regexec(const regex_t * __restrict preg,const char * __restrict string,size_t nmatch,regmatch_t pmatch[],int eflags)235*09d4459fSDaniel Fojt __compat_regexec (const regex_t *__restrict preg,
236*09d4459fSDaniel Fojt 		  const char *__restrict string, size_t nmatch,
23795b7b453SJohn Marino 		  regmatch_t pmatch[], int eflags)
23895b7b453SJohn Marino {
23995b7b453SJohn Marino   return regexec (preg, string, nmatch, pmatch,
24095b7b453SJohn Marino 		  eflags & (REG_NOTBOL | REG_NOTEOL));
24195b7b453SJohn Marino }
24295b7b453SJohn Marino compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
24395b7b453SJohn Marino # endif
24495b7b453SJohn Marino #endif
24595b7b453SJohn Marino 
24695b7b453SJohn Marino /* Entry points for GNU code.  */
24795b7b453SJohn Marino 
24895b7b453SJohn Marino /* re_match, re_search, re_match_2, re_search_2
24995b7b453SJohn Marino 
25095b7b453SJohn Marino    The former two functions operate on STRING with length LENGTH,
25195b7b453SJohn Marino    while the later two operate on concatenation of STRING1 and STRING2
25295b7b453SJohn Marino    with lengths LENGTH1 and LENGTH2, respectively.
25395b7b453SJohn Marino 
25495b7b453SJohn Marino    re_match() matches the compiled pattern in BUFP against the string,
25595b7b453SJohn Marino    starting at index START.
25695b7b453SJohn Marino 
25795b7b453SJohn Marino    re_search() first tries matching at index START, then it tries to match
25895b7b453SJohn Marino    starting from index START + 1, and so on.  The last start position tried
25995b7b453SJohn Marino    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
26095b7b453SJohn Marino    way as re_match().)
26195b7b453SJohn Marino 
26295b7b453SJohn Marino    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
26395b7b453SJohn Marino    the first STOP characters of the concatenation of the strings should be
26495b7b453SJohn Marino    concerned.
26595b7b453SJohn Marino 
26695b7b453SJohn Marino    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
26795b7b453SJohn Marino    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
26895b7b453SJohn Marino    computed relative to the concatenation, not relative to the individual
26995b7b453SJohn Marino    strings.)
27095b7b453SJohn Marino 
27195b7b453SJohn Marino    On success, re_match* functions return the length of the match, re_search*
27295b7b453SJohn Marino    return the position of the start of the match.  Return value -1 means no
27395b7b453SJohn Marino    match was found and -2 indicates an internal error.  */
27495b7b453SJohn Marino 
27595b7b453SJohn Marino regoff_t
re_match(struct re_pattern_buffer * bufp,const char * string,Idx length,Idx start,struct re_registers * regs)276*09d4459fSDaniel Fojt re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
277*09d4459fSDaniel Fojt 	  Idx start, struct re_registers *regs)
27895b7b453SJohn Marino {
27995b7b453SJohn Marino   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
28095b7b453SJohn Marino }
28195b7b453SJohn Marino #ifdef _LIBC
weak_alias(__re_match,re_match)28295b7b453SJohn Marino weak_alias (__re_match, re_match)
28395b7b453SJohn Marino #endif
28495b7b453SJohn Marino 
28595b7b453SJohn Marino regoff_t
286*09d4459fSDaniel Fojt re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
287*09d4459fSDaniel Fojt 	   Idx start, regoff_t range, struct re_registers *regs)
28895b7b453SJohn Marino {
28995b7b453SJohn Marino   return re_search_stub (bufp, string, length, start, range, length, regs,
29095b7b453SJohn Marino 			 false);
29195b7b453SJohn Marino }
29295b7b453SJohn Marino #ifdef _LIBC
weak_alias(__re_search,re_search)29395b7b453SJohn Marino weak_alias (__re_search, re_search)
29495b7b453SJohn Marino #endif
29595b7b453SJohn Marino 
29695b7b453SJohn Marino regoff_t
297*09d4459fSDaniel Fojt re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
298*09d4459fSDaniel Fojt 	    const char *string2, Idx length2, Idx start,
299*09d4459fSDaniel Fojt 	    struct re_registers *regs, Idx stop)
30095b7b453SJohn Marino {
30195b7b453SJohn Marino   return re_search_2_stub (bufp, string1, length1, string2, length2,
30295b7b453SJohn Marino 			   start, 0, regs, stop, true);
30395b7b453SJohn Marino }
30495b7b453SJohn Marino #ifdef _LIBC
weak_alias(__re_match_2,re_match_2)30595b7b453SJohn Marino weak_alias (__re_match_2, re_match_2)
30695b7b453SJohn Marino #endif
30795b7b453SJohn Marino 
30895b7b453SJohn Marino regoff_t
309*09d4459fSDaniel Fojt re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
310*09d4459fSDaniel Fojt 	     const char *string2, Idx length2, Idx start, regoff_t range,
311*09d4459fSDaniel Fojt 	     struct re_registers *regs, Idx stop)
31295b7b453SJohn Marino {
31395b7b453SJohn Marino   return re_search_2_stub (bufp, string1, length1, string2, length2,
31495b7b453SJohn Marino 			   start, range, regs, stop, false);
31595b7b453SJohn Marino }
31695b7b453SJohn Marino #ifdef _LIBC
weak_alias(__re_search_2,re_search_2)31795b7b453SJohn Marino weak_alias (__re_search_2, re_search_2)
31895b7b453SJohn Marino #endif
31995b7b453SJohn Marino 
32095b7b453SJohn Marino static regoff_t
321*09d4459fSDaniel Fojt re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
322*09d4459fSDaniel Fojt 		  Idx length1, const char *string2, Idx length2, Idx start,
323*09d4459fSDaniel Fojt 		  regoff_t range, struct re_registers *regs,
32495b7b453SJohn Marino 		  Idx stop, bool ret_len)
32595b7b453SJohn Marino {
32695b7b453SJohn Marino   const char *str;
32795b7b453SJohn Marino   regoff_t rval;
328*09d4459fSDaniel Fojt   Idx len;
32995b7b453SJohn Marino   char *s = NULL;
33095b7b453SJohn Marino 
331*09d4459fSDaniel Fojt   if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0
332*09d4459fSDaniel Fojt 			 || INT_ADD_WRAPV (length1, length2, &len))))
33395b7b453SJohn Marino     return -2;
33495b7b453SJohn Marino 
33595b7b453SJohn Marino   /* Concatenate the strings.  */
33695b7b453SJohn Marino   if (length2 > 0)
33795b7b453SJohn Marino     if (length1 > 0)
33895b7b453SJohn Marino       {
33995b7b453SJohn Marino 	s = re_malloc (char, len);
34095b7b453SJohn Marino 
341*09d4459fSDaniel Fojt 	if (__glibc_unlikely (s == NULL))
34295b7b453SJohn Marino 	  return -2;
34395b7b453SJohn Marino #ifdef _LIBC
34495b7b453SJohn Marino 	memcpy (__mempcpy (s, string1, length1), string2, length2);
34595b7b453SJohn Marino #else
34695b7b453SJohn Marino 	memcpy (s, string1, length1);
34795b7b453SJohn Marino 	memcpy (s + length1, string2, length2);
34895b7b453SJohn Marino #endif
34995b7b453SJohn Marino 	str = s;
35095b7b453SJohn Marino       }
35195b7b453SJohn Marino     else
35295b7b453SJohn Marino       str = string2;
35395b7b453SJohn Marino   else
35495b7b453SJohn Marino     str = string1;
35595b7b453SJohn Marino 
35695b7b453SJohn Marino   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
35795b7b453SJohn Marino 			 ret_len);
35895b7b453SJohn Marino   re_free (s);
35995b7b453SJohn Marino   return rval;
36095b7b453SJohn Marino }
36195b7b453SJohn Marino 
36295b7b453SJohn Marino /* The parameters have the same meaning as those of re_search.
36395b7b453SJohn Marino    Additional parameters:
36495b7b453SJohn Marino    If RET_LEN is true the length of the match is returned (re_match style);
36595b7b453SJohn Marino    otherwise the position of the match is returned.  */
36695b7b453SJohn Marino 
36795b7b453SJohn Marino static regoff_t
re_search_stub(struct re_pattern_buffer * bufp,const char * string,Idx length,Idx start,regoff_t range,Idx stop,struct re_registers * regs,bool ret_len)368*09d4459fSDaniel Fojt re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
36995b7b453SJohn Marino 		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
37095b7b453SJohn Marino 		bool ret_len)
37195b7b453SJohn Marino {
37295b7b453SJohn Marino   reg_errcode_t result;
37395b7b453SJohn Marino   regmatch_t *pmatch;
37495b7b453SJohn Marino   Idx nregs;
37595b7b453SJohn Marino   regoff_t rval;
37695b7b453SJohn Marino   int eflags = 0;
377cf28ed85SJohn Marino   re_dfa_t *dfa = bufp->buffer;
37895b7b453SJohn Marino   Idx last_start = start + range;
37995b7b453SJohn Marino 
38095b7b453SJohn Marino   /* Check for out-of-range.  */
381*09d4459fSDaniel Fojt   if (__glibc_unlikely (start < 0 || start > length))
38295b7b453SJohn Marino     return -1;
383*09d4459fSDaniel Fojt   if (__glibc_unlikely (length < last_start
384*09d4459fSDaniel Fojt 			|| (0 <= range && last_start < start)))
38595b7b453SJohn Marino     last_start = length;
386*09d4459fSDaniel Fojt   else if (__glibc_unlikely (last_start < 0
387*09d4459fSDaniel Fojt 			     || (range < 0 && start <= last_start)))
38895b7b453SJohn Marino     last_start = 0;
38995b7b453SJohn Marino 
390680a9cb8SJohn Marino   lock_lock (dfa->lock);
39195b7b453SJohn Marino 
39295b7b453SJohn Marino   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
39395b7b453SJohn Marino   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
39495b7b453SJohn Marino 
39595b7b453SJohn Marino   /* Compile fastmap if we haven't yet.  */
39695b7b453SJohn Marino   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
39795b7b453SJohn Marino     re_compile_fastmap (bufp);
39895b7b453SJohn Marino 
399*09d4459fSDaniel Fojt   if (__glibc_unlikely (bufp->no_sub))
40095b7b453SJohn Marino     regs = NULL;
40195b7b453SJohn Marino 
40295b7b453SJohn Marino   /* We need at least 1 register.  */
40395b7b453SJohn Marino   if (regs == NULL)
40495b7b453SJohn Marino     nregs = 1;
405*09d4459fSDaniel Fojt   else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
406*09d4459fSDaniel Fojt 			     && regs->num_regs <= bufp->re_nsub))
40795b7b453SJohn Marino     {
40895b7b453SJohn Marino       nregs = regs->num_regs;
409*09d4459fSDaniel Fojt       if (__glibc_unlikely (nregs < 1))
41095b7b453SJohn Marino 	{
41195b7b453SJohn Marino 	  /* Nothing can be copied to regs.  */
41295b7b453SJohn Marino 	  regs = NULL;
41395b7b453SJohn Marino 	  nregs = 1;
41495b7b453SJohn Marino 	}
41595b7b453SJohn Marino     }
41695b7b453SJohn Marino   else
41795b7b453SJohn Marino     nregs = bufp->re_nsub + 1;
41895b7b453SJohn Marino   pmatch = re_malloc (regmatch_t, nregs);
419*09d4459fSDaniel Fojt   if (__glibc_unlikely (pmatch == NULL))
42095b7b453SJohn Marino     {
42195b7b453SJohn Marino       rval = -2;
42295b7b453SJohn Marino       goto out;
42395b7b453SJohn Marino     }
42495b7b453SJohn Marino 
42595b7b453SJohn Marino   result = re_search_internal (bufp, string, length, start, last_start, stop,
42695b7b453SJohn Marino 			       nregs, pmatch, eflags);
42795b7b453SJohn Marino 
42895b7b453SJohn Marino   rval = 0;
42995b7b453SJohn Marino 
430a8597f6cSJohn Marino   /* I hope we needn't fill their regs with -1's when no match was found.  */
43195b7b453SJohn Marino   if (result != REG_NOERROR)
432cf28ed85SJohn Marino     rval = result == REG_NOMATCH ? -1 : -2;
43395b7b453SJohn Marino   else if (regs != NULL)
43495b7b453SJohn Marino     {
43595b7b453SJohn Marino       /* If caller wants register contents data back, copy them.  */
43695b7b453SJohn Marino       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
43795b7b453SJohn Marino 					   bufp->regs_allocated);
438*09d4459fSDaniel Fojt       if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
43995b7b453SJohn Marino 	rval = -2;
44095b7b453SJohn Marino     }
44195b7b453SJohn Marino 
442*09d4459fSDaniel Fojt   if (__glibc_likely (rval == 0))
44395b7b453SJohn Marino     {
44495b7b453SJohn Marino       if (ret_len)
44595b7b453SJohn Marino 	{
446*09d4459fSDaniel Fojt 	  DEBUG_ASSERT (pmatch[0].rm_so == start);
44795b7b453SJohn Marino 	  rval = pmatch[0].rm_eo - start;
44895b7b453SJohn Marino 	}
44995b7b453SJohn Marino       else
45095b7b453SJohn Marino 	rval = pmatch[0].rm_so;
45195b7b453SJohn Marino     }
45295b7b453SJohn Marino   re_free (pmatch);
45395b7b453SJohn Marino  out:
454680a9cb8SJohn Marino   lock_unlock (dfa->lock);
45595b7b453SJohn Marino   return rval;
45695b7b453SJohn Marino }
45795b7b453SJohn Marino 
458cf28ed85SJohn Marino static unsigned
re_copy_regs(struct re_registers * regs,regmatch_t * pmatch,Idx nregs,int regs_allocated)45995b7b453SJohn Marino re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
46095b7b453SJohn Marino 	      int regs_allocated)
46195b7b453SJohn Marino {
46295b7b453SJohn Marino   int rval = REGS_REALLOCATE;
46395b7b453SJohn Marino   Idx i;
46495b7b453SJohn Marino   Idx need_regs = nregs + 1;
465cf28ed85SJohn Marino   /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
46695b7b453SJohn Marino      uses.  */
46795b7b453SJohn Marino 
46895b7b453SJohn Marino   /* Have the register data arrays been allocated?  */
46995b7b453SJohn Marino   if (regs_allocated == REGS_UNALLOCATED)
47095b7b453SJohn Marino     { /* No.  So allocate them with malloc.  */
47195b7b453SJohn Marino       regs->start = re_malloc (regoff_t, need_regs);
472*09d4459fSDaniel Fojt       if (__glibc_unlikely (regs->start == NULL))
47395b7b453SJohn Marino 	return REGS_UNALLOCATED;
47495b7b453SJohn Marino       regs->end = re_malloc (regoff_t, need_regs);
475*09d4459fSDaniel Fojt       if (__glibc_unlikely (regs->end == NULL))
47695b7b453SJohn Marino 	{
47795b7b453SJohn Marino 	  re_free (regs->start);
47895b7b453SJohn Marino 	  return REGS_UNALLOCATED;
47995b7b453SJohn Marino 	}
48095b7b453SJohn Marino       regs->num_regs = need_regs;
48195b7b453SJohn Marino     }
48295b7b453SJohn Marino   else if (regs_allocated == REGS_REALLOCATE)
48395b7b453SJohn Marino     { /* Yes.  If we need more elements than were already
48495b7b453SJohn Marino 	 allocated, reallocate them.  If we need fewer, just
48595b7b453SJohn Marino 	 leave it alone.  */
486*09d4459fSDaniel Fojt       if (__glibc_unlikely (need_regs > regs->num_regs))
48795b7b453SJohn Marino 	{
48895b7b453SJohn Marino 	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
48995b7b453SJohn Marino 	  regoff_t *new_end;
490*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (new_start == NULL))
49195b7b453SJohn Marino 	    return REGS_UNALLOCATED;
49295b7b453SJohn Marino 	  new_end = re_realloc (regs->end, regoff_t, need_regs);
493*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (new_end == NULL))
49495b7b453SJohn Marino 	    {
49595b7b453SJohn Marino 	      re_free (new_start);
49695b7b453SJohn Marino 	      return REGS_UNALLOCATED;
49795b7b453SJohn Marino 	    }
49895b7b453SJohn Marino 	  regs->start = new_start;
49995b7b453SJohn Marino 	  regs->end = new_end;
50095b7b453SJohn Marino 	  regs->num_regs = need_regs;
50195b7b453SJohn Marino 	}
50295b7b453SJohn Marino     }
50395b7b453SJohn Marino   else
50495b7b453SJohn Marino     {
505*09d4459fSDaniel Fojt       DEBUG_ASSERT (regs_allocated == REGS_FIXED);
50695b7b453SJohn Marino       /* This function may not be called with REGS_FIXED and nregs too big.  */
507*09d4459fSDaniel Fojt       DEBUG_ASSERT (nregs <= regs->num_regs);
50895b7b453SJohn Marino       rval = REGS_FIXED;
50995b7b453SJohn Marino     }
51095b7b453SJohn Marino 
51195b7b453SJohn Marino   /* Copy the regs.  */
51295b7b453SJohn Marino   for (i = 0; i < nregs; ++i)
51395b7b453SJohn Marino     {
51495b7b453SJohn Marino       regs->start[i] = pmatch[i].rm_so;
51595b7b453SJohn Marino       regs->end[i] = pmatch[i].rm_eo;
51695b7b453SJohn Marino     }
51795b7b453SJohn Marino   for ( ; i < regs->num_regs; ++i)
51895b7b453SJohn Marino     regs->start[i] = regs->end[i] = -1;
51995b7b453SJohn Marino 
52095b7b453SJohn Marino   return rval;
52195b7b453SJohn Marino }
52295b7b453SJohn Marino 
52395b7b453SJohn Marino /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
52495b7b453SJohn Marino    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
52595b7b453SJohn Marino    this memory for recording register information.  STARTS and ENDS
52695b7b453SJohn Marino    must be allocated using the malloc library routine, and must each
52795b7b453SJohn Marino    be at least NUM_REGS * sizeof (regoff_t) bytes long.
52895b7b453SJohn Marino 
52995b7b453SJohn Marino    If NUM_REGS == 0, then subsequent matches should allocate their own
53095b7b453SJohn Marino    register data.
53195b7b453SJohn Marino 
53295b7b453SJohn Marino    Unless this function is called, the first search or match using
53395b7b453SJohn Marino    PATTERN_BUFFER will allocate its own register data, without
53495b7b453SJohn Marino    freeing the old data.  */
53595b7b453SJohn Marino 
53695b7b453SJohn Marino void
re_set_registers(struct re_pattern_buffer * bufp,struct re_registers * regs,__re_size_t num_regs,regoff_t * starts,regoff_t * ends)537*09d4459fSDaniel Fojt re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
538*09d4459fSDaniel Fojt 		  __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
53995b7b453SJohn Marino {
54095b7b453SJohn Marino   if (num_regs)
54195b7b453SJohn Marino     {
54295b7b453SJohn Marino       bufp->regs_allocated = REGS_REALLOCATE;
54395b7b453SJohn Marino       regs->num_regs = num_regs;
54495b7b453SJohn Marino       regs->start = starts;
54595b7b453SJohn Marino       regs->end = ends;
54695b7b453SJohn Marino     }
54795b7b453SJohn Marino   else
54895b7b453SJohn Marino     {
54995b7b453SJohn Marino       bufp->regs_allocated = REGS_UNALLOCATED;
55095b7b453SJohn Marino       regs->num_regs = 0;
55195b7b453SJohn Marino       regs->start = regs->end = NULL;
55295b7b453SJohn Marino     }
55395b7b453SJohn Marino }
55495b7b453SJohn Marino #ifdef _LIBC
weak_alias(__re_set_registers,re_set_registers)55595b7b453SJohn Marino weak_alias (__re_set_registers, re_set_registers)
55695b7b453SJohn Marino #endif
55795b7b453SJohn Marino 
55895b7b453SJohn Marino /* Entry points compatible with 4.2 BSD regex library.  We don't define
55995b7b453SJohn Marino    them unless specifically requested.  */
56095b7b453SJohn Marino 
56195b7b453SJohn Marino #if defined _REGEX_RE_COMP || defined _LIBC
56295b7b453SJohn Marino int
56395b7b453SJohn Marino # ifdef _LIBC
56495b7b453SJohn Marino weak_function
56595b7b453SJohn Marino # endif
566*09d4459fSDaniel Fojt re_exec (const char *s)
56795b7b453SJohn Marino {
56895b7b453SJohn Marino   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
56995b7b453SJohn Marino }
57095b7b453SJohn Marino #endif /* _REGEX_RE_COMP */
57195b7b453SJohn Marino 
57295b7b453SJohn Marino /* Internal entry point.  */
57395b7b453SJohn Marino 
57495b7b453SJohn Marino /* Searches for a compiled pattern PREG in the string STRING, whose
57595b7b453SJohn Marino    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
57695b7b453SJohn Marino    meaning as with regexec.  LAST_START is START + RANGE, where
57795b7b453SJohn Marino    START and RANGE have the same meaning as with re_search.
57895b7b453SJohn Marino    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
57995b7b453SJohn Marino    otherwise return the error code.
58095b7b453SJohn Marino    Note: We assume front end functions already check ranges.
58195b7b453SJohn Marino    (0 <= LAST_START && LAST_START <= LENGTH)  */
58295b7b453SJohn Marino 
58395b7b453SJohn Marino static reg_errcode_t
584cf28ed85SJohn Marino __attribute_warn_unused_result__
re_search_internal(const regex_t * preg,const char * string,Idx length,Idx start,Idx last_start,Idx stop,size_t nmatch,regmatch_t pmatch[],int eflags)585*09d4459fSDaniel Fojt re_search_internal (const regex_t *preg, const char *string, Idx length,
586*09d4459fSDaniel Fojt 		    Idx start, Idx last_start, Idx stop, size_t nmatch,
587*09d4459fSDaniel Fojt 		    regmatch_t pmatch[], int eflags)
58895b7b453SJohn Marino {
58995b7b453SJohn Marino   reg_errcode_t err;
590cf28ed85SJohn Marino   const re_dfa_t *dfa = preg->buffer;
59195b7b453SJohn Marino   Idx left_lim, right_lim;
59295b7b453SJohn Marino   int incr;
59395b7b453SJohn Marino   bool fl_longest_match;
59495b7b453SJohn Marino   int match_kind;
59595b7b453SJohn Marino   Idx match_first;
596*09d4459fSDaniel Fojt   Idx match_last = -1;
59795b7b453SJohn Marino   Idx extra_nmatch;
59895b7b453SJohn Marino   bool sb;
59995b7b453SJohn Marino   int ch;
60095b7b453SJohn Marino   re_match_context_t mctx = { .dfa = dfa };
60195b7b453SJohn Marino   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
60295b7b453SJohn Marino 		    && start != last_start && !preg->can_be_null)
60395b7b453SJohn Marino 		   ? preg->fastmap : NULL);
60495b7b453SJohn Marino   RE_TRANSLATE_TYPE t = preg->translate;
60595b7b453SJohn Marino 
60695b7b453SJohn Marino   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
60795b7b453SJohn Marino   nmatch -= extra_nmatch;
60895b7b453SJohn Marino 
60995b7b453SJohn Marino   /* Check if the DFA haven't been compiled.  */
610*09d4459fSDaniel Fojt   if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL
611*09d4459fSDaniel Fojt 			|| dfa->init_state_word == NULL
612*09d4459fSDaniel Fojt 			|| dfa->init_state_nl == NULL
613*09d4459fSDaniel Fojt 			|| dfa->init_state_begbuf == NULL))
61495b7b453SJohn Marino     return REG_NOMATCH;
61595b7b453SJohn Marino 
61695b7b453SJohn Marino   /* We assume front-end functions already check them.  */
617*09d4459fSDaniel Fojt   DEBUG_ASSERT (0 <= last_start && last_start <= length);
61895b7b453SJohn Marino 
61995b7b453SJohn Marino   /* If initial states with non-begbuf contexts have no elements,
62095b7b453SJohn Marino      the regex must be anchored.  If preg->newline_anchor is set,
62195b7b453SJohn Marino      we'll never use init_state_nl, so do not check it.  */
62295b7b453SJohn Marino   if (dfa->init_state->nodes.nelem == 0
62395b7b453SJohn Marino       && dfa->init_state_word->nodes.nelem == 0
62495b7b453SJohn Marino       && (dfa->init_state_nl->nodes.nelem == 0
62595b7b453SJohn Marino 	  || !preg->newline_anchor))
62695b7b453SJohn Marino     {
62795b7b453SJohn Marino       if (start != 0 && last_start != 0)
62895b7b453SJohn Marino         return REG_NOMATCH;
62995b7b453SJohn Marino       start = last_start = 0;
63095b7b453SJohn Marino     }
63195b7b453SJohn Marino 
63295b7b453SJohn Marino   /* We must check the longest matching, if nmatch > 0.  */
63395b7b453SJohn Marino   fl_longest_match = (nmatch != 0 || dfa->nbackref);
63495b7b453SJohn Marino 
63595b7b453SJohn Marino   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
63695b7b453SJohn Marino 			    preg->translate, (preg->syntax & RE_ICASE) != 0,
63795b7b453SJohn Marino 			    dfa);
638*09d4459fSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
63995b7b453SJohn Marino     goto free_return;
64095b7b453SJohn Marino   mctx.input.stop = stop;
64195b7b453SJohn Marino   mctx.input.raw_stop = stop;
64295b7b453SJohn Marino   mctx.input.newline_anchor = preg->newline_anchor;
64395b7b453SJohn Marino 
64495b7b453SJohn Marino   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
645*09d4459fSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
64695b7b453SJohn Marino     goto free_return;
64795b7b453SJohn Marino 
64895b7b453SJohn Marino   /* We will log all the DFA states through which the dfa pass,
64995b7b453SJohn Marino      if nmatch > 1, or this dfa has "multibyte node", which is a
65095b7b453SJohn Marino      back-reference or a node which can accept multibyte character or
65195b7b453SJohn Marino      multi character collating element.  */
65295b7b453SJohn Marino   if (nmatch > 1 || dfa->has_mb_node)
65395b7b453SJohn Marino     {
65495b7b453SJohn Marino       /* Avoid overflow.  */
655*09d4459fSDaniel Fojt       if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
656*09d4459fSDaniel Fojt 			     <= mctx.input.bufs_len)))
65795b7b453SJohn Marino 	{
65895b7b453SJohn Marino 	  err = REG_ESPACE;
65995b7b453SJohn Marino 	  goto free_return;
66095b7b453SJohn Marino 	}
66195b7b453SJohn Marino 
66295b7b453SJohn Marino       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
663*09d4459fSDaniel Fojt       if (__glibc_unlikely (mctx.state_log == NULL))
66495b7b453SJohn Marino 	{
66595b7b453SJohn Marino 	  err = REG_ESPACE;
66695b7b453SJohn Marino 	  goto free_return;
66795b7b453SJohn Marino 	}
66895b7b453SJohn Marino     }
66995b7b453SJohn Marino 
67095b7b453SJohn Marino   match_first = start;
67195b7b453SJohn Marino   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
67295b7b453SJohn Marino 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
67395b7b453SJohn Marino 
674680a9cb8SJohn Marino   /* Check incrementally whether the input string matches.  */
67595b7b453SJohn Marino   incr = (last_start < start) ? -1 : 1;
67695b7b453SJohn Marino   left_lim = (last_start < start) ? last_start : start;
67795b7b453SJohn Marino   right_lim = (last_start < start) ? start : last_start;
67895b7b453SJohn Marino   sb = dfa->mb_cur_max == 1;
67995b7b453SJohn Marino   match_kind =
68095b7b453SJohn Marino     (fastmap
68195b7b453SJohn Marino      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
68295b7b453SJohn Marino 	| (start <= last_start ? 2 : 0)
68395b7b453SJohn Marino 	| (t != NULL ? 1 : 0))
68495b7b453SJohn Marino      : 8);
68595b7b453SJohn Marino 
68695b7b453SJohn Marino   for (;; match_first += incr)
68795b7b453SJohn Marino     {
68895b7b453SJohn Marino       err = REG_NOMATCH;
68995b7b453SJohn Marino       if (match_first < left_lim || right_lim < match_first)
69095b7b453SJohn Marino 	goto free_return;
69195b7b453SJohn Marino 
69295b7b453SJohn Marino       /* Advance as rapidly as possible through the string, until we
69395b7b453SJohn Marino 	 find a plausible place to start matching.  This may be done
69495b7b453SJohn Marino 	 with varying efficiency, so there are various possibilities:
69595b7b453SJohn Marino 	 only the most common of them are specialized, in order to
69695b7b453SJohn Marino 	 save on code size.  We use a switch statement for speed.  */
69795b7b453SJohn Marino       switch (match_kind)
69895b7b453SJohn Marino 	{
69995b7b453SJohn Marino 	case 8:
70095b7b453SJohn Marino 	  /* No fastmap.  */
70195b7b453SJohn Marino 	  break;
70295b7b453SJohn Marino 
70395b7b453SJohn Marino 	case 7:
70495b7b453SJohn Marino 	  /* Fastmap with single-byte translation, match forward.  */
705*09d4459fSDaniel Fojt 	  while (__glibc_likely (match_first < right_lim)
70695b7b453SJohn Marino 		 && !fastmap[t[(unsigned char) string[match_first]]])
70795b7b453SJohn Marino 	    ++match_first;
70895b7b453SJohn Marino 	  goto forward_match_found_start_or_reached_end;
70995b7b453SJohn Marino 
71095b7b453SJohn Marino 	case 6:
71195b7b453SJohn Marino 	  /* Fastmap without translation, match forward.  */
712*09d4459fSDaniel Fojt 	  while (__glibc_likely (match_first < right_lim)
71395b7b453SJohn Marino 		 && !fastmap[(unsigned char) string[match_first]])
71495b7b453SJohn Marino 	    ++match_first;
71595b7b453SJohn Marino 
71695b7b453SJohn Marino 	forward_match_found_start_or_reached_end:
717*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (match_first == right_lim))
71895b7b453SJohn Marino 	    {
71995b7b453SJohn Marino 	      ch = match_first >= length
72095b7b453SJohn Marino 		       ? 0 : (unsigned char) string[match_first];
72195b7b453SJohn Marino 	      if (!fastmap[t ? t[ch] : ch])
72295b7b453SJohn Marino 		goto free_return;
72395b7b453SJohn Marino 	    }
72495b7b453SJohn Marino 	  break;
72595b7b453SJohn Marino 
72695b7b453SJohn Marino 	case 4:
72795b7b453SJohn Marino 	case 5:
72895b7b453SJohn Marino 	  /* Fastmap without multi-byte translation, match backwards.  */
72995b7b453SJohn Marino 	  while (match_first >= left_lim)
73095b7b453SJohn Marino 	    {
73195b7b453SJohn Marino 	      ch = match_first >= length
73295b7b453SJohn Marino 		       ? 0 : (unsigned char) string[match_first];
73395b7b453SJohn Marino 	      if (fastmap[t ? t[ch] : ch])
73495b7b453SJohn Marino 		break;
73595b7b453SJohn Marino 	      --match_first;
73695b7b453SJohn Marino 	    }
73795b7b453SJohn Marino 	  if (match_first < left_lim)
73895b7b453SJohn Marino 	    goto free_return;
73995b7b453SJohn Marino 	  break;
74095b7b453SJohn Marino 
74195b7b453SJohn Marino 	default:
74295b7b453SJohn Marino 	  /* In this case, we can't determine easily the current byte,
74395b7b453SJohn Marino 	     since it might be a component byte of a multibyte
74495b7b453SJohn Marino 	     character.  Then we use the constructed buffer instead.  */
74595b7b453SJohn Marino 	  for (;;)
74695b7b453SJohn Marino 	    {
74795b7b453SJohn Marino 	      /* If MATCH_FIRST is out of the valid range, reconstruct the
74895b7b453SJohn Marino 		 buffers.  */
74995b7b453SJohn Marino 	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
750*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (offset
751*09d4459fSDaniel Fojt 				    >= (__re_size_t) mctx.input.valid_raw_len))
75295b7b453SJohn Marino 		{
75395b7b453SJohn Marino 		  err = re_string_reconstruct (&mctx.input, match_first,
75495b7b453SJohn Marino 					       eflags);
755*09d4459fSDaniel Fojt 		  if (__glibc_unlikely (err != REG_NOERROR))
75695b7b453SJohn Marino 		    goto free_return;
75795b7b453SJohn Marino 
75895b7b453SJohn Marino 		  offset = match_first - mctx.input.raw_mbs_idx;
75995b7b453SJohn Marino 		}
76095b7b453SJohn Marino 	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
76195b7b453SJohn Marino 		 Note that MATCH_FIRST must not be smaller than 0.  */
76295b7b453SJohn Marino 	      ch = (match_first >= length
76395b7b453SJohn Marino 		    ? 0 : re_string_byte_at (&mctx.input, offset));
76495b7b453SJohn Marino 	      if (fastmap[ch])
76595b7b453SJohn Marino 		break;
76695b7b453SJohn Marino 	      match_first += incr;
76795b7b453SJohn Marino 	      if (match_first < left_lim || match_first > right_lim)
76895b7b453SJohn Marino 		{
76995b7b453SJohn Marino 		  err = REG_NOMATCH;
77095b7b453SJohn Marino 		  goto free_return;
77195b7b453SJohn Marino 		}
77295b7b453SJohn Marino 	    }
77395b7b453SJohn Marino 	  break;
77495b7b453SJohn Marino 	}
77595b7b453SJohn Marino 
77695b7b453SJohn Marino       /* Reconstruct the buffers so that the matcher can assume that
77795b7b453SJohn Marino 	 the matching starts from the beginning of the buffer.  */
77895b7b453SJohn Marino       err = re_string_reconstruct (&mctx.input, match_first, eflags);
779*09d4459fSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
78095b7b453SJohn Marino 	goto free_return;
78195b7b453SJohn Marino 
78295b7b453SJohn Marino #ifdef RE_ENABLE_I18N
78395b7b453SJohn Marino      /* Don't consider this char as a possible match start if it part,
78495b7b453SJohn Marino 	yet isn't the head, of a multibyte character.  */
78595b7b453SJohn Marino       if (!sb && !re_string_first_byte (&mctx.input, 0))
78695b7b453SJohn Marino 	continue;
78795b7b453SJohn Marino #endif
78895b7b453SJohn Marino 
78995b7b453SJohn Marino       /* It seems to be appropriate one, then use the matcher.  */
79095b7b453SJohn Marino       /* We assume that the matching starts from 0.  */
79195b7b453SJohn Marino       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
79295b7b453SJohn Marino       match_last = check_matching (&mctx, fl_longest_match,
79395b7b453SJohn Marino 				   start <= last_start ? &match_first : NULL);
794*09d4459fSDaniel Fojt       if (match_last != -1)
79595b7b453SJohn Marino 	{
796*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (match_last == -2))
79795b7b453SJohn Marino 	    {
79895b7b453SJohn Marino 	      err = REG_ESPACE;
79995b7b453SJohn Marino 	      goto free_return;
80095b7b453SJohn Marino 	    }
80195b7b453SJohn Marino 	  else
80295b7b453SJohn Marino 	    {
80395b7b453SJohn Marino 	      mctx.match_last = match_last;
80495b7b453SJohn Marino 	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
80595b7b453SJohn Marino 		{
80695b7b453SJohn Marino 		  re_dfastate_t *pstate = mctx.state_log[match_last];
80795b7b453SJohn Marino 		  mctx.last_node = check_halt_state_context (&mctx, pstate,
80895b7b453SJohn Marino 							     match_last);
80995b7b453SJohn Marino 		}
81095b7b453SJohn Marino 	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
81195b7b453SJohn Marino 		  || dfa->nbackref)
81295b7b453SJohn Marino 		{
81395b7b453SJohn Marino 		  err = prune_impossible_nodes (&mctx);
81495b7b453SJohn Marino 		  if (err == REG_NOERROR)
81595b7b453SJohn Marino 		    break;
816*09d4459fSDaniel Fojt 		  if (__glibc_unlikely (err != REG_NOMATCH))
81795b7b453SJohn Marino 		    goto free_return;
818*09d4459fSDaniel Fojt 		  match_last = -1;
81995b7b453SJohn Marino 		}
82095b7b453SJohn Marino 	      else
82195b7b453SJohn Marino 		break; /* We found a match.  */
82295b7b453SJohn Marino 	    }
82395b7b453SJohn Marino 	}
82495b7b453SJohn Marino 
82595b7b453SJohn Marino       match_ctx_clean (&mctx);
82695b7b453SJohn Marino     }
82795b7b453SJohn Marino 
828*09d4459fSDaniel Fojt   DEBUG_ASSERT (match_last != -1);
829*09d4459fSDaniel Fojt   DEBUG_ASSERT (err == REG_NOERROR);
83095b7b453SJohn Marino 
83195b7b453SJohn Marino   /* Set pmatch[] if we need.  */
83295b7b453SJohn Marino   if (nmatch > 0)
83395b7b453SJohn Marino     {
83495b7b453SJohn Marino       Idx reg_idx;
83595b7b453SJohn Marino 
83695b7b453SJohn Marino       /* Initialize registers.  */
83795b7b453SJohn Marino       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
83895b7b453SJohn Marino 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
83995b7b453SJohn Marino 
84095b7b453SJohn Marino       /* Set the points where matching start/end.  */
84195b7b453SJohn Marino       pmatch[0].rm_so = 0;
84295b7b453SJohn Marino       pmatch[0].rm_eo = mctx.match_last;
84395b7b453SJohn Marino       /* FIXME: This function should fail if mctx.match_last exceeds
84495b7b453SJohn Marino 	 the maximum possible regoff_t value.  We need a new error
84595b7b453SJohn Marino 	 code REG_OVERFLOW.  */
84695b7b453SJohn Marino 
84795b7b453SJohn Marino       if (!preg->no_sub && nmatch > 1)
84895b7b453SJohn Marino 	{
84995b7b453SJohn Marino 	  err = set_regs (preg, &mctx, nmatch, pmatch,
85095b7b453SJohn Marino 			  dfa->has_plural_match && dfa->nbackref > 0);
851*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
85295b7b453SJohn Marino 	    goto free_return;
85395b7b453SJohn Marino 	}
85495b7b453SJohn Marino 
855cf28ed85SJohn Marino       /* At last, add the offset to each register, since we slid
85695b7b453SJohn Marino 	 the buffers so that we could assume that the matching starts
85795b7b453SJohn Marino 	 from 0.  */
85895b7b453SJohn Marino       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
85995b7b453SJohn Marino 	if (pmatch[reg_idx].rm_so != -1)
86095b7b453SJohn Marino 	  {
86195b7b453SJohn Marino #ifdef RE_ENABLE_I18N
862*09d4459fSDaniel Fojt 	    if (__glibc_unlikely (mctx.input.offsets_needed != 0))
86395b7b453SJohn Marino 	      {
86495b7b453SJohn Marino 		pmatch[reg_idx].rm_so =
86595b7b453SJohn Marino 		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
86695b7b453SJohn Marino 		   ? mctx.input.valid_raw_len
86795b7b453SJohn Marino 		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
86895b7b453SJohn Marino 		pmatch[reg_idx].rm_eo =
86995b7b453SJohn Marino 		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
87095b7b453SJohn Marino 		   ? mctx.input.valid_raw_len
87195b7b453SJohn Marino 		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
87295b7b453SJohn Marino 	      }
87395b7b453SJohn Marino #else
874*09d4459fSDaniel Fojt 	    DEBUG_ASSERT (mctx.input.offsets_needed == 0);
87595b7b453SJohn Marino #endif
87695b7b453SJohn Marino 	    pmatch[reg_idx].rm_so += match_first;
87795b7b453SJohn Marino 	    pmatch[reg_idx].rm_eo += match_first;
87895b7b453SJohn Marino 	  }
87995b7b453SJohn Marino       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
88095b7b453SJohn Marino 	{
88195b7b453SJohn Marino 	  pmatch[nmatch + reg_idx].rm_so = -1;
88295b7b453SJohn Marino 	  pmatch[nmatch + reg_idx].rm_eo = -1;
88395b7b453SJohn Marino 	}
88495b7b453SJohn Marino 
88595b7b453SJohn Marino       if (dfa->subexp_map)
88695b7b453SJohn Marino 	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
88795b7b453SJohn Marino 	  if (dfa->subexp_map[reg_idx] != reg_idx)
88895b7b453SJohn Marino 	    {
88995b7b453SJohn Marino 	      pmatch[reg_idx + 1].rm_so
89095b7b453SJohn Marino 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
89195b7b453SJohn Marino 	      pmatch[reg_idx + 1].rm_eo
89295b7b453SJohn Marino 		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
89395b7b453SJohn Marino 	    }
89495b7b453SJohn Marino     }
89595b7b453SJohn Marino 
89695b7b453SJohn Marino  free_return:
89795b7b453SJohn Marino   re_free (mctx.state_log);
89895b7b453SJohn Marino   if (dfa->nbackref)
89995b7b453SJohn Marino     match_ctx_free (&mctx);
90095b7b453SJohn Marino   re_string_destruct (&mctx.input);
90195b7b453SJohn Marino   return err;
90295b7b453SJohn Marino }
90395b7b453SJohn Marino 
90495b7b453SJohn Marino static reg_errcode_t
905cf28ed85SJohn Marino __attribute_warn_unused_result__
prune_impossible_nodes(re_match_context_t * mctx)90695b7b453SJohn Marino prune_impossible_nodes (re_match_context_t *mctx)
90795b7b453SJohn Marino {
90895b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
90995b7b453SJohn Marino   Idx halt_node, match_last;
91095b7b453SJohn Marino   reg_errcode_t ret;
91195b7b453SJohn Marino   re_dfastate_t **sifted_states;
91295b7b453SJohn Marino   re_dfastate_t **lim_states = NULL;
91395b7b453SJohn Marino   re_sift_context_t sctx;
914*09d4459fSDaniel Fojt   DEBUG_ASSERT (mctx->state_log != NULL);
91595b7b453SJohn Marino   match_last = mctx->match_last;
91695b7b453SJohn Marino   halt_node = mctx->last_node;
91795b7b453SJohn Marino 
91895b7b453SJohn Marino   /* Avoid overflow.  */
919*09d4459fSDaniel Fojt   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
920*09d4459fSDaniel Fojt 			<= match_last))
92195b7b453SJohn Marino     return REG_ESPACE;
92295b7b453SJohn Marino 
92395b7b453SJohn Marino   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
924*09d4459fSDaniel Fojt   if (__glibc_unlikely (sifted_states == NULL))
92595b7b453SJohn Marino     {
92695b7b453SJohn Marino       ret = REG_ESPACE;
92795b7b453SJohn Marino       goto free_return;
92895b7b453SJohn Marino     }
92995b7b453SJohn Marino   if (dfa->nbackref)
93095b7b453SJohn Marino     {
93195b7b453SJohn Marino       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
932*09d4459fSDaniel Fojt       if (__glibc_unlikely (lim_states == NULL))
93395b7b453SJohn Marino 	{
93495b7b453SJohn Marino 	  ret = REG_ESPACE;
93595b7b453SJohn Marino 	  goto free_return;
93695b7b453SJohn Marino 	}
93795b7b453SJohn Marino       while (1)
93895b7b453SJohn Marino 	{
93995b7b453SJohn Marino 	  memset (lim_states, '\0',
94095b7b453SJohn Marino 		  sizeof (re_dfastate_t *) * (match_last + 1));
94195b7b453SJohn Marino 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
94295b7b453SJohn Marino 			 match_last);
94395b7b453SJohn Marino 	  ret = sift_states_backward (mctx, &sctx);
94495b7b453SJohn Marino 	  re_node_set_free (&sctx.limits);
945*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (ret != REG_NOERROR))
94695b7b453SJohn Marino 	      goto free_return;
94795b7b453SJohn Marino 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
94895b7b453SJohn Marino 	    break;
94995b7b453SJohn Marino 	  do
95095b7b453SJohn Marino 	    {
95195b7b453SJohn Marino 	      --match_last;
952*09d4459fSDaniel Fojt 	      if (match_last < 0)
95395b7b453SJohn Marino 		{
95495b7b453SJohn Marino 		  ret = REG_NOMATCH;
95595b7b453SJohn Marino 		  goto free_return;
95695b7b453SJohn Marino 		}
95795b7b453SJohn Marino 	    } while (mctx->state_log[match_last] == NULL
95895b7b453SJohn Marino 		     || !mctx->state_log[match_last]->halt);
95995b7b453SJohn Marino 	  halt_node = check_halt_state_context (mctx,
96095b7b453SJohn Marino 						mctx->state_log[match_last],
96195b7b453SJohn Marino 						match_last);
96295b7b453SJohn Marino 	}
96395b7b453SJohn Marino       ret = merge_state_array (dfa, sifted_states, lim_states,
96495b7b453SJohn Marino 			       match_last + 1);
96595b7b453SJohn Marino       re_free (lim_states);
96695b7b453SJohn Marino       lim_states = NULL;
967*09d4459fSDaniel Fojt       if (__glibc_unlikely (ret != REG_NOERROR))
96895b7b453SJohn Marino 	goto free_return;
96995b7b453SJohn Marino     }
97095b7b453SJohn Marino   else
97195b7b453SJohn Marino     {
97295b7b453SJohn Marino       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
97395b7b453SJohn Marino       ret = sift_states_backward (mctx, &sctx);
97495b7b453SJohn Marino       re_node_set_free (&sctx.limits);
975*09d4459fSDaniel Fojt       if (__glibc_unlikely (ret != REG_NOERROR))
97695b7b453SJohn Marino 	goto free_return;
97795b7b453SJohn Marino       if (sifted_states[0] == NULL)
97895b7b453SJohn Marino 	{
97995b7b453SJohn Marino 	  ret = REG_NOMATCH;
98095b7b453SJohn Marino 	  goto free_return;
98195b7b453SJohn Marino 	}
98295b7b453SJohn Marino     }
98395b7b453SJohn Marino   re_free (mctx->state_log);
98495b7b453SJohn Marino   mctx->state_log = sifted_states;
98595b7b453SJohn Marino   sifted_states = NULL;
98695b7b453SJohn Marino   mctx->last_node = halt_node;
98795b7b453SJohn Marino   mctx->match_last = match_last;
98895b7b453SJohn Marino   ret = REG_NOERROR;
98995b7b453SJohn Marino  free_return:
99095b7b453SJohn Marino   re_free (sifted_states);
99195b7b453SJohn Marino   re_free (lim_states);
99295b7b453SJohn Marino   return ret;
99395b7b453SJohn Marino }
99495b7b453SJohn Marino 
99595b7b453SJohn Marino /* Acquire an initial state and return it.
99695b7b453SJohn Marino    We must select appropriate initial state depending on the context,
99795b7b453SJohn Marino    since initial states may have constraints like "\<", "^", etc..  */
99895b7b453SJohn Marino 
99995b7b453SJohn Marino static inline re_dfastate_t *
1000*09d4459fSDaniel Fojt __attribute__ ((always_inline))
acquire_init_state_context(reg_errcode_t * err,const re_match_context_t * mctx,Idx idx)100195b7b453SJohn Marino acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
100295b7b453SJohn Marino 			    Idx idx)
100395b7b453SJohn Marino {
100495b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
100595b7b453SJohn Marino   if (dfa->init_state->has_constraint)
100695b7b453SJohn Marino     {
100795b7b453SJohn Marino       unsigned int context;
100895b7b453SJohn Marino       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
100995b7b453SJohn Marino       if (IS_WORD_CONTEXT (context))
101095b7b453SJohn Marino 	return dfa->init_state_word;
101195b7b453SJohn Marino       else if (IS_ORDINARY_CONTEXT (context))
101295b7b453SJohn Marino 	return dfa->init_state;
101395b7b453SJohn Marino       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
101495b7b453SJohn Marino 	return dfa->init_state_begbuf;
101595b7b453SJohn Marino       else if (IS_NEWLINE_CONTEXT (context))
101695b7b453SJohn Marino 	return dfa->init_state_nl;
101795b7b453SJohn Marino       else if (IS_BEGBUF_CONTEXT (context))
101895b7b453SJohn Marino 	{
101995b7b453SJohn Marino 	  /* It is relatively rare case, then calculate on demand.  */
102095b7b453SJohn Marino 	  return re_acquire_state_context (err, dfa,
102195b7b453SJohn Marino 					   dfa->init_state->entrance_nodes,
102295b7b453SJohn Marino 					   context);
102395b7b453SJohn Marino 	}
102495b7b453SJohn Marino       else
102595b7b453SJohn Marino 	/* Must not happen?  */
102695b7b453SJohn Marino 	return dfa->init_state;
102795b7b453SJohn Marino     }
102895b7b453SJohn Marino   else
102995b7b453SJohn Marino     return dfa->init_state;
103095b7b453SJohn Marino }
103195b7b453SJohn Marino 
103295b7b453SJohn Marino /* Check whether the regular expression match input string INPUT or not,
1033*09d4459fSDaniel Fojt    and return the index where the matching end.  Return -1 if
1034*09d4459fSDaniel Fojt    there is no match, and return -2 in case of an error.
103595b7b453SJohn Marino    FL_LONGEST_MATCH means we want the POSIX longest matching.
103695b7b453SJohn Marino    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
103795b7b453SJohn Marino    next place where we may want to try matching.
1038cf28ed85SJohn Marino    Note that the matcher assumes that the matching starts from the current
103995b7b453SJohn Marino    index of the buffer.  */
104095b7b453SJohn Marino 
104195b7b453SJohn Marino static Idx
1042*09d4459fSDaniel Fojt __attribute_warn_unused_result__
check_matching(re_match_context_t * mctx,bool fl_longest_match,Idx * p_match_first)104395b7b453SJohn Marino check_matching (re_match_context_t *mctx, bool fl_longest_match,
104495b7b453SJohn Marino 		Idx *p_match_first)
104595b7b453SJohn Marino {
104695b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
104795b7b453SJohn Marino   reg_errcode_t err;
104895b7b453SJohn Marino   Idx match = 0;
1049*09d4459fSDaniel Fojt   Idx match_last = -1;
105095b7b453SJohn Marino   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
105195b7b453SJohn Marino   re_dfastate_t *cur_state;
105295b7b453SJohn Marino   bool at_init_state = p_match_first != NULL;
105395b7b453SJohn Marino   Idx next_start_idx = cur_str_idx;
105495b7b453SJohn Marino 
105595b7b453SJohn Marino   err = REG_NOERROR;
105695b7b453SJohn Marino   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
105795b7b453SJohn Marino   /* An initial state must not be NULL (invalid).  */
1058*09d4459fSDaniel Fojt   if (__glibc_unlikely (cur_state == NULL))
105995b7b453SJohn Marino     {
1060*09d4459fSDaniel Fojt       DEBUG_ASSERT (err == REG_ESPACE);
1061*09d4459fSDaniel Fojt       return -2;
106295b7b453SJohn Marino     }
106395b7b453SJohn Marino 
106495b7b453SJohn Marino   if (mctx->state_log != NULL)
106595b7b453SJohn Marino     {
106695b7b453SJohn Marino       mctx->state_log[cur_str_idx] = cur_state;
106795b7b453SJohn Marino 
106895b7b453SJohn Marino       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
106995b7b453SJohn Marino 	 later.  E.g. Processing back references.  */
1070*09d4459fSDaniel Fojt       if (__glibc_unlikely (dfa->nbackref))
107195b7b453SJohn Marino 	{
107295b7b453SJohn Marino 	  at_init_state = false;
107395b7b453SJohn Marino 	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1074*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
107595b7b453SJohn Marino 	    return err;
107695b7b453SJohn Marino 
107795b7b453SJohn Marino 	  if (cur_state->has_backref)
107895b7b453SJohn Marino 	    {
107995b7b453SJohn Marino 	      err = transit_state_bkref (mctx, &cur_state->nodes);
1080*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
108195b7b453SJohn Marino 		return err;
108295b7b453SJohn Marino 	    }
108395b7b453SJohn Marino 	}
108495b7b453SJohn Marino     }
108595b7b453SJohn Marino 
108695b7b453SJohn Marino   /* If the RE accepts NULL string.  */
1087*09d4459fSDaniel Fojt   if (__glibc_unlikely (cur_state->halt))
108895b7b453SJohn Marino     {
108995b7b453SJohn Marino       if (!cur_state->has_constraint
109095b7b453SJohn Marino 	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
109195b7b453SJohn Marino 	{
109295b7b453SJohn Marino 	  if (!fl_longest_match)
109395b7b453SJohn Marino 	    return cur_str_idx;
109495b7b453SJohn Marino 	  else
109595b7b453SJohn Marino 	    {
109695b7b453SJohn Marino 	      match_last = cur_str_idx;
109795b7b453SJohn Marino 	      match = 1;
109895b7b453SJohn Marino 	    }
109995b7b453SJohn Marino 	}
110095b7b453SJohn Marino     }
110195b7b453SJohn Marino 
110295b7b453SJohn Marino   while (!re_string_eoi (&mctx->input))
110395b7b453SJohn Marino     {
110495b7b453SJohn Marino       re_dfastate_t *old_state = cur_state;
110595b7b453SJohn Marino       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
110695b7b453SJohn Marino 
1107*09d4459fSDaniel Fojt       if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
1108cf28ed85SJohn Marino 	   && mctx->input.bufs_len < mctx->input.len)
1109*09d4459fSDaniel Fojt 	  || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
111095b7b453SJohn Marino 	      && mctx->input.valid_len < mctx->input.len))
111195b7b453SJohn Marino 	{
1112680a9cb8SJohn Marino 	  err = extend_buffers (mctx, next_char_idx + 1);
1113*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
111495b7b453SJohn Marino 	    {
1115*09d4459fSDaniel Fojt 	      DEBUG_ASSERT (err == REG_ESPACE);
1116*09d4459fSDaniel Fojt 	      return -2;
111795b7b453SJohn Marino 	    }
111895b7b453SJohn Marino 	}
111995b7b453SJohn Marino 
112095b7b453SJohn Marino       cur_state = transit_state (&err, mctx, cur_state);
112195b7b453SJohn Marino       if (mctx->state_log != NULL)
112295b7b453SJohn Marino 	cur_state = merge_state_with_log (&err, mctx, cur_state);
112395b7b453SJohn Marino 
112495b7b453SJohn Marino       if (cur_state == NULL)
112595b7b453SJohn Marino 	{
112695b7b453SJohn Marino 	  /* Reached the invalid state or an error.  Try to recover a valid
112795b7b453SJohn Marino 	     state using the state log, if available and if we have not
112895b7b453SJohn Marino 	     already found a valid (even if not the longest) match.  */
1129*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
1130*09d4459fSDaniel Fojt 	    return -2;
113195b7b453SJohn Marino 
113295b7b453SJohn Marino 	  if (mctx->state_log == NULL
113395b7b453SJohn Marino 	      || (match && !fl_longest_match)
113495b7b453SJohn Marino 	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
113595b7b453SJohn Marino 	    break;
113695b7b453SJohn Marino 	}
113795b7b453SJohn Marino 
1138*09d4459fSDaniel Fojt       if (__glibc_unlikely (at_init_state))
113995b7b453SJohn Marino 	{
114095b7b453SJohn Marino 	  if (old_state == cur_state)
114195b7b453SJohn Marino 	    next_start_idx = next_char_idx;
114295b7b453SJohn Marino 	  else
114395b7b453SJohn Marino 	    at_init_state = false;
114495b7b453SJohn Marino 	}
114595b7b453SJohn Marino 
114695b7b453SJohn Marino       if (cur_state->halt)
114795b7b453SJohn Marino 	{
114895b7b453SJohn Marino 	  /* Reached a halt state.
114995b7b453SJohn Marino 	     Check the halt state can satisfy the current context.  */
115095b7b453SJohn Marino 	  if (!cur_state->has_constraint
115195b7b453SJohn Marino 	      || check_halt_state_context (mctx, cur_state,
115295b7b453SJohn Marino 					   re_string_cur_idx (&mctx->input)))
115395b7b453SJohn Marino 	    {
115495b7b453SJohn Marino 	      /* We found an appropriate halt state.  */
115595b7b453SJohn Marino 	      match_last = re_string_cur_idx (&mctx->input);
115695b7b453SJohn Marino 	      match = 1;
115795b7b453SJohn Marino 
115895b7b453SJohn Marino 	      /* We found a match, do not modify match_first below.  */
115995b7b453SJohn Marino 	      p_match_first = NULL;
116095b7b453SJohn Marino 	      if (!fl_longest_match)
116195b7b453SJohn Marino 		break;
116295b7b453SJohn Marino 	    }
116395b7b453SJohn Marino 	}
116495b7b453SJohn Marino     }
116595b7b453SJohn Marino 
116695b7b453SJohn Marino   if (p_match_first)
116795b7b453SJohn Marino     *p_match_first += next_start_idx;
116895b7b453SJohn Marino 
116995b7b453SJohn Marino   return match_last;
117095b7b453SJohn Marino }
117195b7b453SJohn Marino 
117295b7b453SJohn Marino /* Check NODE match the current context.  */
117395b7b453SJohn Marino 
117495b7b453SJohn Marino static bool
check_halt_node_context(const re_dfa_t * dfa,Idx node,unsigned int context)117595b7b453SJohn Marino check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
117695b7b453SJohn Marino {
117795b7b453SJohn Marino   re_token_type_t type = dfa->nodes[node].type;
117895b7b453SJohn Marino   unsigned int constraint = dfa->nodes[node].constraint;
117995b7b453SJohn Marino   if (type != END_OF_RE)
118095b7b453SJohn Marino     return false;
118195b7b453SJohn Marino   if (!constraint)
118295b7b453SJohn Marino     return true;
118395b7b453SJohn Marino   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
118495b7b453SJohn Marino     return false;
118595b7b453SJohn Marino   return true;
118695b7b453SJohn Marino }
118795b7b453SJohn Marino 
118895b7b453SJohn Marino /* Check the halt state STATE match the current context.
118995b7b453SJohn Marino    Return 0 if not match, if the node, STATE has, is a halt node and
119095b7b453SJohn Marino    match the context, return the node.  */
119195b7b453SJohn Marino 
119295b7b453SJohn Marino static Idx
check_halt_state_context(const re_match_context_t * mctx,const re_dfastate_t * state,Idx idx)119395b7b453SJohn Marino check_halt_state_context (const re_match_context_t *mctx,
119495b7b453SJohn Marino 			  const re_dfastate_t *state, Idx idx)
119595b7b453SJohn Marino {
119695b7b453SJohn Marino   Idx i;
119795b7b453SJohn Marino   unsigned int context;
1198*09d4459fSDaniel Fojt   DEBUG_ASSERT (state->halt);
119995b7b453SJohn Marino   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
120095b7b453SJohn Marino   for (i = 0; i < state->nodes.nelem; ++i)
120195b7b453SJohn Marino     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
120295b7b453SJohn Marino       return state->nodes.elems[i];
120395b7b453SJohn Marino   return 0;
120495b7b453SJohn Marino }
120595b7b453SJohn Marino 
120695b7b453SJohn Marino /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
120795b7b453SJohn Marino    corresponding to the DFA).
120895b7b453SJohn Marino    Return the destination node, and update EPS_VIA_NODES;
1209*09d4459fSDaniel Fojt    return -1 in case of errors.  */
121095b7b453SJohn Marino 
121195b7b453SJohn Marino static Idx
proceed_next_node(const re_match_context_t * mctx,Idx nregs,regmatch_t * regs,Idx * pidx,Idx node,re_node_set * eps_via_nodes,struct re_fail_stack_t * fs)121295b7b453SJohn Marino proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
121395b7b453SJohn Marino 		   Idx *pidx, Idx node, re_node_set *eps_via_nodes,
121495b7b453SJohn Marino 		   struct re_fail_stack_t *fs)
121595b7b453SJohn Marino {
121695b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
121795b7b453SJohn Marino   Idx i;
121895b7b453SJohn Marino   bool ok;
121995b7b453SJohn Marino   if (IS_EPSILON_NODE (dfa->nodes[node].type))
122095b7b453SJohn Marino     {
122195b7b453SJohn Marino       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
122295b7b453SJohn Marino       re_node_set *edests = &dfa->edests[node];
122395b7b453SJohn Marino       Idx dest_node;
122495b7b453SJohn Marino       ok = re_node_set_insert (eps_via_nodes, node);
1225*09d4459fSDaniel Fojt       if (__glibc_unlikely (! ok))
1226*09d4459fSDaniel Fojt 	return -2;
1227*09d4459fSDaniel Fojt       /* Pick up a valid destination, or return -1 if none
122895b7b453SJohn Marino 	 is found.  */
1229*09d4459fSDaniel Fojt       for (dest_node = -1, i = 0; i < edests->nelem; ++i)
123095b7b453SJohn Marino 	{
123195b7b453SJohn Marino 	  Idx candidate = edests->elems[i];
123295b7b453SJohn Marino 	  if (!re_node_set_contains (cur_nodes, candidate))
123395b7b453SJohn Marino 	    continue;
1234*09d4459fSDaniel Fojt           if (dest_node == -1)
123595b7b453SJohn Marino 	    dest_node = candidate;
123695b7b453SJohn Marino 
123795b7b453SJohn Marino 	  else
123895b7b453SJohn Marino 	    {
123995b7b453SJohn Marino 	      /* In order to avoid infinite loop like "(a*)*", return the second
124095b7b453SJohn Marino 		 epsilon-transition if the first was already considered.  */
124195b7b453SJohn Marino 	      if (re_node_set_contains (eps_via_nodes, dest_node))
124295b7b453SJohn Marino 		return candidate;
124395b7b453SJohn Marino 
124495b7b453SJohn Marino 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
124595b7b453SJohn Marino 	      else if (fs != NULL
124695b7b453SJohn Marino 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
124795b7b453SJohn Marino 					   eps_via_nodes))
1248*09d4459fSDaniel Fojt 		return -2;
124995b7b453SJohn Marino 
125095b7b453SJohn Marino 	      /* We know we are going to exit.  */
125195b7b453SJohn Marino 	      break;
125295b7b453SJohn Marino 	    }
125395b7b453SJohn Marino 	}
125495b7b453SJohn Marino       return dest_node;
125595b7b453SJohn Marino     }
125695b7b453SJohn Marino   else
125795b7b453SJohn Marino     {
125895b7b453SJohn Marino       Idx naccepted = 0;
125995b7b453SJohn Marino       re_token_type_t type = dfa->nodes[node].type;
126095b7b453SJohn Marino 
126195b7b453SJohn Marino #ifdef RE_ENABLE_I18N
126295b7b453SJohn Marino       if (dfa->nodes[node].accept_mb)
126395b7b453SJohn Marino 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
126495b7b453SJohn Marino       else
126595b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
126695b7b453SJohn Marino       if (type == OP_BACK_REF)
126795b7b453SJohn Marino 	{
126895b7b453SJohn Marino 	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1269*09d4459fSDaniel Fojt 	  if (subexp_idx < nregs)
127095b7b453SJohn Marino 	    naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
127195b7b453SJohn Marino 	  if (fs != NULL)
127295b7b453SJohn Marino 	    {
1273*09d4459fSDaniel Fojt 	      if (subexp_idx >= nregs
1274*09d4459fSDaniel Fojt 		  || regs[subexp_idx].rm_so == -1
1275*09d4459fSDaniel Fojt 		  || regs[subexp_idx].rm_eo == -1)
1276*09d4459fSDaniel Fojt 		return -1;
127795b7b453SJohn Marino 	      else if (naccepted)
127895b7b453SJohn Marino 		{
127995b7b453SJohn Marino 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
1280*09d4459fSDaniel Fojt 		  if (mctx->input.valid_len - *pidx < naccepted
1281*09d4459fSDaniel Fojt 		      || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1282*09d4459fSDaniel Fojt 				  naccepted)
1283*09d4459fSDaniel Fojt 			  != 0))
1284*09d4459fSDaniel Fojt 		    return -1;
128595b7b453SJohn Marino 		}
128695b7b453SJohn Marino 	    }
128795b7b453SJohn Marino 
128895b7b453SJohn Marino 	  if (naccepted == 0)
128995b7b453SJohn Marino 	    {
129095b7b453SJohn Marino 	      Idx dest_node;
129195b7b453SJohn Marino 	      ok = re_node_set_insert (eps_via_nodes, node);
1292*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (! ok))
1293*09d4459fSDaniel Fojt 		return -2;
129495b7b453SJohn Marino 	      dest_node = dfa->edests[node].elems[0];
129595b7b453SJohn Marino 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
129695b7b453SJohn Marino 					dest_node))
129795b7b453SJohn Marino 		return dest_node;
129895b7b453SJohn Marino 	    }
129995b7b453SJohn Marino 	}
130095b7b453SJohn Marino 
130195b7b453SJohn Marino       if (naccepted != 0
130295b7b453SJohn Marino 	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
130395b7b453SJohn Marino 	{
130495b7b453SJohn Marino 	  Idx dest_node = dfa->nexts[node];
130595b7b453SJohn Marino 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
130695b7b453SJohn Marino 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
130795b7b453SJohn Marino 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
130895b7b453SJohn Marino 					       dest_node)))
1309*09d4459fSDaniel Fojt 	    return -1;
131095b7b453SJohn Marino 	  re_node_set_empty (eps_via_nodes);
131195b7b453SJohn Marino 	  return dest_node;
131295b7b453SJohn Marino 	}
131395b7b453SJohn Marino     }
1314*09d4459fSDaniel Fojt   return -1;
131595b7b453SJohn Marino }
131695b7b453SJohn Marino 
131795b7b453SJohn Marino static reg_errcode_t
1318*09d4459fSDaniel Fojt __attribute_warn_unused_result__
push_fail_stack(struct re_fail_stack_t * fs,Idx str_idx,Idx dest_node,Idx nregs,regmatch_t * regs,re_node_set * eps_via_nodes)131995b7b453SJohn Marino push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
132095b7b453SJohn Marino 		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
132195b7b453SJohn Marino {
132295b7b453SJohn Marino   reg_errcode_t err;
132395b7b453SJohn Marino   Idx num = fs->num++;
132495b7b453SJohn Marino   if (fs->num == fs->alloc)
132595b7b453SJohn Marino     {
132695b7b453SJohn Marino       struct re_fail_stack_ent_t *new_array;
1327*09d4459fSDaniel Fojt       new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1328*09d4459fSDaniel Fojt                               fs->alloc * 2);
132995b7b453SJohn Marino       if (new_array == NULL)
133095b7b453SJohn Marino 	return REG_ESPACE;
133195b7b453SJohn Marino       fs->alloc *= 2;
133295b7b453SJohn Marino       fs->stack = new_array;
133395b7b453SJohn Marino     }
133495b7b453SJohn Marino   fs->stack[num].idx = str_idx;
133595b7b453SJohn Marino   fs->stack[num].node = dest_node;
133695b7b453SJohn Marino   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
133795b7b453SJohn Marino   if (fs->stack[num].regs == NULL)
133895b7b453SJohn Marino     return REG_ESPACE;
133995b7b453SJohn Marino   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
134095b7b453SJohn Marino   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
134195b7b453SJohn Marino   return err;
134295b7b453SJohn Marino }
134395b7b453SJohn Marino 
134495b7b453SJohn Marino static Idx
pop_fail_stack(struct re_fail_stack_t * fs,Idx * pidx,Idx nregs,regmatch_t * regs,re_node_set * eps_via_nodes)134595b7b453SJohn Marino pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
134695b7b453SJohn Marino 		regmatch_t *regs, re_node_set *eps_via_nodes)
134795b7b453SJohn Marino {
134895b7b453SJohn Marino   Idx num = --fs->num;
1349*09d4459fSDaniel Fojt   DEBUG_ASSERT (num >= 0);
135095b7b453SJohn Marino   *pidx = fs->stack[num].idx;
135195b7b453SJohn Marino   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
135295b7b453SJohn Marino   re_node_set_free (eps_via_nodes);
135395b7b453SJohn Marino   re_free (fs->stack[num].regs);
135495b7b453SJohn Marino   *eps_via_nodes = fs->stack[num].eps_via_nodes;
135595b7b453SJohn Marino   return fs->stack[num].node;
135695b7b453SJohn Marino }
135795b7b453SJohn Marino 
135895b7b453SJohn Marino /* Set the positions where the subexpressions are starts/ends to registers
135995b7b453SJohn Marino    PMATCH.
136095b7b453SJohn Marino    Note: We assume that pmatch[0] is already set, and
136195b7b453SJohn Marino    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
136295b7b453SJohn Marino 
136395b7b453SJohn Marino static reg_errcode_t
1364*09d4459fSDaniel Fojt __attribute_warn_unused_result__
set_regs(const regex_t * preg,const re_match_context_t * mctx,size_t nmatch,regmatch_t * pmatch,bool fl_backtrack)136595b7b453SJohn Marino set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
136695b7b453SJohn Marino 	  regmatch_t *pmatch, bool fl_backtrack)
136795b7b453SJohn Marino {
1368cf28ed85SJohn Marino   const re_dfa_t *dfa = preg->buffer;
136995b7b453SJohn Marino   Idx idx, cur_node;
137095b7b453SJohn Marino   re_node_set eps_via_nodes;
137195b7b453SJohn Marino   struct re_fail_stack_t *fs;
137295b7b453SJohn Marino   struct re_fail_stack_t fs_body = { 0, 2, NULL };
137395b7b453SJohn Marino   regmatch_t *prev_idx_match;
137495b7b453SJohn Marino   bool prev_idx_match_malloced = false;
137595b7b453SJohn Marino 
1376*09d4459fSDaniel Fojt   DEBUG_ASSERT (nmatch > 1);
1377*09d4459fSDaniel Fojt   DEBUG_ASSERT (mctx->state_log != NULL);
137895b7b453SJohn Marino   if (fl_backtrack)
137995b7b453SJohn Marino     {
138095b7b453SJohn Marino       fs = &fs_body;
138195b7b453SJohn Marino       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
138295b7b453SJohn Marino       if (fs->stack == NULL)
138395b7b453SJohn Marino 	return REG_ESPACE;
138495b7b453SJohn Marino     }
138595b7b453SJohn Marino   else
138695b7b453SJohn Marino     fs = NULL;
138795b7b453SJohn Marino 
138895b7b453SJohn Marino   cur_node = dfa->init_node;
138995b7b453SJohn Marino   re_node_set_init_empty (&eps_via_nodes);
139095b7b453SJohn Marino 
139195b7b453SJohn Marino   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
139295b7b453SJohn Marino     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
139395b7b453SJohn Marino   else
139495b7b453SJohn Marino     {
139595b7b453SJohn Marino       prev_idx_match = re_malloc (regmatch_t, nmatch);
139695b7b453SJohn Marino       if (prev_idx_match == NULL)
139795b7b453SJohn Marino 	{
139895b7b453SJohn Marino 	  free_fail_stack_return (fs);
139995b7b453SJohn Marino 	  return REG_ESPACE;
140095b7b453SJohn Marino 	}
140195b7b453SJohn Marino       prev_idx_match_malloced = true;
140295b7b453SJohn Marino     }
140395b7b453SJohn Marino   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
140495b7b453SJohn Marino 
140595b7b453SJohn Marino   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
140695b7b453SJohn Marino     {
140795b7b453SJohn Marino       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
140895b7b453SJohn Marino 
140995b7b453SJohn Marino       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
141095b7b453SJohn Marino 	{
141195b7b453SJohn Marino 	  Idx reg_idx;
141295b7b453SJohn Marino 	  if (fs)
141395b7b453SJohn Marino 	    {
141495b7b453SJohn Marino 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
141595b7b453SJohn Marino 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
141695b7b453SJohn Marino 		  break;
141795b7b453SJohn Marino 	      if (reg_idx == nmatch)
141895b7b453SJohn Marino 		{
141995b7b453SJohn Marino 		  re_node_set_free (&eps_via_nodes);
142095b7b453SJohn Marino 		  if (prev_idx_match_malloced)
142195b7b453SJohn Marino 		    re_free (prev_idx_match);
142295b7b453SJohn Marino 		  return free_fail_stack_return (fs);
142395b7b453SJohn Marino 		}
142495b7b453SJohn Marino 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
142595b7b453SJohn Marino 					 &eps_via_nodes);
142695b7b453SJohn Marino 	    }
142795b7b453SJohn Marino 	  else
142895b7b453SJohn Marino 	    {
142995b7b453SJohn Marino 	      re_node_set_free (&eps_via_nodes);
143095b7b453SJohn Marino 	      if (prev_idx_match_malloced)
143195b7b453SJohn Marino 		re_free (prev_idx_match);
143295b7b453SJohn Marino 	      return REG_NOERROR;
143395b7b453SJohn Marino 	    }
143495b7b453SJohn Marino 	}
143595b7b453SJohn Marino 
143695b7b453SJohn Marino       /* Proceed to next node.  */
143795b7b453SJohn Marino       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
143895b7b453SJohn Marino 				    &eps_via_nodes, fs);
143995b7b453SJohn Marino 
1440*09d4459fSDaniel Fojt       if (__glibc_unlikely (cur_node < 0))
144195b7b453SJohn Marino 	{
1442*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (cur_node == -2))
144395b7b453SJohn Marino 	    {
144495b7b453SJohn Marino 	      re_node_set_free (&eps_via_nodes);
144595b7b453SJohn Marino 	      if (prev_idx_match_malloced)
144695b7b453SJohn Marino 		re_free (prev_idx_match);
144795b7b453SJohn Marino 	      free_fail_stack_return (fs);
144895b7b453SJohn Marino 	      return REG_ESPACE;
144995b7b453SJohn Marino 	    }
145095b7b453SJohn Marino 	  if (fs)
145195b7b453SJohn Marino 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
145295b7b453SJohn Marino 				       &eps_via_nodes);
145395b7b453SJohn Marino 	  else
145495b7b453SJohn Marino 	    {
145595b7b453SJohn Marino 	      re_node_set_free (&eps_via_nodes);
145695b7b453SJohn Marino 	      if (prev_idx_match_malloced)
145795b7b453SJohn Marino 		re_free (prev_idx_match);
145895b7b453SJohn Marino 	      return REG_NOMATCH;
145995b7b453SJohn Marino 	    }
146095b7b453SJohn Marino 	}
146195b7b453SJohn Marino     }
146295b7b453SJohn Marino   re_node_set_free (&eps_via_nodes);
146395b7b453SJohn Marino   if (prev_idx_match_malloced)
146495b7b453SJohn Marino     re_free (prev_idx_match);
146595b7b453SJohn Marino   return free_fail_stack_return (fs);
146695b7b453SJohn Marino }
146795b7b453SJohn Marino 
146895b7b453SJohn Marino static reg_errcode_t
free_fail_stack_return(struct re_fail_stack_t * fs)146995b7b453SJohn Marino free_fail_stack_return (struct re_fail_stack_t *fs)
147095b7b453SJohn Marino {
147195b7b453SJohn Marino   if (fs)
147295b7b453SJohn Marino     {
147395b7b453SJohn Marino       Idx fs_idx;
147495b7b453SJohn Marino       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
147595b7b453SJohn Marino 	{
147695b7b453SJohn Marino 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
147795b7b453SJohn Marino 	  re_free (fs->stack[fs_idx].regs);
147895b7b453SJohn Marino 	}
147995b7b453SJohn Marino       re_free (fs->stack);
148095b7b453SJohn Marino     }
148195b7b453SJohn Marino   return REG_NOERROR;
148295b7b453SJohn Marino }
148395b7b453SJohn Marino 
148495b7b453SJohn Marino static void
update_regs(const re_dfa_t * dfa,regmatch_t * pmatch,regmatch_t * prev_idx_match,Idx cur_node,Idx cur_idx,Idx nmatch)148595b7b453SJohn Marino update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
148695b7b453SJohn Marino 	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
148795b7b453SJohn Marino {
148895b7b453SJohn Marino   int type = dfa->nodes[cur_node].type;
148995b7b453SJohn Marino   if (type == OP_OPEN_SUBEXP)
149095b7b453SJohn Marino     {
149195b7b453SJohn Marino       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
149295b7b453SJohn Marino 
149395b7b453SJohn Marino       /* We are at the first node of this sub expression.  */
149495b7b453SJohn Marino       if (reg_num < nmatch)
149595b7b453SJohn Marino 	{
149695b7b453SJohn Marino 	  pmatch[reg_num].rm_so = cur_idx;
149795b7b453SJohn Marino 	  pmatch[reg_num].rm_eo = -1;
149895b7b453SJohn Marino 	}
149995b7b453SJohn Marino     }
150095b7b453SJohn Marino   else if (type == OP_CLOSE_SUBEXP)
150195b7b453SJohn Marino     {
150295b7b453SJohn Marino       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
150395b7b453SJohn Marino       if (reg_num < nmatch)
150495b7b453SJohn Marino 	{
150595b7b453SJohn Marino 	  /* We are at the last node of this sub expression.  */
150695b7b453SJohn Marino 	  if (pmatch[reg_num].rm_so < cur_idx)
150795b7b453SJohn Marino 	    {
150895b7b453SJohn Marino 	      pmatch[reg_num].rm_eo = cur_idx;
150995b7b453SJohn Marino 	      /* This is a non-empty match or we are not inside an optional
151095b7b453SJohn Marino 		 subexpression.  Accept this right away.  */
151195b7b453SJohn Marino 	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
151295b7b453SJohn Marino 	    }
151395b7b453SJohn Marino 	  else
151495b7b453SJohn Marino 	    {
151595b7b453SJohn Marino 	      if (dfa->nodes[cur_node].opt_subexp
151695b7b453SJohn Marino 		  && prev_idx_match[reg_num].rm_so != -1)
151795b7b453SJohn Marino 		/* We transited through an empty match for an optional
151895b7b453SJohn Marino 		   subexpression, like (a?)*, and this is not the subexp's
151995b7b453SJohn Marino 		   first match.  Copy back the old content of the registers
152095b7b453SJohn Marino 		   so that matches of an inner subexpression are undone as
152195b7b453SJohn Marino 		   well, like in ((a?))*.  */
152295b7b453SJohn Marino 		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
152395b7b453SJohn Marino 	      else
152495b7b453SJohn Marino 		/* We completed a subexpression, but it may be part of
152595b7b453SJohn Marino 		   an optional one, so do not update PREV_IDX_MATCH.  */
152695b7b453SJohn Marino 		pmatch[reg_num].rm_eo = cur_idx;
152795b7b453SJohn Marino 	    }
152895b7b453SJohn Marino 	}
152995b7b453SJohn Marino     }
153095b7b453SJohn Marino }
153195b7b453SJohn Marino 
153295b7b453SJohn Marino /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
153395b7b453SJohn Marino    and sift the nodes in each states according to the following rules.
153495b7b453SJohn Marino    Updated state_log will be wrote to STATE_LOG.
153595b7b453SJohn Marino 
1536cf28ed85SJohn Marino    Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
153795b7b453SJohn Marino      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1538cf28ed85SJohn Marino 	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1539cf28ed85SJohn Marino 	the LAST_NODE, we throw away the node 'a'.
1540cf28ed85SJohn Marino      2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1541cf28ed85SJohn Marino 	string 's' and transit to 'b':
154295b7b453SJohn Marino 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1543cf28ed85SJohn Marino 	   away the node 'a'.
154495b7b453SJohn Marino 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1545cf28ed85SJohn Marino 	    thrown away, we throw away the node 'a'.
154695b7b453SJohn Marino      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
154795b7b453SJohn Marino 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1548cf28ed85SJohn Marino 	   node 'a'.
154995b7b453SJohn Marino 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1550cf28ed85SJohn Marino 	    we throw away the node 'a'.  */
155195b7b453SJohn Marino 
155295b7b453SJohn Marino #define STATE_NODE_CONTAINS(state,node) \
155395b7b453SJohn Marino   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
155495b7b453SJohn Marino 
155595b7b453SJohn Marino static reg_errcode_t
sift_states_backward(const re_match_context_t * mctx,re_sift_context_t * sctx)155695b7b453SJohn Marino sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
155795b7b453SJohn Marino {
155895b7b453SJohn Marino   reg_errcode_t err;
155995b7b453SJohn Marino   int null_cnt = 0;
156095b7b453SJohn Marino   Idx str_idx = sctx->last_str_idx;
156195b7b453SJohn Marino   re_node_set cur_dest;
156295b7b453SJohn Marino 
1563*09d4459fSDaniel Fojt   DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
156495b7b453SJohn Marino 
156595b7b453SJohn Marino   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
156695b7b453SJohn Marino      transit to the last_node and the last_node itself.  */
156795b7b453SJohn Marino   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1568*09d4459fSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
156995b7b453SJohn Marino     return err;
157095b7b453SJohn Marino   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1571*09d4459fSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
157295b7b453SJohn Marino     goto free_return;
157395b7b453SJohn Marino 
157495b7b453SJohn Marino   /* Then check each states in the state_log.  */
157595b7b453SJohn Marino   while (str_idx > 0)
157695b7b453SJohn Marino     {
157795b7b453SJohn Marino       /* Update counters.  */
157895b7b453SJohn Marino       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
157995b7b453SJohn Marino       if (null_cnt > mctx->max_mb_elem_len)
158095b7b453SJohn Marino 	{
158195b7b453SJohn Marino 	  memset (sctx->sifted_states, '\0',
158295b7b453SJohn Marino 		  sizeof (re_dfastate_t *) * str_idx);
158395b7b453SJohn Marino 	  re_node_set_free (&cur_dest);
158495b7b453SJohn Marino 	  return REG_NOERROR;
158595b7b453SJohn Marino 	}
158695b7b453SJohn Marino       re_node_set_empty (&cur_dest);
158795b7b453SJohn Marino       --str_idx;
158895b7b453SJohn Marino 
158995b7b453SJohn Marino       if (mctx->state_log[str_idx])
159095b7b453SJohn Marino 	{
159195b7b453SJohn Marino 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1592*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
159395b7b453SJohn Marino 	    goto free_return;
159495b7b453SJohn Marino 	}
159595b7b453SJohn Marino 
159695b7b453SJohn Marino       /* Add all the nodes which satisfy the following conditions:
159795b7b453SJohn Marino 	 - It can epsilon transit to a node in CUR_DEST.
159895b7b453SJohn Marino 	 - It is in CUR_SRC.
159995b7b453SJohn Marino 	 And update state_log.  */
160095b7b453SJohn Marino       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1601*09d4459fSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
160295b7b453SJohn Marino 	goto free_return;
160395b7b453SJohn Marino     }
160495b7b453SJohn Marino   err = REG_NOERROR;
160595b7b453SJohn Marino  free_return:
160695b7b453SJohn Marino   re_node_set_free (&cur_dest);
160795b7b453SJohn Marino   return err;
160895b7b453SJohn Marino }
160995b7b453SJohn Marino 
161095b7b453SJohn Marino static reg_errcode_t
1611*09d4459fSDaniel Fojt __attribute_warn_unused_result__
build_sifted_states(const re_match_context_t * mctx,re_sift_context_t * sctx,Idx str_idx,re_node_set * cur_dest)161295b7b453SJohn Marino build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
161395b7b453SJohn Marino 		     Idx str_idx, re_node_set *cur_dest)
161495b7b453SJohn Marino {
161595b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
161695b7b453SJohn Marino   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
161795b7b453SJohn Marino   Idx i;
161895b7b453SJohn Marino 
161995b7b453SJohn Marino   /* Then build the next sifted state.
1620cf28ed85SJohn Marino      We build the next sifted state on 'cur_dest', and update
1621cf28ed85SJohn Marino      'sifted_states[str_idx]' with 'cur_dest'.
162295b7b453SJohn Marino      Note:
1623cf28ed85SJohn Marino      'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1624cf28ed85SJohn Marino      'cur_src' points the node_set of the old 'state_log[str_idx]'
162595b7b453SJohn Marino      (with the epsilon nodes pre-filtered out).  */
162695b7b453SJohn Marino   for (i = 0; i < cur_src->nelem; i++)
162795b7b453SJohn Marino     {
162895b7b453SJohn Marino       Idx prev_node = cur_src->elems[i];
162995b7b453SJohn Marino       int naccepted = 0;
163095b7b453SJohn Marino       bool ok;
1631*09d4459fSDaniel Fojt       DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type));
163295b7b453SJohn Marino 
163395b7b453SJohn Marino #ifdef RE_ENABLE_I18N
1634cf28ed85SJohn Marino       /* If the node may accept "multi byte".  */
163595b7b453SJohn Marino       if (dfa->nodes[prev_node].accept_mb)
163695b7b453SJohn Marino 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
163795b7b453SJohn Marino 					 str_idx, sctx->last_str_idx);
163895b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
163995b7b453SJohn Marino 
164095b7b453SJohn Marino       /* We don't check backreferences here.
164195b7b453SJohn Marino 	 See update_cur_sifted_state().  */
164295b7b453SJohn Marino       if (!naccepted
164395b7b453SJohn Marino 	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
164495b7b453SJohn Marino 	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
164595b7b453SJohn Marino 				  dfa->nexts[prev_node]))
164695b7b453SJohn Marino 	naccepted = 1;
164795b7b453SJohn Marino 
164895b7b453SJohn Marino       if (naccepted == 0)
164995b7b453SJohn Marino 	continue;
165095b7b453SJohn Marino 
165195b7b453SJohn Marino       if (sctx->limits.nelem)
165295b7b453SJohn Marino 	{
165395b7b453SJohn Marino 	  Idx to_idx = str_idx + naccepted;
165495b7b453SJohn Marino 	  if (check_dst_limits (mctx, &sctx->limits,
165595b7b453SJohn Marino 				dfa->nexts[prev_node], to_idx,
165695b7b453SJohn Marino 				prev_node, str_idx))
165795b7b453SJohn Marino 	    continue;
165895b7b453SJohn Marino 	}
165995b7b453SJohn Marino       ok = re_node_set_insert (cur_dest, prev_node);
1660*09d4459fSDaniel Fojt       if (__glibc_unlikely (! ok))
166195b7b453SJohn Marino 	return REG_ESPACE;
166295b7b453SJohn Marino     }
166395b7b453SJohn Marino 
166495b7b453SJohn Marino   return REG_NOERROR;
166595b7b453SJohn Marino }
166695b7b453SJohn Marino 
166795b7b453SJohn Marino /* Helper functions.  */
166895b7b453SJohn Marino 
166995b7b453SJohn Marino static reg_errcode_t
clean_state_log_if_needed(re_match_context_t * mctx,Idx next_state_log_idx)167095b7b453SJohn Marino clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
167195b7b453SJohn Marino {
167295b7b453SJohn Marino   Idx top = mctx->state_log_top;
167395b7b453SJohn Marino 
1674cf28ed85SJohn Marino   if ((next_state_log_idx >= mctx->input.bufs_len
1675cf28ed85SJohn Marino        && mctx->input.bufs_len < mctx->input.len)
167695b7b453SJohn Marino       || (next_state_log_idx >= mctx->input.valid_len
167795b7b453SJohn Marino 	  && mctx->input.valid_len < mctx->input.len))
167895b7b453SJohn Marino     {
167995b7b453SJohn Marino       reg_errcode_t err;
1680680a9cb8SJohn Marino       err = extend_buffers (mctx, next_state_log_idx + 1);
1681*09d4459fSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
168295b7b453SJohn Marino 	return err;
168395b7b453SJohn Marino     }
168495b7b453SJohn Marino 
168595b7b453SJohn Marino   if (top < next_state_log_idx)
168695b7b453SJohn Marino     {
168795b7b453SJohn Marino       memset (mctx->state_log + top + 1, '\0',
168895b7b453SJohn Marino 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
168995b7b453SJohn Marino       mctx->state_log_top = next_state_log_idx;
169095b7b453SJohn Marino     }
169195b7b453SJohn Marino   return REG_NOERROR;
169295b7b453SJohn Marino }
169395b7b453SJohn Marino 
169495b7b453SJohn Marino static reg_errcode_t
merge_state_array(const re_dfa_t * dfa,re_dfastate_t ** dst,re_dfastate_t ** src,Idx num)169595b7b453SJohn Marino merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
169695b7b453SJohn Marino 		   re_dfastate_t **src, Idx num)
169795b7b453SJohn Marino {
169895b7b453SJohn Marino   Idx st_idx;
169995b7b453SJohn Marino   reg_errcode_t err;
170095b7b453SJohn Marino   for (st_idx = 0; st_idx < num; ++st_idx)
170195b7b453SJohn Marino     {
170295b7b453SJohn Marino       if (dst[st_idx] == NULL)
170395b7b453SJohn Marino 	dst[st_idx] = src[st_idx];
170495b7b453SJohn Marino       else if (src[st_idx] != NULL)
170595b7b453SJohn Marino 	{
170695b7b453SJohn Marino 	  re_node_set merged_set;
170795b7b453SJohn Marino 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
170895b7b453SJohn Marino 					&src[st_idx]->nodes);
1709*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
171095b7b453SJohn Marino 	    return err;
171195b7b453SJohn Marino 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
171295b7b453SJohn Marino 	  re_node_set_free (&merged_set);
1713*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
171495b7b453SJohn Marino 	    return err;
171595b7b453SJohn Marino 	}
171695b7b453SJohn Marino     }
171795b7b453SJohn Marino   return REG_NOERROR;
171895b7b453SJohn Marino }
171995b7b453SJohn Marino 
172095b7b453SJohn Marino static reg_errcode_t
update_cur_sifted_state(const re_match_context_t * mctx,re_sift_context_t * sctx,Idx str_idx,re_node_set * dest_nodes)172195b7b453SJohn Marino update_cur_sifted_state (const re_match_context_t *mctx,
172295b7b453SJohn Marino 			 re_sift_context_t *sctx, Idx str_idx,
172395b7b453SJohn Marino 			 re_node_set *dest_nodes)
172495b7b453SJohn Marino {
172595b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
172695b7b453SJohn Marino   reg_errcode_t err = REG_NOERROR;
172795b7b453SJohn Marino   const re_node_set *candidates;
172895b7b453SJohn Marino   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
172995b7b453SJohn Marino 		: &mctx->state_log[str_idx]->nodes);
173095b7b453SJohn Marino 
173195b7b453SJohn Marino   if (dest_nodes->nelem == 0)
173295b7b453SJohn Marino     sctx->sifted_states[str_idx] = NULL;
173395b7b453SJohn Marino   else
173495b7b453SJohn Marino     {
173595b7b453SJohn Marino       if (candidates)
173695b7b453SJohn Marino 	{
173795b7b453SJohn Marino 	  /* At first, add the nodes which can epsilon transit to a node in
173895b7b453SJohn Marino 	     DEST_NODE.  */
173995b7b453SJohn Marino 	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1740*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
174195b7b453SJohn Marino 	    return err;
174295b7b453SJohn Marino 
174395b7b453SJohn Marino 	  /* Then, check the limitations in the current sift_context.  */
174495b7b453SJohn Marino 	  if (sctx->limits.nelem)
174595b7b453SJohn Marino 	    {
174695b7b453SJohn Marino 	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
174795b7b453SJohn Marino 					 mctx->bkref_ents, str_idx);
1748*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
174995b7b453SJohn Marino 		return err;
175095b7b453SJohn Marino 	    }
175195b7b453SJohn Marino 	}
175295b7b453SJohn Marino 
175395b7b453SJohn Marino       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1754*09d4459fSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
175595b7b453SJohn Marino 	return err;
175695b7b453SJohn Marino     }
175795b7b453SJohn Marino 
175895b7b453SJohn Marino   if (candidates && mctx->state_log[str_idx]->has_backref)
175995b7b453SJohn Marino     {
176095b7b453SJohn Marino       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1761*09d4459fSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
176295b7b453SJohn Marino 	return err;
176395b7b453SJohn Marino     }
176495b7b453SJohn Marino   return REG_NOERROR;
176595b7b453SJohn Marino }
176695b7b453SJohn Marino 
176795b7b453SJohn Marino static reg_errcode_t
1768*09d4459fSDaniel Fojt __attribute_warn_unused_result__
add_epsilon_src_nodes(const re_dfa_t * dfa,re_node_set * dest_nodes,const re_node_set * candidates)176995b7b453SJohn Marino add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
177095b7b453SJohn Marino 		       const re_node_set *candidates)
177195b7b453SJohn Marino {
177295b7b453SJohn Marino   reg_errcode_t err = REG_NOERROR;
177395b7b453SJohn Marino   Idx i;
177495b7b453SJohn Marino 
177595b7b453SJohn Marino   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1776*09d4459fSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
177795b7b453SJohn Marino     return err;
177895b7b453SJohn Marino 
177995b7b453SJohn Marino   if (!state->inveclosure.alloc)
178095b7b453SJohn Marino     {
178195b7b453SJohn Marino       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1782*09d4459fSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
178395b7b453SJohn Marino 	return REG_ESPACE;
178495b7b453SJohn Marino       for (i = 0; i < dest_nodes->nelem; i++)
178595b7b453SJohn Marino 	{
178695b7b453SJohn Marino 	  err = re_node_set_merge (&state->inveclosure,
178795b7b453SJohn Marino 				   dfa->inveclosures + dest_nodes->elems[i]);
1788*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
178995b7b453SJohn Marino 	    return REG_ESPACE;
179095b7b453SJohn Marino 	}
179195b7b453SJohn Marino     }
179295b7b453SJohn Marino   return re_node_set_add_intersect (dest_nodes, candidates,
179395b7b453SJohn Marino 				    &state->inveclosure);
179495b7b453SJohn Marino }
179595b7b453SJohn Marino 
179695b7b453SJohn Marino static reg_errcode_t
sub_epsilon_src_nodes(const re_dfa_t * dfa,Idx node,re_node_set * dest_nodes,const re_node_set * candidates)179795b7b453SJohn Marino sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
179895b7b453SJohn Marino 		       const re_node_set *candidates)
179995b7b453SJohn Marino {
180095b7b453SJohn Marino     Idx ecl_idx;
180195b7b453SJohn Marino     reg_errcode_t err;
180295b7b453SJohn Marino     re_node_set *inv_eclosure = dfa->inveclosures + node;
180395b7b453SJohn Marino     re_node_set except_nodes;
180495b7b453SJohn Marino     re_node_set_init_empty (&except_nodes);
180595b7b453SJohn Marino     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
180695b7b453SJohn Marino       {
180795b7b453SJohn Marino 	Idx cur_node = inv_eclosure->elems[ecl_idx];
180895b7b453SJohn Marino 	if (cur_node == node)
180995b7b453SJohn Marino 	  continue;
181095b7b453SJohn Marino 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
181195b7b453SJohn Marino 	  {
181295b7b453SJohn Marino 	    Idx edst1 = dfa->edests[cur_node].elems[0];
181395b7b453SJohn Marino 	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1814*09d4459fSDaniel Fojt 			 ? dfa->edests[cur_node].elems[1] : -1);
181595b7b453SJohn Marino 	    if ((!re_node_set_contains (inv_eclosure, edst1)
181695b7b453SJohn Marino 		 && re_node_set_contains (dest_nodes, edst1))
1817*09d4459fSDaniel Fojt 		|| (edst2 > 0
181895b7b453SJohn Marino 		    && !re_node_set_contains (inv_eclosure, edst2)
181995b7b453SJohn Marino 		    && re_node_set_contains (dest_nodes, edst2)))
182095b7b453SJohn Marino 	      {
182195b7b453SJohn Marino 		err = re_node_set_add_intersect (&except_nodes, candidates,
182295b7b453SJohn Marino 						 dfa->inveclosures + cur_node);
1823*09d4459fSDaniel Fojt 		if (__glibc_unlikely (err != REG_NOERROR))
182495b7b453SJohn Marino 		  {
182595b7b453SJohn Marino 		    re_node_set_free (&except_nodes);
182695b7b453SJohn Marino 		    return err;
182795b7b453SJohn Marino 		  }
182895b7b453SJohn Marino 	      }
182995b7b453SJohn Marino 	  }
183095b7b453SJohn Marino       }
183195b7b453SJohn Marino     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
183295b7b453SJohn Marino       {
183395b7b453SJohn Marino 	Idx cur_node = inv_eclosure->elems[ecl_idx];
183495b7b453SJohn Marino 	if (!re_node_set_contains (&except_nodes, cur_node))
183595b7b453SJohn Marino 	  {
183695b7b453SJohn Marino 	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
183795b7b453SJohn Marino 	    re_node_set_remove_at (dest_nodes, idx);
183895b7b453SJohn Marino 	  }
183995b7b453SJohn Marino       }
184095b7b453SJohn Marino     re_node_set_free (&except_nodes);
184195b7b453SJohn Marino     return REG_NOERROR;
184295b7b453SJohn Marino }
184395b7b453SJohn Marino 
184495b7b453SJohn Marino static bool
check_dst_limits(const re_match_context_t * mctx,const re_node_set * limits,Idx dst_node,Idx dst_idx,Idx src_node,Idx src_idx)184595b7b453SJohn Marino check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
184695b7b453SJohn Marino 		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
184795b7b453SJohn Marino {
184895b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
184995b7b453SJohn Marino   Idx lim_idx, src_pos, dst_pos;
185095b7b453SJohn Marino 
185195b7b453SJohn Marino   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
185295b7b453SJohn Marino   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
185395b7b453SJohn Marino   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
185495b7b453SJohn Marino     {
185595b7b453SJohn Marino       Idx subexp_idx;
185695b7b453SJohn Marino       struct re_backref_cache_entry *ent;
185795b7b453SJohn Marino       ent = mctx->bkref_ents + limits->elems[lim_idx];
185895b7b453SJohn Marino       subexp_idx = dfa->nodes[ent->node].opr.idx;
185995b7b453SJohn Marino 
186095b7b453SJohn Marino       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
186195b7b453SJohn Marino 					   subexp_idx, dst_node, dst_idx,
186295b7b453SJohn Marino 					   dst_bkref_idx);
186395b7b453SJohn Marino       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
186495b7b453SJohn Marino 					   subexp_idx, src_node, src_idx,
186595b7b453SJohn Marino 					   src_bkref_idx);
186695b7b453SJohn Marino 
186795b7b453SJohn Marino       /* In case of:
186895b7b453SJohn Marino 	 <src> <dst> ( <subexp> )
186995b7b453SJohn Marino 	 ( <subexp> ) <src> <dst>
187095b7b453SJohn Marino 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
187195b7b453SJohn Marino       if (src_pos == dst_pos)
187295b7b453SJohn Marino 	continue; /* This is unrelated limitation.  */
187395b7b453SJohn Marino       else
187495b7b453SJohn Marino 	return true;
187595b7b453SJohn Marino     }
187695b7b453SJohn Marino   return false;
187795b7b453SJohn Marino }
187895b7b453SJohn Marino 
187995b7b453SJohn Marino static int
check_dst_limits_calc_pos_1(const re_match_context_t * mctx,int boundaries,Idx subexp_idx,Idx from_node,Idx bkref_idx)188095b7b453SJohn Marino check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
188195b7b453SJohn Marino 			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
188295b7b453SJohn Marino {
188395b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
188495b7b453SJohn Marino   const re_node_set *eclosures = dfa->eclosures + from_node;
188595b7b453SJohn Marino   Idx node_idx;
188695b7b453SJohn Marino 
188795b7b453SJohn Marino   /* Else, we are on the boundary: examine the nodes on the epsilon
188895b7b453SJohn Marino      closure.  */
188995b7b453SJohn Marino   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
189095b7b453SJohn Marino     {
189195b7b453SJohn Marino       Idx node = eclosures->elems[node_idx];
189295b7b453SJohn Marino       switch (dfa->nodes[node].type)
189395b7b453SJohn Marino 	{
189495b7b453SJohn Marino 	case OP_BACK_REF:
1895*09d4459fSDaniel Fojt 	  if (bkref_idx != -1)
189695b7b453SJohn Marino 	    {
189795b7b453SJohn Marino 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
189895b7b453SJohn Marino 	      do
189995b7b453SJohn Marino 		{
190095b7b453SJohn Marino 		  Idx dst;
190195b7b453SJohn Marino 		  int cpos;
190295b7b453SJohn Marino 
190395b7b453SJohn Marino 		  if (ent->node != node)
190495b7b453SJohn Marino 		    continue;
190595b7b453SJohn Marino 
190695b7b453SJohn Marino 		  if (subexp_idx < BITSET_WORD_BITS
190795b7b453SJohn Marino 		      && !(ent->eps_reachable_subexps_map
190895b7b453SJohn Marino 			   & ((bitset_word_t) 1 << subexp_idx)))
190995b7b453SJohn Marino 		    continue;
191095b7b453SJohn Marino 
191195b7b453SJohn Marino 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
191295b7b453SJohn Marino 		     OP_CLOSE_SUBEXP cases below.  But, if the
191395b7b453SJohn Marino 		     destination node is the same node as the source
191495b7b453SJohn Marino 		     node, don't recurse because it would cause an
191595b7b453SJohn Marino 		     infinite loop: a regex that exhibits this behavior
191695b7b453SJohn Marino 		     is ()\1*\1*  */
191795b7b453SJohn Marino 		  dst = dfa->edests[node].elems[0];
191895b7b453SJohn Marino 		  if (dst == from_node)
191995b7b453SJohn Marino 		    {
192095b7b453SJohn Marino 		      if (boundaries & 1)
192195b7b453SJohn Marino 			return -1;
192295b7b453SJohn Marino 		      else /* if (boundaries & 2) */
192395b7b453SJohn Marino 			return 0;
192495b7b453SJohn Marino 		    }
192595b7b453SJohn Marino 
192695b7b453SJohn Marino 		  cpos =
192795b7b453SJohn Marino 		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
192895b7b453SJohn Marino 						 dst, bkref_idx);
192995b7b453SJohn Marino 		  if (cpos == -1 /* && (boundaries & 1) */)
193095b7b453SJohn Marino 		    return -1;
193195b7b453SJohn Marino 		  if (cpos == 0 && (boundaries & 2))
193295b7b453SJohn Marino 		    return 0;
193395b7b453SJohn Marino 
193495b7b453SJohn Marino 		  if (subexp_idx < BITSET_WORD_BITS)
193595b7b453SJohn Marino 		    ent->eps_reachable_subexps_map
193695b7b453SJohn Marino 		      &= ~((bitset_word_t) 1 << subexp_idx);
193795b7b453SJohn Marino 		}
193895b7b453SJohn Marino 	      while (ent++->more);
193995b7b453SJohn Marino 	    }
194095b7b453SJohn Marino 	  break;
194195b7b453SJohn Marino 
194295b7b453SJohn Marino 	case OP_OPEN_SUBEXP:
194395b7b453SJohn Marino 	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
194495b7b453SJohn Marino 	    return -1;
194595b7b453SJohn Marino 	  break;
194695b7b453SJohn Marino 
194795b7b453SJohn Marino 	case OP_CLOSE_SUBEXP:
194895b7b453SJohn Marino 	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
194995b7b453SJohn Marino 	    return 0;
195095b7b453SJohn Marino 	  break;
195195b7b453SJohn Marino 
195295b7b453SJohn Marino 	default:
195395b7b453SJohn Marino 	    break;
195495b7b453SJohn Marino 	}
195595b7b453SJohn Marino     }
195695b7b453SJohn Marino 
195795b7b453SJohn Marino   return (boundaries & 2) ? 1 : 0;
195895b7b453SJohn Marino }
195995b7b453SJohn Marino 
196095b7b453SJohn Marino static int
check_dst_limits_calc_pos(const re_match_context_t * mctx,Idx limit,Idx subexp_idx,Idx from_node,Idx str_idx,Idx bkref_idx)196195b7b453SJohn Marino check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
196295b7b453SJohn Marino 			   Idx subexp_idx, Idx from_node, Idx str_idx,
196395b7b453SJohn Marino 			   Idx bkref_idx)
196495b7b453SJohn Marino {
196595b7b453SJohn Marino   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
196695b7b453SJohn Marino   int boundaries;
196795b7b453SJohn Marino 
196895b7b453SJohn Marino   /* If we are outside the range of the subexpression, return -1 or 1.  */
196995b7b453SJohn Marino   if (str_idx < lim->subexp_from)
197095b7b453SJohn Marino     return -1;
197195b7b453SJohn Marino 
197295b7b453SJohn Marino   if (lim->subexp_to < str_idx)
197395b7b453SJohn Marino     return 1;
197495b7b453SJohn Marino 
197595b7b453SJohn Marino   /* If we are within the subexpression, return 0.  */
197695b7b453SJohn Marino   boundaries = (str_idx == lim->subexp_from);
197795b7b453SJohn Marino   boundaries |= (str_idx == lim->subexp_to) << 1;
197895b7b453SJohn Marino   if (boundaries == 0)
197995b7b453SJohn Marino     return 0;
198095b7b453SJohn Marino 
198195b7b453SJohn Marino   /* Else, examine epsilon closure.  */
198295b7b453SJohn Marino   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
198395b7b453SJohn Marino 				      from_node, bkref_idx);
198495b7b453SJohn Marino }
198595b7b453SJohn Marino 
198695b7b453SJohn Marino /* Check the limitations of sub expressions LIMITS, and remove the nodes
198795b7b453SJohn Marino    which are against limitations from DEST_NODES. */
198895b7b453SJohn Marino 
198995b7b453SJohn Marino static reg_errcode_t
check_subexp_limits(const re_dfa_t * dfa,re_node_set * dest_nodes,const re_node_set * candidates,re_node_set * limits,struct re_backref_cache_entry * bkref_ents,Idx str_idx)199095b7b453SJohn Marino check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
199195b7b453SJohn Marino 		     const re_node_set *candidates, re_node_set *limits,
199295b7b453SJohn Marino 		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
199395b7b453SJohn Marino {
199495b7b453SJohn Marino   reg_errcode_t err;
199595b7b453SJohn Marino   Idx node_idx, lim_idx;
199695b7b453SJohn Marino 
199795b7b453SJohn Marino   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
199895b7b453SJohn Marino     {
199995b7b453SJohn Marino       Idx subexp_idx;
200095b7b453SJohn Marino       struct re_backref_cache_entry *ent;
200195b7b453SJohn Marino       ent = bkref_ents + limits->elems[lim_idx];
200295b7b453SJohn Marino 
200395b7b453SJohn Marino       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
200495b7b453SJohn Marino 	continue; /* This is unrelated limitation.  */
200595b7b453SJohn Marino 
200695b7b453SJohn Marino       subexp_idx = dfa->nodes[ent->node].opr.idx;
200795b7b453SJohn Marino       if (ent->subexp_to == str_idx)
200895b7b453SJohn Marino 	{
2009*09d4459fSDaniel Fojt 	  Idx ops_node = -1;
2010*09d4459fSDaniel Fojt 	  Idx cls_node = -1;
201195b7b453SJohn Marino 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
201295b7b453SJohn Marino 	    {
201395b7b453SJohn Marino 	      Idx node = dest_nodes->elems[node_idx];
201495b7b453SJohn Marino 	      re_token_type_t type = dfa->nodes[node].type;
201595b7b453SJohn Marino 	      if (type == OP_OPEN_SUBEXP
201695b7b453SJohn Marino 		  && subexp_idx == dfa->nodes[node].opr.idx)
201795b7b453SJohn Marino 		ops_node = node;
201895b7b453SJohn Marino 	      else if (type == OP_CLOSE_SUBEXP
201995b7b453SJohn Marino 		       && subexp_idx == dfa->nodes[node].opr.idx)
202095b7b453SJohn Marino 		cls_node = node;
202195b7b453SJohn Marino 	    }
202295b7b453SJohn Marino 
202395b7b453SJohn Marino 	  /* Check the limitation of the open subexpression.  */
202495b7b453SJohn Marino 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
2025*09d4459fSDaniel Fojt 	  if (ops_node >= 0)
202695b7b453SJohn Marino 	    {
202795b7b453SJohn Marino 	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
202895b7b453SJohn Marino 					   candidates);
2029*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
203095b7b453SJohn Marino 		return err;
203195b7b453SJohn Marino 	    }
203295b7b453SJohn Marino 
203395b7b453SJohn Marino 	  /* Check the limitation of the close subexpression.  */
2034*09d4459fSDaniel Fojt 	  if (cls_node >= 0)
203595b7b453SJohn Marino 	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
203695b7b453SJohn Marino 	      {
203795b7b453SJohn Marino 		Idx node = dest_nodes->elems[node_idx];
203895b7b453SJohn Marino 		if (!re_node_set_contains (dfa->inveclosures + node,
203995b7b453SJohn Marino 					   cls_node)
204095b7b453SJohn Marino 		    && !re_node_set_contains (dfa->eclosures + node,
204195b7b453SJohn Marino 					      cls_node))
204295b7b453SJohn Marino 		  {
204395b7b453SJohn Marino 		    /* It is against this limitation.
204495b7b453SJohn Marino 		       Remove it form the current sifted state.  */
204595b7b453SJohn Marino 		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
204695b7b453SJohn Marino 						 candidates);
2047*09d4459fSDaniel Fojt 		    if (__glibc_unlikely (err != REG_NOERROR))
204895b7b453SJohn Marino 		      return err;
204995b7b453SJohn Marino 		    --node_idx;
205095b7b453SJohn Marino 		  }
205195b7b453SJohn Marino 	      }
205295b7b453SJohn Marino 	}
205395b7b453SJohn Marino       else /* (ent->subexp_to != str_idx)  */
205495b7b453SJohn Marino 	{
205595b7b453SJohn Marino 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
205695b7b453SJohn Marino 	    {
205795b7b453SJohn Marino 	      Idx node = dest_nodes->elems[node_idx];
205895b7b453SJohn Marino 	      re_token_type_t type = dfa->nodes[node].type;
205995b7b453SJohn Marino 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
206095b7b453SJohn Marino 		{
206195b7b453SJohn Marino 		  if (subexp_idx != dfa->nodes[node].opr.idx)
206295b7b453SJohn Marino 		    continue;
206395b7b453SJohn Marino 		  /* It is against this limitation.
206495b7b453SJohn Marino 		     Remove it form the current sifted state.  */
206595b7b453SJohn Marino 		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
206695b7b453SJohn Marino 					       candidates);
2067*09d4459fSDaniel Fojt 		  if (__glibc_unlikely (err != REG_NOERROR))
206895b7b453SJohn Marino 		    return err;
206995b7b453SJohn Marino 		}
207095b7b453SJohn Marino 	    }
207195b7b453SJohn Marino 	}
207295b7b453SJohn Marino     }
207395b7b453SJohn Marino   return REG_NOERROR;
207495b7b453SJohn Marino }
207595b7b453SJohn Marino 
207695b7b453SJohn Marino static reg_errcode_t
2077*09d4459fSDaniel Fojt __attribute_warn_unused_result__
sift_states_bkref(const re_match_context_t * mctx,re_sift_context_t * sctx,Idx str_idx,const re_node_set * candidates)207895b7b453SJohn Marino sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
207995b7b453SJohn Marino 		   Idx str_idx, const re_node_set *candidates)
208095b7b453SJohn Marino {
208195b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
208295b7b453SJohn Marino   reg_errcode_t err;
208395b7b453SJohn Marino   Idx node_idx, node;
208495b7b453SJohn Marino   re_sift_context_t local_sctx;
208595b7b453SJohn Marino   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
208695b7b453SJohn Marino 
2087*09d4459fSDaniel Fojt   if (first_idx == -1)
208895b7b453SJohn Marino     return REG_NOERROR;
208995b7b453SJohn Marino 
209095b7b453SJohn Marino   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
209195b7b453SJohn Marino 
209295b7b453SJohn Marino   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
209395b7b453SJohn Marino     {
209495b7b453SJohn Marino       Idx enabled_idx;
209595b7b453SJohn Marino       re_token_type_t type;
209695b7b453SJohn Marino       struct re_backref_cache_entry *entry;
209795b7b453SJohn Marino       node = candidates->elems[node_idx];
209895b7b453SJohn Marino       type = dfa->nodes[node].type;
209995b7b453SJohn Marino       /* Avoid infinite loop for the REs like "()\1+".  */
210095b7b453SJohn Marino       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
210195b7b453SJohn Marino 	continue;
210295b7b453SJohn Marino       if (type != OP_BACK_REF)
210395b7b453SJohn Marino 	continue;
210495b7b453SJohn Marino 
210595b7b453SJohn Marino       entry = mctx->bkref_ents + first_idx;
210695b7b453SJohn Marino       enabled_idx = first_idx;
210795b7b453SJohn Marino       do
210895b7b453SJohn Marino 	{
210995b7b453SJohn Marino 	  Idx subexp_len;
211095b7b453SJohn Marino 	  Idx to_idx;
211195b7b453SJohn Marino 	  Idx dst_node;
211295b7b453SJohn Marino 	  bool ok;
211395b7b453SJohn Marino 	  re_dfastate_t *cur_state;
211495b7b453SJohn Marino 
211595b7b453SJohn Marino 	  if (entry->node != node)
211695b7b453SJohn Marino 	    continue;
211795b7b453SJohn Marino 	  subexp_len = entry->subexp_to - entry->subexp_from;
211895b7b453SJohn Marino 	  to_idx = str_idx + subexp_len;
211995b7b453SJohn Marino 	  dst_node = (subexp_len ? dfa->nexts[node]
212095b7b453SJohn Marino 		      : dfa->edests[node].elems[0]);
212195b7b453SJohn Marino 
212295b7b453SJohn Marino 	  if (to_idx > sctx->last_str_idx
212395b7b453SJohn Marino 	      || sctx->sifted_states[to_idx] == NULL
212495b7b453SJohn Marino 	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
212595b7b453SJohn Marino 	      || check_dst_limits (mctx, &sctx->limits, node,
212695b7b453SJohn Marino 				   str_idx, dst_node, to_idx))
212795b7b453SJohn Marino 	    continue;
212895b7b453SJohn Marino 
212995b7b453SJohn Marino 	  if (local_sctx.sifted_states == NULL)
213095b7b453SJohn Marino 	    {
213195b7b453SJohn Marino 	      local_sctx = *sctx;
213295b7b453SJohn Marino 	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2133*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
213495b7b453SJohn Marino 		goto free_return;
213595b7b453SJohn Marino 	    }
213695b7b453SJohn Marino 	  local_sctx.last_node = node;
213795b7b453SJohn Marino 	  local_sctx.last_str_idx = str_idx;
213895b7b453SJohn Marino 	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2139*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (! ok))
214095b7b453SJohn Marino 	    {
214195b7b453SJohn Marino 	      err = REG_ESPACE;
214295b7b453SJohn Marino 	      goto free_return;
214395b7b453SJohn Marino 	    }
214495b7b453SJohn Marino 	  cur_state = local_sctx.sifted_states[str_idx];
214595b7b453SJohn Marino 	  err = sift_states_backward (mctx, &local_sctx);
2146*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
214795b7b453SJohn Marino 	    goto free_return;
214895b7b453SJohn Marino 	  if (sctx->limited_states != NULL)
214995b7b453SJohn Marino 	    {
215095b7b453SJohn Marino 	      err = merge_state_array (dfa, sctx->limited_states,
215195b7b453SJohn Marino 				       local_sctx.sifted_states,
215295b7b453SJohn Marino 				       str_idx + 1);
2153*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
215495b7b453SJohn Marino 		goto free_return;
215595b7b453SJohn Marino 	    }
215695b7b453SJohn Marino 	  local_sctx.sifted_states[str_idx] = cur_state;
215795b7b453SJohn Marino 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
215895b7b453SJohn Marino 
215995b7b453SJohn Marino 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
216095b7b453SJohn Marino 	  entry = mctx->bkref_ents + enabled_idx;
216195b7b453SJohn Marino 	}
216295b7b453SJohn Marino       while (enabled_idx++, entry++->more);
216395b7b453SJohn Marino     }
216495b7b453SJohn Marino   err = REG_NOERROR;
216595b7b453SJohn Marino  free_return:
216695b7b453SJohn Marino   if (local_sctx.sifted_states != NULL)
216795b7b453SJohn Marino     {
216895b7b453SJohn Marino       re_node_set_free (&local_sctx.limits);
216995b7b453SJohn Marino     }
217095b7b453SJohn Marino 
217195b7b453SJohn Marino   return err;
217295b7b453SJohn Marino }
217395b7b453SJohn Marino 
217495b7b453SJohn Marino 
217595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
217695b7b453SJohn Marino static int
sift_states_iter_mb(const re_match_context_t * mctx,re_sift_context_t * sctx,Idx node_idx,Idx str_idx,Idx max_str_idx)217795b7b453SJohn Marino sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
217895b7b453SJohn Marino 		     Idx node_idx, Idx str_idx, Idx max_str_idx)
217995b7b453SJohn Marino {
218095b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
218195b7b453SJohn Marino   int naccepted;
2182cf28ed85SJohn Marino   /* Check the node can accept "multi byte".  */
218395b7b453SJohn Marino   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2184*09d4459fSDaniel Fojt   if (naccepted > 0 && str_idx + naccepted <= max_str_idx
2185*09d4459fSDaniel Fojt       && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
218695b7b453SJohn Marino 			       dfa->nexts[node_idx]))
2187cf28ed85SJohn Marino     /* The node can't accept the "multi byte", or the
218895b7b453SJohn Marino        destination was already thrown away, then the node
2189*09d4459fSDaniel Fojt        couldn't accept the current input "multi byte".   */
219095b7b453SJohn Marino     naccepted = 0;
219195b7b453SJohn Marino   /* Otherwise, it is sure that the node could accept
2192cf28ed85SJohn Marino      'naccepted' bytes input.  */
219395b7b453SJohn Marino   return naccepted;
219495b7b453SJohn Marino }
219595b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
219695b7b453SJohn Marino 
219795b7b453SJohn Marino 
219895b7b453SJohn Marino /* Functions for state transition.  */
219995b7b453SJohn Marino 
220095b7b453SJohn Marino /* Return the next state to which the current state STATE will transit by
220195b7b453SJohn Marino    accepting the current input byte, and update STATE_LOG if necessary.
220295b7b453SJohn Marino    If STATE can accept a multibyte char/collating element/back reference
220395b7b453SJohn Marino    update the destination of STATE_LOG.  */
220495b7b453SJohn Marino 
220595b7b453SJohn Marino static re_dfastate_t *
2206*09d4459fSDaniel Fojt __attribute_warn_unused_result__
transit_state(reg_errcode_t * err,re_match_context_t * mctx,re_dfastate_t * state)220795b7b453SJohn Marino transit_state (reg_errcode_t *err, re_match_context_t *mctx,
220895b7b453SJohn Marino 	       re_dfastate_t *state)
220995b7b453SJohn Marino {
221095b7b453SJohn Marino   re_dfastate_t **trtable;
221195b7b453SJohn Marino   unsigned char ch;
221295b7b453SJohn Marino 
221395b7b453SJohn Marino #ifdef RE_ENABLE_I18N
221495b7b453SJohn Marino   /* If the current state can accept multibyte.  */
2215*09d4459fSDaniel Fojt   if (__glibc_unlikely (state->accept_mb))
221695b7b453SJohn Marino     {
221795b7b453SJohn Marino       *err = transit_state_mb (mctx, state);
2218*09d4459fSDaniel Fojt       if (__glibc_unlikely (*err != REG_NOERROR))
221995b7b453SJohn Marino 	return NULL;
222095b7b453SJohn Marino     }
222195b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
222295b7b453SJohn Marino 
222395b7b453SJohn Marino   /* Then decide the next state with the single byte.  */
222495b7b453SJohn Marino #if 0
222595b7b453SJohn Marino   if (0)
222695b7b453SJohn Marino     /* don't use transition table  */
222795b7b453SJohn Marino     return transit_state_sb (err, mctx, state);
222895b7b453SJohn Marino #endif
222995b7b453SJohn Marino 
223095b7b453SJohn Marino   /* Use transition table  */
223195b7b453SJohn Marino   ch = re_string_fetch_byte (&mctx->input);
223295b7b453SJohn Marino   for (;;)
223395b7b453SJohn Marino     {
223495b7b453SJohn Marino       trtable = state->trtable;
2235*09d4459fSDaniel Fojt       if (__glibc_likely (trtable != NULL))
223695b7b453SJohn Marino 	return trtable[ch];
223795b7b453SJohn Marino 
223895b7b453SJohn Marino       trtable = state->word_trtable;
2239*09d4459fSDaniel Fojt       if (__glibc_likely (trtable != NULL))
224095b7b453SJohn Marino 	{
224195b7b453SJohn Marino 	  unsigned int context;
224295b7b453SJohn Marino 	  context
224395b7b453SJohn Marino 	    = re_string_context_at (&mctx->input,
224495b7b453SJohn Marino 				    re_string_cur_idx (&mctx->input) - 1,
224595b7b453SJohn Marino 				    mctx->eflags);
224695b7b453SJohn Marino 	  if (IS_WORD_CONTEXT (context))
224795b7b453SJohn Marino 	    return trtable[ch + SBC_MAX];
224895b7b453SJohn Marino 	  else
224995b7b453SJohn Marino 	    return trtable[ch];
225095b7b453SJohn Marino 	}
225195b7b453SJohn Marino 
225295b7b453SJohn Marino       if (!build_trtable (mctx->dfa, state))
225395b7b453SJohn Marino 	{
225495b7b453SJohn Marino 	  *err = REG_ESPACE;
225595b7b453SJohn Marino 	  return NULL;
225695b7b453SJohn Marino 	}
225795b7b453SJohn Marino 
225895b7b453SJohn Marino       /* Retry, we now have a transition table.  */
225995b7b453SJohn Marino     }
226095b7b453SJohn Marino }
226195b7b453SJohn Marino 
226295b7b453SJohn Marino /* Update the state_log if we need */
226395b7b453SJohn Marino static re_dfastate_t *
merge_state_with_log(reg_errcode_t * err,re_match_context_t * mctx,re_dfastate_t * next_state)226495b7b453SJohn Marino merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
226595b7b453SJohn Marino 		      re_dfastate_t *next_state)
226695b7b453SJohn Marino {
226795b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
226895b7b453SJohn Marino   Idx cur_idx = re_string_cur_idx (&mctx->input);
226995b7b453SJohn Marino 
227095b7b453SJohn Marino   if (cur_idx > mctx->state_log_top)
227195b7b453SJohn Marino     {
227295b7b453SJohn Marino       mctx->state_log[cur_idx] = next_state;
227395b7b453SJohn Marino       mctx->state_log_top = cur_idx;
227495b7b453SJohn Marino     }
227595b7b453SJohn Marino   else if (mctx->state_log[cur_idx] == 0)
227695b7b453SJohn Marino     {
227795b7b453SJohn Marino       mctx->state_log[cur_idx] = next_state;
227895b7b453SJohn Marino     }
227995b7b453SJohn Marino   else
228095b7b453SJohn Marino     {
228195b7b453SJohn Marino       re_dfastate_t *pstate;
228295b7b453SJohn Marino       unsigned int context;
228395b7b453SJohn Marino       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
228495b7b453SJohn Marino       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
228595b7b453SJohn Marino 	 the destination of a multibyte char/collating element/
228695b7b453SJohn Marino 	 back reference.  Then the next state is the union set of
228795b7b453SJohn Marino 	 these destinations and the results of the transition table.  */
228895b7b453SJohn Marino       pstate = mctx->state_log[cur_idx];
228995b7b453SJohn Marino       log_nodes = pstate->entrance_nodes;
229095b7b453SJohn Marino       if (next_state != NULL)
229195b7b453SJohn Marino 	{
229295b7b453SJohn Marino 	  table_nodes = next_state->entrance_nodes;
229395b7b453SJohn Marino 	  *err = re_node_set_init_union (&next_nodes, table_nodes,
229495b7b453SJohn Marino 					     log_nodes);
2295*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (*err != REG_NOERROR))
229695b7b453SJohn Marino 	    return NULL;
229795b7b453SJohn Marino 	}
229895b7b453SJohn Marino       else
229995b7b453SJohn Marino 	next_nodes = *log_nodes;
230095b7b453SJohn Marino       /* Note: We already add the nodes of the initial state,
230195b7b453SJohn Marino 	 then we don't need to add them here.  */
230295b7b453SJohn Marino 
230395b7b453SJohn Marino       context = re_string_context_at (&mctx->input,
230495b7b453SJohn Marino 				      re_string_cur_idx (&mctx->input) - 1,
230595b7b453SJohn Marino 				      mctx->eflags);
230695b7b453SJohn Marino       next_state = mctx->state_log[cur_idx]
230795b7b453SJohn Marino 	= re_acquire_state_context (err, dfa, &next_nodes, context);
230895b7b453SJohn Marino       /* We don't need to check errors here, since the return value of
230995b7b453SJohn Marino 	 this function is next_state and ERR is already set.  */
231095b7b453SJohn Marino 
231195b7b453SJohn Marino       if (table_nodes != NULL)
231295b7b453SJohn Marino 	re_node_set_free (&next_nodes);
231395b7b453SJohn Marino     }
231495b7b453SJohn Marino 
2315*09d4459fSDaniel Fojt   if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
231695b7b453SJohn Marino     {
231795b7b453SJohn Marino       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
231895b7b453SJohn Marino 	 later.  We must check them here, since the back references in the
231995b7b453SJohn Marino 	 next state might use them.  */
232095b7b453SJohn Marino       *err = check_subexp_matching_top (mctx, &next_state->nodes,
232195b7b453SJohn Marino 					cur_idx);
2322*09d4459fSDaniel Fojt       if (__glibc_unlikely (*err != REG_NOERROR))
232395b7b453SJohn Marino 	return NULL;
232495b7b453SJohn Marino 
232595b7b453SJohn Marino       /* If the next state has back references.  */
232695b7b453SJohn Marino       if (next_state->has_backref)
232795b7b453SJohn Marino 	{
232895b7b453SJohn Marino 	  *err = transit_state_bkref (mctx, &next_state->nodes);
2329*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (*err != REG_NOERROR))
233095b7b453SJohn Marino 	    return NULL;
233195b7b453SJohn Marino 	  next_state = mctx->state_log[cur_idx];
233295b7b453SJohn Marino 	}
233395b7b453SJohn Marino     }
233495b7b453SJohn Marino 
233595b7b453SJohn Marino   return next_state;
233695b7b453SJohn Marino }
233795b7b453SJohn Marino 
233895b7b453SJohn Marino /* Skip bytes in the input that correspond to part of a
233995b7b453SJohn Marino    multi-byte match, then look in the log for a state
234095b7b453SJohn Marino    from which to restart matching.  */
234195b7b453SJohn Marino static re_dfastate_t *
find_recover_state(reg_errcode_t * err,re_match_context_t * mctx)234295b7b453SJohn Marino find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
234395b7b453SJohn Marino {
234495b7b453SJohn Marino   re_dfastate_t *cur_state;
234595b7b453SJohn Marino   do
234695b7b453SJohn Marino     {
234795b7b453SJohn Marino       Idx max = mctx->state_log_top;
234895b7b453SJohn Marino       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
234995b7b453SJohn Marino 
235095b7b453SJohn Marino       do
235195b7b453SJohn Marino 	{
235295b7b453SJohn Marino 	  if (++cur_str_idx > max)
235395b7b453SJohn Marino 	    return NULL;
235495b7b453SJohn Marino 	  re_string_skip_bytes (&mctx->input, 1);
235595b7b453SJohn Marino 	}
235695b7b453SJohn Marino       while (mctx->state_log[cur_str_idx] == NULL);
235795b7b453SJohn Marino 
235895b7b453SJohn Marino       cur_state = merge_state_with_log (err, mctx, NULL);
235995b7b453SJohn Marino     }
236095b7b453SJohn Marino   while (*err == REG_NOERROR && cur_state == NULL);
236195b7b453SJohn Marino   return cur_state;
236295b7b453SJohn Marino }
236395b7b453SJohn Marino 
236495b7b453SJohn Marino /* Helper functions for transit_state.  */
236595b7b453SJohn Marino 
236695b7b453SJohn Marino /* From the node set CUR_NODES, pick up the nodes whose types are
236795b7b453SJohn Marino    OP_OPEN_SUBEXP and which have corresponding back references in the regular
236895b7b453SJohn Marino    expression. And register them to use them later for evaluating the
2369cf28ed85SJohn Marino    corresponding back references.  */
237095b7b453SJohn Marino 
237195b7b453SJohn Marino static reg_errcode_t
check_subexp_matching_top(re_match_context_t * mctx,re_node_set * cur_nodes,Idx str_idx)237295b7b453SJohn Marino check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
237395b7b453SJohn Marino 			   Idx str_idx)
237495b7b453SJohn Marino {
237595b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
237695b7b453SJohn Marino   Idx node_idx;
237795b7b453SJohn Marino   reg_errcode_t err;
237895b7b453SJohn Marino 
237995b7b453SJohn Marino   /* TODO: This isn't efficient.
238095b7b453SJohn Marino 	   Because there might be more than one nodes whose types are
238195b7b453SJohn Marino 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
238295b7b453SJohn Marino 	   nodes.
238395b7b453SJohn Marino 	   E.g. RE: (a){2}  */
238495b7b453SJohn Marino   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
238595b7b453SJohn Marino     {
238695b7b453SJohn Marino       Idx node = cur_nodes->elems[node_idx];
238795b7b453SJohn Marino       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
238895b7b453SJohn Marino 	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
238995b7b453SJohn Marino 	  && (dfa->used_bkref_map
239095b7b453SJohn Marino 	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
239195b7b453SJohn Marino 	{
239295b7b453SJohn Marino 	  err = match_ctx_add_subtop (mctx, node, str_idx);
2393*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
239495b7b453SJohn Marino 	    return err;
239595b7b453SJohn Marino 	}
239695b7b453SJohn Marino     }
239795b7b453SJohn Marino   return REG_NOERROR;
239895b7b453SJohn Marino }
239995b7b453SJohn Marino 
240095b7b453SJohn Marino #if 0
240195b7b453SJohn Marino /* Return the next state to which the current state STATE will transit by
240295b7b453SJohn Marino    accepting the current input byte.  */
240395b7b453SJohn Marino 
240495b7b453SJohn Marino static re_dfastate_t *
240595b7b453SJohn Marino transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
240695b7b453SJohn Marino 		  re_dfastate_t *state)
240795b7b453SJohn Marino {
240895b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
240995b7b453SJohn Marino   re_node_set next_nodes;
241095b7b453SJohn Marino   re_dfastate_t *next_state;
241195b7b453SJohn Marino   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
241295b7b453SJohn Marino   unsigned int context;
241395b7b453SJohn Marino 
241495b7b453SJohn Marino   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2415*09d4459fSDaniel Fojt   if (__glibc_unlikely (*err != REG_NOERROR))
241695b7b453SJohn Marino     return NULL;
241795b7b453SJohn Marino   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
241895b7b453SJohn Marino     {
241995b7b453SJohn Marino       Idx cur_node = state->nodes.elems[node_cnt];
242095b7b453SJohn Marino       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
242195b7b453SJohn Marino 	{
242295b7b453SJohn Marino 	  *err = re_node_set_merge (&next_nodes,
242395b7b453SJohn Marino 				    dfa->eclosures + dfa->nexts[cur_node]);
2424*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (*err != REG_NOERROR))
242595b7b453SJohn Marino 	    {
242695b7b453SJohn Marino 	      re_node_set_free (&next_nodes);
242795b7b453SJohn Marino 	      return NULL;
242895b7b453SJohn Marino 	    }
242995b7b453SJohn Marino 	}
243095b7b453SJohn Marino     }
243195b7b453SJohn Marino   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
243295b7b453SJohn Marino   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
243395b7b453SJohn Marino   /* We don't need to check errors here, since the return value of
243495b7b453SJohn Marino      this function is next_state and ERR is already set.  */
243595b7b453SJohn Marino 
243695b7b453SJohn Marino   re_node_set_free (&next_nodes);
243795b7b453SJohn Marino   re_string_skip_bytes (&mctx->input, 1);
243895b7b453SJohn Marino   return next_state;
243995b7b453SJohn Marino }
244095b7b453SJohn Marino #endif
244195b7b453SJohn Marino 
244295b7b453SJohn Marino #ifdef RE_ENABLE_I18N
244395b7b453SJohn Marino static reg_errcode_t
transit_state_mb(re_match_context_t * mctx,re_dfastate_t * pstate)244495b7b453SJohn Marino transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
244595b7b453SJohn Marino {
244695b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
244795b7b453SJohn Marino   reg_errcode_t err;
244895b7b453SJohn Marino   Idx i;
244995b7b453SJohn Marino 
245095b7b453SJohn Marino   for (i = 0; i < pstate->nodes.nelem; ++i)
245195b7b453SJohn Marino     {
245295b7b453SJohn Marino       re_node_set dest_nodes, *new_nodes;
245395b7b453SJohn Marino       Idx cur_node_idx = pstate->nodes.elems[i];
245495b7b453SJohn Marino       int naccepted;
245595b7b453SJohn Marino       Idx dest_idx;
245695b7b453SJohn Marino       unsigned int context;
245795b7b453SJohn Marino       re_dfastate_t *dest_state;
245895b7b453SJohn Marino 
245995b7b453SJohn Marino       if (!dfa->nodes[cur_node_idx].accept_mb)
246095b7b453SJohn Marino 	continue;
246195b7b453SJohn Marino 
246295b7b453SJohn Marino       if (dfa->nodes[cur_node_idx].constraint)
246395b7b453SJohn Marino 	{
246495b7b453SJohn Marino 	  context = re_string_context_at (&mctx->input,
246595b7b453SJohn Marino 					  re_string_cur_idx (&mctx->input),
246695b7b453SJohn Marino 					  mctx->eflags);
246795b7b453SJohn Marino 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
246895b7b453SJohn Marino 					   context))
246995b7b453SJohn Marino 	    continue;
247095b7b453SJohn Marino 	}
247195b7b453SJohn Marino 
247295b7b453SJohn Marino       /* How many bytes the node can accept?  */
247395b7b453SJohn Marino       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
247495b7b453SJohn Marino 					   re_string_cur_idx (&mctx->input));
247595b7b453SJohn Marino       if (naccepted == 0)
247695b7b453SJohn Marino 	continue;
247795b7b453SJohn Marino 
2478cf28ed85SJohn Marino       /* The node can accepts 'naccepted' bytes.  */
247995b7b453SJohn Marino       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
248095b7b453SJohn Marino       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
248195b7b453SJohn Marino 			       : mctx->max_mb_elem_len);
248295b7b453SJohn Marino       err = clean_state_log_if_needed (mctx, dest_idx);
2483*09d4459fSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
248495b7b453SJohn Marino 	return err;
2485*09d4459fSDaniel Fojt       DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -1);
248695b7b453SJohn Marino       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
248795b7b453SJohn Marino 
248895b7b453SJohn Marino       dest_state = mctx->state_log[dest_idx];
248995b7b453SJohn Marino       if (dest_state == NULL)
249095b7b453SJohn Marino 	dest_nodes = *new_nodes;
249195b7b453SJohn Marino       else
249295b7b453SJohn Marino 	{
249395b7b453SJohn Marino 	  err = re_node_set_init_union (&dest_nodes,
249495b7b453SJohn Marino 					dest_state->entrance_nodes, new_nodes);
2495*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
249695b7b453SJohn Marino 	    return err;
249795b7b453SJohn Marino 	}
249895b7b453SJohn Marino       context = re_string_context_at (&mctx->input, dest_idx - 1,
249995b7b453SJohn Marino 				      mctx->eflags);
250095b7b453SJohn Marino       mctx->state_log[dest_idx]
250195b7b453SJohn Marino 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
250295b7b453SJohn Marino       if (dest_state != NULL)
250395b7b453SJohn Marino 	re_node_set_free (&dest_nodes);
2504*09d4459fSDaniel Fojt       if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
2505*09d4459fSDaniel Fojt 			    && err != REG_NOERROR))
250695b7b453SJohn Marino 	return err;
250795b7b453SJohn Marino     }
250895b7b453SJohn Marino   return REG_NOERROR;
250995b7b453SJohn Marino }
251095b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
251195b7b453SJohn Marino 
251295b7b453SJohn Marino static reg_errcode_t
transit_state_bkref(re_match_context_t * mctx,const re_node_set * nodes)251395b7b453SJohn Marino transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
251495b7b453SJohn Marino {
251595b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
251695b7b453SJohn Marino   reg_errcode_t err;
251795b7b453SJohn Marino   Idx i;
251895b7b453SJohn Marino   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
251995b7b453SJohn Marino 
252095b7b453SJohn Marino   for (i = 0; i < nodes->nelem; ++i)
252195b7b453SJohn Marino     {
252295b7b453SJohn Marino       Idx dest_str_idx, prev_nelem, bkc_idx;
252395b7b453SJohn Marino       Idx node_idx = nodes->elems[i];
252495b7b453SJohn Marino       unsigned int context;
252595b7b453SJohn Marino       const re_token_t *node = dfa->nodes + node_idx;
252695b7b453SJohn Marino       re_node_set *new_dest_nodes;
252795b7b453SJohn Marino 
2528cf28ed85SJohn Marino       /* Check whether 'node' is a backreference or not.  */
252995b7b453SJohn Marino       if (node->type != OP_BACK_REF)
253095b7b453SJohn Marino 	continue;
253195b7b453SJohn Marino 
253295b7b453SJohn Marino       if (node->constraint)
253395b7b453SJohn Marino 	{
253495b7b453SJohn Marino 	  context = re_string_context_at (&mctx->input, cur_str_idx,
253595b7b453SJohn Marino 					  mctx->eflags);
253695b7b453SJohn Marino 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
253795b7b453SJohn Marino 	    continue;
253895b7b453SJohn Marino 	}
253995b7b453SJohn Marino 
2540cf28ed85SJohn Marino       /* 'node' is a backreference.
254195b7b453SJohn Marino 	 Check the substring which the substring matched.  */
254295b7b453SJohn Marino       bkc_idx = mctx->nbkref_ents;
254395b7b453SJohn Marino       err = get_subexp (mctx, node_idx, cur_str_idx);
2544*09d4459fSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
254595b7b453SJohn Marino 	goto free_return;
254695b7b453SJohn Marino 
2547cf28ed85SJohn Marino       /* And add the epsilon closures (which is 'new_dest_nodes') of
254895b7b453SJohn Marino 	 the backreference to appropriate state_log.  */
2549*09d4459fSDaniel Fojt       DEBUG_ASSERT (dfa->nexts[node_idx] != -1);
255095b7b453SJohn Marino       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
255195b7b453SJohn Marino 	{
255295b7b453SJohn Marino 	  Idx subexp_len;
255395b7b453SJohn Marino 	  re_dfastate_t *dest_state;
255495b7b453SJohn Marino 	  struct re_backref_cache_entry *bkref_ent;
255595b7b453SJohn Marino 	  bkref_ent = mctx->bkref_ents + bkc_idx;
255695b7b453SJohn Marino 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
255795b7b453SJohn Marino 	    continue;
255895b7b453SJohn Marino 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
255995b7b453SJohn Marino 	  new_dest_nodes = (subexp_len == 0
256095b7b453SJohn Marino 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
256195b7b453SJohn Marino 			    : dfa->eclosures + dfa->nexts[node_idx]);
256295b7b453SJohn Marino 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
256395b7b453SJohn Marino 			  - bkref_ent->subexp_from);
256495b7b453SJohn Marino 	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
256595b7b453SJohn Marino 					  mctx->eflags);
256695b7b453SJohn Marino 	  dest_state = mctx->state_log[dest_str_idx];
256795b7b453SJohn Marino 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
256895b7b453SJohn Marino 			: mctx->state_log[cur_str_idx]->nodes.nelem);
2569cf28ed85SJohn Marino 	  /* Add 'new_dest_node' to state_log.  */
257095b7b453SJohn Marino 	  if (dest_state == NULL)
257195b7b453SJohn Marino 	    {
257295b7b453SJohn Marino 	      mctx->state_log[dest_str_idx]
257395b7b453SJohn Marino 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
257495b7b453SJohn Marino 					    context);
2575*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2576*09d4459fSDaniel Fojt 				    && err != REG_NOERROR))
257795b7b453SJohn Marino 		goto free_return;
257895b7b453SJohn Marino 	    }
257995b7b453SJohn Marino 	  else
258095b7b453SJohn Marino 	    {
258195b7b453SJohn Marino 	      re_node_set dest_nodes;
258295b7b453SJohn Marino 	      err = re_node_set_init_union (&dest_nodes,
258395b7b453SJohn Marino 					    dest_state->entrance_nodes,
258495b7b453SJohn Marino 					    new_dest_nodes);
2585*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
258695b7b453SJohn Marino 		{
258795b7b453SJohn Marino 		  re_node_set_free (&dest_nodes);
258895b7b453SJohn Marino 		  goto free_return;
258995b7b453SJohn Marino 		}
259095b7b453SJohn Marino 	      mctx->state_log[dest_str_idx]
259195b7b453SJohn Marino 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
259295b7b453SJohn Marino 	      re_node_set_free (&dest_nodes);
2593*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2594*09d4459fSDaniel Fojt 				    && err != REG_NOERROR))
259595b7b453SJohn Marino 		goto free_return;
259695b7b453SJohn Marino 	    }
259795b7b453SJohn Marino 	  /* We need to check recursively if the backreference can epsilon
259895b7b453SJohn Marino 	     transit.  */
259995b7b453SJohn Marino 	  if (subexp_len == 0
260095b7b453SJohn Marino 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
260195b7b453SJohn Marino 	    {
260295b7b453SJohn Marino 	      err = check_subexp_matching_top (mctx, new_dest_nodes,
260395b7b453SJohn Marino 					       cur_str_idx);
2604*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
260595b7b453SJohn Marino 		goto free_return;
260695b7b453SJohn Marino 	      err = transit_state_bkref (mctx, new_dest_nodes);
2607*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
260895b7b453SJohn Marino 		goto free_return;
260995b7b453SJohn Marino 	    }
261095b7b453SJohn Marino 	}
261195b7b453SJohn Marino     }
261295b7b453SJohn Marino   err = REG_NOERROR;
261395b7b453SJohn Marino  free_return:
261495b7b453SJohn Marino   return err;
261595b7b453SJohn Marino }
261695b7b453SJohn Marino 
261795b7b453SJohn Marino /* Enumerate all the candidates which the backreference BKREF_NODE can match
261895b7b453SJohn Marino    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
261995b7b453SJohn Marino    Note that we might collect inappropriate candidates here.
262095b7b453SJohn Marino    However, the cost of checking them strictly here is too high, then we
262195b7b453SJohn Marino    delay these checking for prune_impossible_nodes().  */
262295b7b453SJohn Marino 
262395b7b453SJohn Marino static reg_errcode_t
2624*09d4459fSDaniel Fojt __attribute_warn_unused_result__
get_subexp(re_match_context_t * mctx,Idx bkref_node,Idx bkref_str_idx)262595b7b453SJohn Marino get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
262695b7b453SJohn Marino {
262795b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
262895b7b453SJohn Marino   Idx subexp_num, sub_top_idx;
262995b7b453SJohn Marino   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
263095b7b453SJohn Marino   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
263195b7b453SJohn Marino   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2632*09d4459fSDaniel Fojt   if (cache_idx != -1)
263395b7b453SJohn Marino     {
263495b7b453SJohn Marino       const struct re_backref_cache_entry *entry
263595b7b453SJohn Marino 	= mctx->bkref_ents + cache_idx;
263695b7b453SJohn Marino       do
263795b7b453SJohn Marino 	if (entry->node == bkref_node)
263895b7b453SJohn Marino 	  return REG_NOERROR; /* We already checked it.  */
263995b7b453SJohn Marino       while (entry++->more);
264095b7b453SJohn Marino     }
264195b7b453SJohn Marino 
264295b7b453SJohn Marino   subexp_num = dfa->nodes[bkref_node].opr.idx;
264395b7b453SJohn Marino 
264495b7b453SJohn Marino   /* For each sub expression  */
264595b7b453SJohn Marino   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
264695b7b453SJohn Marino     {
264795b7b453SJohn Marino       reg_errcode_t err;
264895b7b453SJohn Marino       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
264995b7b453SJohn Marino       re_sub_match_last_t *sub_last;
265095b7b453SJohn Marino       Idx sub_last_idx, sl_str, bkref_str_off;
265195b7b453SJohn Marino 
265295b7b453SJohn Marino       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
265395b7b453SJohn Marino 	continue; /* It isn't related.  */
265495b7b453SJohn Marino 
265595b7b453SJohn Marino       sl_str = sub_top->str_idx;
265695b7b453SJohn Marino       bkref_str_off = bkref_str_idx;
265795b7b453SJohn Marino       /* At first, check the last node of sub expressions we already
265895b7b453SJohn Marino 	 evaluated.  */
265995b7b453SJohn Marino       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
266095b7b453SJohn Marino 	{
266195b7b453SJohn Marino 	  regoff_t sl_str_diff;
266295b7b453SJohn Marino 	  sub_last = sub_top->lasts[sub_last_idx];
266395b7b453SJohn Marino 	  sl_str_diff = sub_last->str_idx - sl_str;
266495b7b453SJohn Marino 	  /* The matched string by the sub expression match with the substring
266595b7b453SJohn Marino 	     at the back reference?  */
266695b7b453SJohn Marino 	  if (sl_str_diff > 0)
266795b7b453SJohn Marino 	    {
2668*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (bkref_str_off + sl_str_diff
2669*09d4459fSDaniel Fojt 				    > mctx->input.valid_len))
267095b7b453SJohn Marino 		{
267195b7b453SJohn Marino 		  /* Not enough chars for a successful match.  */
267295b7b453SJohn Marino 		  if (bkref_str_off + sl_str_diff > mctx->input.len)
267395b7b453SJohn Marino 		    break;
267495b7b453SJohn Marino 
267595b7b453SJohn Marino 		  err = clean_state_log_if_needed (mctx,
267695b7b453SJohn Marino 						   bkref_str_off
267795b7b453SJohn Marino 						   + sl_str_diff);
2678*09d4459fSDaniel Fojt 		  if (__glibc_unlikely (err != REG_NOERROR))
267995b7b453SJohn Marino 		    return err;
268095b7b453SJohn Marino 		  buf = (const char *) re_string_get_buffer (&mctx->input);
268195b7b453SJohn Marino 		}
268295b7b453SJohn Marino 	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
268395b7b453SJohn Marino 		/* We don't need to search this sub expression any more.  */
268495b7b453SJohn Marino 		break;
268595b7b453SJohn Marino 	    }
268695b7b453SJohn Marino 	  bkref_str_off += sl_str_diff;
268795b7b453SJohn Marino 	  sl_str += sl_str_diff;
268895b7b453SJohn Marino 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
268995b7b453SJohn Marino 				bkref_str_idx);
269095b7b453SJohn Marino 
269195b7b453SJohn Marino 	  /* Reload buf, since the preceding call might have reallocated
269295b7b453SJohn Marino 	     the buffer.  */
269395b7b453SJohn Marino 	  buf = (const char *) re_string_get_buffer (&mctx->input);
269495b7b453SJohn Marino 
269595b7b453SJohn Marino 	  if (err == REG_NOMATCH)
269695b7b453SJohn Marino 	    continue;
2697*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
269895b7b453SJohn Marino 	    return err;
269995b7b453SJohn Marino 	}
270095b7b453SJohn Marino 
270195b7b453SJohn Marino       if (sub_last_idx < sub_top->nlasts)
270295b7b453SJohn Marino 	continue;
270395b7b453SJohn Marino       if (sub_last_idx > 0)
270495b7b453SJohn Marino 	++sl_str;
270595b7b453SJohn Marino       /* Then, search for the other last nodes of the sub expression.  */
270695b7b453SJohn Marino       for (; sl_str <= bkref_str_idx; ++sl_str)
270795b7b453SJohn Marino 	{
270895b7b453SJohn Marino 	  Idx cls_node;
270995b7b453SJohn Marino 	  regoff_t sl_str_off;
271095b7b453SJohn Marino 	  const re_node_set *nodes;
271195b7b453SJohn Marino 	  sl_str_off = sl_str - sub_top->str_idx;
271295b7b453SJohn Marino 	  /* The matched string by the sub expression match with the substring
271395b7b453SJohn Marino 	     at the back reference?  */
271495b7b453SJohn Marino 	  if (sl_str_off > 0)
271595b7b453SJohn Marino 	    {
2716*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
271795b7b453SJohn Marino 		{
271895b7b453SJohn Marino 		  /* If we are at the end of the input, we cannot match.  */
271995b7b453SJohn Marino 		  if (bkref_str_off >= mctx->input.len)
272095b7b453SJohn Marino 		    break;
272195b7b453SJohn Marino 
2722680a9cb8SJohn Marino 		  err = extend_buffers (mctx, bkref_str_off + 1);
2723*09d4459fSDaniel Fojt 		  if (__glibc_unlikely (err != REG_NOERROR))
272495b7b453SJohn Marino 		    return err;
272595b7b453SJohn Marino 
272695b7b453SJohn Marino 		  buf = (const char *) re_string_get_buffer (&mctx->input);
272795b7b453SJohn Marino 		}
272895b7b453SJohn Marino 	      if (buf [bkref_str_off++] != buf[sl_str - 1])
272995b7b453SJohn Marino 		break; /* We don't need to search this sub expression
273095b7b453SJohn Marino 			  any more.  */
273195b7b453SJohn Marino 	    }
273295b7b453SJohn Marino 	  if (mctx->state_log[sl_str] == NULL)
273395b7b453SJohn Marino 	    continue;
273495b7b453SJohn Marino 	  /* Does this state have a ')' of the sub expression?  */
273595b7b453SJohn Marino 	  nodes = &mctx->state_log[sl_str]->nodes;
273695b7b453SJohn Marino 	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
273795b7b453SJohn Marino 				       OP_CLOSE_SUBEXP);
2738*09d4459fSDaniel Fojt 	  if (cls_node == -1)
273995b7b453SJohn Marino 	    continue; /* No.  */
274095b7b453SJohn Marino 	  if (sub_top->path == NULL)
274195b7b453SJohn Marino 	    {
274295b7b453SJohn Marino 	      sub_top->path = calloc (sizeof (state_array_t),
274395b7b453SJohn Marino 				      sl_str - sub_top->str_idx + 1);
274495b7b453SJohn Marino 	      if (sub_top->path == NULL)
274595b7b453SJohn Marino 		return REG_ESPACE;
274695b7b453SJohn Marino 	    }
274795b7b453SJohn Marino 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
274895b7b453SJohn Marino 	     in the current context?  */
274995b7b453SJohn Marino 	  err = check_arrival (mctx, sub_top->path, sub_top->node,
275095b7b453SJohn Marino 			       sub_top->str_idx, cls_node, sl_str,
275195b7b453SJohn Marino 			       OP_CLOSE_SUBEXP);
275295b7b453SJohn Marino 	  if (err == REG_NOMATCH)
275395b7b453SJohn Marino 	      continue;
2754*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
275595b7b453SJohn Marino 	      return err;
275695b7b453SJohn Marino 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2757*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (sub_last == NULL))
275895b7b453SJohn Marino 	    return REG_ESPACE;
275995b7b453SJohn Marino 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
276095b7b453SJohn Marino 				bkref_str_idx);
2761*09d4459fSDaniel Fojt 	  buf = (const char *) re_string_get_buffer (&mctx->input);
276295b7b453SJohn Marino 	  if (err == REG_NOMATCH)
276395b7b453SJohn Marino 	    continue;
2764*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
2765*09d4459fSDaniel Fojt 	    return err;
276695b7b453SJohn Marino 	}
276795b7b453SJohn Marino     }
276895b7b453SJohn Marino   return REG_NOERROR;
276995b7b453SJohn Marino }
277095b7b453SJohn Marino 
277195b7b453SJohn Marino /* Helper functions for get_subexp().  */
277295b7b453SJohn Marino 
277395b7b453SJohn Marino /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
277495b7b453SJohn Marino    If it can arrive, register the sub expression expressed with SUB_TOP
277595b7b453SJohn Marino    and SUB_LAST.  */
277695b7b453SJohn Marino 
277795b7b453SJohn Marino static reg_errcode_t
get_subexp_sub(re_match_context_t * mctx,const re_sub_match_top_t * sub_top,re_sub_match_last_t * sub_last,Idx bkref_node,Idx bkref_str)277895b7b453SJohn Marino get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
277995b7b453SJohn Marino 		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
278095b7b453SJohn Marino {
278195b7b453SJohn Marino   reg_errcode_t err;
278295b7b453SJohn Marino   Idx to_idx;
278395b7b453SJohn Marino   /* Can the subexpression arrive the back reference?  */
278495b7b453SJohn Marino   err = check_arrival (mctx, &sub_last->path, sub_last->node,
278595b7b453SJohn Marino 		       sub_last->str_idx, bkref_node, bkref_str,
278695b7b453SJohn Marino 		       OP_OPEN_SUBEXP);
278795b7b453SJohn Marino   if (err != REG_NOERROR)
278895b7b453SJohn Marino     return err;
278995b7b453SJohn Marino   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
279095b7b453SJohn Marino 			     sub_last->str_idx);
2791*09d4459fSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
279295b7b453SJohn Marino     return err;
279395b7b453SJohn Marino   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
279495b7b453SJohn Marino   return clean_state_log_if_needed (mctx, to_idx);
279595b7b453SJohn Marino }
279695b7b453SJohn Marino 
279795b7b453SJohn Marino /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
279895b7b453SJohn Marino    Search '(' if FL_OPEN, or search ')' otherwise.
279995b7b453SJohn Marino    TODO: This function isn't efficient...
280095b7b453SJohn Marino 	 Because there might be more than one nodes whose types are
280195b7b453SJohn Marino 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
280295b7b453SJohn Marino 	 nodes.
280395b7b453SJohn Marino 	 E.g. RE: (a){2}  */
280495b7b453SJohn Marino 
280595b7b453SJohn Marino static Idx
find_subexp_node(const re_dfa_t * dfa,const re_node_set * nodes,Idx subexp_idx,int type)280695b7b453SJohn Marino find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
280795b7b453SJohn Marino 		  Idx subexp_idx, int type)
280895b7b453SJohn Marino {
280995b7b453SJohn Marino   Idx cls_idx;
281095b7b453SJohn Marino   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
281195b7b453SJohn Marino     {
281295b7b453SJohn Marino       Idx cls_node = nodes->elems[cls_idx];
281395b7b453SJohn Marino       const re_token_t *node = dfa->nodes + cls_node;
281495b7b453SJohn Marino       if (node->type == type
281595b7b453SJohn Marino 	  && node->opr.idx == subexp_idx)
281695b7b453SJohn Marino 	return cls_node;
281795b7b453SJohn Marino     }
2818*09d4459fSDaniel Fojt   return -1;
281995b7b453SJohn Marino }
282095b7b453SJohn Marino 
282195b7b453SJohn Marino /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
282295b7b453SJohn Marino    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
282395b7b453SJohn Marino    heavily reused.
282495b7b453SJohn Marino    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
282595b7b453SJohn Marino 
282695b7b453SJohn Marino static reg_errcode_t
2827*09d4459fSDaniel Fojt __attribute_warn_unused_result__
check_arrival(re_match_context_t * mctx,state_array_t * path,Idx top_node,Idx top_str,Idx last_node,Idx last_str,int type)282895b7b453SJohn Marino check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
282995b7b453SJohn Marino 	       Idx top_str, Idx last_node, Idx last_str, int type)
283095b7b453SJohn Marino {
283195b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
283295b7b453SJohn Marino   reg_errcode_t err = REG_NOERROR;
283395b7b453SJohn Marino   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
283495b7b453SJohn Marino   re_dfastate_t *cur_state = NULL;
283595b7b453SJohn Marino   re_node_set *cur_nodes, next_nodes;
283695b7b453SJohn Marino   re_dfastate_t **backup_state_log;
283795b7b453SJohn Marino   unsigned int context;
283895b7b453SJohn Marino 
283995b7b453SJohn Marino   subexp_num = dfa->nodes[top_node].opr.idx;
284095b7b453SJohn Marino   /* Extend the buffer if we need.  */
2841*09d4459fSDaniel Fojt   if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1))
284295b7b453SJohn Marino     {
284395b7b453SJohn Marino       re_dfastate_t **new_array;
284495b7b453SJohn Marino       Idx old_alloc = path->alloc;
2845cf28ed85SJohn Marino       Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2846cf28ed85SJohn Marino       Idx new_alloc;
2847*09d4459fSDaniel Fojt       if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
2848cf28ed85SJohn Marino 	return REG_ESPACE;
2849cf28ed85SJohn Marino       new_alloc = old_alloc + incr_alloc;
2850*09d4459fSDaniel Fojt       if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
285195b7b453SJohn Marino 	return REG_ESPACE;
285295b7b453SJohn Marino       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2853*09d4459fSDaniel Fojt       if (__glibc_unlikely (new_array == NULL))
285495b7b453SJohn Marino 	return REG_ESPACE;
285595b7b453SJohn Marino       path->array = new_array;
285695b7b453SJohn Marino       path->alloc = new_alloc;
285795b7b453SJohn Marino       memset (new_array + old_alloc, '\0',
285895b7b453SJohn Marino 	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
285995b7b453SJohn Marino     }
286095b7b453SJohn Marino 
286195b7b453SJohn Marino   str_idx = path->next_idx ? path->next_idx : top_str;
286295b7b453SJohn Marino 
286395b7b453SJohn Marino   /* Temporary modify MCTX.  */
286495b7b453SJohn Marino   backup_state_log = mctx->state_log;
286595b7b453SJohn Marino   backup_cur_idx = mctx->input.cur_idx;
286695b7b453SJohn Marino   mctx->state_log = path->array;
286795b7b453SJohn Marino   mctx->input.cur_idx = str_idx;
286895b7b453SJohn Marino 
286995b7b453SJohn Marino   /* Setup initial node set.  */
287095b7b453SJohn Marino   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
287195b7b453SJohn Marino   if (str_idx == top_str)
287295b7b453SJohn Marino     {
287395b7b453SJohn Marino       err = re_node_set_init_1 (&next_nodes, top_node);
2874*09d4459fSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
287595b7b453SJohn Marino 	return err;
287695b7b453SJohn Marino       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2877*09d4459fSDaniel Fojt       if (__glibc_unlikely (err != REG_NOERROR))
287895b7b453SJohn Marino 	{
287995b7b453SJohn Marino 	  re_node_set_free (&next_nodes);
288095b7b453SJohn Marino 	  return err;
288195b7b453SJohn Marino 	}
288295b7b453SJohn Marino     }
288395b7b453SJohn Marino   else
288495b7b453SJohn Marino     {
288595b7b453SJohn Marino       cur_state = mctx->state_log[str_idx];
288695b7b453SJohn Marino       if (cur_state && cur_state->has_backref)
288795b7b453SJohn Marino 	{
288895b7b453SJohn Marino 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2889*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
289095b7b453SJohn Marino 	    return err;
289195b7b453SJohn Marino 	}
289295b7b453SJohn Marino       else
289395b7b453SJohn Marino 	re_node_set_init_empty (&next_nodes);
289495b7b453SJohn Marino     }
289595b7b453SJohn Marino   if (str_idx == top_str || (cur_state && cur_state->has_backref))
289695b7b453SJohn Marino     {
289795b7b453SJohn Marino       if (next_nodes.nelem)
289895b7b453SJohn Marino 	{
289995b7b453SJohn Marino 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
290095b7b453SJohn Marino 				    subexp_num, type);
2901*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
290295b7b453SJohn Marino 	    {
290395b7b453SJohn Marino 	      re_node_set_free (&next_nodes);
290495b7b453SJohn Marino 	      return err;
290595b7b453SJohn Marino 	    }
290695b7b453SJohn Marino 	}
290795b7b453SJohn Marino       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2908*09d4459fSDaniel Fojt       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
290995b7b453SJohn Marino 	{
291095b7b453SJohn Marino 	  re_node_set_free (&next_nodes);
291195b7b453SJohn Marino 	  return err;
291295b7b453SJohn Marino 	}
291395b7b453SJohn Marino       mctx->state_log[str_idx] = cur_state;
291495b7b453SJohn Marino     }
291595b7b453SJohn Marino 
291695b7b453SJohn Marino   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
291795b7b453SJohn Marino     {
291895b7b453SJohn Marino       re_node_set_empty (&next_nodes);
291995b7b453SJohn Marino       if (mctx->state_log[str_idx + 1])
292095b7b453SJohn Marino 	{
292195b7b453SJohn Marino 	  err = re_node_set_merge (&next_nodes,
292295b7b453SJohn Marino 				   &mctx->state_log[str_idx + 1]->nodes);
2923*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
292495b7b453SJohn Marino 	    {
292595b7b453SJohn Marino 	      re_node_set_free (&next_nodes);
292695b7b453SJohn Marino 	      return err;
292795b7b453SJohn Marino 	    }
292895b7b453SJohn Marino 	}
292995b7b453SJohn Marino       if (cur_state)
293095b7b453SJohn Marino 	{
293195b7b453SJohn Marino 	  err = check_arrival_add_next_nodes (mctx, str_idx,
293295b7b453SJohn Marino 					      &cur_state->non_eps_nodes,
293395b7b453SJohn Marino 					      &next_nodes);
2934*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
293595b7b453SJohn Marino 	    {
293695b7b453SJohn Marino 	      re_node_set_free (&next_nodes);
293795b7b453SJohn Marino 	      return err;
293895b7b453SJohn Marino 	    }
293995b7b453SJohn Marino 	}
294095b7b453SJohn Marino       ++str_idx;
294195b7b453SJohn Marino       if (next_nodes.nelem)
294295b7b453SJohn Marino 	{
294395b7b453SJohn Marino 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2944*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
294595b7b453SJohn Marino 	    {
294695b7b453SJohn Marino 	      re_node_set_free (&next_nodes);
294795b7b453SJohn Marino 	      return err;
294895b7b453SJohn Marino 	    }
294995b7b453SJohn Marino 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
295095b7b453SJohn Marino 				    subexp_num, type);
2951*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
295295b7b453SJohn Marino 	    {
295395b7b453SJohn Marino 	      re_node_set_free (&next_nodes);
295495b7b453SJohn Marino 	      return err;
295595b7b453SJohn Marino 	    }
295695b7b453SJohn Marino 	}
295795b7b453SJohn Marino       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
295895b7b453SJohn Marino       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2959*09d4459fSDaniel Fojt       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
296095b7b453SJohn Marino 	{
296195b7b453SJohn Marino 	  re_node_set_free (&next_nodes);
296295b7b453SJohn Marino 	  return err;
296395b7b453SJohn Marino 	}
296495b7b453SJohn Marino       mctx->state_log[str_idx] = cur_state;
296595b7b453SJohn Marino       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
296695b7b453SJohn Marino     }
296795b7b453SJohn Marino   re_node_set_free (&next_nodes);
296895b7b453SJohn Marino   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
296995b7b453SJohn Marino 	       : &mctx->state_log[last_str]->nodes);
297095b7b453SJohn Marino   path->next_idx = str_idx;
297195b7b453SJohn Marino 
297295b7b453SJohn Marino   /* Fix MCTX.  */
297395b7b453SJohn Marino   mctx->state_log = backup_state_log;
297495b7b453SJohn Marino   mctx->input.cur_idx = backup_cur_idx;
297595b7b453SJohn Marino 
297695b7b453SJohn Marino   /* Then check the current node set has the node LAST_NODE.  */
297795b7b453SJohn Marino   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
297895b7b453SJohn Marino     return REG_NOERROR;
297995b7b453SJohn Marino 
298095b7b453SJohn Marino   return REG_NOMATCH;
298195b7b453SJohn Marino }
298295b7b453SJohn Marino 
298395b7b453SJohn Marino /* Helper functions for check_arrival.  */
298495b7b453SJohn Marino 
298595b7b453SJohn Marino /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
298695b7b453SJohn Marino    to NEXT_NODES.
298795b7b453SJohn Marino    TODO: This function is similar to the functions transit_state*(),
298895b7b453SJohn Marino 	 however this function has many additional works.
298995b7b453SJohn Marino 	 Can't we unify them?  */
299095b7b453SJohn Marino 
299195b7b453SJohn Marino static reg_errcode_t
2992*09d4459fSDaniel Fojt __attribute_warn_unused_result__
check_arrival_add_next_nodes(re_match_context_t * mctx,Idx str_idx,re_node_set * cur_nodes,re_node_set * next_nodes)299395b7b453SJohn Marino check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
299495b7b453SJohn Marino 			      re_node_set *cur_nodes, re_node_set *next_nodes)
299595b7b453SJohn Marino {
299695b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
299795b7b453SJohn Marino   bool ok;
299895b7b453SJohn Marino   Idx cur_idx;
299995b7b453SJohn Marino #ifdef RE_ENABLE_I18N
300095b7b453SJohn Marino   reg_errcode_t err = REG_NOERROR;
300195b7b453SJohn Marino #endif
300295b7b453SJohn Marino   re_node_set union_set;
300395b7b453SJohn Marino   re_node_set_init_empty (&union_set);
300495b7b453SJohn Marino   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
300595b7b453SJohn Marino     {
300695b7b453SJohn Marino       int naccepted = 0;
300795b7b453SJohn Marino       Idx cur_node = cur_nodes->elems[cur_idx];
3008*09d4459fSDaniel Fojt       DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type));
3009*09d4459fSDaniel Fojt 
301095b7b453SJohn Marino #ifdef RE_ENABLE_I18N
3011cf28ed85SJohn Marino       /* If the node may accept "multi byte".  */
301295b7b453SJohn Marino       if (dfa->nodes[cur_node].accept_mb)
301395b7b453SJohn Marino 	{
301495b7b453SJohn Marino 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
301595b7b453SJohn Marino 					       str_idx);
301695b7b453SJohn Marino 	  if (naccepted > 1)
301795b7b453SJohn Marino 	    {
301895b7b453SJohn Marino 	      re_dfastate_t *dest_state;
301995b7b453SJohn Marino 	      Idx next_node = dfa->nexts[cur_node];
302095b7b453SJohn Marino 	      Idx next_idx = str_idx + naccepted;
302195b7b453SJohn Marino 	      dest_state = mctx->state_log[next_idx];
302295b7b453SJohn Marino 	      re_node_set_empty (&union_set);
302395b7b453SJohn Marino 	      if (dest_state)
302495b7b453SJohn Marino 		{
302595b7b453SJohn Marino 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
3026*09d4459fSDaniel Fojt 		  if (__glibc_unlikely (err != REG_NOERROR))
302795b7b453SJohn Marino 		    {
302895b7b453SJohn Marino 		      re_node_set_free (&union_set);
302995b7b453SJohn Marino 		      return err;
303095b7b453SJohn Marino 		    }
303195b7b453SJohn Marino 		}
303295b7b453SJohn Marino 	      ok = re_node_set_insert (&union_set, next_node);
3033*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (! ok))
303495b7b453SJohn Marino 		{
303595b7b453SJohn Marino 		  re_node_set_free (&union_set);
303695b7b453SJohn Marino 		  return REG_ESPACE;
303795b7b453SJohn Marino 		}
303895b7b453SJohn Marino 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
303995b7b453SJohn Marino 							    &union_set);
3040*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
3041*09d4459fSDaniel Fojt 				    && err != REG_NOERROR))
304295b7b453SJohn Marino 		{
304395b7b453SJohn Marino 		  re_node_set_free (&union_set);
304495b7b453SJohn Marino 		  return err;
304595b7b453SJohn Marino 		}
304695b7b453SJohn Marino 	    }
304795b7b453SJohn Marino 	}
304895b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
304995b7b453SJohn Marino       if (naccepted
305095b7b453SJohn Marino 	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
305195b7b453SJohn Marino 	{
305295b7b453SJohn Marino 	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3053*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (! ok))
305495b7b453SJohn Marino 	    {
305595b7b453SJohn Marino 	      re_node_set_free (&union_set);
305695b7b453SJohn Marino 	      return REG_ESPACE;
305795b7b453SJohn Marino 	    }
305895b7b453SJohn Marino 	}
305995b7b453SJohn Marino     }
306095b7b453SJohn Marino   re_node_set_free (&union_set);
306195b7b453SJohn Marino   return REG_NOERROR;
306295b7b453SJohn Marino }
306395b7b453SJohn Marino 
306495b7b453SJohn Marino /* For all the nodes in CUR_NODES, add the epsilon closures of them to
306595b7b453SJohn Marino    CUR_NODES, however exclude the nodes which are:
306695b7b453SJohn Marino     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
306795b7b453SJohn Marino     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
306895b7b453SJohn Marino */
306995b7b453SJohn Marino 
307095b7b453SJohn Marino static reg_errcode_t
check_arrival_expand_ecl(const re_dfa_t * dfa,re_node_set * cur_nodes,Idx ex_subexp,int type)307195b7b453SJohn Marino check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
307295b7b453SJohn Marino 			  Idx ex_subexp, int type)
307395b7b453SJohn Marino {
307495b7b453SJohn Marino   reg_errcode_t err;
307595b7b453SJohn Marino   Idx idx, outside_node;
307695b7b453SJohn Marino   re_node_set new_nodes;
3077*09d4459fSDaniel Fojt   DEBUG_ASSERT (cur_nodes->nelem);
307895b7b453SJohn Marino   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3079*09d4459fSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
308095b7b453SJohn Marino     return err;
308195b7b453SJohn Marino   /* Create a new node set NEW_NODES with the nodes which are epsilon
308295b7b453SJohn Marino      closures of the node in CUR_NODES.  */
308395b7b453SJohn Marino 
308495b7b453SJohn Marino   for (idx = 0; idx < cur_nodes->nelem; ++idx)
308595b7b453SJohn Marino     {
308695b7b453SJohn Marino       Idx cur_node = cur_nodes->elems[idx];
308795b7b453SJohn Marino       const re_node_set *eclosure = dfa->eclosures + cur_node;
308895b7b453SJohn Marino       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3089*09d4459fSDaniel Fojt       if (outside_node == -1)
309095b7b453SJohn Marino 	{
309195b7b453SJohn Marino 	  /* There are no problematic nodes, just merge them.  */
309295b7b453SJohn Marino 	  err = re_node_set_merge (&new_nodes, eclosure);
3093*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
309495b7b453SJohn Marino 	    {
309595b7b453SJohn Marino 	      re_node_set_free (&new_nodes);
309695b7b453SJohn Marino 	      return err;
309795b7b453SJohn Marino 	    }
309895b7b453SJohn Marino 	}
309995b7b453SJohn Marino       else
310095b7b453SJohn Marino 	{
310195b7b453SJohn Marino 	  /* There are problematic nodes, re-calculate incrementally.  */
310295b7b453SJohn Marino 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
310395b7b453SJohn Marino 					      ex_subexp, type);
3104*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
310595b7b453SJohn Marino 	    {
310695b7b453SJohn Marino 	      re_node_set_free (&new_nodes);
310795b7b453SJohn Marino 	      return err;
310895b7b453SJohn Marino 	    }
310995b7b453SJohn Marino 	}
311095b7b453SJohn Marino     }
311195b7b453SJohn Marino   re_node_set_free (cur_nodes);
311295b7b453SJohn Marino   *cur_nodes = new_nodes;
311395b7b453SJohn Marino   return REG_NOERROR;
311495b7b453SJohn Marino }
311595b7b453SJohn Marino 
311695b7b453SJohn Marino /* Helper function for check_arrival_expand_ecl.
311795b7b453SJohn Marino    Check incrementally the epsilon closure of TARGET, and if it isn't
311895b7b453SJohn Marino    problematic append it to DST_NODES.  */
311995b7b453SJohn Marino 
312095b7b453SJohn Marino static reg_errcode_t
3121*09d4459fSDaniel Fojt __attribute_warn_unused_result__
check_arrival_expand_ecl_sub(const re_dfa_t * dfa,re_node_set * dst_nodes,Idx target,Idx ex_subexp,int type)312295b7b453SJohn Marino check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
312395b7b453SJohn Marino 			      Idx target, Idx ex_subexp, int type)
312495b7b453SJohn Marino {
312595b7b453SJohn Marino   Idx cur_node;
312695b7b453SJohn Marino   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
312795b7b453SJohn Marino     {
312895b7b453SJohn Marino       bool ok;
312995b7b453SJohn Marino 
313095b7b453SJohn Marino       if (dfa->nodes[cur_node].type == type
313195b7b453SJohn Marino 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
313295b7b453SJohn Marino 	{
313395b7b453SJohn Marino 	  if (type == OP_CLOSE_SUBEXP)
313495b7b453SJohn Marino 	    {
313595b7b453SJohn Marino 	      ok = re_node_set_insert (dst_nodes, cur_node);
3136*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (! ok))
313795b7b453SJohn Marino 		return REG_ESPACE;
313895b7b453SJohn Marino 	    }
313995b7b453SJohn Marino 	  break;
314095b7b453SJohn Marino 	}
314195b7b453SJohn Marino       ok = re_node_set_insert (dst_nodes, cur_node);
3142*09d4459fSDaniel Fojt       if (__glibc_unlikely (! ok))
314395b7b453SJohn Marino 	return REG_ESPACE;
314495b7b453SJohn Marino       if (dfa->edests[cur_node].nelem == 0)
314595b7b453SJohn Marino 	break;
314695b7b453SJohn Marino       if (dfa->edests[cur_node].nelem == 2)
314795b7b453SJohn Marino 	{
314895b7b453SJohn Marino 	  reg_errcode_t err;
314995b7b453SJohn Marino 	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
315095b7b453SJohn Marino 					      dfa->edests[cur_node].elems[1],
315195b7b453SJohn Marino 					      ex_subexp, type);
3152*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
315395b7b453SJohn Marino 	    return err;
315495b7b453SJohn Marino 	}
315595b7b453SJohn Marino       cur_node = dfa->edests[cur_node].elems[0];
315695b7b453SJohn Marino     }
315795b7b453SJohn Marino   return REG_NOERROR;
315895b7b453SJohn Marino }
315995b7b453SJohn Marino 
316095b7b453SJohn Marino 
316195b7b453SJohn Marino /* For all the back references in the current state, calculate the
316295b7b453SJohn Marino    destination of the back references by the appropriate entry
316395b7b453SJohn Marino    in MCTX->BKREF_ENTS.  */
316495b7b453SJohn Marino 
316595b7b453SJohn Marino static reg_errcode_t
3166*09d4459fSDaniel Fojt __attribute_warn_unused_result__
expand_bkref_cache(re_match_context_t * mctx,re_node_set * cur_nodes,Idx cur_str,Idx subexp_num,int type)316795b7b453SJohn Marino expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
316895b7b453SJohn Marino 		    Idx cur_str, Idx subexp_num, int type)
316995b7b453SJohn Marino {
317095b7b453SJohn Marino   const re_dfa_t *const dfa = mctx->dfa;
317195b7b453SJohn Marino   reg_errcode_t err;
317295b7b453SJohn Marino   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
317395b7b453SJohn Marino   struct re_backref_cache_entry *ent;
317495b7b453SJohn Marino 
3175*09d4459fSDaniel Fojt   if (cache_idx_start == -1)
317695b7b453SJohn Marino     return REG_NOERROR;
317795b7b453SJohn Marino 
317895b7b453SJohn Marino  restart:
317995b7b453SJohn Marino   ent = mctx->bkref_ents + cache_idx_start;
318095b7b453SJohn Marino   do
318195b7b453SJohn Marino     {
318295b7b453SJohn Marino       Idx to_idx, next_node;
318395b7b453SJohn Marino 
318495b7b453SJohn Marino       /* Is this entry ENT is appropriate?  */
318595b7b453SJohn Marino       if (!re_node_set_contains (cur_nodes, ent->node))
318695b7b453SJohn Marino 	continue; /* No.  */
318795b7b453SJohn Marino 
318895b7b453SJohn Marino       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
318995b7b453SJohn Marino       /* Calculate the destination of the back reference, and append it
319095b7b453SJohn Marino 	 to MCTX->STATE_LOG.  */
319195b7b453SJohn Marino       if (to_idx == cur_str)
319295b7b453SJohn Marino 	{
319395b7b453SJohn Marino 	  /* The backreference did epsilon transit, we must re-check all the
319495b7b453SJohn Marino 	     node in the current state.  */
319595b7b453SJohn Marino 	  re_node_set new_dests;
319695b7b453SJohn Marino 	  reg_errcode_t err2, err3;
319795b7b453SJohn Marino 	  next_node = dfa->edests[ent->node].elems[0];
319895b7b453SJohn Marino 	  if (re_node_set_contains (cur_nodes, next_node))
319995b7b453SJohn Marino 	    continue;
320095b7b453SJohn Marino 	  err = re_node_set_init_1 (&new_dests, next_node);
320195b7b453SJohn Marino 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
320295b7b453SJohn Marino 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
320395b7b453SJohn Marino 	  re_node_set_free (&new_dests);
3204*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR
3205*09d4459fSDaniel Fojt 				|| err3 != REG_NOERROR))
320695b7b453SJohn Marino 	    {
320795b7b453SJohn Marino 	      err = (err != REG_NOERROR ? err
320895b7b453SJohn Marino 		     : (err2 != REG_NOERROR ? err2 : err3));
320995b7b453SJohn Marino 	      return err;
321095b7b453SJohn Marino 	    }
321195b7b453SJohn Marino 	  /* TODO: It is still inefficient...  */
321295b7b453SJohn Marino 	  goto restart;
321395b7b453SJohn Marino 	}
321495b7b453SJohn Marino       else
321595b7b453SJohn Marino 	{
321695b7b453SJohn Marino 	  re_node_set union_set;
321795b7b453SJohn Marino 	  next_node = dfa->nexts[ent->node];
321895b7b453SJohn Marino 	  if (mctx->state_log[to_idx])
321995b7b453SJohn Marino 	    {
322095b7b453SJohn Marino 	      bool ok;
322195b7b453SJohn Marino 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
322295b7b453SJohn Marino 					next_node))
322395b7b453SJohn Marino 		continue;
322495b7b453SJohn Marino 	      err = re_node_set_init_copy (&union_set,
322595b7b453SJohn Marino 					   &mctx->state_log[to_idx]->nodes);
322695b7b453SJohn Marino 	      ok = re_node_set_insert (&union_set, next_node);
3227*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR || ! ok))
322895b7b453SJohn Marino 		{
322995b7b453SJohn Marino 		  re_node_set_free (&union_set);
323095b7b453SJohn Marino 		  err = err != REG_NOERROR ? err : REG_ESPACE;
323195b7b453SJohn Marino 		  return err;
323295b7b453SJohn Marino 		}
323395b7b453SJohn Marino 	    }
323495b7b453SJohn Marino 	  else
323595b7b453SJohn Marino 	    {
323695b7b453SJohn Marino 	      err = re_node_set_init_1 (&union_set, next_node);
3237*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
323895b7b453SJohn Marino 		return err;
323995b7b453SJohn Marino 	    }
324095b7b453SJohn Marino 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
324195b7b453SJohn Marino 	  re_node_set_free (&union_set);
3242*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
3243*09d4459fSDaniel Fojt 				&& err != REG_NOERROR))
324495b7b453SJohn Marino 	    return err;
324595b7b453SJohn Marino 	}
324695b7b453SJohn Marino     }
324795b7b453SJohn Marino   while (ent++->more);
324895b7b453SJohn Marino   return REG_NOERROR;
324995b7b453SJohn Marino }
325095b7b453SJohn Marino 
325195b7b453SJohn Marino /* Build transition table for the state.
325295b7b453SJohn Marino    Return true if successful.  */
325395b7b453SJohn Marino 
325495b7b453SJohn Marino static bool
build_trtable(const re_dfa_t * dfa,re_dfastate_t * state)325595b7b453SJohn Marino build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
325695b7b453SJohn Marino {
325795b7b453SJohn Marino   reg_errcode_t err;
325895b7b453SJohn Marino   Idx i, j;
325995b7b453SJohn Marino   int ch;
326095b7b453SJohn Marino   bool need_word_trtable = false;
326195b7b453SJohn Marino   bitset_word_t elem, mask;
326295b7b453SJohn Marino   bool dests_node_malloced = false;
326395b7b453SJohn Marino   bool dest_states_malloced = false;
3264cf28ed85SJohn Marino   Idx ndests; /* Number of the destination states from 'state'.  */
326595b7b453SJohn Marino   re_dfastate_t **trtable;
326695b7b453SJohn Marino   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
326795b7b453SJohn Marino   re_node_set follows, *dests_node;
326895b7b453SJohn Marino   bitset_t *dests_ch;
326995b7b453SJohn Marino   bitset_t acceptable;
327095b7b453SJohn Marino 
327195b7b453SJohn Marino   struct dests_alloc
327295b7b453SJohn Marino   {
327395b7b453SJohn Marino     re_node_set dests_node[SBC_MAX];
327495b7b453SJohn Marino     bitset_t dests_ch[SBC_MAX];
327595b7b453SJohn Marino   } *dests_alloc;
327695b7b453SJohn Marino 
327795b7b453SJohn Marino   /* We build DFA states which corresponds to the destination nodes
3278cf28ed85SJohn Marino      from 'state'.  'dests_node[i]' represents the nodes which i-th
3279cf28ed85SJohn Marino      destination state contains, and 'dests_ch[i]' represents the
328095b7b453SJohn Marino      characters which i-th destination state accepts.  */
328195b7b453SJohn Marino   if (__libc_use_alloca (sizeof (struct dests_alloc)))
328295b7b453SJohn Marino     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
328395b7b453SJohn Marino   else
328495b7b453SJohn Marino     {
328595b7b453SJohn Marino       dests_alloc = re_malloc (struct dests_alloc, 1);
3286*09d4459fSDaniel Fojt       if (__glibc_unlikely (dests_alloc == NULL))
328795b7b453SJohn Marino 	return false;
328895b7b453SJohn Marino       dests_node_malloced = true;
328995b7b453SJohn Marino     }
329095b7b453SJohn Marino   dests_node = dests_alloc->dests_node;
329195b7b453SJohn Marino   dests_ch = dests_alloc->dests_ch;
329295b7b453SJohn Marino 
3293cf28ed85SJohn Marino   /* Initialize transition table.  */
329495b7b453SJohn Marino   state->word_trtable = state->trtable = NULL;
329595b7b453SJohn Marino 
3296cf28ed85SJohn Marino   /* At first, group all nodes belonging to 'state' into several
329795b7b453SJohn Marino      destinations.  */
329895b7b453SJohn Marino   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3299*09d4459fSDaniel Fojt   if (__glibc_unlikely (ndests <= 0))
330095b7b453SJohn Marino     {
330195b7b453SJohn Marino       if (dests_node_malloced)
3302*09d4459fSDaniel Fojt 	re_free (dests_alloc);
3303cf28ed85SJohn Marino       /* Return false in case of an error, true otherwise.  */
330495b7b453SJohn Marino       if (ndests == 0)
330595b7b453SJohn Marino 	{
330695b7b453SJohn Marino 	  state->trtable = (re_dfastate_t **)
330795b7b453SJohn Marino 	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
3308*09d4459fSDaniel Fojt           if (__glibc_unlikely (state->trtable == NULL))
3309200fbe8dSJohn Marino             return false;
331095b7b453SJohn Marino 	  return true;
331195b7b453SJohn Marino 	}
331295b7b453SJohn Marino       return false;
331395b7b453SJohn Marino     }
331495b7b453SJohn Marino 
331595b7b453SJohn Marino   err = re_node_set_alloc (&follows, ndests + 1);
3316*09d4459fSDaniel Fojt   if (__glibc_unlikely (err != REG_NOERROR))
331795b7b453SJohn Marino     goto out_free;
331895b7b453SJohn Marino 
331995b7b453SJohn Marino   /* Avoid arithmetic overflow in size calculation.  */
3320*09d4459fSDaniel Fojt   size_t ndests_max
3321*09d4459fSDaniel Fojt     = ((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3322*09d4459fSDaniel Fojt        / (3 * sizeof (re_dfastate_t *)));
3323*09d4459fSDaniel Fojt   if (__glibc_unlikely (ndests_max < ndests))
332495b7b453SJohn Marino     goto out_free;
332595b7b453SJohn Marino 
332695b7b453SJohn Marino   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
332795b7b453SJohn Marino 			 + ndests * 3 * sizeof (re_dfastate_t *)))
332895b7b453SJohn Marino     dest_states = (re_dfastate_t **)
332995b7b453SJohn Marino       alloca (ndests * 3 * sizeof (re_dfastate_t *));
333095b7b453SJohn Marino   else
333195b7b453SJohn Marino     {
3332*09d4459fSDaniel Fojt       dest_states = re_malloc (re_dfastate_t *, ndests * 3);
3333*09d4459fSDaniel Fojt       if (__glibc_unlikely (dest_states == NULL))
333495b7b453SJohn Marino 	{
333595b7b453SJohn Marino out_free:
333695b7b453SJohn Marino 	  if (dest_states_malloced)
3337*09d4459fSDaniel Fojt 	    re_free (dest_states);
333895b7b453SJohn Marino 	  re_node_set_free (&follows);
333995b7b453SJohn Marino 	  for (i = 0; i < ndests; ++i)
334095b7b453SJohn Marino 	    re_node_set_free (dests_node + i);
334195b7b453SJohn Marino 	  if (dests_node_malloced)
3342*09d4459fSDaniel Fojt 	    re_free (dests_alloc);
334395b7b453SJohn Marino 	  return false;
334495b7b453SJohn Marino 	}
334595b7b453SJohn Marino       dest_states_malloced = true;
334695b7b453SJohn Marino     }
334795b7b453SJohn Marino   dest_states_word = dest_states + ndests;
334895b7b453SJohn Marino   dest_states_nl = dest_states_word + ndests;
334995b7b453SJohn Marino   bitset_empty (acceptable);
335095b7b453SJohn Marino 
335195b7b453SJohn Marino   /* Then build the states for all destinations.  */
335295b7b453SJohn Marino   for (i = 0; i < ndests; ++i)
335395b7b453SJohn Marino     {
335495b7b453SJohn Marino       Idx next_node;
335595b7b453SJohn Marino       re_node_set_empty (&follows);
335695b7b453SJohn Marino       /* Merge the follows of this destination states.  */
335795b7b453SJohn Marino       for (j = 0; j < dests_node[i].nelem; ++j)
335895b7b453SJohn Marino 	{
335995b7b453SJohn Marino 	  next_node = dfa->nexts[dests_node[i].elems[j]];
3360*09d4459fSDaniel Fojt 	  if (next_node != -1)
336195b7b453SJohn Marino 	    {
336295b7b453SJohn Marino 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3363*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
336495b7b453SJohn Marino 		goto out_free;
336595b7b453SJohn Marino 	    }
336695b7b453SJohn Marino 	}
336795b7b453SJohn Marino       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3368*09d4459fSDaniel Fojt       if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
336995b7b453SJohn Marino 	goto out_free;
337095b7b453SJohn Marino       /* If the new state has context constraint,
337195b7b453SJohn Marino 	 build appropriate states for these contexts.  */
337295b7b453SJohn Marino       if (dest_states[i]->has_constraint)
337395b7b453SJohn Marino 	{
337495b7b453SJohn Marino 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
337595b7b453SJohn Marino 							  CONTEXT_WORD);
3376*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (dest_states_word[i] == NULL
3377*09d4459fSDaniel Fojt 				&& err != REG_NOERROR))
337895b7b453SJohn Marino 	    goto out_free;
337995b7b453SJohn Marino 
338095b7b453SJohn Marino 	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
338195b7b453SJohn Marino 	    need_word_trtable = true;
338295b7b453SJohn Marino 
338395b7b453SJohn Marino 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
338495b7b453SJohn Marino 							CONTEXT_NEWLINE);
3385*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
338695b7b453SJohn Marino 	    goto out_free;
338795b7b453SJohn Marino 	}
338895b7b453SJohn Marino       else
338995b7b453SJohn Marino 	{
339095b7b453SJohn Marino 	  dest_states_word[i] = dest_states[i];
339195b7b453SJohn Marino 	  dest_states_nl[i] = dest_states[i];
339295b7b453SJohn Marino 	}
339395b7b453SJohn Marino       bitset_merge (acceptable, dests_ch[i]);
339495b7b453SJohn Marino     }
339595b7b453SJohn Marino 
3396*09d4459fSDaniel Fojt   if (!__glibc_unlikely (need_word_trtable))
339795b7b453SJohn Marino     {
339895b7b453SJohn Marino       /* We don't care about whether the following character is a word
339995b7b453SJohn Marino 	 character, or we are in a single-byte character set so we can
340095b7b453SJohn Marino 	 discern by looking at the character code: allocate a
340195b7b453SJohn Marino 	 256-entry transition table.  */
340295b7b453SJohn Marino       trtable = state->trtable =
340395b7b453SJohn Marino 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3404*09d4459fSDaniel Fojt       if (__glibc_unlikely (trtable == NULL))
340595b7b453SJohn Marino 	goto out_free;
340695b7b453SJohn Marino 
340795b7b453SJohn Marino       /* For all characters ch...:  */
340895b7b453SJohn Marino       for (i = 0; i < BITSET_WORDS; ++i)
340995b7b453SJohn Marino 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
341095b7b453SJohn Marino 	     elem;
341195b7b453SJohn Marino 	     mask <<= 1, elem >>= 1, ++ch)
3412*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (elem & 1))
341395b7b453SJohn Marino 	    {
341495b7b453SJohn Marino 	      /* There must be exactly one destination which accepts
341595b7b453SJohn Marino 		 character ch.  See group_nodes_into_DFAstates.  */
341695b7b453SJohn Marino 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
341795b7b453SJohn Marino 		;
341895b7b453SJohn Marino 
341995b7b453SJohn Marino 	      /* j-th destination accepts the word character ch.  */
342095b7b453SJohn Marino 	      if (dfa->word_char[i] & mask)
342195b7b453SJohn Marino 		trtable[ch] = dest_states_word[j];
342295b7b453SJohn Marino 	      else
342395b7b453SJohn Marino 		trtable[ch] = dest_states[j];
342495b7b453SJohn Marino 	    }
342595b7b453SJohn Marino     }
342695b7b453SJohn Marino   else
342795b7b453SJohn Marino     {
342895b7b453SJohn Marino       /* We care about whether the following character is a word
342995b7b453SJohn Marino 	 character, and we are in a multi-byte character set: discern
343095b7b453SJohn Marino 	 by looking at the character code: build two 256-entry
343195b7b453SJohn Marino 	 transition tables, one starting at trtable[0] and one
343295b7b453SJohn Marino 	 starting at trtable[SBC_MAX].  */
343395b7b453SJohn Marino       trtable = state->word_trtable =
343495b7b453SJohn Marino 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3435*09d4459fSDaniel Fojt       if (__glibc_unlikely (trtable == NULL))
343695b7b453SJohn Marino 	goto out_free;
343795b7b453SJohn Marino 
343895b7b453SJohn Marino       /* For all characters ch...:  */
343995b7b453SJohn Marino       for (i = 0; i < BITSET_WORDS; ++i)
344095b7b453SJohn Marino 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
344195b7b453SJohn Marino 	     elem;
344295b7b453SJohn Marino 	     mask <<= 1, elem >>= 1, ++ch)
3443*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (elem & 1))
344495b7b453SJohn Marino 	    {
344595b7b453SJohn Marino 	      /* There must be exactly one destination which accepts
344695b7b453SJohn Marino 		 character ch.  See group_nodes_into_DFAstates.  */
344795b7b453SJohn Marino 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
344895b7b453SJohn Marino 		;
344995b7b453SJohn Marino 
345095b7b453SJohn Marino 	      /* j-th destination accepts the word character ch.  */
345195b7b453SJohn Marino 	      trtable[ch] = dest_states[j];
345295b7b453SJohn Marino 	      trtable[ch + SBC_MAX] = dest_states_word[j];
345395b7b453SJohn Marino 	    }
345495b7b453SJohn Marino     }
345595b7b453SJohn Marino 
345695b7b453SJohn Marino   /* new line */
345795b7b453SJohn Marino   if (bitset_contain (acceptable, NEWLINE_CHAR))
345895b7b453SJohn Marino     {
345995b7b453SJohn Marino       /* The current state accepts newline character.  */
346095b7b453SJohn Marino       for (j = 0; j < ndests; ++j)
346195b7b453SJohn Marino 	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
346295b7b453SJohn Marino 	  {
346395b7b453SJohn Marino 	    /* k-th destination accepts newline character.  */
346495b7b453SJohn Marino 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
346595b7b453SJohn Marino 	    if (need_word_trtable)
346695b7b453SJohn Marino 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
346795b7b453SJohn Marino 	    /* There must be only one destination which accepts
346895b7b453SJohn Marino 	       newline.  See group_nodes_into_DFAstates.  */
346995b7b453SJohn Marino 	    break;
347095b7b453SJohn Marino 	  }
347195b7b453SJohn Marino     }
347295b7b453SJohn Marino 
347395b7b453SJohn Marino   if (dest_states_malloced)
3474*09d4459fSDaniel Fojt     re_free (dest_states);
347595b7b453SJohn Marino 
347695b7b453SJohn Marino   re_node_set_free (&follows);
347795b7b453SJohn Marino   for (i = 0; i < ndests; ++i)
347895b7b453SJohn Marino     re_node_set_free (dests_node + i);
347995b7b453SJohn Marino 
348095b7b453SJohn Marino   if (dests_node_malloced)
3481*09d4459fSDaniel Fojt     re_free (dests_alloc);
348295b7b453SJohn Marino 
348395b7b453SJohn Marino   return true;
348495b7b453SJohn Marino }
348595b7b453SJohn Marino 
348695b7b453SJohn Marino /* Group all nodes belonging to STATE into several destinations.
348795b7b453SJohn Marino    Then for all destinations, set the nodes belonging to the destination
348895b7b453SJohn Marino    to DESTS_NODE[i] and set the characters accepted by the destination
348995b7b453SJohn Marino    to DEST_CH[i].  This function return the number of destinations.  */
349095b7b453SJohn Marino 
349195b7b453SJohn Marino static Idx
group_nodes_into_DFAstates(const re_dfa_t * dfa,const re_dfastate_t * state,re_node_set * dests_node,bitset_t * dests_ch)349295b7b453SJohn Marino group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
349395b7b453SJohn Marino 			    re_node_set *dests_node, bitset_t *dests_ch)
349495b7b453SJohn Marino {
349595b7b453SJohn Marino   reg_errcode_t err;
349695b7b453SJohn Marino   bool ok;
349795b7b453SJohn Marino   Idx i, j, k;
3498cf28ed85SJohn Marino   Idx ndests; /* Number of the destinations from 'state'.  */
349995b7b453SJohn Marino   bitset_t accepts; /* Characters a node can accept.  */
350095b7b453SJohn Marino   const re_node_set *cur_nodes = &state->nodes;
350195b7b453SJohn Marino   bitset_empty (accepts);
350295b7b453SJohn Marino   ndests = 0;
350395b7b453SJohn Marino 
3504cf28ed85SJohn Marino   /* For all the nodes belonging to 'state',  */
350595b7b453SJohn Marino   for (i = 0; i < cur_nodes->nelem; ++i)
350695b7b453SJohn Marino     {
350795b7b453SJohn Marino       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
350895b7b453SJohn Marino       re_token_type_t type = node->type;
350995b7b453SJohn Marino       unsigned int constraint = node->constraint;
351095b7b453SJohn Marino 
351195b7b453SJohn Marino       /* Enumerate all single byte character this node can accept.  */
351295b7b453SJohn Marino       if (type == CHARACTER)
351395b7b453SJohn Marino 	bitset_set (accepts, node->opr.c);
351495b7b453SJohn Marino       else if (type == SIMPLE_BRACKET)
351595b7b453SJohn Marino 	{
351695b7b453SJohn Marino 	  bitset_merge (accepts, node->opr.sbcset);
351795b7b453SJohn Marino 	}
351895b7b453SJohn Marino       else if (type == OP_PERIOD)
351995b7b453SJohn Marino 	{
352095b7b453SJohn Marino #ifdef RE_ENABLE_I18N
352195b7b453SJohn Marino 	  if (dfa->mb_cur_max > 1)
352295b7b453SJohn Marino 	    bitset_merge (accepts, dfa->sb_char);
352395b7b453SJohn Marino 	  else
352495b7b453SJohn Marino #endif
352595b7b453SJohn Marino 	    bitset_set_all (accepts);
352695b7b453SJohn Marino 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
352795b7b453SJohn Marino 	    bitset_clear (accepts, '\n');
352895b7b453SJohn Marino 	  if (dfa->syntax & RE_DOT_NOT_NULL)
352995b7b453SJohn Marino 	    bitset_clear (accepts, '\0');
353095b7b453SJohn Marino 	}
353195b7b453SJohn Marino #ifdef RE_ENABLE_I18N
353295b7b453SJohn Marino       else if (type == OP_UTF8_PERIOD)
353395b7b453SJohn Marino 	{
353495b7b453SJohn Marino 	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
353595b7b453SJohn Marino 	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
353695b7b453SJohn Marino 	  else
353795b7b453SJohn Marino 	    bitset_merge (accepts, utf8_sb_map);
353895b7b453SJohn Marino 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
353995b7b453SJohn Marino 	    bitset_clear (accepts, '\n');
354095b7b453SJohn Marino 	  if (dfa->syntax & RE_DOT_NOT_NULL)
354195b7b453SJohn Marino 	    bitset_clear (accepts, '\0');
354295b7b453SJohn Marino 	}
354395b7b453SJohn Marino #endif
354495b7b453SJohn Marino       else
354595b7b453SJohn Marino 	continue;
354695b7b453SJohn Marino 
3547cf28ed85SJohn Marino       /* Check the 'accepts' and sift the characters which are not
354895b7b453SJohn Marino 	 match it the context.  */
354995b7b453SJohn Marino       if (constraint)
355095b7b453SJohn Marino 	{
355195b7b453SJohn Marino 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
355295b7b453SJohn Marino 	    {
355395b7b453SJohn Marino 	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
355495b7b453SJohn Marino 	      bitset_empty (accepts);
355595b7b453SJohn Marino 	      if (accepts_newline)
355695b7b453SJohn Marino 		bitset_set (accepts, NEWLINE_CHAR);
355795b7b453SJohn Marino 	      else
355895b7b453SJohn Marino 		continue;
355995b7b453SJohn Marino 	    }
356095b7b453SJohn Marino 	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
356195b7b453SJohn Marino 	    {
356295b7b453SJohn Marino 	      bitset_empty (accepts);
356395b7b453SJohn Marino 	      continue;
356495b7b453SJohn Marino 	    }
356595b7b453SJohn Marino 
356695b7b453SJohn Marino 	  if (constraint & NEXT_WORD_CONSTRAINT)
356795b7b453SJohn Marino 	    {
356895b7b453SJohn Marino 	      bitset_word_t any_set = 0;
356995b7b453SJohn Marino 	      if (type == CHARACTER && !node->word_char)
357095b7b453SJohn Marino 		{
357195b7b453SJohn Marino 		  bitset_empty (accepts);
357295b7b453SJohn Marino 		  continue;
357395b7b453SJohn Marino 		}
357495b7b453SJohn Marino #ifdef RE_ENABLE_I18N
357595b7b453SJohn Marino 	      if (dfa->mb_cur_max > 1)
357695b7b453SJohn Marino 		for (j = 0; j < BITSET_WORDS; ++j)
357795b7b453SJohn Marino 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
357895b7b453SJohn Marino 	      else
357995b7b453SJohn Marino #endif
358095b7b453SJohn Marino 		for (j = 0; j < BITSET_WORDS; ++j)
358195b7b453SJohn Marino 		  any_set |= (accepts[j] &= dfa->word_char[j]);
358295b7b453SJohn Marino 	      if (!any_set)
358395b7b453SJohn Marino 		continue;
358495b7b453SJohn Marino 	    }
358595b7b453SJohn Marino 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
358695b7b453SJohn Marino 	    {
358795b7b453SJohn Marino 	      bitset_word_t any_set = 0;
358895b7b453SJohn Marino 	      if (type == CHARACTER && node->word_char)
358995b7b453SJohn Marino 		{
359095b7b453SJohn Marino 		  bitset_empty (accepts);
359195b7b453SJohn Marino 		  continue;
359295b7b453SJohn Marino 		}
359395b7b453SJohn Marino #ifdef RE_ENABLE_I18N
359495b7b453SJohn Marino 	      if (dfa->mb_cur_max > 1)
359595b7b453SJohn Marino 		for (j = 0; j < BITSET_WORDS; ++j)
359695b7b453SJohn Marino 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
359795b7b453SJohn Marino 	      else
359895b7b453SJohn Marino #endif
359995b7b453SJohn Marino 		for (j = 0; j < BITSET_WORDS; ++j)
360095b7b453SJohn Marino 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
360195b7b453SJohn Marino 	      if (!any_set)
360295b7b453SJohn Marino 		continue;
360395b7b453SJohn Marino 	    }
360495b7b453SJohn Marino 	}
360595b7b453SJohn Marino 
3606cf28ed85SJohn Marino       /* Then divide 'accepts' into DFA states, or create a new
360795b7b453SJohn Marino 	 state.  Above, we make sure that accepts is not empty.  */
360895b7b453SJohn Marino       for (j = 0; j < ndests; ++j)
360995b7b453SJohn Marino 	{
361095b7b453SJohn Marino 	  bitset_t intersec; /* Intersection sets, see below.  */
361195b7b453SJohn Marino 	  bitset_t remains;
361295b7b453SJohn Marino 	  /* Flags, see below.  */
361395b7b453SJohn Marino 	  bitset_word_t has_intersec, not_subset, not_consumed;
361495b7b453SJohn Marino 
361595b7b453SJohn Marino 	  /* Optimization, skip if this state doesn't accept the character.  */
361695b7b453SJohn Marino 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
361795b7b453SJohn Marino 	    continue;
361895b7b453SJohn Marino 
3619cf28ed85SJohn Marino 	  /* Enumerate the intersection set of this state and 'accepts'.  */
362095b7b453SJohn Marino 	  has_intersec = 0;
362195b7b453SJohn Marino 	  for (k = 0; k < BITSET_WORDS; ++k)
362295b7b453SJohn Marino 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
362395b7b453SJohn Marino 	  /* And skip if the intersection set is empty.  */
362495b7b453SJohn Marino 	  if (!has_intersec)
362595b7b453SJohn Marino 	    continue;
362695b7b453SJohn Marino 
3627cf28ed85SJohn Marino 	  /* Then check if this state is a subset of 'accepts'.  */
362895b7b453SJohn Marino 	  not_subset = not_consumed = 0;
362995b7b453SJohn Marino 	  for (k = 0; k < BITSET_WORDS; ++k)
363095b7b453SJohn Marino 	    {
363195b7b453SJohn Marino 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
363295b7b453SJohn Marino 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
363395b7b453SJohn Marino 	    }
363495b7b453SJohn Marino 
3635cf28ed85SJohn Marino 	  /* If this state isn't a subset of 'accepts', create a
3636cf28ed85SJohn Marino 	     new group state, which has the 'remains'. */
363795b7b453SJohn Marino 	  if (not_subset)
363895b7b453SJohn Marino 	    {
363995b7b453SJohn Marino 	      bitset_copy (dests_ch[ndests], remains);
364095b7b453SJohn Marino 	      bitset_copy (dests_ch[j], intersec);
364195b7b453SJohn Marino 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3642*09d4459fSDaniel Fojt 	      if (__glibc_unlikely (err != REG_NOERROR))
364395b7b453SJohn Marino 		goto error_return;
364495b7b453SJohn Marino 	      ++ndests;
364595b7b453SJohn Marino 	    }
364695b7b453SJohn Marino 
364795b7b453SJohn Marino 	  /* Put the position in the current group. */
364895b7b453SJohn Marino 	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3649*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (! ok))
365095b7b453SJohn Marino 	    goto error_return;
365195b7b453SJohn Marino 
365295b7b453SJohn Marino 	  /* If all characters are consumed, go to next node. */
365395b7b453SJohn Marino 	  if (!not_consumed)
365495b7b453SJohn Marino 	    break;
365595b7b453SJohn Marino 	}
365695b7b453SJohn Marino       /* Some characters remain, create a new group. */
365795b7b453SJohn Marino       if (j == ndests)
365895b7b453SJohn Marino 	{
365995b7b453SJohn Marino 	  bitset_copy (dests_ch[ndests], accepts);
366095b7b453SJohn Marino 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3661*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (err != REG_NOERROR))
366295b7b453SJohn Marino 	    goto error_return;
366395b7b453SJohn Marino 	  ++ndests;
366495b7b453SJohn Marino 	  bitset_empty (accepts);
366595b7b453SJohn Marino 	}
366695b7b453SJohn Marino     }
3667*09d4459fSDaniel Fojt   assume (ndests <= SBC_MAX);
366895b7b453SJohn Marino   return ndests;
366995b7b453SJohn Marino  error_return:
367095b7b453SJohn Marino   for (j = 0; j < ndests; ++j)
367195b7b453SJohn Marino     re_node_set_free (dests_node + j);
3672*09d4459fSDaniel Fojt   return -1;
367395b7b453SJohn Marino }
367495b7b453SJohn Marino 
367595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
3676cf28ed85SJohn Marino /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
367795b7b453SJohn Marino    Return the number of the bytes the node accepts.
367895b7b453SJohn Marino    STR_IDX is the current index of the input string.
367995b7b453SJohn Marino 
368095b7b453SJohn Marino    This function handles the nodes which can accept one character, or
368195b7b453SJohn Marino    one collating element like '.', '[a-z]', opposite to the other nodes
368295b7b453SJohn Marino    can only accept one byte.  */
368395b7b453SJohn Marino 
3684dc7c36e4SJohn Marino # ifdef _LIBC
3685dc7c36e4SJohn Marino #  include <locale/weight.h>
3686dc7c36e4SJohn Marino # endif
3687dc7c36e4SJohn Marino 
368895b7b453SJohn Marino static int
check_node_accept_bytes(const re_dfa_t * dfa,Idx node_idx,const re_string_t * input,Idx str_idx)368995b7b453SJohn Marino check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
369095b7b453SJohn Marino 			 const re_string_t *input, Idx str_idx)
369195b7b453SJohn Marino {
369295b7b453SJohn Marino   const re_token_t *node = dfa->nodes + node_idx;
369395b7b453SJohn Marino   int char_len, elem_len;
369495b7b453SJohn Marino   Idx i;
369595b7b453SJohn Marino 
3696*09d4459fSDaniel Fojt   if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
369795b7b453SJohn Marino     {
369895b7b453SJohn Marino       unsigned char c = re_string_byte_at (input, str_idx), d;
3699*09d4459fSDaniel Fojt       if (__glibc_likely (c < 0xc2))
370095b7b453SJohn Marino 	return 0;
370195b7b453SJohn Marino 
370295b7b453SJohn Marino       if (str_idx + 2 > input->len)
370395b7b453SJohn Marino 	return 0;
370495b7b453SJohn Marino 
370595b7b453SJohn Marino       d = re_string_byte_at (input, str_idx + 1);
370695b7b453SJohn Marino       if (c < 0xe0)
370795b7b453SJohn Marino 	return (d < 0x80 || d > 0xbf) ? 0 : 2;
370895b7b453SJohn Marino       else if (c < 0xf0)
370995b7b453SJohn Marino 	{
371095b7b453SJohn Marino 	  char_len = 3;
371195b7b453SJohn Marino 	  if (c == 0xe0 && d < 0xa0)
371295b7b453SJohn Marino 	    return 0;
371395b7b453SJohn Marino 	}
371495b7b453SJohn Marino       else if (c < 0xf8)
371595b7b453SJohn Marino 	{
371695b7b453SJohn Marino 	  char_len = 4;
371795b7b453SJohn Marino 	  if (c == 0xf0 && d < 0x90)
371895b7b453SJohn Marino 	    return 0;
371995b7b453SJohn Marino 	}
372095b7b453SJohn Marino       else if (c < 0xfc)
372195b7b453SJohn Marino 	{
372295b7b453SJohn Marino 	  char_len = 5;
372395b7b453SJohn Marino 	  if (c == 0xf8 && d < 0x88)
372495b7b453SJohn Marino 	    return 0;
372595b7b453SJohn Marino 	}
372695b7b453SJohn Marino       else if (c < 0xfe)
372795b7b453SJohn Marino 	{
372895b7b453SJohn Marino 	  char_len = 6;
372995b7b453SJohn Marino 	  if (c == 0xfc && d < 0x84)
373095b7b453SJohn Marino 	    return 0;
373195b7b453SJohn Marino 	}
373295b7b453SJohn Marino       else
373395b7b453SJohn Marino 	return 0;
373495b7b453SJohn Marino 
373595b7b453SJohn Marino       if (str_idx + char_len > input->len)
373695b7b453SJohn Marino 	return 0;
373795b7b453SJohn Marino 
373895b7b453SJohn Marino       for (i = 1; i < char_len; ++i)
373995b7b453SJohn Marino 	{
374095b7b453SJohn Marino 	  d = re_string_byte_at (input, str_idx + i);
374195b7b453SJohn Marino 	  if (d < 0x80 || d > 0xbf)
374295b7b453SJohn Marino 	    return 0;
374395b7b453SJohn Marino 	}
374495b7b453SJohn Marino       return char_len;
374595b7b453SJohn Marino     }
374695b7b453SJohn Marino 
374795b7b453SJohn Marino   char_len = re_string_char_size_at (input, str_idx);
374895b7b453SJohn Marino   if (node->type == OP_PERIOD)
374995b7b453SJohn Marino     {
375095b7b453SJohn Marino       if (char_len <= 1)
375195b7b453SJohn Marino 	return 0;
375295b7b453SJohn Marino       /* FIXME: I don't think this if is needed, as both '\n'
375395b7b453SJohn Marino 	 and '\0' are char_len == 1.  */
375495b7b453SJohn Marino       /* '.' accepts any one character except the following two cases.  */
3755*09d4459fSDaniel Fojt       if ((!(dfa->syntax & RE_DOT_NEWLINE)
3756*09d4459fSDaniel Fojt 	   && re_string_byte_at (input, str_idx) == '\n')
3757*09d4459fSDaniel Fojt 	  || ((dfa->syntax & RE_DOT_NOT_NULL)
3758*09d4459fSDaniel Fojt 	      && re_string_byte_at (input, str_idx) == '\0'))
375995b7b453SJohn Marino 	return 0;
376095b7b453SJohn Marino       return char_len;
376195b7b453SJohn Marino     }
376295b7b453SJohn Marino 
376395b7b453SJohn Marino   elem_len = re_string_elem_size_at (input, str_idx);
376495b7b453SJohn Marino   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
376595b7b453SJohn Marino     return 0;
376695b7b453SJohn Marino 
376795b7b453SJohn Marino   if (node->type == COMPLEX_BRACKET)
376895b7b453SJohn Marino     {
376995b7b453SJohn Marino       const re_charset_t *cset = node->opr.mbcset;
377095b7b453SJohn Marino # ifdef _LIBC
377195b7b453SJohn Marino       const unsigned char *pin
377295b7b453SJohn Marino 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
377395b7b453SJohn Marino       Idx j;
377495b7b453SJohn Marino       uint32_t nrules;
377595b7b453SJohn Marino # endif /* _LIBC */
377695b7b453SJohn Marino       int match_len = 0;
377795b7b453SJohn Marino       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
377895b7b453SJohn Marino 		    ? re_string_wchar_at (input, str_idx) : 0);
377995b7b453SJohn Marino 
378095b7b453SJohn Marino       /* match with multibyte character?  */
378195b7b453SJohn Marino       for (i = 0; i < cset->nmbchars; ++i)
378295b7b453SJohn Marino 	if (wc == cset->mbchars[i])
378395b7b453SJohn Marino 	  {
378495b7b453SJohn Marino 	    match_len = char_len;
378595b7b453SJohn Marino 	    goto check_node_accept_bytes_match;
378695b7b453SJohn Marino 	  }
378795b7b453SJohn Marino       /* match with character_class?  */
378895b7b453SJohn Marino       for (i = 0; i < cset->nchar_classes; ++i)
378995b7b453SJohn Marino 	{
379095b7b453SJohn Marino 	  wctype_t wt = cset->char_classes[i];
379195b7b453SJohn Marino 	  if (__iswctype (wc, wt))
379295b7b453SJohn Marino 	    {
379395b7b453SJohn Marino 	      match_len = char_len;
379495b7b453SJohn Marino 	      goto check_node_accept_bytes_match;
379595b7b453SJohn Marino 	    }
379695b7b453SJohn Marino 	}
379795b7b453SJohn Marino 
379895b7b453SJohn Marino # ifdef _LIBC
379995b7b453SJohn Marino       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
380095b7b453SJohn Marino       if (nrules != 0)
380195b7b453SJohn Marino 	{
380295b7b453SJohn Marino 	  unsigned int in_collseq = 0;
380395b7b453SJohn Marino 	  const int32_t *table, *indirect;
380495b7b453SJohn Marino 	  const unsigned char *weights, *extra;
380595b7b453SJohn Marino 	  const char *collseqwc;
380695b7b453SJohn Marino 
380795b7b453SJohn Marino 	  /* match with collating_symbol?  */
380895b7b453SJohn Marino 	  if (cset->ncoll_syms)
380995b7b453SJohn Marino 	    extra = (const unsigned char *)
381095b7b453SJohn Marino 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
381195b7b453SJohn Marino 	  for (i = 0; i < cset->ncoll_syms; ++i)
381295b7b453SJohn Marino 	    {
381395b7b453SJohn Marino 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
381495b7b453SJohn Marino 	      /* Compare the length of input collating element and
381595b7b453SJohn Marino 		 the length of current collating element.  */
381695b7b453SJohn Marino 	      if (*coll_sym != elem_len)
381795b7b453SJohn Marino 		continue;
381895b7b453SJohn Marino 	      /* Compare each bytes.  */
381995b7b453SJohn Marino 	      for (j = 0; j < *coll_sym; j++)
382095b7b453SJohn Marino 		if (pin[j] != coll_sym[1 + j])
382195b7b453SJohn Marino 		  break;
382295b7b453SJohn Marino 	      if (j == *coll_sym)
382395b7b453SJohn Marino 		{
382495b7b453SJohn Marino 		  /* Match if every bytes is equal.  */
382595b7b453SJohn Marino 		  match_len = j;
382695b7b453SJohn Marino 		  goto check_node_accept_bytes_match;
382795b7b453SJohn Marino 		}
382895b7b453SJohn Marino 	    }
382995b7b453SJohn Marino 
383095b7b453SJohn Marino 	  if (cset->nranges)
383195b7b453SJohn Marino 	    {
383295b7b453SJohn Marino 	      if (elem_len <= char_len)
383395b7b453SJohn Marino 		{
383495b7b453SJohn Marino 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
383595b7b453SJohn Marino 		  in_collseq = __collseq_table_lookup (collseqwc, wc);
383695b7b453SJohn Marino 		}
383795b7b453SJohn Marino 	      else
383895b7b453SJohn Marino 		in_collseq = find_collation_sequence_value (pin, elem_len);
383995b7b453SJohn Marino 	    }
384095b7b453SJohn Marino 	  /* match with range expression?  */
3841680a9cb8SJohn Marino 	  /* FIXME: Implement rational ranges here, too.  */
384295b7b453SJohn Marino 	  for (i = 0; i < cset->nranges; ++i)
384395b7b453SJohn Marino 	    if (cset->range_starts[i] <= in_collseq
384495b7b453SJohn Marino 		&& in_collseq <= cset->range_ends[i])
384595b7b453SJohn Marino 	      {
384695b7b453SJohn Marino 		match_len = elem_len;
384795b7b453SJohn Marino 		goto check_node_accept_bytes_match;
384895b7b453SJohn Marino 	      }
384995b7b453SJohn Marino 
385095b7b453SJohn Marino 	  /* match with equivalence_class?  */
385195b7b453SJohn Marino 	  if (cset->nequiv_classes)
385295b7b453SJohn Marino 	    {
385395b7b453SJohn Marino 	      const unsigned char *cp = pin;
385495b7b453SJohn Marino 	      table = (const int32_t *)
385595b7b453SJohn Marino 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
385695b7b453SJohn Marino 	      weights = (const unsigned char *)
385795b7b453SJohn Marino 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
385895b7b453SJohn Marino 	      extra = (const unsigned char *)
385995b7b453SJohn Marino 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
386095b7b453SJohn Marino 	      indirect = (const int32_t *)
386195b7b453SJohn Marino 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3862dc7c36e4SJohn Marino 	      int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3863*09d4459fSDaniel Fojt 	      int32_t rule = idx >> 24;
3864*09d4459fSDaniel Fojt 	      idx &= 0xffffff;
386595b7b453SJohn Marino 	      if (idx > 0)
3866*09d4459fSDaniel Fojt 		{
3867*09d4459fSDaniel Fojt 		  size_t weight_len = weights[idx];
386895b7b453SJohn Marino 		  for (i = 0; i < cset->nequiv_classes; ++i)
386995b7b453SJohn Marino 		    {
387095b7b453SJohn Marino 		      int32_t equiv_class_idx = cset->equiv_classes[i];
3871*09d4459fSDaniel Fojt 		      int32_t equiv_class_rule = equiv_class_idx >> 24;
387295b7b453SJohn Marino 		      equiv_class_idx &= 0xffffff;
3873*09d4459fSDaniel Fojt 		      if (weights[equiv_class_idx] == weight_len
3874*09d4459fSDaniel Fojt 			  && equiv_class_rule == rule
3875*09d4459fSDaniel Fojt 			  && memcmp (weights + idx + 1,
3876*09d4459fSDaniel Fojt 				     weights + equiv_class_idx + 1,
3877*09d4459fSDaniel Fojt 				     weight_len) == 0)
387895b7b453SJohn Marino 			{
387995b7b453SJohn Marino 			  match_len = elem_len;
388095b7b453SJohn Marino 			  goto check_node_accept_bytes_match;
388195b7b453SJohn Marino 			}
388295b7b453SJohn Marino 		    }
388395b7b453SJohn Marino 		}
388495b7b453SJohn Marino 	    }
388595b7b453SJohn Marino 	}
388695b7b453SJohn Marino       else
388795b7b453SJohn Marino # endif /* _LIBC */
388895b7b453SJohn Marino 	{
388995b7b453SJohn Marino 	  /* match with range expression?  */
389095b7b453SJohn Marino 	  for (i = 0; i < cset->nranges; ++i)
389195b7b453SJohn Marino 	    {
3892680a9cb8SJohn Marino 	      if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
389395b7b453SJohn Marino 		{
389495b7b453SJohn Marino 		  match_len = char_len;
389595b7b453SJohn Marino 		  goto check_node_accept_bytes_match;
389695b7b453SJohn Marino 		}
389795b7b453SJohn Marino 	    }
389895b7b453SJohn Marino 	}
389995b7b453SJohn Marino     check_node_accept_bytes_match:
390095b7b453SJohn Marino       if (!cset->non_match)
390195b7b453SJohn Marino 	return match_len;
390295b7b453SJohn Marino       else
390395b7b453SJohn Marino 	{
390495b7b453SJohn Marino 	  if (match_len > 0)
390595b7b453SJohn Marino 	    return 0;
390695b7b453SJohn Marino 	  else
390795b7b453SJohn Marino 	    return (elem_len > char_len) ? elem_len : char_len;
390895b7b453SJohn Marino 	}
390995b7b453SJohn Marino     }
391095b7b453SJohn Marino   return 0;
391195b7b453SJohn Marino }
391295b7b453SJohn Marino 
391395b7b453SJohn Marino # ifdef _LIBC
391495b7b453SJohn Marino static unsigned int
find_collation_sequence_value(const unsigned char * mbs,size_t mbs_len)391595b7b453SJohn Marino find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
391695b7b453SJohn Marino {
391795b7b453SJohn Marino   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
391895b7b453SJohn Marino   if (nrules == 0)
391995b7b453SJohn Marino     {
392095b7b453SJohn Marino       if (mbs_len == 1)
392195b7b453SJohn Marino 	{
392295b7b453SJohn Marino 	  /* No valid character.  Match it as a single byte character.  */
392395b7b453SJohn Marino 	  const unsigned char *collseq = (const unsigned char *)
392495b7b453SJohn Marino 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
392595b7b453SJohn Marino 	  return collseq[mbs[0]];
392695b7b453SJohn Marino 	}
392795b7b453SJohn Marino       return UINT_MAX;
392895b7b453SJohn Marino     }
392995b7b453SJohn Marino   else
393095b7b453SJohn Marino     {
393195b7b453SJohn Marino       int32_t idx;
393295b7b453SJohn Marino       const unsigned char *extra = (const unsigned char *)
393395b7b453SJohn Marino 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
393495b7b453SJohn Marino       int32_t extrasize = (const unsigned char *)
393595b7b453SJohn Marino 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
393695b7b453SJohn Marino 
393795b7b453SJohn Marino       for (idx = 0; idx < extrasize;)
393895b7b453SJohn Marino 	{
393995b7b453SJohn Marino 	  int mbs_cnt;
394095b7b453SJohn Marino 	  bool found = false;
394195b7b453SJohn Marino 	  int32_t elem_mbs_len;
394295b7b453SJohn Marino 	  /* Skip the name of collating element name.  */
394395b7b453SJohn Marino 	  idx = idx + extra[idx] + 1;
394495b7b453SJohn Marino 	  elem_mbs_len = extra[idx++];
394595b7b453SJohn Marino 	  if (mbs_len == elem_mbs_len)
394695b7b453SJohn Marino 	    {
394795b7b453SJohn Marino 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
394895b7b453SJohn Marino 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
394995b7b453SJohn Marino 		  break;
395095b7b453SJohn Marino 	      if (mbs_cnt == elem_mbs_len)
395195b7b453SJohn Marino 		/* Found the entry.  */
395295b7b453SJohn Marino 		found = true;
395395b7b453SJohn Marino 	    }
395495b7b453SJohn Marino 	  /* Skip the byte sequence of the collating element.  */
395595b7b453SJohn Marino 	  idx += elem_mbs_len;
395695b7b453SJohn Marino 	  /* Adjust for the alignment.  */
395795b7b453SJohn Marino 	  idx = (idx + 3) & ~3;
395895b7b453SJohn Marino 	  /* Skip the collation sequence value.  */
395995b7b453SJohn Marino 	  idx += sizeof (uint32_t);
396095b7b453SJohn Marino 	  /* Skip the wide char sequence of the collating element.  */
3961cf28ed85SJohn Marino 	  idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
396295b7b453SJohn Marino 	  /* If we found the entry, return the sequence value.  */
396395b7b453SJohn Marino 	  if (found)
396495b7b453SJohn Marino 	    return *(uint32_t *) (extra + idx);
396595b7b453SJohn Marino 	  /* Skip the collation sequence value.  */
396695b7b453SJohn Marino 	  idx += sizeof (uint32_t);
396795b7b453SJohn Marino 	}
396895b7b453SJohn Marino       return UINT_MAX;
396995b7b453SJohn Marino     }
397095b7b453SJohn Marino }
397195b7b453SJohn Marino # endif /* _LIBC */
397295b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
397395b7b453SJohn Marino 
397495b7b453SJohn Marino /* Check whether the node accepts the byte which is IDX-th
397595b7b453SJohn Marino    byte of the INPUT.  */
397695b7b453SJohn Marino 
397795b7b453SJohn Marino static bool
check_node_accept(const re_match_context_t * mctx,const re_token_t * node,Idx idx)397895b7b453SJohn Marino check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
397995b7b453SJohn Marino 		   Idx idx)
398095b7b453SJohn Marino {
398195b7b453SJohn Marino   unsigned char ch;
398295b7b453SJohn Marino   ch = re_string_byte_at (&mctx->input, idx);
398395b7b453SJohn Marino   switch (node->type)
398495b7b453SJohn Marino     {
398595b7b453SJohn Marino     case CHARACTER:
398695b7b453SJohn Marino       if (node->opr.c != ch)
398795b7b453SJohn Marino         return false;
398895b7b453SJohn Marino       break;
398995b7b453SJohn Marino 
399095b7b453SJohn Marino     case SIMPLE_BRACKET:
399195b7b453SJohn Marino       if (!bitset_contain (node->opr.sbcset, ch))
399295b7b453SJohn Marino         return false;
399395b7b453SJohn Marino       break;
399495b7b453SJohn Marino 
399595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
399695b7b453SJohn Marino     case OP_UTF8_PERIOD:
399795b7b453SJohn Marino       if (ch >= ASCII_CHARS)
399895b7b453SJohn Marino         return false;
3999*09d4459fSDaniel Fojt       FALLTHROUGH;
400095b7b453SJohn Marino #endif
400195b7b453SJohn Marino     case OP_PERIOD:
400295b7b453SJohn Marino       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
400395b7b453SJohn Marino 	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
400495b7b453SJohn Marino 	return false;
400595b7b453SJohn Marino       break;
400695b7b453SJohn Marino 
400795b7b453SJohn Marino     default:
400895b7b453SJohn Marino       return false;
400995b7b453SJohn Marino     }
401095b7b453SJohn Marino 
401195b7b453SJohn Marino   if (node->constraint)
401295b7b453SJohn Marino     {
401395b7b453SJohn Marino       /* The node has constraints.  Check whether the current context
401495b7b453SJohn Marino 	 satisfies the constraints.  */
401595b7b453SJohn Marino       unsigned int context = re_string_context_at (&mctx->input, idx,
401695b7b453SJohn Marino 						   mctx->eflags);
401795b7b453SJohn Marino       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
401895b7b453SJohn Marino 	return false;
401995b7b453SJohn Marino     }
402095b7b453SJohn Marino 
402195b7b453SJohn Marino   return true;
402295b7b453SJohn Marino }
402395b7b453SJohn Marino 
402495b7b453SJohn Marino /* Extend the buffers, if the buffers have run out.  */
402595b7b453SJohn Marino 
402695b7b453SJohn Marino static reg_errcode_t
4027*09d4459fSDaniel Fojt __attribute_warn_unused_result__
extend_buffers(re_match_context_t * mctx,int min_len)4028680a9cb8SJohn Marino extend_buffers (re_match_context_t *mctx, int min_len)
402995b7b453SJohn Marino {
403095b7b453SJohn Marino   reg_errcode_t ret;
403195b7b453SJohn Marino   re_string_t *pstr = &mctx->input;
403295b7b453SJohn Marino 
403395b7b453SJohn Marino   /* Avoid overflow.  */
4034*09d4459fSDaniel Fojt   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
4035*09d4459fSDaniel Fojt 			<= pstr->bufs_len))
403695b7b453SJohn Marino     return REG_ESPACE;
403795b7b453SJohn Marino 
4038680a9cb8SJohn Marino   /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
4039680a9cb8SJohn Marino   ret = re_string_realloc_buffers (pstr,
4040680a9cb8SJohn Marino 				   MAX (min_len,
4041680a9cb8SJohn Marino 					MIN (pstr->len, pstr->bufs_len * 2)));
4042*09d4459fSDaniel Fojt   if (__glibc_unlikely (ret != REG_NOERROR))
404395b7b453SJohn Marino     return ret;
404495b7b453SJohn Marino 
404595b7b453SJohn Marino   if (mctx->state_log != NULL)
404695b7b453SJohn Marino     {
404795b7b453SJohn Marino       /* And double the length of state_log.  */
404895b7b453SJohn Marino       /* XXX We have no indication of the size of this buffer.  If this
404995b7b453SJohn Marino 	 allocation fail we have no indication that the state_log array
405095b7b453SJohn Marino 	 does not have the right size.  */
405195b7b453SJohn Marino       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
405295b7b453SJohn Marino 					      pstr->bufs_len + 1);
4053*09d4459fSDaniel Fojt       if (__glibc_unlikely (new_array == NULL))
405495b7b453SJohn Marino 	return REG_ESPACE;
405595b7b453SJohn Marino       mctx->state_log = new_array;
405695b7b453SJohn Marino     }
405795b7b453SJohn Marino 
405895b7b453SJohn Marino   /* Then reconstruct the buffers.  */
405995b7b453SJohn Marino   if (pstr->icase)
406095b7b453SJohn Marino     {
406195b7b453SJohn Marino #ifdef RE_ENABLE_I18N
406295b7b453SJohn Marino       if (pstr->mb_cur_max > 1)
406395b7b453SJohn Marino 	{
406495b7b453SJohn Marino 	  ret = build_wcs_upper_buffer (pstr);
4065*09d4459fSDaniel Fojt 	  if (__glibc_unlikely (ret != REG_NOERROR))
406695b7b453SJohn Marino 	    return ret;
406795b7b453SJohn Marino 	}
406895b7b453SJohn Marino       else
406995b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
407095b7b453SJohn Marino 	build_upper_buffer (pstr);
407195b7b453SJohn Marino     }
407295b7b453SJohn Marino   else
407395b7b453SJohn Marino     {
407495b7b453SJohn Marino #ifdef RE_ENABLE_I18N
407595b7b453SJohn Marino       if (pstr->mb_cur_max > 1)
407695b7b453SJohn Marino 	build_wcs_buffer (pstr);
407795b7b453SJohn Marino       else
407895b7b453SJohn Marino #endif /* RE_ENABLE_I18N  */
407995b7b453SJohn Marino 	{
408095b7b453SJohn Marino 	  if (pstr->trans != NULL)
408195b7b453SJohn Marino 	    re_string_translate_buffer (pstr);
408295b7b453SJohn Marino 	}
408395b7b453SJohn Marino     }
408495b7b453SJohn Marino   return REG_NOERROR;
408595b7b453SJohn Marino }
408695b7b453SJohn Marino 
408795b7b453SJohn Marino 
408895b7b453SJohn Marino /* Functions for matching context.  */
408995b7b453SJohn Marino 
409095b7b453SJohn Marino /* Initialize MCTX.  */
409195b7b453SJohn Marino 
409295b7b453SJohn Marino static reg_errcode_t
4093*09d4459fSDaniel Fojt __attribute_warn_unused_result__
match_ctx_init(re_match_context_t * mctx,int eflags,Idx n)409495b7b453SJohn Marino match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
409595b7b453SJohn Marino {
409695b7b453SJohn Marino   mctx->eflags = eflags;
4097*09d4459fSDaniel Fojt   mctx->match_last = -1;
409895b7b453SJohn Marino   if (n > 0)
409995b7b453SJohn Marino     {
410095b7b453SJohn Marino       /* Avoid overflow.  */
410195b7b453SJohn Marino       size_t max_object_size =
410295b7b453SJohn Marino 	MAX (sizeof (struct re_backref_cache_entry),
410395b7b453SJohn Marino 	     sizeof (re_sub_match_top_t *));
4104*09d4459fSDaniel Fojt       if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
410595b7b453SJohn Marino 	return REG_ESPACE;
410695b7b453SJohn Marino 
410795b7b453SJohn Marino       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
410895b7b453SJohn Marino       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4109*09d4459fSDaniel Fojt       if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL))
411095b7b453SJohn Marino 	return REG_ESPACE;
411195b7b453SJohn Marino     }
411295b7b453SJohn Marino   /* Already zero-ed by the caller.
411395b7b453SJohn Marino      else
411495b7b453SJohn Marino        mctx->bkref_ents = NULL;
411595b7b453SJohn Marino      mctx->nbkref_ents = 0;
411695b7b453SJohn Marino      mctx->nsub_tops = 0;  */
411795b7b453SJohn Marino   mctx->abkref_ents = n;
411895b7b453SJohn Marino   mctx->max_mb_elem_len = 1;
411995b7b453SJohn Marino   mctx->asub_tops = n;
412095b7b453SJohn Marino   return REG_NOERROR;
412195b7b453SJohn Marino }
412295b7b453SJohn Marino 
412395b7b453SJohn Marino /* Clean the entries which depend on the current input in MCTX.
412495b7b453SJohn Marino    This function must be invoked when the matcher changes the start index
412595b7b453SJohn Marino    of the input, or changes the input string.  */
412695b7b453SJohn Marino 
412795b7b453SJohn Marino static void
match_ctx_clean(re_match_context_t * mctx)412895b7b453SJohn Marino match_ctx_clean (re_match_context_t *mctx)
412995b7b453SJohn Marino {
413095b7b453SJohn Marino   Idx st_idx;
413195b7b453SJohn Marino   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
413295b7b453SJohn Marino     {
413395b7b453SJohn Marino       Idx sl_idx;
413495b7b453SJohn Marino       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
413595b7b453SJohn Marino       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
413695b7b453SJohn Marino 	{
413795b7b453SJohn Marino 	  re_sub_match_last_t *last = top->lasts[sl_idx];
413895b7b453SJohn Marino 	  re_free (last->path.array);
413995b7b453SJohn Marino 	  re_free (last);
414095b7b453SJohn Marino 	}
414195b7b453SJohn Marino       re_free (top->lasts);
414295b7b453SJohn Marino       if (top->path)
414395b7b453SJohn Marino 	{
414495b7b453SJohn Marino 	  re_free (top->path->array);
414595b7b453SJohn Marino 	  re_free (top->path);
414695b7b453SJohn Marino 	}
4147*09d4459fSDaniel Fojt       re_free (top);
414895b7b453SJohn Marino     }
414995b7b453SJohn Marino 
415095b7b453SJohn Marino   mctx->nsub_tops = 0;
415195b7b453SJohn Marino   mctx->nbkref_ents = 0;
415295b7b453SJohn Marino }
415395b7b453SJohn Marino 
415495b7b453SJohn Marino /* Free all the memory associated with MCTX.  */
415595b7b453SJohn Marino 
415695b7b453SJohn Marino static void
match_ctx_free(re_match_context_t * mctx)415795b7b453SJohn Marino match_ctx_free (re_match_context_t *mctx)
415895b7b453SJohn Marino {
415995b7b453SJohn Marino   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
416095b7b453SJohn Marino   match_ctx_clean (mctx);
416195b7b453SJohn Marino   re_free (mctx->sub_tops);
416295b7b453SJohn Marino   re_free (mctx->bkref_ents);
416395b7b453SJohn Marino }
416495b7b453SJohn Marino 
416595b7b453SJohn Marino /* Add a new backreference entry to MCTX.
416695b7b453SJohn Marino    Note that we assume that caller never call this function with duplicate
416795b7b453SJohn Marino    entry, and call with STR_IDX which isn't smaller than any existing entry.
416895b7b453SJohn Marino */
416995b7b453SJohn Marino 
417095b7b453SJohn Marino static reg_errcode_t
4171*09d4459fSDaniel Fojt __attribute_warn_unused_result__
match_ctx_add_entry(re_match_context_t * mctx,Idx node,Idx str_idx,Idx from,Idx to)417295b7b453SJohn Marino match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
417395b7b453SJohn Marino 		     Idx to)
417495b7b453SJohn Marino {
417595b7b453SJohn Marino   if (mctx->nbkref_ents >= mctx->abkref_ents)
417695b7b453SJohn Marino     {
417795b7b453SJohn Marino       struct re_backref_cache_entry* new_entry;
417895b7b453SJohn Marino       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
417995b7b453SJohn Marino 			      mctx->abkref_ents * 2);
4180*09d4459fSDaniel Fojt       if (__glibc_unlikely (new_entry == NULL))
418195b7b453SJohn Marino 	{
418295b7b453SJohn Marino 	  re_free (mctx->bkref_ents);
418395b7b453SJohn Marino 	  return REG_ESPACE;
418495b7b453SJohn Marino 	}
418595b7b453SJohn Marino       mctx->bkref_ents = new_entry;
418695b7b453SJohn Marino       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
418795b7b453SJohn Marino 	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
418895b7b453SJohn Marino       mctx->abkref_ents *= 2;
418995b7b453SJohn Marino     }
419095b7b453SJohn Marino   if (mctx->nbkref_ents > 0
419195b7b453SJohn Marino       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
419295b7b453SJohn Marino     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
419395b7b453SJohn Marino 
419495b7b453SJohn Marino   mctx->bkref_ents[mctx->nbkref_ents].node = node;
419595b7b453SJohn Marino   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
419695b7b453SJohn Marino   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
419795b7b453SJohn Marino   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
419895b7b453SJohn Marino 
419995b7b453SJohn Marino   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
420095b7b453SJohn Marino      If bit N is clear, means that this entry won't epsilon-transition to
420195b7b453SJohn Marino      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
420295b7b453SJohn Marino      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
420395b7b453SJohn Marino      such node.
420495b7b453SJohn Marino 
420595b7b453SJohn Marino      A backreference does not epsilon-transition unless it is empty, so set
420695b7b453SJohn Marino      to all zeros if FROM != TO.  */
420795b7b453SJohn Marino   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
420895b7b453SJohn Marino     = (from == to ? -1 : 0);
420995b7b453SJohn Marino 
421095b7b453SJohn Marino   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
421195b7b453SJohn Marino   if (mctx->max_mb_elem_len < to - from)
421295b7b453SJohn Marino     mctx->max_mb_elem_len = to - from;
421395b7b453SJohn Marino   return REG_NOERROR;
421495b7b453SJohn Marino }
421595b7b453SJohn Marino 
4216*09d4459fSDaniel Fojt /* Return the first entry with the same str_idx, or -1 if none is
421795b7b453SJohn Marino    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
421895b7b453SJohn Marino 
421995b7b453SJohn Marino static Idx
search_cur_bkref_entry(const re_match_context_t * mctx,Idx str_idx)422095b7b453SJohn Marino search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
422195b7b453SJohn Marino {
422295b7b453SJohn Marino   Idx left, right, mid, last;
422395b7b453SJohn Marino   last = right = mctx->nbkref_ents;
422495b7b453SJohn Marino   for (left = 0; left < right;)
422595b7b453SJohn Marino     {
422695b7b453SJohn Marino       mid = (left + right) / 2;
422795b7b453SJohn Marino       if (mctx->bkref_ents[mid].str_idx < str_idx)
422895b7b453SJohn Marino 	left = mid + 1;
422995b7b453SJohn Marino       else
423095b7b453SJohn Marino 	right = mid;
423195b7b453SJohn Marino     }
423295b7b453SJohn Marino   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
423395b7b453SJohn Marino     return left;
423495b7b453SJohn Marino   else
4235*09d4459fSDaniel Fojt     return -1;
423695b7b453SJohn Marino }
423795b7b453SJohn Marino 
423895b7b453SJohn Marino /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
423995b7b453SJohn Marino    at STR_IDX.  */
424095b7b453SJohn Marino 
424195b7b453SJohn Marino static reg_errcode_t
4242*09d4459fSDaniel Fojt __attribute_warn_unused_result__
match_ctx_add_subtop(re_match_context_t * mctx,Idx node,Idx str_idx)424395b7b453SJohn Marino match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
424495b7b453SJohn Marino {
4245*09d4459fSDaniel Fojt   DEBUG_ASSERT (mctx->sub_tops != NULL);
4246*09d4459fSDaniel Fojt   DEBUG_ASSERT (mctx->asub_tops > 0);
4247*09d4459fSDaniel Fojt   if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
424895b7b453SJohn Marino     {
424995b7b453SJohn Marino       Idx new_asub_tops = mctx->asub_tops * 2;
425095b7b453SJohn Marino       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
425195b7b453SJohn Marino 						   re_sub_match_top_t *,
425295b7b453SJohn Marino 						   new_asub_tops);
4253*09d4459fSDaniel Fojt       if (__glibc_unlikely (new_array == NULL))
425495b7b453SJohn Marino 	return REG_ESPACE;
425595b7b453SJohn Marino       mctx->sub_tops = new_array;
425695b7b453SJohn Marino       mctx->asub_tops = new_asub_tops;
425795b7b453SJohn Marino     }
425895b7b453SJohn Marino   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4259*09d4459fSDaniel Fojt   if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
426095b7b453SJohn Marino     return REG_ESPACE;
426195b7b453SJohn Marino   mctx->sub_tops[mctx->nsub_tops]->node = node;
426295b7b453SJohn Marino   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
426395b7b453SJohn Marino   return REG_NOERROR;
426495b7b453SJohn Marino }
426595b7b453SJohn Marino 
426695b7b453SJohn Marino /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
426795b7b453SJohn Marino    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
426895b7b453SJohn Marino 
426995b7b453SJohn Marino static re_sub_match_last_t *
match_ctx_add_sublast(re_sub_match_top_t * subtop,Idx node,Idx str_idx)427095b7b453SJohn Marino match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
427195b7b453SJohn Marino {
427295b7b453SJohn Marino   re_sub_match_last_t *new_entry;
4273*09d4459fSDaniel Fojt   if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
427495b7b453SJohn Marino     {
427595b7b453SJohn Marino       Idx new_alasts = 2 * subtop->alasts + 1;
427695b7b453SJohn Marino       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
427795b7b453SJohn Marino 						    re_sub_match_last_t *,
427895b7b453SJohn Marino 						    new_alasts);
4279*09d4459fSDaniel Fojt       if (__glibc_unlikely (new_array == NULL))
428095b7b453SJohn Marino 	return NULL;
428195b7b453SJohn Marino       subtop->lasts = new_array;
428295b7b453SJohn Marino       subtop->alasts = new_alasts;
428395b7b453SJohn Marino     }
428495b7b453SJohn Marino   new_entry = calloc (1, sizeof (re_sub_match_last_t));
4285*09d4459fSDaniel Fojt   if (__glibc_likely (new_entry != NULL))
428695b7b453SJohn Marino     {
428795b7b453SJohn Marino       subtop->lasts[subtop->nlasts] = new_entry;
428895b7b453SJohn Marino       new_entry->node = node;
428995b7b453SJohn Marino       new_entry->str_idx = str_idx;
429095b7b453SJohn Marino       ++subtop->nlasts;
429195b7b453SJohn Marino     }
429295b7b453SJohn Marino   return new_entry;
429395b7b453SJohn Marino }
429495b7b453SJohn Marino 
429595b7b453SJohn Marino static void
sift_ctx_init(re_sift_context_t * sctx,re_dfastate_t ** sifted_sts,re_dfastate_t ** limited_sts,Idx last_node,Idx last_str_idx)429695b7b453SJohn Marino sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
429795b7b453SJohn Marino 	       re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
429895b7b453SJohn Marino {
429995b7b453SJohn Marino   sctx->sifted_states = sifted_sts;
430095b7b453SJohn Marino   sctx->limited_states = limited_sts;
430195b7b453SJohn Marino   sctx->last_node = last_node;
430295b7b453SJohn Marino   sctx->last_str_idx = last_str_idx;
430395b7b453SJohn Marino   re_node_set_init_empty (&sctx->limits);
430495b7b453SJohn Marino }
4305