195b7b453SJohn Marino /* Extended regular expression matching and search library.
2*09d4459fSDaniel Fojt Copyright (C) 2002-2020 Free Software Foundation, Inc.
395b7b453SJohn Marino This file is part of the GNU C Library.
495b7b453SJohn Marino Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
595b7b453SJohn Marino
6680a9cb8SJohn Marino The GNU C Library is free software; you can redistribute it and/or
7680a9cb8SJohn Marino modify it under the terms of the GNU General Public
8680a9cb8SJohn Marino License as published by the Free Software Foundation; either
9680a9cb8SJohn Marino version 3 of the License, or (at your option) any later version.
1095b7b453SJohn Marino
11680a9cb8SJohn Marino The GNU C Library is distributed in the hope that it will be useful,
1295b7b453SJohn Marino but WITHOUT ANY WARRANTY; without even the implied warranty of
13680a9cb8SJohn Marino MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14680a9cb8SJohn Marino General Public License for more details.
1595b7b453SJohn Marino
16680a9cb8SJohn Marino You should have received a copy of the GNU General Public
17680a9cb8SJohn Marino License along with the GNU C Library; if not, see
18*09d4459fSDaniel Fojt <https://www.gnu.org/licenses/>. */
1995b7b453SJohn Marino
2095b7b453SJohn Marino static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21*09d4459fSDaniel Fojt Idx n);
22*09d4459fSDaniel Fojt static void match_ctx_clean (re_match_context_t *mctx);
23*09d4459fSDaniel Fojt static void match_ctx_free (re_match_context_t *cache);
2495b7b453SJohn Marino static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25*09d4459fSDaniel Fojt Idx str_idx, Idx from, Idx to);
26*09d4459fSDaniel Fojt static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
2795b7b453SJohn Marino static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
28*09d4459fSDaniel Fojt Idx str_idx);
2995b7b453SJohn Marino static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
30*09d4459fSDaniel Fojt Idx node, Idx str_idx);
3195b7b453SJohn Marino static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
3295b7b453SJohn Marino re_dfastate_t **limited_sts, Idx last_node,
33*09d4459fSDaniel Fojt Idx last_str_idx);
3495b7b453SJohn Marino static reg_errcode_t re_search_internal (const regex_t *preg,
3595b7b453SJohn Marino const char *string, Idx length,
3695b7b453SJohn Marino Idx start, Idx last_start, Idx stop,
3795b7b453SJohn Marino size_t nmatch, regmatch_t pmatch[],
38*09d4459fSDaniel Fojt int eflags);
3995b7b453SJohn Marino static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
4095b7b453SJohn Marino const char *string1, Idx length1,
4195b7b453SJohn Marino const char *string2, Idx length2,
4295b7b453SJohn Marino Idx start, regoff_t range,
4395b7b453SJohn Marino struct re_registers *regs,
44*09d4459fSDaniel Fojt Idx stop, bool ret_len);
4595b7b453SJohn Marino static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
4695b7b453SJohn Marino const char *string, Idx length, Idx start,
4795b7b453SJohn Marino regoff_t range, Idx stop,
4895b7b453SJohn Marino struct re_registers *regs,
49*09d4459fSDaniel Fojt bool ret_len);
50cf28ed85SJohn Marino static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
51*09d4459fSDaniel Fojt Idx nregs, int regs_allocated);
52*09d4459fSDaniel Fojt static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
5395b7b453SJohn Marino static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
54*09d4459fSDaniel Fojt Idx *p_match_first);
5595b7b453SJohn Marino static Idx check_halt_state_context (const re_match_context_t *mctx,
56*09d4459fSDaniel Fojt const re_dfastate_t *state, Idx idx);
5795b7b453SJohn Marino static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
5895b7b453SJohn Marino regmatch_t *prev_idx_match, Idx cur_node,
59*09d4459fSDaniel Fojt Idx cur_idx, Idx nmatch);
6095b7b453SJohn Marino static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
6195b7b453SJohn Marino Idx str_idx, Idx dest_node, Idx nregs,
6295b7b453SJohn Marino regmatch_t *regs,
63*09d4459fSDaniel Fojt re_node_set *eps_via_nodes);
6495b7b453SJohn Marino static reg_errcode_t set_regs (const regex_t *preg,
6595b7b453SJohn Marino const re_match_context_t *mctx,
6695b7b453SJohn Marino size_t nmatch, regmatch_t *pmatch,
67*09d4459fSDaniel Fojt bool fl_backtrack);
68*09d4459fSDaniel Fojt static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
6995b7b453SJohn Marino
7095b7b453SJohn Marino #ifdef RE_ENABLE_I18N
7195b7b453SJohn Marino static int sift_states_iter_mb (const re_match_context_t *mctx,
7295b7b453SJohn Marino re_sift_context_t *sctx,
73*09d4459fSDaniel Fojt Idx node_idx, Idx str_idx, Idx max_str_idx);
7495b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
7595b7b453SJohn Marino static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
76*09d4459fSDaniel Fojt re_sift_context_t *sctx);
7795b7b453SJohn Marino static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
7895b7b453SJohn Marino re_sift_context_t *sctx, Idx str_idx,
79*09d4459fSDaniel Fojt re_node_set *cur_dest);
8095b7b453SJohn Marino static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
8195b7b453SJohn Marino re_sift_context_t *sctx,
8295b7b453SJohn Marino Idx str_idx,
83*09d4459fSDaniel Fojt re_node_set *dest_nodes);
8495b7b453SJohn Marino static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
8595b7b453SJohn Marino re_node_set *dest_nodes,
86*09d4459fSDaniel Fojt const re_node_set *candidates);
8795b7b453SJohn Marino static bool check_dst_limits (const re_match_context_t *mctx,
8895b7b453SJohn Marino const re_node_set *limits,
8995b7b453SJohn Marino Idx dst_node, Idx dst_idx, Idx src_node,
90*09d4459fSDaniel Fojt Idx src_idx);
9195b7b453SJohn Marino static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
9295b7b453SJohn Marino int boundaries, Idx subexp_idx,
93*09d4459fSDaniel Fojt Idx from_node, Idx bkref_idx);
9495b7b453SJohn Marino static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
9595b7b453SJohn Marino Idx limit, Idx subexp_idx,
9695b7b453SJohn Marino Idx node, Idx str_idx,
97*09d4459fSDaniel Fojt Idx bkref_idx);
9895b7b453SJohn Marino static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
9995b7b453SJohn Marino re_node_set *dest_nodes,
10095b7b453SJohn Marino const re_node_set *candidates,
10195b7b453SJohn Marino re_node_set *limits,
10295b7b453SJohn Marino struct re_backref_cache_entry *bkref_ents,
103*09d4459fSDaniel Fojt Idx str_idx);
10495b7b453SJohn Marino static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
10595b7b453SJohn Marino re_sift_context_t *sctx,
106*09d4459fSDaniel Fojt Idx str_idx, const re_node_set *candidates);
10795b7b453SJohn Marino static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
10895b7b453SJohn Marino re_dfastate_t **dst,
109*09d4459fSDaniel Fojt re_dfastate_t **src, Idx num);
11095b7b453SJohn Marino static re_dfastate_t *find_recover_state (reg_errcode_t *err,
111*09d4459fSDaniel Fojt re_match_context_t *mctx);
11295b7b453SJohn Marino static re_dfastate_t *transit_state (reg_errcode_t *err,
11395b7b453SJohn Marino re_match_context_t *mctx,
114*09d4459fSDaniel Fojt re_dfastate_t *state);
11595b7b453SJohn Marino static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
11695b7b453SJohn Marino re_match_context_t *mctx,
117*09d4459fSDaniel Fojt re_dfastate_t *next_state);
11895b7b453SJohn Marino static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
11995b7b453SJohn Marino re_node_set *cur_nodes,
120*09d4459fSDaniel Fojt Idx str_idx);
12195b7b453SJohn Marino #if 0
12295b7b453SJohn Marino static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
12395b7b453SJohn Marino re_match_context_t *mctx,
124*09d4459fSDaniel Fojt re_dfastate_t *pstate);
12595b7b453SJohn Marino #endif
12695b7b453SJohn Marino #ifdef RE_ENABLE_I18N
12795b7b453SJohn Marino static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
128*09d4459fSDaniel Fojt re_dfastate_t *pstate);
12995b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
13095b7b453SJohn Marino static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
131*09d4459fSDaniel Fojt const re_node_set *nodes);
13295b7b453SJohn Marino static reg_errcode_t get_subexp (re_match_context_t *mctx,
133*09d4459fSDaniel Fojt Idx bkref_node, Idx bkref_str_idx);
13495b7b453SJohn Marino static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
13595b7b453SJohn Marino const re_sub_match_top_t *sub_top,
13695b7b453SJohn Marino re_sub_match_last_t *sub_last,
137*09d4459fSDaniel Fojt Idx bkref_node, Idx bkref_str);
13895b7b453SJohn Marino static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
139*09d4459fSDaniel Fojt Idx subexp_idx, int type);
14095b7b453SJohn Marino static reg_errcode_t check_arrival (re_match_context_t *mctx,
14195b7b453SJohn Marino state_array_t *path, Idx top_node,
14295b7b453SJohn Marino Idx top_str, Idx last_node, Idx last_str,
143*09d4459fSDaniel Fojt int type);
14495b7b453SJohn Marino static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
14595b7b453SJohn Marino Idx str_idx,
14695b7b453SJohn Marino re_node_set *cur_nodes,
147*09d4459fSDaniel Fojt re_node_set *next_nodes);
14895b7b453SJohn Marino static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
14995b7b453SJohn Marino re_node_set *cur_nodes,
150*09d4459fSDaniel Fojt Idx ex_subexp, int type);
15195b7b453SJohn Marino static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
15295b7b453SJohn Marino re_node_set *dst_nodes,
15395b7b453SJohn Marino Idx target, Idx ex_subexp,
154*09d4459fSDaniel Fojt int type);
15595b7b453SJohn Marino static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
15695b7b453SJohn Marino re_node_set *cur_nodes, Idx cur_str,
157*09d4459fSDaniel Fojt Idx subexp_num, int type);
158*09d4459fSDaniel Fojt static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
15995b7b453SJohn Marino #ifdef RE_ENABLE_I18N
16095b7b453SJohn Marino static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
161*09d4459fSDaniel Fojt const re_string_t *input, Idx idx);
16295b7b453SJohn Marino # ifdef _LIBC
16395b7b453SJohn Marino static unsigned int find_collation_sequence_value (const unsigned char *mbs,
164*09d4459fSDaniel Fojt size_t name_len);
16595b7b453SJohn Marino # endif /* _LIBC */
16695b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
16795b7b453SJohn Marino static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
16895b7b453SJohn Marino const re_dfastate_t *state,
16995b7b453SJohn Marino re_node_set *states_node,
170*09d4459fSDaniel Fojt bitset_t *states_ch);
17195b7b453SJohn Marino static bool check_node_accept (const re_match_context_t *mctx,
172*09d4459fSDaniel Fojt const re_token_t *node, Idx idx);
173*09d4459fSDaniel Fojt static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
17495b7b453SJohn Marino
17595b7b453SJohn Marino /* Entry point for POSIX code. */
17695b7b453SJohn Marino
17795b7b453SJohn Marino /* regexec searches for a given pattern, specified by PREG, in the
17895b7b453SJohn Marino string STRING.
17995b7b453SJohn Marino
18095b7b453SJohn Marino If NMATCH is zero or REG_NOSUB was set in the cflags argument to
181cf28ed85SJohn Marino 'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
18295b7b453SJohn Marino least NMATCH elements, and we set them to the offsets of the
18395b7b453SJohn Marino corresponding matched substrings.
18495b7b453SJohn Marino
185cf28ed85SJohn Marino EFLAGS specifies "execution flags" which affect matching: if
18695b7b453SJohn Marino REG_NOTBOL is set, then ^ does not match at the beginning of the
18795b7b453SJohn Marino string; if REG_NOTEOL is set, then $ does not match at the end.
18895b7b453SJohn Marino
18995b7b453SJohn Marino We return 0 if we find a match and REG_NOMATCH if not. */
19095b7b453SJohn Marino
19195b7b453SJohn Marino int
regexec(const regex_t * __restrict preg,const char * __restrict string,size_t nmatch,regmatch_t pmatch[],int eflags)192*09d4459fSDaniel Fojt regexec (const regex_t *__restrict preg, const char *__restrict string,
193*09d4459fSDaniel Fojt size_t nmatch, regmatch_t pmatch[], int eflags)
19495b7b453SJohn Marino {
19595b7b453SJohn Marino reg_errcode_t err;
19695b7b453SJohn Marino Idx start, length;
197cf28ed85SJohn Marino re_dfa_t *dfa = preg->buffer;
19895b7b453SJohn Marino
19995b7b453SJohn Marino if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
20095b7b453SJohn Marino return REG_BADPAT;
20195b7b453SJohn Marino
20295b7b453SJohn Marino if (eflags & REG_STARTEND)
20395b7b453SJohn Marino {
20495b7b453SJohn Marino start = pmatch[0].rm_so;
20595b7b453SJohn Marino length = pmatch[0].rm_eo;
20695b7b453SJohn Marino }
20795b7b453SJohn Marino else
20895b7b453SJohn Marino {
20995b7b453SJohn Marino start = 0;
21095b7b453SJohn Marino length = strlen (string);
21195b7b453SJohn Marino }
21295b7b453SJohn Marino
213680a9cb8SJohn Marino lock_lock (dfa->lock);
21495b7b453SJohn Marino if (preg->no_sub)
21595b7b453SJohn Marino err = re_search_internal (preg, string, length, start, length,
21695b7b453SJohn Marino length, 0, NULL, eflags);
21795b7b453SJohn Marino else
21895b7b453SJohn Marino err = re_search_internal (preg, string, length, start, length,
21995b7b453SJohn Marino length, nmatch, pmatch, eflags);
220680a9cb8SJohn Marino lock_unlock (dfa->lock);
22195b7b453SJohn Marino return err != REG_NOERROR;
22295b7b453SJohn Marino }
22395b7b453SJohn Marino
22495b7b453SJohn Marino #ifdef _LIBC
225*09d4459fSDaniel Fojt libc_hidden_def (__regexec)
226*09d4459fSDaniel Fojt
22795b7b453SJohn Marino # include <shlib-compat.h>
22895b7b453SJohn Marino versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
22995b7b453SJohn Marino
23095b7b453SJohn Marino # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
23195b7b453SJohn Marino __typeof__ (__regexec) __compat_regexec;
23295b7b453SJohn Marino
23395b7b453SJohn Marino int
23495b7b453SJohn Marino attribute_compat_text_section
__compat_regexec(const regex_t * __restrict preg,const char * __restrict string,size_t nmatch,regmatch_t pmatch[],int eflags)235*09d4459fSDaniel Fojt __compat_regexec (const regex_t *__restrict preg,
236*09d4459fSDaniel Fojt const char *__restrict string, size_t nmatch,
23795b7b453SJohn Marino regmatch_t pmatch[], int eflags)
23895b7b453SJohn Marino {
23995b7b453SJohn Marino return regexec (preg, string, nmatch, pmatch,
24095b7b453SJohn Marino eflags & (REG_NOTBOL | REG_NOTEOL));
24195b7b453SJohn Marino }
24295b7b453SJohn Marino compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
24395b7b453SJohn Marino # endif
24495b7b453SJohn Marino #endif
24595b7b453SJohn Marino
24695b7b453SJohn Marino /* Entry points for GNU code. */
24795b7b453SJohn Marino
24895b7b453SJohn Marino /* re_match, re_search, re_match_2, re_search_2
24995b7b453SJohn Marino
25095b7b453SJohn Marino The former two functions operate on STRING with length LENGTH,
25195b7b453SJohn Marino while the later two operate on concatenation of STRING1 and STRING2
25295b7b453SJohn Marino with lengths LENGTH1 and LENGTH2, respectively.
25395b7b453SJohn Marino
25495b7b453SJohn Marino re_match() matches the compiled pattern in BUFP against the string,
25595b7b453SJohn Marino starting at index START.
25695b7b453SJohn Marino
25795b7b453SJohn Marino re_search() first tries matching at index START, then it tries to match
25895b7b453SJohn Marino starting from index START + 1, and so on. The last start position tried
25995b7b453SJohn Marino is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
26095b7b453SJohn Marino way as re_match().)
26195b7b453SJohn Marino
26295b7b453SJohn Marino The parameter STOP of re_{match,search}_2 specifies that no match exceeding
26395b7b453SJohn Marino the first STOP characters of the concatenation of the strings should be
26495b7b453SJohn Marino concerned.
26595b7b453SJohn Marino
26695b7b453SJohn Marino If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
26795b7b453SJohn Marino and all groups is stored in REGS. (For the "_2" variants, the offsets are
26895b7b453SJohn Marino computed relative to the concatenation, not relative to the individual
26995b7b453SJohn Marino strings.)
27095b7b453SJohn Marino
27195b7b453SJohn Marino On success, re_match* functions return the length of the match, re_search*
27295b7b453SJohn Marino return the position of the start of the match. Return value -1 means no
27395b7b453SJohn Marino match was found and -2 indicates an internal error. */
27495b7b453SJohn Marino
27595b7b453SJohn Marino regoff_t
re_match(struct re_pattern_buffer * bufp,const char * string,Idx length,Idx start,struct re_registers * regs)276*09d4459fSDaniel Fojt re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
277*09d4459fSDaniel Fojt Idx start, struct re_registers *regs)
27895b7b453SJohn Marino {
27995b7b453SJohn Marino return re_search_stub (bufp, string, length, start, 0, length, regs, true);
28095b7b453SJohn Marino }
28195b7b453SJohn Marino #ifdef _LIBC
weak_alias(__re_match,re_match)28295b7b453SJohn Marino weak_alias (__re_match, re_match)
28395b7b453SJohn Marino #endif
28495b7b453SJohn Marino
28595b7b453SJohn Marino regoff_t
286*09d4459fSDaniel Fojt re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
287*09d4459fSDaniel Fojt Idx start, regoff_t range, struct re_registers *regs)
28895b7b453SJohn Marino {
28995b7b453SJohn Marino return re_search_stub (bufp, string, length, start, range, length, regs,
29095b7b453SJohn Marino false);
29195b7b453SJohn Marino }
29295b7b453SJohn Marino #ifdef _LIBC
weak_alias(__re_search,re_search)29395b7b453SJohn Marino weak_alias (__re_search, re_search)
29495b7b453SJohn Marino #endif
29595b7b453SJohn Marino
29695b7b453SJohn Marino regoff_t
297*09d4459fSDaniel Fojt re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
298*09d4459fSDaniel Fojt const char *string2, Idx length2, Idx start,
299*09d4459fSDaniel Fojt struct re_registers *regs, Idx stop)
30095b7b453SJohn Marino {
30195b7b453SJohn Marino return re_search_2_stub (bufp, string1, length1, string2, length2,
30295b7b453SJohn Marino start, 0, regs, stop, true);
30395b7b453SJohn Marino }
30495b7b453SJohn Marino #ifdef _LIBC
weak_alias(__re_match_2,re_match_2)30595b7b453SJohn Marino weak_alias (__re_match_2, re_match_2)
30695b7b453SJohn Marino #endif
30795b7b453SJohn Marino
30895b7b453SJohn Marino regoff_t
309*09d4459fSDaniel Fojt re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
310*09d4459fSDaniel Fojt const char *string2, Idx length2, Idx start, regoff_t range,
311*09d4459fSDaniel Fojt struct re_registers *regs, Idx stop)
31295b7b453SJohn Marino {
31395b7b453SJohn Marino return re_search_2_stub (bufp, string1, length1, string2, length2,
31495b7b453SJohn Marino start, range, regs, stop, false);
31595b7b453SJohn Marino }
31695b7b453SJohn Marino #ifdef _LIBC
weak_alias(__re_search_2,re_search_2)31795b7b453SJohn Marino weak_alias (__re_search_2, re_search_2)
31895b7b453SJohn Marino #endif
31995b7b453SJohn Marino
32095b7b453SJohn Marino static regoff_t
321*09d4459fSDaniel Fojt re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
322*09d4459fSDaniel Fojt Idx length1, const char *string2, Idx length2, Idx start,
323*09d4459fSDaniel Fojt regoff_t range, struct re_registers *regs,
32495b7b453SJohn Marino Idx stop, bool ret_len)
32595b7b453SJohn Marino {
32695b7b453SJohn Marino const char *str;
32795b7b453SJohn Marino regoff_t rval;
328*09d4459fSDaniel Fojt Idx len;
32995b7b453SJohn Marino char *s = NULL;
33095b7b453SJohn Marino
331*09d4459fSDaniel Fojt if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0
332*09d4459fSDaniel Fojt || INT_ADD_WRAPV (length1, length2, &len))))
33395b7b453SJohn Marino return -2;
33495b7b453SJohn Marino
33595b7b453SJohn Marino /* Concatenate the strings. */
33695b7b453SJohn Marino if (length2 > 0)
33795b7b453SJohn Marino if (length1 > 0)
33895b7b453SJohn Marino {
33995b7b453SJohn Marino s = re_malloc (char, len);
34095b7b453SJohn Marino
341*09d4459fSDaniel Fojt if (__glibc_unlikely (s == NULL))
34295b7b453SJohn Marino return -2;
34395b7b453SJohn Marino #ifdef _LIBC
34495b7b453SJohn Marino memcpy (__mempcpy (s, string1, length1), string2, length2);
34595b7b453SJohn Marino #else
34695b7b453SJohn Marino memcpy (s, string1, length1);
34795b7b453SJohn Marino memcpy (s + length1, string2, length2);
34895b7b453SJohn Marino #endif
34995b7b453SJohn Marino str = s;
35095b7b453SJohn Marino }
35195b7b453SJohn Marino else
35295b7b453SJohn Marino str = string2;
35395b7b453SJohn Marino else
35495b7b453SJohn Marino str = string1;
35595b7b453SJohn Marino
35695b7b453SJohn Marino rval = re_search_stub (bufp, str, len, start, range, stop, regs,
35795b7b453SJohn Marino ret_len);
35895b7b453SJohn Marino re_free (s);
35995b7b453SJohn Marino return rval;
36095b7b453SJohn Marino }
36195b7b453SJohn Marino
36295b7b453SJohn Marino /* The parameters have the same meaning as those of re_search.
36395b7b453SJohn Marino Additional parameters:
36495b7b453SJohn Marino If RET_LEN is true the length of the match is returned (re_match style);
36595b7b453SJohn Marino otherwise the position of the match is returned. */
36695b7b453SJohn Marino
36795b7b453SJohn Marino static regoff_t
re_search_stub(struct re_pattern_buffer * bufp,const char * string,Idx length,Idx start,regoff_t range,Idx stop,struct re_registers * regs,bool ret_len)368*09d4459fSDaniel Fojt re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
36995b7b453SJohn Marino Idx start, regoff_t range, Idx stop, struct re_registers *regs,
37095b7b453SJohn Marino bool ret_len)
37195b7b453SJohn Marino {
37295b7b453SJohn Marino reg_errcode_t result;
37395b7b453SJohn Marino regmatch_t *pmatch;
37495b7b453SJohn Marino Idx nregs;
37595b7b453SJohn Marino regoff_t rval;
37695b7b453SJohn Marino int eflags = 0;
377cf28ed85SJohn Marino re_dfa_t *dfa = bufp->buffer;
37895b7b453SJohn Marino Idx last_start = start + range;
37995b7b453SJohn Marino
38095b7b453SJohn Marino /* Check for out-of-range. */
381*09d4459fSDaniel Fojt if (__glibc_unlikely (start < 0 || start > length))
38295b7b453SJohn Marino return -1;
383*09d4459fSDaniel Fojt if (__glibc_unlikely (length < last_start
384*09d4459fSDaniel Fojt || (0 <= range && last_start < start)))
38595b7b453SJohn Marino last_start = length;
386*09d4459fSDaniel Fojt else if (__glibc_unlikely (last_start < 0
387*09d4459fSDaniel Fojt || (range < 0 && start <= last_start)))
38895b7b453SJohn Marino last_start = 0;
38995b7b453SJohn Marino
390680a9cb8SJohn Marino lock_lock (dfa->lock);
39195b7b453SJohn Marino
39295b7b453SJohn Marino eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
39395b7b453SJohn Marino eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
39495b7b453SJohn Marino
39595b7b453SJohn Marino /* Compile fastmap if we haven't yet. */
39695b7b453SJohn Marino if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
39795b7b453SJohn Marino re_compile_fastmap (bufp);
39895b7b453SJohn Marino
399*09d4459fSDaniel Fojt if (__glibc_unlikely (bufp->no_sub))
40095b7b453SJohn Marino regs = NULL;
40195b7b453SJohn Marino
40295b7b453SJohn Marino /* We need at least 1 register. */
40395b7b453SJohn Marino if (regs == NULL)
40495b7b453SJohn Marino nregs = 1;
405*09d4459fSDaniel Fojt else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
406*09d4459fSDaniel Fojt && regs->num_regs <= bufp->re_nsub))
40795b7b453SJohn Marino {
40895b7b453SJohn Marino nregs = regs->num_regs;
409*09d4459fSDaniel Fojt if (__glibc_unlikely (nregs < 1))
41095b7b453SJohn Marino {
41195b7b453SJohn Marino /* Nothing can be copied to regs. */
41295b7b453SJohn Marino regs = NULL;
41395b7b453SJohn Marino nregs = 1;
41495b7b453SJohn Marino }
41595b7b453SJohn Marino }
41695b7b453SJohn Marino else
41795b7b453SJohn Marino nregs = bufp->re_nsub + 1;
41895b7b453SJohn Marino pmatch = re_malloc (regmatch_t, nregs);
419*09d4459fSDaniel Fojt if (__glibc_unlikely (pmatch == NULL))
42095b7b453SJohn Marino {
42195b7b453SJohn Marino rval = -2;
42295b7b453SJohn Marino goto out;
42395b7b453SJohn Marino }
42495b7b453SJohn Marino
42595b7b453SJohn Marino result = re_search_internal (bufp, string, length, start, last_start, stop,
42695b7b453SJohn Marino nregs, pmatch, eflags);
42795b7b453SJohn Marino
42895b7b453SJohn Marino rval = 0;
42995b7b453SJohn Marino
430a8597f6cSJohn Marino /* I hope we needn't fill their regs with -1's when no match was found. */
43195b7b453SJohn Marino if (result != REG_NOERROR)
432cf28ed85SJohn Marino rval = result == REG_NOMATCH ? -1 : -2;
43395b7b453SJohn Marino else if (regs != NULL)
43495b7b453SJohn Marino {
43595b7b453SJohn Marino /* If caller wants register contents data back, copy them. */
43695b7b453SJohn Marino bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
43795b7b453SJohn Marino bufp->regs_allocated);
438*09d4459fSDaniel Fojt if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
43995b7b453SJohn Marino rval = -2;
44095b7b453SJohn Marino }
44195b7b453SJohn Marino
442*09d4459fSDaniel Fojt if (__glibc_likely (rval == 0))
44395b7b453SJohn Marino {
44495b7b453SJohn Marino if (ret_len)
44595b7b453SJohn Marino {
446*09d4459fSDaniel Fojt DEBUG_ASSERT (pmatch[0].rm_so == start);
44795b7b453SJohn Marino rval = pmatch[0].rm_eo - start;
44895b7b453SJohn Marino }
44995b7b453SJohn Marino else
45095b7b453SJohn Marino rval = pmatch[0].rm_so;
45195b7b453SJohn Marino }
45295b7b453SJohn Marino re_free (pmatch);
45395b7b453SJohn Marino out:
454680a9cb8SJohn Marino lock_unlock (dfa->lock);
45595b7b453SJohn Marino return rval;
45695b7b453SJohn Marino }
45795b7b453SJohn Marino
458cf28ed85SJohn Marino static unsigned
re_copy_regs(struct re_registers * regs,regmatch_t * pmatch,Idx nregs,int regs_allocated)45995b7b453SJohn Marino re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
46095b7b453SJohn Marino int regs_allocated)
46195b7b453SJohn Marino {
46295b7b453SJohn Marino int rval = REGS_REALLOCATE;
46395b7b453SJohn Marino Idx i;
46495b7b453SJohn Marino Idx need_regs = nregs + 1;
465cf28ed85SJohn Marino /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
46695b7b453SJohn Marino uses. */
46795b7b453SJohn Marino
46895b7b453SJohn Marino /* Have the register data arrays been allocated? */
46995b7b453SJohn Marino if (regs_allocated == REGS_UNALLOCATED)
47095b7b453SJohn Marino { /* No. So allocate them with malloc. */
47195b7b453SJohn Marino regs->start = re_malloc (regoff_t, need_regs);
472*09d4459fSDaniel Fojt if (__glibc_unlikely (regs->start == NULL))
47395b7b453SJohn Marino return REGS_UNALLOCATED;
47495b7b453SJohn Marino regs->end = re_malloc (regoff_t, need_regs);
475*09d4459fSDaniel Fojt if (__glibc_unlikely (regs->end == NULL))
47695b7b453SJohn Marino {
47795b7b453SJohn Marino re_free (regs->start);
47895b7b453SJohn Marino return REGS_UNALLOCATED;
47995b7b453SJohn Marino }
48095b7b453SJohn Marino regs->num_regs = need_regs;
48195b7b453SJohn Marino }
48295b7b453SJohn Marino else if (regs_allocated == REGS_REALLOCATE)
48395b7b453SJohn Marino { /* Yes. If we need more elements than were already
48495b7b453SJohn Marino allocated, reallocate them. If we need fewer, just
48595b7b453SJohn Marino leave it alone. */
486*09d4459fSDaniel Fojt if (__glibc_unlikely (need_regs > regs->num_regs))
48795b7b453SJohn Marino {
48895b7b453SJohn Marino regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
48995b7b453SJohn Marino regoff_t *new_end;
490*09d4459fSDaniel Fojt if (__glibc_unlikely (new_start == NULL))
49195b7b453SJohn Marino return REGS_UNALLOCATED;
49295b7b453SJohn Marino new_end = re_realloc (regs->end, regoff_t, need_regs);
493*09d4459fSDaniel Fojt if (__glibc_unlikely (new_end == NULL))
49495b7b453SJohn Marino {
49595b7b453SJohn Marino re_free (new_start);
49695b7b453SJohn Marino return REGS_UNALLOCATED;
49795b7b453SJohn Marino }
49895b7b453SJohn Marino regs->start = new_start;
49995b7b453SJohn Marino regs->end = new_end;
50095b7b453SJohn Marino regs->num_regs = need_regs;
50195b7b453SJohn Marino }
50295b7b453SJohn Marino }
50395b7b453SJohn Marino else
50495b7b453SJohn Marino {
505*09d4459fSDaniel Fojt DEBUG_ASSERT (regs_allocated == REGS_FIXED);
50695b7b453SJohn Marino /* This function may not be called with REGS_FIXED and nregs too big. */
507*09d4459fSDaniel Fojt DEBUG_ASSERT (nregs <= regs->num_regs);
50895b7b453SJohn Marino rval = REGS_FIXED;
50995b7b453SJohn Marino }
51095b7b453SJohn Marino
51195b7b453SJohn Marino /* Copy the regs. */
51295b7b453SJohn Marino for (i = 0; i < nregs; ++i)
51395b7b453SJohn Marino {
51495b7b453SJohn Marino regs->start[i] = pmatch[i].rm_so;
51595b7b453SJohn Marino regs->end[i] = pmatch[i].rm_eo;
51695b7b453SJohn Marino }
51795b7b453SJohn Marino for ( ; i < regs->num_regs; ++i)
51895b7b453SJohn Marino regs->start[i] = regs->end[i] = -1;
51995b7b453SJohn Marino
52095b7b453SJohn Marino return rval;
52195b7b453SJohn Marino }
52295b7b453SJohn Marino
52395b7b453SJohn Marino /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
52495b7b453SJohn Marino ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
52595b7b453SJohn Marino this memory for recording register information. STARTS and ENDS
52695b7b453SJohn Marino must be allocated using the malloc library routine, and must each
52795b7b453SJohn Marino be at least NUM_REGS * sizeof (regoff_t) bytes long.
52895b7b453SJohn Marino
52995b7b453SJohn Marino If NUM_REGS == 0, then subsequent matches should allocate their own
53095b7b453SJohn Marino register data.
53195b7b453SJohn Marino
53295b7b453SJohn Marino Unless this function is called, the first search or match using
53395b7b453SJohn Marino PATTERN_BUFFER will allocate its own register data, without
53495b7b453SJohn Marino freeing the old data. */
53595b7b453SJohn Marino
53695b7b453SJohn Marino void
re_set_registers(struct re_pattern_buffer * bufp,struct re_registers * regs,__re_size_t num_regs,regoff_t * starts,regoff_t * ends)537*09d4459fSDaniel Fojt re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
538*09d4459fSDaniel Fojt __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
53995b7b453SJohn Marino {
54095b7b453SJohn Marino if (num_regs)
54195b7b453SJohn Marino {
54295b7b453SJohn Marino bufp->regs_allocated = REGS_REALLOCATE;
54395b7b453SJohn Marino regs->num_regs = num_regs;
54495b7b453SJohn Marino regs->start = starts;
54595b7b453SJohn Marino regs->end = ends;
54695b7b453SJohn Marino }
54795b7b453SJohn Marino else
54895b7b453SJohn Marino {
54995b7b453SJohn Marino bufp->regs_allocated = REGS_UNALLOCATED;
55095b7b453SJohn Marino regs->num_regs = 0;
55195b7b453SJohn Marino regs->start = regs->end = NULL;
55295b7b453SJohn Marino }
55395b7b453SJohn Marino }
55495b7b453SJohn Marino #ifdef _LIBC
weak_alias(__re_set_registers,re_set_registers)55595b7b453SJohn Marino weak_alias (__re_set_registers, re_set_registers)
55695b7b453SJohn Marino #endif
55795b7b453SJohn Marino
55895b7b453SJohn Marino /* Entry points compatible with 4.2 BSD regex library. We don't define
55995b7b453SJohn Marino them unless specifically requested. */
56095b7b453SJohn Marino
56195b7b453SJohn Marino #if defined _REGEX_RE_COMP || defined _LIBC
56295b7b453SJohn Marino int
56395b7b453SJohn Marino # ifdef _LIBC
56495b7b453SJohn Marino weak_function
56595b7b453SJohn Marino # endif
566*09d4459fSDaniel Fojt re_exec (const char *s)
56795b7b453SJohn Marino {
56895b7b453SJohn Marino return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
56995b7b453SJohn Marino }
57095b7b453SJohn Marino #endif /* _REGEX_RE_COMP */
57195b7b453SJohn Marino
57295b7b453SJohn Marino /* Internal entry point. */
57395b7b453SJohn Marino
57495b7b453SJohn Marino /* Searches for a compiled pattern PREG in the string STRING, whose
57595b7b453SJohn Marino length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
57695b7b453SJohn Marino meaning as with regexec. LAST_START is START + RANGE, where
57795b7b453SJohn Marino START and RANGE have the same meaning as with re_search.
57895b7b453SJohn Marino Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
57995b7b453SJohn Marino otherwise return the error code.
58095b7b453SJohn Marino Note: We assume front end functions already check ranges.
58195b7b453SJohn Marino (0 <= LAST_START && LAST_START <= LENGTH) */
58295b7b453SJohn Marino
58395b7b453SJohn Marino static reg_errcode_t
584cf28ed85SJohn Marino __attribute_warn_unused_result__
re_search_internal(const regex_t * preg,const char * string,Idx length,Idx start,Idx last_start,Idx stop,size_t nmatch,regmatch_t pmatch[],int eflags)585*09d4459fSDaniel Fojt re_search_internal (const regex_t *preg, const char *string, Idx length,
586*09d4459fSDaniel Fojt Idx start, Idx last_start, Idx stop, size_t nmatch,
587*09d4459fSDaniel Fojt regmatch_t pmatch[], int eflags)
58895b7b453SJohn Marino {
58995b7b453SJohn Marino reg_errcode_t err;
590cf28ed85SJohn Marino const re_dfa_t *dfa = preg->buffer;
59195b7b453SJohn Marino Idx left_lim, right_lim;
59295b7b453SJohn Marino int incr;
59395b7b453SJohn Marino bool fl_longest_match;
59495b7b453SJohn Marino int match_kind;
59595b7b453SJohn Marino Idx match_first;
596*09d4459fSDaniel Fojt Idx match_last = -1;
59795b7b453SJohn Marino Idx extra_nmatch;
59895b7b453SJohn Marino bool sb;
59995b7b453SJohn Marino int ch;
60095b7b453SJohn Marino re_match_context_t mctx = { .dfa = dfa };
60195b7b453SJohn Marino char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
60295b7b453SJohn Marino && start != last_start && !preg->can_be_null)
60395b7b453SJohn Marino ? preg->fastmap : NULL);
60495b7b453SJohn Marino RE_TRANSLATE_TYPE t = preg->translate;
60595b7b453SJohn Marino
60695b7b453SJohn Marino extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
60795b7b453SJohn Marino nmatch -= extra_nmatch;
60895b7b453SJohn Marino
60995b7b453SJohn Marino /* Check if the DFA haven't been compiled. */
610*09d4459fSDaniel Fojt if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL
611*09d4459fSDaniel Fojt || dfa->init_state_word == NULL
612*09d4459fSDaniel Fojt || dfa->init_state_nl == NULL
613*09d4459fSDaniel Fojt || dfa->init_state_begbuf == NULL))
61495b7b453SJohn Marino return REG_NOMATCH;
61595b7b453SJohn Marino
61695b7b453SJohn Marino /* We assume front-end functions already check them. */
617*09d4459fSDaniel Fojt DEBUG_ASSERT (0 <= last_start && last_start <= length);
61895b7b453SJohn Marino
61995b7b453SJohn Marino /* If initial states with non-begbuf contexts have no elements,
62095b7b453SJohn Marino the regex must be anchored. If preg->newline_anchor is set,
62195b7b453SJohn Marino we'll never use init_state_nl, so do not check it. */
62295b7b453SJohn Marino if (dfa->init_state->nodes.nelem == 0
62395b7b453SJohn Marino && dfa->init_state_word->nodes.nelem == 0
62495b7b453SJohn Marino && (dfa->init_state_nl->nodes.nelem == 0
62595b7b453SJohn Marino || !preg->newline_anchor))
62695b7b453SJohn Marino {
62795b7b453SJohn Marino if (start != 0 && last_start != 0)
62895b7b453SJohn Marino return REG_NOMATCH;
62995b7b453SJohn Marino start = last_start = 0;
63095b7b453SJohn Marino }
63195b7b453SJohn Marino
63295b7b453SJohn Marino /* We must check the longest matching, if nmatch > 0. */
63395b7b453SJohn Marino fl_longest_match = (nmatch != 0 || dfa->nbackref);
63495b7b453SJohn Marino
63595b7b453SJohn Marino err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
63695b7b453SJohn Marino preg->translate, (preg->syntax & RE_ICASE) != 0,
63795b7b453SJohn Marino dfa);
638*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
63995b7b453SJohn Marino goto free_return;
64095b7b453SJohn Marino mctx.input.stop = stop;
64195b7b453SJohn Marino mctx.input.raw_stop = stop;
64295b7b453SJohn Marino mctx.input.newline_anchor = preg->newline_anchor;
64395b7b453SJohn Marino
64495b7b453SJohn Marino err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
645*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
64695b7b453SJohn Marino goto free_return;
64795b7b453SJohn Marino
64895b7b453SJohn Marino /* We will log all the DFA states through which the dfa pass,
64995b7b453SJohn Marino if nmatch > 1, or this dfa has "multibyte node", which is a
65095b7b453SJohn Marino back-reference or a node which can accept multibyte character or
65195b7b453SJohn Marino multi character collating element. */
65295b7b453SJohn Marino if (nmatch > 1 || dfa->has_mb_node)
65395b7b453SJohn Marino {
65495b7b453SJohn Marino /* Avoid overflow. */
655*09d4459fSDaniel Fojt if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
656*09d4459fSDaniel Fojt <= mctx.input.bufs_len)))
65795b7b453SJohn Marino {
65895b7b453SJohn Marino err = REG_ESPACE;
65995b7b453SJohn Marino goto free_return;
66095b7b453SJohn Marino }
66195b7b453SJohn Marino
66295b7b453SJohn Marino mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
663*09d4459fSDaniel Fojt if (__glibc_unlikely (mctx.state_log == NULL))
66495b7b453SJohn Marino {
66595b7b453SJohn Marino err = REG_ESPACE;
66695b7b453SJohn Marino goto free_return;
66795b7b453SJohn Marino }
66895b7b453SJohn Marino }
66995b7b453SJohn Marino
67095b7b453SJohn Marino match_first = start;
67195b7b453SJohn Marino mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
67295b7b453SJohn Marino : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
67395b7b453SJohn Marino
674680a9cb8SJohn Marino /* Check incrementally whether the input string matches. */
67595b7b453SJohn Marino incr = (last_start < start) ? -1 : 1;
67695b7b453SJohn Marino left_lim = (last_start < start) ? last_start : start;
67795b7b453SJohn Marino right_lim = (last_start < start) ? start : last_start;
67895b7b453SJohn Marino sb = dfa->mb_cur_max == 1;
67995b7b453SJohn Marino match_kind =
68095b7b453SJohn Marino (fastmap
68195b7b453SJohn Marino ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
68295b7b453SJohn Marino | (start <= last_start ? 2 : 0)
68395b7b453SJohn Marino | (t != NULL ? 1 : 0))
68495b7b453SJohn Marino : 8);
68595b7b453SJohn Marino
68695b7b453SJohn Marino for (;; match_first += incr)
68795b7b453SJohn Marino {
68895b7b453SJohn Marino err = REG_NOMATCH;
68995b7b453SJohn Marino if (match_first < left_lim || right_lim < match_first)
69095b7b453SJohn Marino goto free_return;
69195b7b453SJohn Marino
69295b7b453SJohn Marino /* Advance as rapidly as possible through the string, until we
69395b7b453SJohn Marino find a plausible place to start matching. This may be done
69495b7b453SJohn Marino with varying efficiency, so there are various possibilities:
69595b7b453SJohn Marino only the most common of them are specialized, in order to
69695b7b453SJohn Marino save on code size. We use a switch statement for speed. */
69795b7b453SJohn Marino switch (match_kind)
69895b7b453SJohn Marino {
69995b7b453SJohn Marino case 8:
70095b7b453SJohn Marino /* No fastmap. */
70195b7b453SJohn Marino break;
70295b7b453SJohn Marino
70395b7b453SJohn Marino case 7:
70495b7b453SJohn Marino /* Fastmap with single-byte translation, match forward. */
705*09d4459fSDaniel Fojt while (__glibc_likely (match_first < right_lim)
70695b7b453SJohn Marino && !fastmap[t[(unsigned char) string[match_first]]])
70795b7b453SJohn Marino ++match_first;
70895b7b453SJohn Marino goto forward_match_found_start_or_reached_end;
70995b7b453SJohn Marino
71095b7b453SJohn Marino case 6:
71195b7b453SJohn Marino /* Fastmap without translation, match forward. */
712*09d4459fSDaniel Fojt while (__glibc_likely (match_first < right_lim)
71395b7b453SJohn Marino && !fastmap[(unsigned char) string[match_first]])
71495b7b453SJohn Marino ++match_first;
71595b7b453SJohn Marino
71695b7b453SJohn Marino forward_match_found_start_or_reached_end:
717*09d4459fSDaniel Fojt if (__glibc_unlikely (match_first == right_lim))
71895b7b453SJohn Marino {
71995b7b453SJohn Marino ch = match_first >= length
72095b7b453SJohn Marino ? 0 : (unsigned char) string[match_first];
72195b7b453SJohn Marino if (!fastmap[t ? t[ch] : ch])
72295b7b453SJohn Marino goto free_return;
72395b7b453SJohn Marino }
72495b7b453SJohn Marino break;
72595b7b453SJohn Marino
72695b7b453SJohn Marino case 4:
72795b7b453SJohn Marino case 5:
72895b7b453SJohn Marino /* Fastmap without multi-byte translation, match backwards. */
72995b7b453SJohn Marino while (match_first >= left_lim)
73095b7b453SJohn Marino {
73195b7b453SJohn Marino ch = match_first >= length
73295b7b453SJohn Marino ? 0 : (unsigned char) string[match_first];
73395b7b453SJohn Marino if (fastmap[t ? t[ch] : ch])
73495b7b453SJohn Marino break;
73595b7b453SJohn Marino --match_first;
73695b7b453SJohn Marino }
73795b7b453SJohn Marino if (match_first < left_lim)
73895b7b453SJohn Marino goto free_return;
73995b7b453SJohn Marino break;
74095b7b453SJohn Marino
74195b7b453SJohn Marino default:
74295b7b453SJohn Marino /* In this case, we can't determine easily the current byte,
74395b7b453SJohn Marino since it might be a component byte of a multibyte
74495b7b453SJohn Marino character. Then we use the constructed buffer instead. */
74595b7b453SJohn Marino for (;;)
74695b7b453SJohn Marino {
74795b7b453SJohn Marino /* If MATCH_FIRST is out of the valid range, reconstruct the
74895b7b453SJohn Marino buffers. */
74995b7b453SJohn Marino __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
750*09d4459fSDaniel Fojt if (__glibc_unlikely (offset
751*09d4459fSDaniel Fojt >= (__re_size_t) mctx.input.valid_raw_len))
75295b7b453SJohn Marino {
75395b7b453SJohn Marino err = re_string_reconstruct (&mctx.input, match_first,
75495b7b453SJohn Marino eflags);
755*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
75695b7b453SJohn Marino goto free_return;
75795b7b453SJohn Marino
75895b7b453SJohn Marino offset = match_first - mctx.input.raw_mbs_idx;
75995b7b453SJohn Marino }
76095b7b453SJohn Marino /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
76195b7b453SJohn Marino Note that MATCH_FIRST must not be smaller than 0. */
76295b7b453SJohn Marino ch = (match_first >= length
76395b7b453SJohn Marino ? 0 : re_string_byte_at (&mctx.input, offset));
76495b7b453SJohn Marino if (fastmap[ch])
76595b7b453SJohn Marino break;
76695b7b453SJohn Marino match_first += incr;
76795b7b453SJohn Marino if (match_first < left_lim || match_first > right_lim)
76895b7b453SJohn Marino {
76995b7b453SJohn Marino err = REG_NOMATCH;
77095b7b453SJohn Marino goto free_return;
77195b7b453SJohn Marino }
77295b7b453SJohn Marino }
77395b7b453SJohn Marino break;
77495b7b453SJohn Marino }
77595b7b453SJohn Marino
77695b7b453SJohn Marino /* Reconstruct the buffers so that the matcher can assume that
77795b7b453SJohn Marino the matching starts from the beginning of the buffer. */
77895b7b453SJohn Marino err = re_string_reconstruct (&mctx.input, match_first, eflags);
779*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
78095b7b453SJohn Marino goto free_return;
78195b7b453SJohn Marino
78295b7b453SJohn Marino #ifdef RE_ENABLE_I18N
78395b7b453SJohn Marino /* Don't consider this char as a possible match start if it part,
78495b7b453SJohn Marino yet isn't the head, of a multibyte character. */
78595b7b453SJohn Marino if (!sb && !re_string_first_byte (&mctx.input, 0))
78695b7b453SJohn Marino continue;
78795b7b453SJohn Marino #endif
78895b7b453SJohn Marino
78995b7b453SJohn Marino /* It seems to be appropriate one, then use the matcher. */
79095b7b453SJohn Marino /* We assume that the matching starts from 0. */
79195b7b453SJohn Marino mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
79295b7b453SJohn Marino match_last = check_matching (&mctx, fl_longest_match,
79395b7b453SJohn Marino start <= last_start ? &match_first : NULL);
794*09d4459fSDaniel Fojt if (match_last != -1)
79595b7b453SJohn Marino {
796*09d4459fSDaniel Fojt if (__glibc_unlikely (match_last == -2))
79795b7b453SJohn Marino {
79895b7b453SJohn Marino err = REG_ESPACE;
79995b7b453SJohn Marino goto free_return;
80095b7b453SJohn Marino }
80195b7b453SJohn Marino else
80295b7b453SJohn Marino {
80395b7b453SJohn Marino mctx.match_last = match_last;
80495b7b453SJohn Marino if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
80595b7b453SJohn Marino {
80695b7b453SJohn Marino re_dfastate_t *pstate = mctx.state_log[match_last];
80795b7b453SJohn Marino mctx.last_node = check_halt_state_context (&mctx, pstate,
80895b7b453SJohn Marino match_last);
80995b7b453SJohn Marino }
81095b7b453SJohn Marino if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
81195b7b453SJohn Marino || dfa->nbackref)
81295b7b453SJohn Marino {
81395b7b453SJohn Marino err = prune_impossible_nodes (&mctx);
81495b7b453SJohn Marino if (err == REG_NOERROR)
81595b7b453SJohn Marino break;
816*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOMATCH))
81795b7b453SJohn Marino goto free_return;
818*09d4459fSDaniel Fojt match_last = -1;
81995b7b453SJohn Marino }
82095b7b453SJohn Marino else
82195b7b453SJohn Marino break; /* We found a match. */
82295b7b453SJohn Marino }
82395b7b453SJohn Marino }
82495b7b453SJohn Marino
82595b7b453SJohn Marino match_ctx_clean (&mctx);
82695b7b453SJohn Marino }
82795b7b453SJohn Marino
828*09d4459fSDaniel Fojt DEBUG_ASSERT (match_last != -1);
829*09d4459fSDaniel Fojt DEBUG_ASSERT (err == REG_NOERROR);
83095b7b453SJohn Marino
83195b7b453SJohn Marino /* Set pmatch[] if we need. */
83295b7b453SJohn Marino if (nmatch > 0)
83395b7b453SJohn Marino {
83495b7b453SJohn Marino Idx reg_idx;
83595b7b453SJohn Marino
83695b7b453SJohn Marino /* Initialize registers. */
83795b7b453SJohn Marino for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
83895b7b453SJohn Marino pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
83995b7b453SJohn Marino
84095b7b453SJohn Marino /* Set the points where matching start/end. */
84195b7b453SJohn Marino pmatch[0].rm_so = 0;
84295b7b453SJohn Marino pmatch[0].rm_eo = mctx.match_last;
84395b7b453SJohn Marino /* FIXME: This function should fail if mctx.match_last exceeds
84495b7b453SJohn Marino the maximum possible regoff_t value. We need a new error
84595b7b453SJohn Marino code REG_OVERFLOW. */
84695b7b453SJohn Marino
84795b7b453SJohn Marino if (!preg->no_sub && nmatch > 1)
84895b7b453SJohn Marino {
84995b7b453SJohn Marino err = set_regs (preg, &mctx, nmatch, pmatch,
85095b7b453SJohn Marino dfa->has_plural_match && dfa->nbackref > 0);
851*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
85295b7b453SJohn Marino goto free_return;
85395b7b453SJohn Marino }
85495b7b453SJohn Marino
855cf28ed85SJohn Marino /* At last, add the offset to each register, since we slid
85695b7b453SJohn Marino the buffers so that we could assume that the matching starts
85795b7b453SJohn Marino from 0. */
85895b7b453SJohn Marino for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
85995b7b453SJohn Marino if (pmatch[reg_idx].rm_so != -1)
86095b7b453SJohn Marino {
86195b7b453SJohn Marino #ifdef RE_ENABLE_I18N
862*09d4459fSDaniel Fojt if (__glibc_unlikely (mctx.input.offsets_needed != 0))
86395b7b453SJohn Marino {
86495b7b453SJohn Marino pmatch[reg_idx].rm_so =
86595b7b453SJohn Marino (pmatch[reg_idx].rm_so == mctx.input.valid_len
86695b7b453SJohn Marino ? mctx.input.valid_raw_len
86795b7b453SJohn Marino : mctx.input.offsets[pmatch[reg_idx].rm_so]);
86895b7b453SJohn Marino pmatch[reg_idx].rm_eo =
86995b7b453SJohn Marino (pmatch[reg_idx].rm_eo == mctx.input.valid_len
87095b7b453SJohn Marino ? mctx.input.valid_raw_len
87195b7b453SJohn Marino : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
87295b7b453SJohn Marino }
87395b7b453SJohn Marino #else
874*09d4459fSDaniel Fojt DEBUG_ASSERT (mctx.input.offsets_needed == 0);
87595b7b453SJohn Marino #endif
87695b7b453SJohn Marino pmatch[reg_idx].rm_so += match_first;
87795b7b453SJohn Marino pmatch[reg_idx].rm_eo += match_first;
87895b7b453SJohn Marino }
87995b7b453SJohn Marino for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
88095b7b453SJohn Marino {
88195b7b453SJohn Marino pmatch[nmatch + reg_idx].rm_so = -1;
88295b7b453SJohn Marino pmatch[nmatch + reg_idx].rm_eo = -1;
88395b7b453SJohn Marino }
88495b7b453SJohn Marino
88595b7b453SJohn Marino if (dfa->subexp_map)
88695b7b453SJohn Marino for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
88795b7b453SJohn Marino if (dfa->subexp_map[reg_idx] != reg_idx)
88895b7b453SJohn Marino {
88995b7b453SJohn Marino pmatch[reg_idx + 1].rm_so
89095b7b453SJohn Marino = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
89195b7b453SJohn Marino pmatch[reg_idx + 1].rm_eo
89295b7b453SJohn Marino = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
89395b7b453SJohn Marino }
89495b7b453SJohn Marino }
89595b7b453SJohn Marino
89695b7b453SJohn Marino free_return:
89795b7b453SJohn Marino re_free (mctx.state_log);
89895b7b453SJohn Marino if (dfa->nbackref)
89995b7b453SJohn Marino match_ctx_free (&mctx);
90095b7b453SJohn Marino re_string_destruct (&mctx.input);
90195b7b453SJohn Marino return err;
90295b7b453SJohn Marino }
90395b7b453SJohn Marino
90495b7b453SJohn Marino static reg_errcode_t
905cf28ed85SJohn Marino __attribute_warn_unused_result__
prune_impossible_nodes(re_match_context_t * mctx)90695b7b453SJohn Marino prune_impossible_nodes (re_match_context_t *mctx)
90795b7b453SJohn Marino {
90895b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
90995b7b453SJohn Marino Idx halt_node, match_last;
91095b7b453SJohn Marino reg_errcode_t ret;
91195b7b453SJohn Marino re_dfastate_t **sifted_states;
91295b7b453SJohn Marino re_dfastate_t **lim_states = NULL;
91395b7b453SJohn Marino re_sift_context_t sctx;
914*09d4459fSDaniel Fojt DEBUG_ASSERT (mctx->state_log != NULL);
91595b7b453SJohn Marino match_last = mctx->match_last;
91695b7b453SJohn Marino halt_node = mctx->last_node;
91795b7b453SJohn Marino
91895b7b453SJohn Marino /* Avoid overflow. */
919*09d4459fSDaniel Fojt if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
920*09d4459fSDaniel Fojt <= match_last))
92195b7b453SJohn Marino return REG_ESPACE;
92295b7b453SJohn Marino
92395b7b453SJohn Marino sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
924*09d4459fSDaniel Fojt if (__glibc_unlikely (sifted_states == NULL))
92595b7b453SJohn Marino {
92695b7b453SJohn Marino ret = REG_ESPACE;
92795b7b453SJohn Marino goto free_return;
92895b7b453SJohn Marino }
92995b7b453SJohn Marino if (dfa->nbackref)
93095b7b453SJohn Marino {
93195b7b453SJohn Marino lim_states = re_malloc (re_dfastate_t *, match_last + 1);
932*09d4459fSDaniel Fojt if (__glibc_unlikely (lim_states == NULL))
93395b7b453SJohn Marino {
93495b7b453SJohn Marino ret = REG_ESPACE;
93595b7b453SJohn Marino goto free_return;
93695b7b453SJohn Marino }
93795b7b453SJohn Marino while (1)
93895b7b453SJohn Marino {
93995b7b453SJohn Marino memset (lim_states, '\0',
94095b7b453SJohn Marino sizeof (re_dfastate_t *) * (match_last + 1));
94195b7b453SJohn Marino sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
94295b7b453SJohn Marino match_last);
94395b7b453SJohn Marino ret = sift_states_backward (mctx, &sctx);
94495b7b453SJohn Marino re_node_set_free (&sctx.limits);
945*09d4459fSDaniel Fojt if (__glibc_unlikely (ret != REG_NOERROR))
94695b7b453SJohn Marino goto free_return;
94795b7b453SJohn Marino if (sifted_states[0] != NULL || lim_states[0] != NULL)
94895b7b453SJohn Marino break;
94995b7b453SJohn Marino do
95095b7b453SJohn Marino {
95195b7b453SJohn Marino --match_last;
952*09d4459fSDaniel Fojt if (match_last < 0)
95395b7b453SJohn Marino {
95495b7b453SJohn Marino ret = REG_NOMATCH;
95595b7b453SJohn Marino goto free_return;
95695b7b453SJohn Marino }
95795b7b453SJohn Marino } while (mctx->state_log[match_last] == NULL
95895b7b453SJohn Marino || !mctx->state_log[match_last]->halt);
95995b7b453SJohn Marino halt_node = check_halt_state_context (mctx,
96095b7b453SJohn Marino mctx->state_log[match_last],
96195b7b453SJohn Marino match_last);
96295b7b453SJohn Marino }
96395b7b453SJohn Marino ret = merge_state_array (dfa, sifted_states, lim_states,
96495b7b453SJohn Marino match_last + 1);
96595b7b453SJohn Marino re_free (lim_states);
96695b7b453SJohn Marino lim_states = NULL;
967*09d4459fSDaniel Fojt if (__glibc_unlikely (ret != REG_NOERROR))
96895b7b453SJohn Marino goto free_return;
96995b7b453SJohn Marino }
97095b7b453SJohn Marino else
97195b7b453SJohn Marino {
97295b7b453SJohn Marino sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
97395b7b453SJohn Marino ret = sift_states_backward (mctx, &sctx);
97495b7b453SJohn Marino re_node_set_free (&sctx.limits);
975*09d4459fSDaniel Fojt if (__glibc_unlikely (ret != REG_NOERROR))
97695b7b453SJohn Marino goto free_return;
97795b7b453SJohn Marino if (sifted_states[0] == NULL)
97895b7b453SJohn Marino {
97995b7b453SJohn Marino ret = REG_NOMATCH;
98095b7b453SJohn Marino goto free_return;
98195b7b453SJohn Marino }
98295b7b453SJohn Marino }
98395b7b453SJohn Marino re_free (mctx->state_log);
98495b7b453SJohn Marino mctx->state_log = sifted_states;
98595b7b453SJohn Marino sifted_states = NULL;
98695b7b453SJohn Marino mctx->last_node = halt_node;
98795b7b453SJohn Marino mctx->match_last = match_last;
98895b7b453SJohn Marino ret = REG_NOERROR;
98995b7b453SJohn Marino free_return:
99095b7b453SJohn Marino re_free (sifted_states);
99195b7b453SJohn Marino re_free (lim_states);
99295b7b453SJohn Marino return ret;
99395b7b453SJohn Marino }
99495b7b453SJohn Marino
99595b7b453SJohn Marino /* Acquire an initial state and return it.
99695b7b453SJohn Marino We must select appropriate initial state depending on the context,
99795b7b453SJohn Marino since initial states may have constraints like "\<", "^", etc.. */
99895b7b453SJohn Marino
99995b7b453SJohn Marino static inline re_dfastate_t *
1000*09d4459fSDaniel Fojt __attribute__ ((always_inline))
acquire_init_state_context(reg_errcode_t * err,const re_match_context_t * mctx,Idx idx)100195b7b453SJohn Marino acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
100295b7b453SJohn Marino Idx idx)
100395b7b453SJohn Marino {
100495b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
100595b7b453SJohn Marino if (dfa->init_state->has_constraint)
100695b7b453SJohn Marino {
100795b7b453SJohn Marino unsigned int context;
100895b7b453SJohn Marino context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
100995b7b453SJohn Marino if (IS_WORD_CONTEXT (context))
101095b7b453SJohn Marino return dfa->init_state_word;
101195b7b453SJohn Marino else if (IS_ORDINARY_CONTEXT (context))
101295b7b453SJohn Marino return dfa->init_state;
101395b7b453SJohn Marino else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
101495b7b453SJohn Marino return dfa->init_state_begbuf;
101595b7b453SJohn Marino else if (IS_NEWLINE_CONTEXT (context))
101695b7b453SJohn Marino return dfa->init_state_nl;
101795b7b453SJohn Marino else if (IS_BEGBUF_CONTEXT (context))
101895b7b453SJohn Marino {
101995b7b453SJohn Marino /* It is relatively rare case, then calculate on demand. */
102095b7b453SJohn Marino return re_acquire_state_context (err, dfa,
102195b7b453SJohn Marino dfa->init_state->entrance_nodes,
102295b7b453SJohn Marino context);
102395b7b453SJohn Marino }
102495b7b453SJohn Marino else
102595b7b453SJohn Marino /* Must not happen? */
102695b7b453SJohn Marino return dfa->init_state;
102795b7b453SJohn Marino }
102895b7b453SJohn Marino else
102995b7b453SJohn Marino return dfa->init_state;
103095b7b453SJohn Marino }
103195b7b453SJohn Marino
103295b7b453SJohn Marino /* Check whether the regular expression match input string INPUT or not,
1033*09d4459fSDaniel Fojt and return the index where the matching end. Return -1 if
1034*09d4459fSDaniel Fojt there is no match, and return -2 in case of an error.
103595b7b453SJohn Marino FL_LONGEST_MATCH means we want the POSIX longest matching.
103695b7b453SJohn Marino If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
103795b7b453SJohn Marino next place where we may want to try matching.
1038cf28ed85SJohn Marino Note that the matcher assumes that the matching starts from the current
103995b7b453SJohn Marino index of the buffer. */
104095b7b453SJohn Marino
104195b7b453SJohn Marino static Idx
1042*09d4459fSDaniel Fojt __attribute_warn_unused_result__
check_matching(re_match_context_t * mctx,bool fl_longest_match,Idx * p_match_first)104395b7b453SJohn Marino check_matching (re_match_context_t *mctx, bool fl_longest_match,
104495b7b453SJohn Marino Idx *p_match_first)
104595b7b453SJohn Marino {
104695b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
104795b7b453SJohn Marino reg_errcode_t err;
104895b7b453SJohn Marino Idx match = 0;
1049*09d4459fSDaniel Fojt Idx match_last = -1;
105095b7b453SJohn Marino Idx cur_str_idx = re_string_cur_idx (&mctx->input);
105195b7b453SJohn Marino re_dfastate_t *cur_state;
105295b7b453SJohn Marino bool at_init_state = p_match_first != NULL;
105395b7b453SJohn Marino Idx next_start_idx = cur_str_idx;
105495b7b453SJohn Marino
105595b7b453SJohn Marino err = REG_NOERROR;
105695b7b453SJohn Marino cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
105795b7b453SJohn Marino /* An initial state must not be NULL (invalid). */
1058*09d4459fSDaniel Fojt if (__glibc_unlikely (cur_state == NULL))
105995b7b453SJohn Marino {
1060*09d4459fSDaniel Fojt DEBUG_ASSERT (err == REG_ESPACE);
1061*09d4459fSDaniel Fojt return -2;
106295b7b453SJohn Marino }
106395b7b453SJohn Marino
106495b7b453SJohn Marino if (mctx->state_log != NULL)
106595b7b453SJohn Marino {
106695b7b453SJohn Marino mctx->state_log[cur_str_idx] = cur_state;
106795b7b453SJohn Marino
106895b7b453SJohn Marino /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
106995b7b453SJohn Marino later. E.g. Processing back references. */
1070*09d4459fSDaniel Fojt if (__glibc_unlikely (dfa->nbackref))
107195b7b453SJohn Marino {
107295b7b453SJohn Marino at_init_state = false;
107395b7b453SJohn Marino err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1074*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
107595b7b453SJohn Marino return err;
107695b7b453SJohn Marino
107795b7b453SJohn Marino if (cur_state->has_backref)
107895b7b453SJohn Marino {
107995b7b453SJohn Marino err = transit_state_bkref (mctx, &cur_state->nodes);
1080*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
108195b7b453SJohn Marino return err;
108295b7b453SJohn Marino }
108395b7b453SJohn Marino }
108495b7b453SJohn Marino }
108595b7b453SJohn Marino
108695b7b453SJohn Marino /* If the RE accepts NULL string. */
1087*09d4459fSDaniel Fojt if (__glibc_unlikely (cur_state->halt))
108895b7b453SJohn Marino {
108995b7b453SJohn Marino if (!cur_state->has_constraint
109095b7b453SJohn Marino || check_halt_state_context (mctx, cur_state, cur_str_idx))
109195b7b453SJohn Marino {
109295b7b453SJohn Marino if (!fl_longest_match)
109395b7b453SJohn Marino return cur_str_idx;
109495b7b453SJohn Marino else
109595b7b453SJohn Marino {
109695b7b453SJohn Marino match_last = cur_str_idx;
109795b7b453SJohn Marino match = 1;
109895b7b453SJohn Marino }
109995b7b453SJohn Marino }
110095b7b453SJohn Marino }
110195b7b453SJohn Marino
110295b7b453SJohn Marino while (!re_string_eoi (&mctx->input))
110395b7b453SJohn Marino {
110495b7b453SJohn Marino re_dfastate_t *old_state = cur_state;
110595b7b453SJohn Marino Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
110695b7b453SJohn Marino
1107*09d4459fSDaniel Fojt if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
1108cf28ed85SJohn Marino && mctx->input.bufs_len < mctx->input.len)
1109*09d4459fSDaniel Fojt || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
111095b7b453SJohn Marino && mctx->input.valid_len < mctx->input.len))
111195b7b453SJohn Marino {
1112680a9cb8SJohn Marino err = extend_buffers (mctx, next_char_idx + 1);
1113*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
111495b7b453SJohn Marino {
1115*09d4459fSDaniel Fojt DEBUG_ASSERT (err == REG_ESPACE);
1116*09d4459fSDaniel Fojt return -2;
111795b7b453SJohn Marino }
111895b7b453SJohn Marino }
111995b7b453SJohn Marino
112095b7b453SJohn Marino cur_state = transit_state (&err, mctx, cur_state);
112195b7b453SJohn Marino if (mctx->state_log != NULL)
112295b7b453SJohn Marino cur_state = merge_state_with_log (&err, mctx, cur_state);
112395b7b453SJohn Marino
112495b7b453SJohn Marino if (cur_state == NULL)
112595b7b453SJohn Marino {
112695b7b453SJohn Marino /* Reached the invalid state or an error. Try to recover a valid
112795b7b453SJohn Marino state using the state log, if available and if we have not
112895b7b453SJohn Marino already found a valid (even if not the longest) match. */
1129*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
1130*09d4459fSDaniel Fojt return -2;
113195b7b453SJohn Marino
113295b7b453SJohn Marino if (mctx->state_log == NULL
113395b7b453SJohn Marino || (match && !fl_longest_match)
113495b7b453SJohn Marino || (cur_state = find_recover_state (&err, mctx)) == NULL)
113595b7b453SJohn Marino break;
113695b7b453SJohn Marino }
113795b7b453SJohn Marino
1138*09d4459fSDaniel Fojt if (__glibc_unlikely (at_init_state))
113995b7b453SJohn Marino {
114095b7b453SJohn Marino if (old_state == cur_state)
114195b7b453SJohn Marino next_start_idx = next_char_idx;
114295b7b453SJohn Marino else
114395b7b453SJohn Marino at_init_state = false;
114495b7b453SJohn Marino }
114595b7b453SJohn Marino
114695b7b453SJohn Marino if (cur_state->halt)
114795b7b453SJohn Marino {
114895b7b453SJohn Marino /* Reached a halt state.
114995b7b453SJohn Marino Check the halt state can satisfy the current context. */
115095b7b453SJohn Marino if (!cur_state->has_constraint
115195b7b453SJohn Marino || check_halt_state_context (mctx, cur_state,
115295b7b453SJohn Marino re_string_cur_idx (&mctx->input)))
115395b7b453SJohn Marino {
115495b7b453SJohn Marino /* We found an appropriate halt state. */
115595b7b453SJohn Marino match_last = re_string_cur_idx (&mctx->input);
115695b7b453SJohn Marino match = 1;
115795b7b453SJohn Marino
115895b7b453SJohn Marino /* We found a match, do not modify match_first below. */
115995b7b453SJohn Marino p_match_first = NULL;
116095b7b453SJohn Marino if (!fl_longest_match)
116195b7b453SJohn Marino break;
116295b7b453SJohn Marino }
116395b7b453SJohn Marino }
116495b7b453SJohn Marino }
116595b7b453SJohn Marino
116695b7b453SJohn Marino if (p_match_first)
116795b7b453SJohn Marino *p_match_first += next_start_idx;
116895b7b453SJohn Marino
116995b7b453SJohn Marino return match_last;
117095b7b453SJohn Marino }
117195b7b453SJohn Marino
117295b7b453SJohn Marino /* Check NODE match the current context. */
117395b7b453SJohn Marino
117495b7b453SJohn Marino static bool
check_halt_node_context(const re_dfa_t * dfa,Idx node,unsigned int context)117595b7b453SJohn Marino check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
117695b7b453SJohn Marino {
117795b7b453SJohn Marino re_token_type_t type = dfa->nodes[node].type;
117895b7b453SJohn Marino unsigned int constraint = dfa->nodes[node].constraint;
117995b7b453SJohn Marino if (type != END_OF_RE)
118095b7b453SJohn Marino return false;
118195b7b453SJohn Marino if (!constraint)
118295b7b453SJohn Marino return true;
118395b7b453SJohn Marino if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
118495b7b453SJohn Marino return false;
118595b7b453SJohn Marino return true;
118695b7b453SJohn Marino }
118795b7b453SJohn Marino
118895b7b453SJohn Marino /* Check the halt state STATE match the current context.
118995b7b453SJohn Marino Return 0 if not match, if the node, STATE has, is a halt node and
119095b7b453SJohn Marino match the context, return the node. */
119195b7b453SJohn Marino
119295b7b453SJohn Marino static Idx
check_halt_state_context(const re_match_context_t * mctx,const re_dfastate_t * state,Idx idx)119395b7b453SJohn Marino check_halt_state_context (const re_match_context_t *mctx,
119495b7b453SJohn Marino const re_dfastate_t *state, Idx idx)
119595b7b453SJohn Marino {
119695b7b453SJohn Marino Idx i;
119795b7b453SJohn Marino unsigned int context;
1198*09d4459fSDaniel Fojt DEBUG_ASSERT (state->halt);
119995b7b453SJohn Marino context = re_string_context_at (&mctx->input, idx, mctx->eflags);
120095b7b453SJohn Marino for (i = 0; i < state->nodes.nelem; ++i)
120195b7b453SJohn Marino if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
120295b7b453SJohn Marino return state->nodes.elems[i];
120395b7b453SJohn Marino return 0;
120495b7b453SJohn Marino }
120595b7b453SJohn Marino
120695b7b453SJohn Marino /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
120795b7b453SJohn Marino corresponding to the DFA).
120895b7b453SJohn Marino Return the destination node, and update EPS_VIA_NODES;
1209*09d4459fSDaniel Fojt return -1 in case of errors. */
121095b7b453SJohn Marino
121195b7b453SJohn Marino static Idx
proceed_next_node(const re_match_context_t * mctx,Idx nregs,regmatch_t * regs,Idx * pidx,Idx node,re_node_set * eps_via_nodes,struct re_fail_stack_t * fs)121295b7b453SJohn Marino proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
121395b7b453SJohn Marino Idx *pidx, Idx node, re_node_set *eps_via_nodes,
121495b7b453SJohn Marino struct re_fail_stack_t *fs)
121595b7b453SJohn Marino {
121695b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
121795b7b453SJohn Marino Idx i;
121895b7b453SJohn Marino bool ok;
121995b7b453SJohn Marino if (IS_EPSILON_NODE (dfa->nodes[node].type))
122095b7b453SJohn Marino {
122195b7b453SJohn Marino re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
122295b7b453SJohn Marino re_node_set *edests = &dfa->edests[node];
122395b7b453SJohn Marino Idx dest_node;
122495b7b453SJohn Marino ok = re_node_set_insert (eps_via_nodes, node);
1225*09d4459fSDaniel Fojt if (__glibc_unlikely (! ok))
1226*09d4459fSDaniel Fojt return -2;
1227*09d4459fSDaniel Fojt /* Pick up a valid destination, or return -1 if none
122895b7b453SJohn Marino is found. */
1229*09d4459fSDaniel Fojt for (dest_node = -1, i = 0; i < edests->nelem; ++i)
123095b7b453SJohn Marino {
123195b7b453SJohn Marino Idx candidate = edests->elems[i];
123295b7b453SJohn Marino if (!re_node_set_contains (cur_nodes, candidate))
123395b7b453SJohn Marino continue;
1234*09d4459fSDaniel Fojt if (dest_node == -1)
123595b7b453SJohn Marino dest_node = candidate;
123695b7b453SJohn Marino
123795b7b453SJohn Marino else
123895b7b453SJohn Marino {
123995b7b453SJohn Marino /* In order to avoid infinite loop like "(a*)*", return the second
124095b7b453SJohn Marino epsilon-transition if the first was already considered. */
124195b7b453SJohn Marino if (re_node_set_contains (eps_via_nodes, dest_node))
124295b7b453SJohn Marino return candidate;
124395b7b453SJohn Marino
124495b7b453SJohn Marino /* Otherwise, push the second epsilon-transition on the fail stack. */
124595b7b453SJohn Marino else if (fs != NULL
124695b7b453SJohn Marino && push_fail_stack (fs, *pidx, candidate, nregs, regs,
124795b7b453SJohn Marino eps_via_nodes))
1248*09d4459fSDaniel Fojt return -2;
124995b7b453SJohn Marino
125095b7b453SJohn Marino /* We know we are going to exit. */
125195b7b453SJohn Marino break;
125295b7b453SJohn Marino }
125395b7b453SJohn Marino }
125495b7b453SJohn Marino return dest_node;
125595b7b453SJohn Marino }
125695b7b453SJohn Marino else
125795b7b453SJohn Marino {
125895b7b453SJohn Marino Idx naccepted = 0;
125995b7b453SJohn Marino re_token_type_t type = dfa->nodes[node].type;
126095b7b453SJohn Marino
126195b7b453SJohn Marino #ifdef RE_ENABLE_I18N
126295b7b453SJohn Marino if (dfa->nodes[node].accept_mb)
126395b7b453SJohn Marino naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
126495b7b453SJohn Marino else
126595b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
126695b7b453SJohn Marino if (type == OP_BACK_REF)
126795b7b453SJohn Marino {
126895b7b453SJohn Marino Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1269*09d4459fSDaniel Fojt if (subexp_idx < nregs)
127095b7b453SJohn Marino naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
127195b7b453SJohn Marino if (fs != NULL)
127295b7b453SJohn Marino {
1273*09d4459fSDaniel Fojt if (subexp_idx >= nregs
1274*09d4459fSDaniel Fojt || regs[subexp_idx].rm_so == -1
1275*09d4459fSDaniel Fojt || regs[subexp_idx].rm_eo == -1)
1276*09d4459fSDaniel Fojt return -1;
127795b7b453SJohn Marino else if (naccepted)
127895b7b453SJohn Marino {
127995b7b453SJohn Marino char *buf = (char *) re_string_get_buffer (&mctx->input);
1280*09d4459fSDaniel Fojt if (mctx->input.valid_len - *pidx < naccepted
1281*09d4459fSDaniel Fojt || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1282*09d4459fSDaniel Fojt naccepted)
1283*09d4459fSDaniel Fojt != 0))
1284*09d4459fSDaniel Fojt return -1;
128595b7b453SJohn Marino }
128695b7b453SJohn Marino }
128795b7b453SJohn Marino
128895b7b453SJohn Marino if (naccepted == 0)
128995b7b453SJohn Marino {
129095b7b453SJohn Marino Idx dest_node;
129195b7b453SJohn Marino ok = re_node_set_insert (eps_via_nodes, node);
1292*09d4459fSDaniel Fojt if (__glibc_unlikely (! ok))
1293*09d4459fSDaniel Fojt return -2;
129495b7b453SJohn Marino dest_node = dfa->edests[node].elems[0];
129595b7b453SJohn Marino if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
129695b7b453SJohn Marino dest_node))
129795b7b453SJohn Marino return dest_node;
129895b7b453SJohn Marino }
129995b7b453SJohn Marino }
130095b7b453SJohn Marino
130195b7b453SJohn Marino if (naccepted != 0
130295b7b453SJohn Marino || check_node_accept (mctx, dfa->nodes + node, *pidx))
130395b7b453SJohn Marino {
130495b7b453SJohn Marino Idx dest_node = dfa->nexts[node];
130595b7b453SJohn Marino *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
130695b7b453SJohn Marino if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
130795b7b453SJohn Marino || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
130895b7b453SJohn Marino dest_node)))
1309*09d4459fSDaniel Fojt return -1;
131095b7b453SJohn Marino re_node_set_empty (eps_via_nodes);
131195b7b453SJohn Marino return dest_node;
131295b7b453SJohn Marino }
131395b7b453SJohn Marino }
1314*09d4459fSDaniel Fojt return -1;
131595b7b453SJohn Marino }
131695b7b453SJohn Marino
131795b7b453SJohn Marino static reg_errcode_t
1318*09d4459fSDaniel Fojt __attribute_warn_unused_result__
push_fail_stack(struct re_fail_stack_t * fs,Idx str_idx,Idx dest_node,Idx nregs,regmatch_t * regs,re_node_set * eps_via_nodes)131995b7b453SJohn Marino push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
132095b7b453SJohn Marino Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
132195b7b453SJohn Marino {
132295b7b453SJohn Marino reg_errcode_t err;
132395b7b453SJohn Marino Idx num = fs->num++;
132495b7b453SJohn Marino if (fs->num == fs->alloc)
132595b7b453SJohn Marino {
132695b7b453SJohn Marino struct re_fail_stack_ent_t *new_array;
1327*09d4459fSDaniel Fojt new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1328*09d4459fSDaniel Fojt fs->alloc * 2);
132995b7b453SJohn Marino if (new_array == NULL)
133095b7b453SJohn Marino return REG_ESPACE;
133195b7b453SJohn Marino fs->alloc *= 2;
133295b7b453SJohn Marino fs->stack = new_array;
133395b7b453SJohn Marino }
133495b7b453SJohn Marino fs->stack[num].idx = str_idx;
133595b7b453SJohn Marino fs->stack[num].node = dest_node;
133695b7b453SJohn Marino fs->stack[num].regs = re_malloc (regmatch_t, nregs);
133795b7b453SJohn Marino if (fs->stack[num].regs == NULL)
133895b7b453SJohn Marino return REG_ESPACE;
133995b7b453SJohn Marino memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
134095b7b453SJohn Marino err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
134195b7b453SJohn Marino return err;
134295b7b453SJohn Marino }
134395b7b453SJohn Marino
134495b7b453SJohn Marino static Idx
pop_fail_stack(struct re_fail_stack_t * fs,Idx * pidx,Idx nregs,regmatch_t * regs,re_node_set * eps_via_nodes)134595b7b453SJohn Marino pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
134695b7b453SJohn Marino regmatch_t *regs, re_node_set *eps_via_nodes)
134795b7b453SJohn Marino {
134895b7b453SJohn Marino Idx num = --fs->num;
1349*09d4459fSDaniel Fojt DEBUG_ASSERT (num >= 0);
135095b7b453SJohn Marino *pidx = fs->stack[num].idx;
135195b7b453SJohn Marino memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
135295b7b453SJohn Marino re_node_set_free (eps_via_nodes);
135395b7b453SJohn Marino re_free (fs->stack[num].regs);
135495b7b453SJohn Marino *eps_via_nodes = fs->stack[num].eps_via_nodes;
135595b7b453SJohn Marino return fs->stack[num].node;
135695b7b453SJohn Marino }
135795b7b453SJohn Marino
135895b7b453SJohn Marino /* Set the positions where the subexpressions are starts/ends to registers
135995b7b453SJohn Marino PMATCH.
136095b7b453SJohn Marino Note: We assume that pmatch[0] is already set, and
136195b7b453SJohn Marino pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
136295b7b453SJohn Marino
136395b7b453SJohn Marino static reg_errcode_t
1364*09d4459fSDaniel Fojt __attribute_warn_unused_result__
set_regs(const regex_t * preg,const re_match_context_t * mctx,size_t nmatch,regmatch_t * pmatch,bool fl_backtrack)136595b7b453SJohn Marino set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
136695b7b453SJohn Marino regmatch_t *pmatch, bool fl_backtrack)
136795b7b453SJohn Marino {
1368cf28ed85SJohn Marino const re_dfa_t *dfa = preg->buffer;
136995b7b453SJohn Marino Idx idx, cur_node;
137095b7b453SJohn Marino re_node_set eps_via_nodes;
137195b7b453SJohn Marino struct re_fail_stack_t *fs;
137295b7b453SJohn Marino struct re_fail_stack_t fs_body = { 0, 2, NULL };
137395b7b453SJohn Marino regmatch_t *prev_idx_match;
137495b7b453SJohn Marino bool prev_idx_match_malloced = false;
137595b7b453SJohn Marino
1376*09d4459fSDaniel Fojt DEBUG_ASSERT (nmatch > 1);
1377*09d4459fSDaniel Fojt DEBUG_ASSERT (mctx->state_log != NULL);
137895b7b453SJohn Marino if (fl_backtrack)
137995b7b453SJohn Marino {
138095b7b453SJohn Marino fs = &fs_body;
138195b7b453SJohn Marino fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
138295b7b453SJohn Marino if (fs->stack == NULL)
138395b7b453SJohn Marino return REG_ESPACE;
138495b7b453SJohn Marino }
138595b7b453SJohn Marino else
138695b7b453SJohn Marino fs = NULL;
138795b7b453SJohn Marino
138895b7b453SJohn Marino cur_node = dfa->init_node;
138995b7b453SJohn Marino re_node_set_init_empty (&eps_via_nodes);
139095b7b453SJohn Marino
139195b7b453SJohn Marino if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
139295b7b453SJohn Marino prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
139395b7b453SJohn Marino else
139495b7b453SJohn Marino {
139595b7b453SJohn Marino prev_idx_match = re_malloc (regmatch_t, nmatch);
139695b7b453SJohn Marino if (prev_idx_match == NULL)
139795b7b453SJohn Marino {
139895b7b453SJohn Marino free_fail_stack_return (fs);
139995b7b453SJohn Marino return REG_ESPACE;
140095b7b453SJohn Marino }
140195b7b453SJohn Marino prev_idx_match_malloced = true;
140295b7b453SJohn Marino }
140395b7b453SJohn Marino memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
140495b7b453SJohn Marino
140595b7b453SJohn Marino for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
140695b7b453SJohn Marino {
140795b7b453SJohn Marino update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
140895b7b453SJohn Marino
140995b7b453SJohn Marino if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
141095b7b453SJohn Marino {
141195b7b453SJohn Marino Idx reg_idx;
141295b7b453SJohn Marino if (fs)
141395b7b453SJohn Marino {
141495b7b453SJohn Marino for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
141595b7b453SJohn Marino if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
141695b7b453SJohn Marino break;
141795b7b453SJohn Marino if (reg_idx == nmatch)
141895b7b453SJohn Marino {
141995b7b453SJohn Marino re_node_set_free (&eps_via_nodes);
142095b7b453SJohn Marino if (prev_idx_match_malloced)
142195b7b453SJohn Marino re_free (prev_idx_match);
142295b7b453SJohn Marino return free_fail_stack_return (fs);
142395b7b453SJohn Marino }
142495b7b453SJohn Marino cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
142595b7b453SJohn Marino &eps_via_nodes);
142695b7b453SJohn Marino }
142795b7b453SJohn Marino else
142895b7b453SJohn Marino {
142995b7b453SJohn Marino re_node_set_free (&eps_via_nodes);
143095b7b453SJohn Marino if (prev_idx_match_malloced)
143195b7b453SJohn Marino re_free (prev_idx_match);
143295b7b453SJohn Marino return REG_NOERROR;
143395b7b453SJohn Marino }
143495b7b453SJohn Marino }
143595b7b453SJohn Marino
143695b7b453SJohn Marino /* Proceed to next node. */
143795b7b453SJohn Marino cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
143895b7b453SJohn Marino &eps_via_nodes, fs);
143995b7b453SJohn Marino
1440*09d4459fSDaniel Fojt if (__glibc_unlikely (cur_node < 0))
144195b7b453SJohn Marino {
1442*09d4459fSDaniel Fojt if (__glibc_unlikely (cur_node == -2))
144395b7b453SJohn Marino {
144495b7b453SJohn Marino re_node_set_free (&eps_via_nodes);
144595b7b453SJohn Marino if (prev_idx_match_malloced)
144695b7b453SJohn Marino re_free (prev_idx_match);
144795b7b453SJohn Marino free_fail_stack_return (fs);
144895b7b453SJohn Marino return REG_ESPACE;
144995b7b453SJohn Marino }
145095b7b453SJohn Marino if (fs)
145195b7b453SJohn Marino cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
145295b7b453SJohn Marino &eps_via_nodes);
145395b7b453SJohn Marino else
145495b7b453SJohn Marino {
145595b7b453SJohn Marino re_node_set_free (&eps_via_nodes);
145695b7b453SJohn Marino if (prev_idx_match_malloced)
145795b7b453SJohn Marino re_free (prev_idx_match);
145895b7b453SJohn Marino return REG_NOMATCH;
145995b7b453SJohn Marino }
146095b7b453SJohn Marino }
146195b7b453SJohn Marino }
146295b7b453SJohn Marino re_node_set_free (&eps_via_nodes);
146395b7b453SJohn Marino if (prev_idx_match_malloced)
146495b7b453SJohn Marino re_free (prev_idx_match);
146595b7b453SJohn Marino return free_fail_stack_return (fs);
146695b7b453SJohn Marino }
146795b7b453SJohn Marino
146895b7b453SJohn Marino static reg_errcode_t
free_fail_stack_return(struct re_fail_stack_t * fs)146995b7b453SJohn Marino free_fail_stack_return (struct re_fail_stack_t *fs)
147095b7b453SJohn Marino {
147195b7b453SJohn Marino if (fs)
147295b7b453SJohn Marino {
147395b7b453SJohn Marino Idx fs_idx;
147495b7b453SJohn Marino for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
147595b7b453SJohn Marino {
147695b7b453SJohn Marino re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
147795b7b453SJohn Marino re_free (fs->stack[fs_idx].regs);
147895b7b453SJohn Marino }
147995b7b453SJohn Marino re_free (fs->stack);
148095b7b453SJohn Marino }
148195b7b453SJohn Marino return REG_NOERROR;
148295b7b453SJohn Marino }
148395b7b453SJohn Marino
148495b7b453SJohn Marino static void
update_regs(const re_dfa_t * dfa,regmatch_t * pmatch,regmatch_t * prev_idx_match,Idx cur_node,Idx cur_idx,Idx nmatch)148595b7b453SJohn Marino update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
148695b7b453SJohn Marino regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
148795b7b453SJohn Marino {
148895b7b453SJohn Marino int type = dfa->nodes[cur_node].type;
148995b7b453SJohn Marino if (type == OP_OPEN_SUBEXP)
149095b7b453SJohn Marino {
149195b7b453SJohn Marino Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
149295b7b453SJohn Marino
149395b7b453SJohn Marino /* We are at the first node of this sub expression. */
149495b7b453SJohn Marino if (reg_num < nmatch)
149595b7b453SJohn Marino {
149695b7b453SJohn Marino pmatch[reg_num].rm_so = cur_idx;
149795b7b453SJohn Marino pmatch[reg_num].rm_eo = -1;
149895b7b453SJohn Marino }
149995b7b453SJohn Marino }
150095b7b453SJohn Marino else if (type == OP_CLOSE_SUBEXP)
150195b7b453SJohn Marino {
150295b7b453SJohn Marino Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
150395b7b453SJohn Marino if (reg_num < nmatch)
150495b7b453SJohn Marino {
150595b7b453SJohn Marino /* We are at the last node of this sub expression. */
150695b7b453SJohn Marino if (pmatch[reg_num].rm_so < cur_idx)
150795b7b453SJohn Marino {
150895b7b453SJohn Marino pmatch[reg_num].rm_eo = cur_idx;
150995b7b453SJohn Marino /* This is a non-empty match or we are not inside an optional
151095b7b453SJohn Marino subexpression. Accept this right away. */
151195b7b453SJohn Marino memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
151295b7b453SJohn Marino }
151395b7b453SJohn Marino else
151495b7b453SJohn Marino {
151595b7b453SJohn Marino if (dfa->nodes[cur_node].opt_subexp
151695b7b453SJohn Marino && prev_idx_match[reg_num].rm_so != -1)
151795b7b453SJohn Marino /* We transited through an empty match for an optional
151895b7b453SJohn Marino subexpression, like (a?)*, and this is not the subexp's
151995b7b453SJohn Marino first match. Copy back the old content of the registers
152095b7b453SJohn Marino so that matches of an inner subexpression are undone as
152195b7b453SJohn Marino well, like in ((a?))*. */
152295b7b453SJohn Marino memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
152395b7b453SJohn Marino else
152495b7b453SJohn Marino /* We completed a subexpression, but it may be part of
152595b7b453SJohn Marino an optional one, so do not update PREV_IDX_MATCH. */
152695b7b453SJohn Marino pmatch[reg_num].rm_eo = cur_idx;
152795b7b453SJohn Marino }
152895b7b453SJohn Marino }
152995b7b453SJohn Marino }
153095b7b453SJohn Marino }
153195b7b453SJohn Marino
153295b7b453SJohn Marino /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
153395b7b453SJohn Marino and sift the nodes in each states according to the following rules.
153495b7b453SJohn Marino Updated state_log will be wrote to STATE_LOG.
153595b7b453SJohn Marino
1536cf28ed85SJohn Marino Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
153795b7b453SJohn Marino 1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1538cf28ed85SJohn Marino If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1539cf28ed85SJohn Marino the LAST_NODE, we throw away the node 'a'.
1540cf28ed85SJohn Marino 2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1541cf28ed85SJohn Marino string 's' and transit to 'b':
154295b7b453SJohn Marino i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1543cf28ed85SJohn Marino away the node 'a'.
154495b7b453SJohn Marino ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1545cf28ed85SJohn Marino thrown away, we throw away the node 'a'.
154695b7b453SJohn Marino 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
154795b7b453SJohn Marino i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1548cf28ed85SJohn Marino node 'a'.
154995b7b453SJohn Marino ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1550cf28ed85SJohn Marino we throw away the node 'a'. */
155195b7b453SJohn Marino
155295b7b453SJohn Marino #define STATE_NODE_CONTAINS(state,node) \
155395b7b453SJohn Marino ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
155495b7b453SJohn Marino
155595b7b453SJohn Marino static reg_errcode_t
sift_states_backward(const re_match_context_t * mctx,re_sift_context_t * sctx)155695b7b453SJohn Marino sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
155795b7b453SJohn Marino {
155895b7b453SJohn Marino reg_errcode_t err;
155995b7b453SJohn Marino int null_cnt = 0;
156095b7b453SJohn Marino Idx str_idx = sctx->last_str_idx;
156195b7b453SJohn Marino re_node_set cur_dest;
156295b7b453SJohn Marino
1563*09d4459fSDaniel Fojt DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
156495b7b453SJohn Marino
156595b7b453SJohn Marino /* Build sifted state_log[str_idx]. It has the nodes which can epsilon
156695b7b453SJohn Marino transit to the last_node and the last_node itself. */
156795b7b453SJohn Marino err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1568*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
156995b7b453SJohn Marino return err;
157095b7b453SJohn Marino err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1571*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
157295b7b453SJohn Marino goto free_return;
157395b7b453SJohn Marino
157495b7b453SJohn Marino /* Then check each states in the state_log. */
157595b7b453SJohn Marino while (str_idx > 0)
157695b7b453SJohn Marino {
157795b7b453SJohn Marino /* Update counters. */
157895b7b453SJohn Marino null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
157995b7b453SJohn Marino if (null_cnt > mctx->max_mb_elem_len)
158095b7b453SJohn Marino {
158195b7b453SJohn Marino memset (sctx->sifted_states, '\0',
158295b7b453SJohn Marino sizeof (re_dfastate_t *) * str_idx);
158395b7b453SJohn Marino re_node_set_free (&cur_dest);
158495b7b453SJohn Marino return REG_NOERROR;
158595b7b453SJohn Marino }
158695b7b453SJohn Marino re_node_set_empty (&cur_dest);
158795b7b453SJohn Marino --str_idx;
158895b7b453SJohn Marino
158995b7b453SJohn Marino if (mctx->state_log[str_idx])
159095b7b453SJohn Marino {
159195b7b453SJohn Marino err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1592*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
159395b7b453SJohn Marino goto free_return;
159495b7b453SJohn Marino }
159595b7b453SJohn Marino
159695b7b453SJohn Marino /* Add all the nodes which satisfy the following conditions:
159795b7b453SJohn Marino - It can epsilon transit to a node in CUR_DEST.
159895b7b453SJohn Marino - It is in CUR_SRC.
159995b7b453SJohn Marino And update state_log. */
160095b7b453SJohn Marino err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1601*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
160295b7b453SJohn Marino goto free_return;
160395b7b453SJohn Marino }
160495b7b453SJohn Marino err = REG_NOERROR;
160595b7b453SJohn Marino free_return:
160695b7b453SJohn Marino re_node_set_free (&cur_dest);
160795b7b453SJohn Marino return err;
160895b7b453SJohn Marino }
160995b7b453SJohn Marino
161095b7b453SJohn Marino static reg_errcode_t
1611*09d4459fSDaniel Fojt __attribute_warn_unused_result__
build_sifted_states(const re_match_context_t * mctx,re_sift_context_t * sctx,Idx str_idx,re_node_set * cur_dest)161295b7b453SJohn Marino build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
161395b7b453SJohn Marino Idx str_idx, re_node_set *cur_dest)
161495b7b453SJohn Marino {
161595b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
161695b7b453SJohn Marino const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
161795b7b453SJohn Marino Idx i;
161895b7b453SJohn Marino
161995b7b453SJohn Marino /* Then build the next sifted state.
1620cf28ed85SJohn Marino We build the next sifted state on 'cur_dest', and update
1621cf28ed85SJohn Marino 'sifted_states[str_idx]' with 'cur_dest'.
162295b7b453SJohn Marino Note:
1623cf28ed85SJohn Marino 'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1624cf28ed85SJohn Marino 'cur_src' points the node_set of the old 'state_log[str_idx]'
162595b7b453SJohn Marino (with the epsilon nodes pre-filtered out). */
162695b7b453SJohn Marino for (i = 0; i < cur_src->nelem; i++)
162795b7b453SJohn Marino {
162895b7b453SJohn Marino Idx prev_node = cur_src->elems[i];
162995b7b453SJohn Marino int naccepted = 0;
163095b7b453SJohn Marino bool ok;
1631*09d4459fSDaniel Fojt DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type));
163295b7b453SJohn Marino
163395b7b453SJohn Marino #ifdef RE_ENABLE_I18N
1634cf28ed85SJohn Marino /* If the node may accept "multi byte". */
163595b7b453SJohn Marino if (dfa->nodes[prev_node].accept_mb)
163695b7b453SJohn Marino naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
163795b7b453SJohn Marino str_idx, sctx->last_str_idx);
163895b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
163995b7b453SJohn Marino
164095b7b453SJohn Marino /* We don't check backreferences here.
164195b7b453SJohn Marino See update_cur_sifted_state(). */
164295b7b453SJohn Marino if (!naccepted
164395b7b453SJohn Marino && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
164495b7b453SJohn Marino && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
164595b7b453SJohn Marino dfa->nexts[prev_node]))
164695b7b453SJohn Marino naccepted = 1;
164795b7b453SJohn Marino
164895b7b453SJohn Marino if (naccepted == 0)
164995b7b453SJohn Marino continue;
165095b7b453SJohn Marino
165195b7b453SJohn Marino if (sctx->limits.nelem)
165295b7b453SJohn Marino {
165395b7b453SJohn Marino Idx to_idx = str_idx + naccepted;
165495b7b453SJohn Marino if (check_dst_limits (mctx, &sctx->limits,
165595b7b453SJohn Marino dfa->nexts[prev_node], to_idx,
165695b7b453SJohn Marino prev_node, str_idx))
165795b7b453SJohn Marino continue;
165895b7b453SJohn Marino }
165995b7b453SJohn Marino ok = re_node_set_insert (cur_dest, prev_node);
1660*09d4459fSDaniel Fojt if (__glibc_unlikely (! ok))
166195b7b453SJohn Marino return REG_ESPACE;
166295b7b453SJohn Marino }
166395b7b453SJohn Marino
166495b7b453SJohn Marino return REG_NOERROR;
166595b7b453SJohn Marino }
166695b7b453SJohn Marino
166795b7b453SJohn Marino /* Helper functions. */
166895b7b453SJohn Marino
166995b7b453SJohn Marino static reg_errcode_t
clean_state_log_if_needed(re_match_context_t * mctx,Idx next_state_log_idx)167095b7b453SJohn Marino clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
167195b7b453SJohn Marino {
167295b7b453SJohn Marino Idx top = mctx->state_log_top;
167395b7b453SJohn Marino
1674cf28ed85SJohn Marino if ((next_state_log_idx >= mctx->input.bufs_len
1675cf28ed85SJohn Marino && mctx->input.bufs_len < mctx->input.len)
167695b7b453SJohn Marino || (next_state_log_idx >= mctx->input.valid_len
167795b7b453SJohn Marino && mctx->input.valid_len < mctx->input.len))
167895b7b453SJohn Marino {
167995b7b453SJohn Marino reg_errcode_t err;
1680680a9cb8SJohn Marino err = extend_buffers (mctx, next_state_log_idx + 1);
1681*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
168295b7b453SJohn Marino return err;
168395b7b453SJohn Marino }
168495b7b453SJohn Marino
168595b7b453SJohn Marino if (top < next_state_log_idx)
168695b7b453SJohn Marino {
168795b7b453SJohn Marino memset (mctx->state_log + top + 1, '\0',
168895b7b453SJohn Marino sizeof (re_dfastate_t *) * (next_state_log_idx - top));
168995b7b453SJohn Marino mctx->state_log_top = next_state_log_idx;
169095b7b453SJohn Marino }
169195b7b453SJohn Marino return REG_NOERROR;
169295b7b453SJohn Marino }
169395b7b453SJohn Marino
169495b7b453SJohn Marino static reg_errcode_t
merge_state_array(const re_dfa_t * dfa,re_dfastate_t ** dst,re_dfastate_t ** src,Idx num)169595b7b453SJohn Marino merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
169695b7b453SJohn Marino re_dfastate_t **src, Idx num)
169795b7b453SJohn Marino {
169895b7b453SJohn Marino Idx st_idx;
169995b7b453SJohn Marino reg_errcode_t err;
170095b7b453SJohn Marino for (st_idx = 0; st_idx < num; ++st_idx)
170195b7b453SJohn Marino {
170295b7b453SJohn Marino if (dst[st_idx] == NULL)
170395b7b453SJohn Marino dst[st_idx] = src[st_idx];
170495b7b453SJohn Marino else if (src[st_idx] != NULL)
170595b7b453SJohn Marino {
170695b7b453SJohn Marino re_node_set merged_set;
170795b7b453SJohn Marino err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
170895b7b453SJohn Marino &src[st_idx]->nodes);
1709*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
171095b7b453SJohn Marino return err;
171195b7b453SJohn Marino dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
171295b7b453SJohn Marino re_node_set_free (&merged_set);
1713*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
171495b7b453SJohn Marino return err;
171595b7b453SJohn Marino }
171695b7b453SJohn Marino }
171795b7b453SJohn Marino return REG_NOERROR;
171895b7b453SJohn Marino }
171995b7b453SJohn Marino
172095b7b453SJohn Marino static reg_errcode_t
update_cur_sifted_state(const re_match_context_t * mctx,re_sift_context_t * sctx,Idx str_idx,re_node_set * dest_nodes)172195b7b453SJohn Marino update_cur_sifted_state (const re_match_context_t *mctx,
172295b7b453SJohn Marino re_sift_context_t *sctx, Idx str_idx,
172395b7b453SJohn Marino re_node_set *dest_nodes)
172495b7b453SJohn Marino {
172595b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
172695b7b453SJohn Marino reg_errcode_t err = REG_NOERROR;
172795b7b453SJohn Marino const re_node_set *candidates;
172895b7b453SJohn Marino candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
172995b7b453SJohn Marino : &mctx->state_log[str_idx]->nodes);
173095b7b453SJohn Marino
173195b7b453SJohn Marino if (dest_nodes->nelem == 0)
173295b7b453SJohn Marino sctx->sifted_states[str_idx] = NULL;
173395b7b453SJohn Marino else
173495b7b453SJohn Marino {
173595b7b453SJohn Marino if (candidates)
173695b7b453SJohn Marino {
173795b7b453SJohn Marino /* At first, add the nodes which can epsilon transit to a node in
173895b7b453SJohn Marino DEST_NODE. */
173995b7b453SJohn Marino err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1740*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
174195b7b453SJohn Marino return err;
174295b7b453SJohn Marino
174395b7b453SJohn Marino /* Then, check the limitations in the current sift_context. */
174495b7b453SJohn Marino if (sctx->limits.nelem)
174595b7b453SJohn Marino {
174695b7b453SJohn Marino err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
174795b7b453SJohn Marino mctx->bkref_ents, str_idx);
1748*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
174995b7b453SJohn Marino return err;
175095b7b453SJohn Marino }
175195b7b453SJohn Marino }
175295b7b453SJohn Marino
175395b7b453SJohn Marino sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1754*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
175595b7b453SJohn Marino return err;
175695b7b453SJohn Marino }
175795b7b453SJohn Marino
175895b7b453SJohn Marino if (candidates && mctx->state_log[str_idx]->has_backref)
175995b7b453SJohn Marino {
176095b7b453SJohn Marino err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1761*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
176295b7b453SJohn Marino return err;
176395b7b453SJohn Marino }
176495b7b453SJohn Marino return REG_NOERROR;
176595b7b453SJohn Marino }
176695b7b453SJohn Marino
176795b7b453SJohn Marino static reg_errcode_t
1768*09d4459fSDaniel Fojt __attribute_warn_unused_result__
add_epsilon_src_nodes(const re_dfa_t * dfa,re_node_set * dest_nodes,const re_node_set * candidates)176995b7b453SJohn Marino add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
177095b7b453SJohn Marino const re_node_set *candidates)
177195b7b453SJohn Marino {
177295b7b453SJohn Marino reg_errcode_t err = REG_NOERROR;
177395b7b453SJohn Marino Idx i;
177495b7b453SJohn Marino
177595b7b453SJohn Marino re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1776*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
177795b7b453SJohn Marino return err;
177895b7b453SJohn Marino
177995b7b453SJohn Marino if (!state->inveclosure.alloc)
178095b7b453SJohn Marino {
178195b7b453SJohn Marino err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1782*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
178395b7b453SJohn Marino return REG_ESPACE;
178495b7b453SJohn Marino for (i = 0; i < dest_nodes->nelem; i++)
178595b7b453SJohn Marino {
178695b7b453SJohn Marino err = re_node_set_merge (&state->inveclosure,
178795b7b453SJohn Marino dfa->inveclosures + dest_nodes->elems[i]);
1788*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
178995b7b453SJohn Marino return REG_ESPACE;
179095b7b453SJohn Marino }
179195b7b453SJohn Marino }
179295b7b453SJohn Marino return re_node_set_add_intersect (dest_nodes, candidates,
179395b7b453SJohn Marino &state->inveclosure);
179495b7b453SJohn Marino }
179595b7b453SJohn Marino
179695b7b453SJohn Marino static reg_errcode_t
sub_epsilon_src_nodes(const re_dfa_t * dfa,Idx node,re_node_set * dest_nodes,const re_node_set * candidates)179795b7b453SJohn Marino sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
179895b7b453SJohn Marino const re_node_set *candidates)
179995b7b453SJohn Marino {
180095b7b453SJohn Marino Idx ecl_idx;
180195b7b453SJohn Marino reg_errcode_t err;
180295b7b453SJohn Marino re_node_set *inv_eclosure = dfa->inveclosures + node;
180395b7b453SJohn Marino re_node_set except_nodes;
180495b7b453SJohn Marino re_node_set_init_empty (&except_nodes);
180595b7b453SJohn Marino for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
180695b7b453SJohn Marino {
180795b7b453SJohn Marino Idx cur_node = inv_eclosure->elems[ecl_idx];
180895b7b453SJohn Marino if (cur_node == node)
180995b7b453SJohn Marino continue;
181095b7b453SJohn Marino if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
181195b7b453SJohn Marino {
181295b7b453SJohn Marino Idx edst1 = dfa->edests[cur_node].elems[0];
181395b7b453SJohn Marino Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1814*09d4459fSDaniel Fojt ? dfa->edests[cur_node].elems[1] : -1);
181595b7b453SJohn Marino if ((!re_node_set_contains (inv_eclosure, edst1)
181695b7b453SJohn Marino && re_node_set_contains (dest_nodes, edst1))
1817*09d4459fSDaniel Fojt || (edst2 > 0
181895b7b453SJohn Marino && !re_node_set_contains (inv_eclosure, edst2)
181995b7b453SJohn Marino && re_node_set_contains (dest_nodes, edst2)))
182095b7b453SJohn Marino {
182195b7b453SJohn Marino err = re_node_set_add_intersect (&except_nodes, candidates,
182295b7b453SJohn Marino dfa->inveclosures + cur_node);
1823*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
182495b7b453SJohn Marino {
182595b7b453SJohn Marino re_node_set_free (&except_nodes);
182695b7b453SJohn Marino return err;
182795b7b453SJohn Marino }
182895b7b453SJohn Marino }
182995b7b453SJohn Marino }
183095b7b453SJohn Marino }
183195b7b453SJohn Marino for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
183295b7b453SJohn Marino {
183395b7b453SJohn Marino Idx cur_node = inv_eclosure->elems[ecl_idx];
183495b7b453SJohn Marino if (!re_node_set_contains (&except_nodes, cur_node))
183595b7b453SJohn Marino {
183695b7b453SJohn Marino Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
183795b7b453SJohn Marino re_node_set_remove_at (dest_nodes, idx);
183895b7b453SJohn Marino }
183995b7b453SJohn Marino }
184095b7b453SJohn Marino re_node_set_free (&except_nodes);
184195b7b453SJohn Marino return REG_NOERROR;
184295b7b453SJohn Marino }
184395b7b453SJohn Marino
184495b7b453SJohn Marino static bool
check_dst_limits(const re_match_context_t * mctx,const re_node_set * limits,Idx dst_node,Idx dst_idx,Idx src_node,Idx src_idx)184595b7b453SJohn Marino check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
184695b7b453SJohn Marino Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
184795b7b453SJohn Marino {
184895b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
184995b7b453SJohn Marino Idx lim_idx, src_pos, dst_pos;
185095b7b453SJohn Marino
185195b7b453SJohn Marino Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
185295b7b453SJohn Marino Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
185395b7b453SJohn Marino for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
185495b7b453SJohn Marino {
185595b7b453SJohn Marino Idx subexp_idx;
185695b7b453SJohn Marino struct re_backref_cache_entry *ent;
185795b7b453SJohn Marino ent = mctx->bkref_ents + limits->elems[lim_idx];
185895b7b453SJohn Marino subexp_idx = dfa->nodes[ent->node].opr.idx;
185995b7b453SJohn Marino
186095b7b453SJohn Marino dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
186195b7b453SJohn Marino subexp_idx, dst_node, dst_idx,
186295b7b453SJohn Marino dst_bkref_idx);
186395b7b453SJohn Marino src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
186495b7b453SJohn Marino subexp_idx, src_node, src_idx,
186595b7b453SJohn Marino src_bkref_idx);
186695b7b453SJohn Marino
186795b7b453SJohn Marino /* In case of:
186895b7b453SJohn Marino <src> <dst> ( <subexp> )
186995b7b453SJohn Marino ( <subexp> ) <src> <dst>
187095b7b453SJohn Marino ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */
187195b7b453SJohn Marino if (src_pos == dst_pos)
187295b7b453SJohn Marino continue; /* This is unrelated limitation. */
187395b7b453SJohn Marino else
187495b7b453SJohn Marino return true;
187595b7b453SJohn Marino }
187695b7b453SJohn Marino return false;
187795b7b453SJohn Marino }
187895b7b453SJohn Marino
187995b7b453SJohn Marino static int
check_dst_limits_calc_pos_1(const re_match_context_t * mctx,int boundaries,Idx subexp_idx,Idx from_node,Idx bkref_idx)188095b7b453SJohn Marino check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
188195b7b453SJohn Marino Idx subexp_idx, Idx from_node, Idx bkref_idx)
188295b7b453SJohn Marino {
188395b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
188495b7b453SJohn Marino const re_node_set *eclosures = dfa->eclosures + from_node;
188595b7b453SJohn Marino Idx node_idx;
188695b7b453SJohn Marino
188795b7b453SJohn Marino /* Else, we are on the boundary: examine the nodes on the epsilon
188895b7b453SJohn Marino closure. */
188995b7b453SJohn Marino for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
189095b7b453SJohn Marino {
189195b7b453SJohn Marino Idx node = eclosures->elems[node_idx];
189295b7b453SJohn Marino switch (dfa->nodes[node].type)
189395b7b453SJohn Marino {
189495b7b453SJohn Marino case OP_BACK_REF:
1895*09d4459fSDaniel Fojt if (bkref_idx != -1)
189695b7b453SJohn Marino {
189795b7b453SJohn Marino struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
189895b7b453SJohn Marino do
189995b7b453SJohn Marino {
190095b7b453SJohn Marino Idx dst;
190195b7b453SJohn Marino int cpos;
190295b7b453SJohn Marino
190395b7b453SJohn Marino if (ent->node != node)
190495b7b453SJohn Marino continue;
190595b7b453SJohn Marino
190695b7b453SJohn Marino if (subexp_idx < BITSET_WORD_BITS
190795b7b453SJohn Marino && !(ent->eps_reachable_subexps_map
190895b7b453SJohn Marino & ((bitset_word_t) 1 << subexp_idx)))
190995b7b453SJohn Marino continue;
191095b7b453SJohn Marino
191195b7b453SJohn Marino /* Recurse trying to reach the OP_OPEN_SUBEXP and
191295b7b453SJohn Marino OP_CLOSE_SUBEXP cases below. But, if the
191395b7b453SJohn Marino destination node is the same node as the source
191495b7b453SJohn Marino node, don't recurse because it would cause an
191595b7b453SJohn Marino infinite loop: a regex that exhibits this behavior
191695b7b453SJohn Marino is ()\1*\1* */
191795b7b453SJohn Marino dst = dfa->edests[node].elems[0];
191895b7b453SJohn Marino if (dst == from_node)
191995b7b453SJohn Marino {
192095b7b453SJohn Marino if (boundaries & 1)
192195b7b453SJohn Marino return -1;
192295b7b453SJohn Marino else /* if (boundaries & 2) */
192395b7b453SJohn Marino return 0;
192495b7b453SJohn Marino }
192595b7b453SJohn Marino
192695b7b453SJohn Marino cpos =
192795b7b453SJohn Marino check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
192895b7b453SJohn Marino dst, bkref_idx);
192995b7b453SJohn Marino if (cpos == -1 /* && (boundaries & 1) */)
193095b7b453SJohn Marino return -1;
193195b7b453SJohn Marino if (cpos == 0 && (boundaries & 2))
193295b7b453SJohn Marino return 0;
193395b7b453SJohn Marino
193495b7b453SJohn Marino if (subexp_idx < BITSET_WORD_BITS)
193595b7b453SJohn Marino ent->eps_reachable_subexps_map
193695b7b453SJohn Marino &= ~((bitset_word_t) 1 << subexp_idx);
193795b7b453SJohn Marino }
193895b7b453SJohn Marino while (ent++->more);
193995b7b453SJohn Marino }
194095b7b453SJohn Marino break;
194195b7b453SJohn Marino
194295b7b453SJohn Marino case OP_OPEN_SUBEXP:
194395b7b453SJohn Marino if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
194495b7b453SJohn Marino return -1;
194595b7b453SJohn Marino break;
194695b7b453SJohn Marino
194795b7b453SJohn Marino case OP_CLOSE_SUBEXP:
194895b7b453SJohn Marino if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
194995b7b453SJohn Marino return 0;
195095b7b453SJohn Marino break;
195195b7b453SJohn Marino
195295b7b453SJohn Marino default:
195395b7b453SJohn Marino break;
195495b7b453SJohn Marino }
195595b7b453SJohn Marino }
195695b7b453SJohn Marino
195795b7b453SJohn Marino return (boundaries & 2) ? 1 : 0;
195895b7b453SJohn Marino }
195995b7b453SJohn Marino
196095b7b453SJohn Marino static int
check_dst_limits_calc_pos(const re_match_context_t * mctx,Idx limit,Idx subexp_idx,Idx from_node,Idx str_idx,Idx bkref_idx)196195b7b453SJohn Marino check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
196295b7b453SJohn Marino Idx subexp_idx, Idx from_node, Idx str_idx,
196395b7b453SJohn Marino Idx bkref_idx)
196495b7b453SJohn Marino {
196595b7b453SJohn Marino struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
196695b7b453SJohn Marino int boundaries;
196795b7b453SJohn Marino
196895b7b453SJohn Marino /* If we are outside the range of the subexpression, return -1 or 1. */
196995b7b453SJohn Marino if (str_idx < lim->subexp_from)
197095b7b453SJohn Marino return -1;
197195b7b453SJohn Marino
197295b7b453SJohn Marino if (lim->subexp_to < str_idx)
197395b7b453SJohn Marino return 1;
197495b7b453SJohn Marino
197595b7b453SJohn Marino /* If we are within the subexpression, return 0. */
197695b7b453SJohn Marino boundaries = (str_idx == lim->subexp_from);
197795b7b453SJohn Marino boundaries |= (str_idx == lim->subexp_to) << 1;
197895b7b453SJohn Marino if (boundaries == 0)
197995b7b453SJohn Marino return 0;
198095b7b453SJohn Marino
198195b7b453SJohn Marino /* Else, examine epsilon closure. */
198295b7b453SJohn Marino return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
198395b7b453SJohn Marino from_node, bkref_idx);
198495b7b453SJohn Marino }
198595b7b453SJohn Marino
198695b7b453SJohn Marino /* Check the limitations of sub expressions LIMITS, and remove the nodes
198795b7b453SJohn Marino which are against limitations from DEST_NODES. */
198895b7b453SJohn Marino
198995b7b453SJohn Marino static reg_errcode_t
check_subexp_limits(const re_dfa_t * dfa,re_node_set * dest_nodes,const re_node_set * candidates,re_node_set * limits,struct re_backref_cache_entry * bkref_ents,Idx str_idx)199095b7b453SJohn Marino check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
199195b7b453SJohn Marino const re_node_set *candidates, re_node_set *limits,
199295b7b453SJohn Marino struct re_backref_cache_entry *bkref_ents, Idx str_idx)
199395b7b453SJohn Marino {
199495b7b453SJohn Marino reg_errcode_t err;
199595b7b453SJohn Marino Idx node_idx, lim_idx;
199695b7b453SJohn Marino
199795b7b453SJohn Marino for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
199895b7b453SJohn Marino {
199995b7b453SJohn Marino Idx subexp_idx;
200095b7b453SJohn Marino struct re_backref_cache_entry *ent;
200195b7b453SJohn Marino ent = bkref_ents + limits->elems[lim_idx];
200295b7b453SJohn Marino
200395b7b453SJohn Marino if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
200495b7b453SJohn Marino continue; /* This is unrelated limitation. */
200595b7b453SJohn Marino
200695b7b453SJohn Marino subexp_idx = dfa->nodes[ent->node].opr.idx;
200795b7b453SJohn Marino if (ent->subexp_to == str_idx)
200895b7b453SJohn Marino {
2009*09d4459fSDaniel Fojt Idx ops_node = -1;
2010*09d4459fSDaniel Fojt Idx cls_node = -1;
201195b7b453SJohn Marino for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
201295b7b453SJohn Marino {
201395b7b453SJohn Marino Idx node = dest_nodes->elems[node_idx];
201495b7b453SJohn Marino re_token_type_t type = dfa->nodes[node].type;
201595b7b453SJohn Marino if (type == OP_OPEN_SUBEXP
201695b7b453SJohn Marino && subexp_idx == dfa->nodes[node].opr.idx)
201795b7b453SJohn Marino ops_node = node;
201895b7b453SJohn Marino else if (type == OP_CLOSE_SUBEXP
201995b7b453SJohn Marino && subexp_idx == dfa->nodes[node].opr.idx)
202095b7b453SJohn Marino cls_node = node;
202195b7b453SJohn Marino }
202295b7b453SJohn Marino
202395b7b453SJohn Marino /* Check the limitation of the open subexpression. */
202495b7b453SJohn Marino /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
2025*09d4459fSDaniel Fojt if (ops_node >= 0)
202695b7b453SJohn Marino {
202795b7b453SJohn Marino err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
202895b7b453SJohn Marino candidates);
2029*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
203095b7b453SJohn Marino return err;
203195b7b453SJohn Marino }
203295b7b453SJohn Marino
203395b7b453SJohn Marino /* Check the limitation of the close subexpression. */
2034*09d4459fSDaniel Fojt if (cls_node >= 0)
203595b7b453SJohn Marino for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
203695b7b453SJohn Marino {
203795b7b453SJohn Marino Idx node = dest_nodes->elems[node_idx];
203895b7b453SJohn Marino if (!re_node_set_contains (dfa->inveclosures + node,
203995b7b453SJohn Marino cls_node)
204095b7b453SJohn Marino && !re_node_set_contains (dfa->eclosures + node,
204195b7b453SJohn Marino cls_node))
204295b7b453SJohn Marino {
204395b7b453SJohn Marino /* It is against this limitation.
204495b7b453SJohn Marino Remove it form the current sifted state. */
204595b7b453SJohn Marino err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
204695b7b453SJohn Marino candidates);
2047*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
204895b7b453SJohn Marino return err;
204995b7b453SJohn Marino --node_idx;
205095b7b453SJohn Marino }
205195b7b453SJohn Marino }
205295b7b453SJohn Marino }
205395b7b453SJohn Marino else /* (ent->subexp_to != str_idx) */
205495b7b453SJohn Marino {
205595b7b453SJohn Marino for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
205695b7b453SJohn Marino {
205795b7b453SJohn Marino Idx node = dest_nodes->elems[node_idx];
205895b7b453SJohn Marino re_token_type_t type = dfa->nodes[node].type;
205995b7b453SJohn Marino if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
206095b7b453SJohn Marino {
206195b7b453SJohn Marino if (subexp_idx != dfa->nodes[node].opr.idx)
206295b7b453SJohn Marino continue;
206395b7b453SJohn Marino /* It is against this limitation.
206495b7b453SJohn Marino Remove it form the current sifted state. */
206595b7b453SJohn Marino err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
206695b7b453SJohn Marino candidates);
2067*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
206895b7b453SJohn Marino return err;
206995b7b453SJohn Marino }
207095b7b453SJohn Marino }
207195b7b453SJohn Marino }
207295b7b453SJohn Marino }
207395b7b453SJohn Marino return REG_NOERROR;
207495b7b453SJohn Marino }
207595b7b453SJohn Marino
207695b7b453SJohn Marino static reg_errcode_t
2077*09d4459fSDaniel Fojt __attribute_warn_unused_result__
sift_states_bkref(const re_match_context_t * mctx,re_sift_context_t * sctx,Idx str_idx,const re_node_set * candidates)207895b7b453SJohn Marino sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
207995b7b453SJohn Marino Idx str_idx, const re_node_set *candidates)
208095b7b453SJohn Marino {
208195b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
208295b7b453SJohn Marino reg_errcode_t err;
208395b7b453SJohn Marino Idx node_idx, node;
208495b7b453SJohn Marino re_sift_context_t local_sctx;
208595b7b453SJohn Marino Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
208695b7b453SJohn Marino
2087*09d4459fSDaniel Fojt if (first_idx == -1)
208895b7b453SJohn Marino return REG_NOERROR;
208995b7b453SJohn Marino
209095b7b453SJohn Marino local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
209195b7b453SJohn Marino
209295b7b453SJohn Marino for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
209395b7b453SJohn Marino {
209495b7b453SJohn Marino Idx enabled_idx;
209595b7b453SJohn Marino re_token_type_t type;
209695b7b453SJohn Marino struct re_backref_cache_entry *entry;
209795b7b453SJohn Marino node = candidates->elems[node_idx];
209895b7b453SJohn Marino type = dfa->nodes[node].type;
209995b7b453SJohn Marino /* Avoid infinite loop for the REs like "()\1+". */
210095b7b453SJohn Marino if (node == sctx->last_node && str_idx == sctx->last_str_idx)
210195b7b453SJohn Marino continue;
210295b7b453SJohn Marino if (type != OP_BACK_REF)
210395b7b453SJohn Marino continue;
210495b7b453SJohn Marino
210595b7b453SJohn Marino entry = mctx->bkref_ents + first_idx;
210695b7b453SJohn Marino enabled_idx = first_idx;
210795b7b453SJohn Marino do
210895b7b453SJohn Marino {
210995b7b453SJohn Marino Idx subexp_len;
211095b7b453SJohn Marino Idx to_idx;
211195b7b453SJohn Marino Idx dst_node;
211295b7b453SJohn Marino bool ok;
211395b7b453SJohn Marino re_dfastate_t *cur_state;
211495b7b453SJohn Marino
211595b7b453SJohn Marino if (entry->node != node)
211695b7b453SJohn Marino continue;
211795b7b453SJohn Marino subexp_len = entry->subexp_to - entry->subexp_from;
211895b7b453SJohn Marino to_idx = str_idx + subexp_len;
211995b7b453SJohn Marino dst_node = (subexp_len ? dfa->nexts[node]
212095b7b453SJohn Marino : dfa->edests[node].elems[0]);
212195b7b453SJohn Marino
212295b7b453SJohn Marino if (to_idx > sctx->last_str_idx
212395b7b453SJohn Marino || sctx->sifted_states[to_idx] == NULL
212495b7b453SJohn Marino || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
212595b7b453SJohn Marino || check_dst_limits (mctx, &sctx->limits, node,
212695b7b453SJohn Marino str_idx, dst_node, to_idx))
212795b7b453SJohn Marino continue;
212895b7b453SJohn Marino
212995b7b453SJohn Marino if (local_sctx.sifted_states == NULL)
213095b7b453SJohn Marino {
213195b7b453SJohn Marino local_sctx = *sctx;
213295b7b453SJohn Marino err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2133*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
213495b7b453SJohn Marino goto free_return;
213595b7b453SJohn Marino }
213695b7b453SJohn Marino local_sctx.last_node = node;
213795b7b453SJohn Marino local_sctx.last_str_idx = str_idx;
213895b7b453SJohn Marino ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2139*09d4459fSDaniel Fojt if (__glibc_unlikely (! ok))
214095b7b453SJohn Marino {
214195b7b453SJohn Marino err = REG_ESPACE;
214295b7b453SJohn Marino goto free_return;
214395b7b453SJohn Marino }
214495b7b453SJohn Marino cur_state = local_sctx.sifted_states[str_idx];
214595b7b453SJohn Marino err = sift_states_backward (mctx, &local_sctx);
2146*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
214795b7b453SJohn Marino goto free_return;
214895b7b453SJohn Marino if (sctx->limited_states != NULL)
214995b7b453SJohn Marino {
215095b7b453SJohn Marino err = merge_state_array (dfa, sctx->limited_states,
215195b7b453SJohn Marino local_sctx.sifted_states,
215295b7b453SJohn Marino str_idx + 1);
2153*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
215495b7b453SJohn Marino goto free_return;
215595b7b453SJohn Marino }
215695b7b453SJohn Marino local_sctx.sifted_states[str_idx] = cur_state;
215795b7b453SJohn Marino re_node_set_remove (&local_sctx.limits, enabled_idx);
215895b7b453SJohn Marino
215995b7b453SJohn Marino /* mctx->bkref_ents may have changed, reload the pointer. */
216095b7b453SJohn Marino entry = mctx->bkref_ents + enabled_idx;
216195b7b453SJohn Marino }
216295b7b453SJohn Marino while (enabled_idx++, entry++->more);
216395b7b453SJohn Marino }
216495b7b453SJohn Marino err = REG_NOERROR;
216595b7b453SJohn Marino free_return:
216695b7b453SJohn Marino if (local_sctx.sifted_states != NULL)
216795b7b453SJohn Marino {
216895b7b453SJohn Marino re_node_set_free (&local_sctx.limits);
216995b7b453SJohn Marino }
217095b7b453SJohn Marino
217195b7b453SJohn Marino return err;
217295b7b453SJohn Marino }
217395b7b453SJohn Marino
217495b7b453SJohn Marino
217595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
217695b7b453SJohn Marino static int
sift_states_iter_mb(const re_match_context_t * mctx,re_sift_context_t * sctx,Idx node_idx,Idx str_idx,Idx max_str_idx)217795b7b453SJohn Marino sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
217895b7b453SJohn Marino Idx node_idx, Idx str_idx, Idx max_str_idx)
217995b7b453SJohn Marino {
218095b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
218195b7b453SJohn Marino int naccepted;
2182cf28ed85SJohn Marino /* Check the node can accept "multi byte". */
218395b7b453SJohn Marino naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2184*09d4459fSDaniel Fojt if (naccepted > 0 && str_idx + naccepted <= max_str_idx
2185*09d4459fSDaniel Fojt && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
218695b7b453SJohn Marino dfa->nexts[node_idx]))
2187cf28ed85SJohn Marino /* The node can't accept the "multi byte", or the
218895b7b453SJohn Marino destination was already thrown away, then the node
2189*09d4459fSDaniel Fojt couldn't accept the current input "multi byte". */
219095b7b453SJohn Marino naccepted = 0;
219195b7b453SJohn Marino /* Otherwise, it is sure that the node could accept
2192cf28ed85SJohn Marino 'naccepted' bytes input. */
219395b7b453SJohn Marino return naccepted;
219495b7b453SJohn Marino }
219595b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
219695b7b453SJohn Marino
219795b7b453SJohn Marino
219895b7b453SJohn Marino /* Functions for state transition. */
219995b7b453SJohn Marino
220095b7b453SJohn Marino /* Return the next state to which the current state STATE will transit by
220195b7b453SJohn Marino accepting the current input byte, and update STATE_LOG if necessary.
220295b7b453SJohn Marino If STATE can accept a multibyte char/collating element/back reference
220395b7b453SJohn Marino update the destination of STATE_LOG. */
220495b7b453SJohn Marino
220595b7b453SJohn Marino static re_dfastate_t *
2206*09d4459fSDaniel Fojt __attribute_warn_unused_result__
transit_state(reg_errcode_t * err,re_match_context_t * mctx,re_dfastate_t * state)220795b7b453SJohn Marino transit_state (reg_errcode_t *err, re_match_context_t *mctx,
220895b7b453SJohn Marino re_dfastate_t *state)
220995b7b453SJohn Marino {
221095b7b453SJohn Marino re_dfastate_t **trtable;
221195b7b453SJohn Marino unsigned char ch;
221295b7b453SJohn Marino
221395b7b453SJohn Marino #ifdef RE_ENABLE_I18N
221495b7b453SJohn Marino /* If the current state can accept multibyte. */
2215*09d4459fSDaniel Fojt if (__glibc_unlikely (state->accept_mb))
221695b7b453SJohn Marino {
221795b7b453SJohn Marino *err = transit_state_mb (mctx, state);
2218*09d4459fSDaniel Fojt if (__glibc_unlikely (*err != REG_NOERROR))
221995b7b453SJohn Marino return NULL;
222095b7b453SJohn Marino }
222195b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
222295b7b453SJohn Marino
222395b7b453SJohn Marino /* Then decide the next state with the single byte. */
222495b7b453SJohn Marino #if 0
222595b7b453SJohn Marino if (0)
222695b7b453SJohn Marino /* don't use transition table */
222795b7b453SJohn Marino return transit_state_sb (err, mctx, state);
222895b7b453SJohn Marino #endif
222995b7b453SJohn Marino
223095b7b453SJohn Marino /* Use transition table */
223195b7b453SJohn Marino ch = re_string_fetch_byte (&mctx->input);
223295b7b453SJohn Marino for (;;)
223395b7b453SJohn Marino {
223495b7b453SJohn Marino trtable = state->trtable;
2235*09d4459fSDaniel Fojt if (__glibc_likely (trtable != NULL))
223695b7b453SJohn Marino return trtable[ch];
223795b7b453SJohn Marino
223895b7b453SJohn Marino trtable = state->word_trtable;
2239*09d4459fSDaniel Fojt if (__glibc_likely (trtable != NULL))
224095b7b453SJohn Marino {
224195b7b453SJohn Marino unsigned int context;
224295b7b453SJohn Marino context
224395b7b453SJohn Marino = re_string_context_at (&mctx->input,
224495b7b453SJohn Marino re_string_cur_idx (&mctx->input) - 1,
224595b7b453SJohn Marino mctx->eflags);
224695b7b453SJohn Marino if (IS_WORD_CONTEXT (context))
224795b7b453SJohn Marino return trtable[ch + SBC_MAX];
224895b7b453SJohn Marino else
224995b7b453SJohn Marino return trtable[ch];
225095b7b453SJohn Marino }
225195b7b453SJohn Marino
225295b7b453SJohn Marino if (!build_trtable (mctx->dfa, state))
225395b7b453SJohn Marino {
225495b7b453SJohn Marino *err = REG_ESPACE;
225595b7b453SJohn Marino return NULL;
225695b7b453SJohn Marino }
225795b7b453SJohn Marino
225895b7b453SJohn Marino /* Retry, we now have a transition table. */
225995b7b453SJohn Marino }
226095b7b453SJohn Marino }
226195b7b453SJohn Marino
226295b7b453SJohn Marino /* Update the state_log if we need */
226395b7b453SJohn Marino static re_dfastate_t *
merge_state_with_log(reg_errcode_t * err,re_match_context_t * mctx,re_dfastate_t * next_state)226495b7b453SJohn Marino merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
226595b7b453SJohn Marino re_dfastate_t *next_state)
226695b7b453SJohn Marino {
226795b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
226895b7b453SJohn Marino Idx cur_idx = re_string_cur_idx (&mctx->input);
226995b7b453SJohn Marino
227095b7b453SJohn Marino if (cur_idx > mctx->state_log_top)
227195b7b453SJohn Marino {
227295b7b453SJohn Marino mctx->state_log[cur_idx] = next_state;
227395b7b453SJohn Marino mctx->state_log_top = cur_idx;
227495b7b453SJohn Marino }
227595b7b453SJohn Marino else if (mctx->state_log[cur_idx] == 0)
227695b7b453SJohn Marino {
227795b7b453SJohn Marino mctx->state_log[cur_idx] = next_state;
227895b7b453SJohn Marino }
227995b7b453SJohn Marino else
228095b7b453SJohn Marino {
228195b7b453SJohn Marino re_dfastate_t *pstate;
228295b7b453SJohn Marino unsigned int context;
228395b7b453SJohn Marino re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
228495b7b453SJohn Marino /* If (state_log[cur_idx] != 0), it implies that cur_idx is
228595b7b453SJohn Marino the destination of a multibyte char/collating element/
228695b7b453SJohn Marino back reference. Then the next state is the union set of
228795b7b453SJohn Marino these destinations and the results of the transition table. */
228895b7b453SJohn Marino pstate = mctx->state_log[cur_idx];
228995b7b453SJohn Marino log_nodes = pstate->entrance_nodes;
229095b7b453SJohn Marino if (next_state != NULL)
229195b7b453SJohn Marino {
229295b7b453SJohn Marino table_nodes = next_state->entrance_nodes;
229395b7b453SJohn Marino *err = re_node_set_init_union (&next_nodes, table_nodes,
229495b7b453SJohn Marino log_nodes);
2295*09d4459fSDaniel Fojt if (__glibc_unlikely (*err != REG_NOERROR))
229695b7b453SJohn Marino return NULL;
229795b7b453SJohn Marino }
229895b7b453SJohn Marino else
229995b7b453SJohn Marino next_nodes = *log_nodes;
230095b7b453SJohn Marino /* Note: We already add the nodes of the initial state,
230195b7b453SJohn Marino then we don't need to add them here. */
230295b7b453SJohn Marino
230395b7b453SJohn Marino context = re_string_context_at (&mctx->input,
230495b7b453SJohn Marino re_string_cur_idx (&mctx->input) - 1,
230595b7b453SJohn Marino mctx->eflags);
230695b7b453SJohn Marino next_state = mctx->state_log[cur_idx]
230795b7b453SJohn Marino = re_acquire_state_context (err, dfa, &next_nodes, context);
230895b7b453SJohn Marino /* We don't need to check errors here, since the return value of
230995b7b453SJohn Marino this function is next_state and ERR is already set. */
231095b7b453SJohn Marino
231195b7b453SJohn Marino if (table_nodes != NULL)
231295b7b453SJohn Marino re_node_set_free (&next_nodes);
231395b7b453SJohn Marino }
231495b7b453SJohn Marino
2315*09d4459fSDaniel Fojt if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
231695b7b453SJohn Marino {
231795b7b453SJohn Marino /* Check OP_OPEN_SUBEXP in the current state in case that we use them
231895b7b453SJohn Marino later. We must check them here, since the back references in the
231995b7b453SJohn Marino next state might use them. */
232095b7b453SJohn Marino *err = check_subexp_matching_top (mctx, &next_state->nodes,
232195b7b453SJohn Marino cur_idx);
2322*09d4459fSDaniel Fojt if (__glibc_unlikely (*err != REG_NOERROR))
232395b7b453SJohn Marino return NULL;
232495b7b453SJohn Marino
232595b7b453SJohn Marino /* If the next state has back references. */
232695b7b453SJohn Marino if (next_state->has_backref)
232795b7b453SJohn Marino {
232895b7b453SJohn Marino *err = transit_state_bkref (mctx, &next_state->nodes);
2329*09d4459fSDaniel Fojt if (__glibc_unlikely (*err != REG_NOERROR))
233095b7b453SJohn Marino return NULL;
233195b7b453SJohn Marino next_state = mctx->state_log[cur_idx];
233295b7b453SJohn Marino }
233395b7b453SJohn Marino }
233495b7b453SJohn Marino
233595b7b453SJohn Marino return next_state;
233695b7b453SJohn Marino }
233795b7b453SJohn Marino
233895b7b453SJohn Marino /* Skip bytes in the input that correspond to part of a
233995b7b453SJohn Marino multi-byte match, then look in the log for a state
234095b7b453SJohn Marino from which to restart matching. */
234195b7b453SJohn Marino static re_dfastate_t *
find_recover_state(reg_errcode_t * err,re_match_context_t * mctx)234295b7b453SJohn Marino find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
234395b7b453SJohn Marino {
234495b7b453SJohn Marino re_dfastate_t *cur_state;
234595b7b453SJohn Marino do
234695b7b453SJohn Marino {
234795b7b453SJohn Marino Idx max = mctx->state_log_top;
234895b7b453SJohn Marino Idx cur_str_idx = re_string_cur_idx (&mctx->input);
234995b7b453SJohn Marino
235095b7b453SJohn Marino do
235195b7b453SJohn Marino {
235295b7b453SJohn Marino if (++cur_str_idx > max)
235395b7b453SJohn Marino return NULL;
235495b7b453SJohn Marino re_string_skip_bytes (&mctx->input, 1);
235595b7b453SJohn Marino }
235695b7b453SJohn Marino while (mctx->state_log[cur_str_idx] == NULL);
235795b7b453SJohn Marino
235895b7b453SJohn Marino cur_state = merge_state_with_log (err, mctx, NULL);
235995b7b453SJohn Marino }
236095b7b453SJohn Marino while (*err == REG_NOERROR && cur_state == NULL);
236195b7b453SJohn Marino return cur_state;
236295b7b453SJohn Marino }
236395b7b453SJohn Marino
236495b7b453SJohn Marino /* Helper functions for transit_state. */
236595b7b453SJohn Marino
236695b7b453SJohn Marino /* From the node set CUR_NODES, pick up the nodes whose types are
236795b7b453SJohn Marino OP_OPEN_SUBEXP and which have corresponding back references in the regular
236895b7b453SJohn Marino expression. And register them to use them later for evaluating the
2369cf28ed85SJohn Marino corresponding back references. */
237095b7b453SJohn Marino
237195b7b453SJohn Marino static reg_errcode_t
check_subexp_matching_top(re_match_context_t * mctx,re_node_set * cur_nodes,Idx str_idx)237295b7b453SJohn Marino check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
237395b7b453SJohn Marino Idx str_idx)
237495b7b453SJohn Marino {
237595b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
237695b7b453SJohn Marino Idx node_idx;
237795b7b453SJohn Marino reg_errcode_t err;
237895b7b453SJohn Marino
237995b7b453SJohn Marino /* TODO: This isn't efficient.
238095b7b453SJohn Marino Because there might be more than one nodes whose types are
238195b7b453SJohn Marino OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
238295b7b453SJohn Marino nodes.
238395b7b453SJohn Marino E.g. RE: (a){2} */
238495b7b453SJohn Marino for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
238595b7b453SJohn Marino {
238695b7b453SJohn Marino Idx node = cur_nodes->elems[node_idx];
238795b7b453SJohn Marino if (dfa->nodes[node].type == OP_OPEN_SUBEXP
238895b7b453SJohn Marino && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
238995b7b453SJohn Marino && (dfa->used_bkref_map
239095b7b453SJohn Marino & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
239195b7b453SJohn Marino {
239295b7b453SJohn Marino err = match_ctx_add_subtop (mctx, node, str_idx);
2393*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
239495b7b453SJohn Marino return err;
239595b7b453SJohn Marino }
239695b7b453SJohn Marino }
239795b7b453SJohn Marino return REG_NOERROR;
239895b7b453SJohn Marino }
239995b7b453SJohn Marino
240095b7b453SJohn Marino #if 0
240195b7b453SJohn Marino /* Return the next state to which the current state STATE will transit by
240295b7b453SJohn Marino accepting the current input byte. */
240395b7b453SJohn Marino
240495b7b453SJohn Marino static re_dfastate_t *
240595b7b453SJohn Marino transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
240695b7b453SJohn Marino re_dfastate_t *state)
240795b7b453SJohn Marino {
240895b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
240995b7b453SJohn Marino re_node_set next_nodes;
241095b7b453SJohn Marino re_dfastate_t *next_state;
241195b7b453SJohn Marino Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
241295b7b453SJohn Marino unsigned int context;
241395b7b453SJohn Marino
241495b7b453SJohn Marino *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2415*09d4459fSDaniel Fojt if (__glibc_unlikely (*err != REG_NOERROR))
241695b7b453SJohn Marino return NULL;
241795b7b453SJohn Marino for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
241895b7b453SJohn Marino {
241995b7b453SJohn Marino Idx cur_node = state->nodes.elems[node_cnt];
242095b7b453SJohn Marino if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
242195b7b453SJohn Marino {
242295b7b453SJohn Marino *err = re_node_set_merge (&next_nodes,
242395b7b453SJohn Marino dfa->eclosures + dfa->nexts[cur_node]);
2424*09d4459fSDaniel Fojt if (__glibc_unlikely (*err != REG_NOERROR))
242595b7b453SJohn Marino {
242695b7b453SJohn Marino re_node_set_free (&next_nodes);
242795b7b453SJohn Marino return NULL;
242895b7b453SJohn Marino }
242995b7b453SJohn Marino }
243095b7b453SJohn Marino }
243195b7b453SJohn Marino context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
243295b7b453SJohn Marino next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
243395b7b453SJohn Marino /* We don't need to check errors here, since the return value of
243495b7b453SJohn Marino this function is next_state and ERR is already set. */
243595b7b453SJohn Marino
243695b7b453SJohn Marino re_node_set_free (&next_nodes);
243795b7b453SJohn Marino re_string_skip_bytes (&mctx->input, 1);
243895b7b453SJohn Marino return next_state;
243995b7b453SJohn Marino }
244095b7b453SJohn Marino #endif
244195b7b453SJohn Marino
244295b7b453SJohn Marino #ifdef RE_ENABLE_I18N
244395b7b453SJohn Marino static reg_errcode_t
transit_state_mb(re_match_context_t * mctx,re_dfastate_t * pstate)244495b7b453SJohn Marino transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
244595b7b453SJohn Marino {
244695b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
244795b7b453SJohn Marino reg_errcode_t err;
244895b7b453SJohn Marino Idx i;
244995b7b453SJohn Marino
245095b7b453SJohn Marino for (i = 0; i < pstate->nodes.nelem; ++i)
245195b7b453SJohn Marino {
245295b7b453SJohn Marino re_node_set dest_nodes, *new_nodes;
245395b7b453SJohn Marino Idx cur_node_idx = pstate->nodes.elems[i];
245495b7b453SJohn Marino int naccepted;
245595b7b453SJohn Marino Idx dest_idx;
245695b7b453SJohn Marino unsigned int context;
245795b7b453SJohn Marino re_dfastate_t *dest_state;
245895b7b453SJohn Marino
245995b7b453SJohn Marino if (!dfa->nodes[cur_node_idx].accept_mb)
246095b7b453SJohn Marino continue;
246195b7b453SJohn Marino
246295b7b453SJohn Marino if (dfa->nodes[cur_node_idx].constraint)
246395b7b453SJohn Marino {
246495b7b453SJohn Marino context = re_string_context_at (&mctx->input,
246595b7b453SJohn Marino re_string_cur_idx (&mctx->input),
246695b7b453SJohn Marino mctx->eflags);
246795b7b453SJohn Marino if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
246895b7b453SJohn Marino context))
246995b7b453SJohn Marino continue;
247095b7b453SJohn Marino }
247195b7b453SJohn Marino
247295b7b453SJohn Marino /* How many bytes the node can accept? */
247395b7b453SJohn Marino naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
247495b7b453SJohn Marino re_string_cur_idx (&mctx->input));
247595b7b453SJohn Marino if (naccepted == 0)
247695b7b453SJohn Marino continue;
247795b7b453SJohn Marino
2478cf28ed85SJohn Marino /* The node can accepts 'naccepted' bytes. */
247995b7b453SJohn Marino dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
248095b7b453SJohn Marino mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
248195b7b453SJohn Marino : mctx->max_mb_elem_len);
248295b7b453SJohn Marino err = clean_state_log_if_needed (mctx, dest_idx);
2483*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
248495b7b453SJohn Marino return err;
2485*09d4459fSDaniel Fojt DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -1);
248695b7b453SJohn Marino new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
248795b7b453SJohn Marino
248895b7b453SJohn Marino dest_state = mctx->state_log[dest_idx];
248995b7b453SJohn Marino if (dest_state == NULL)
249095b7b453SJohn Marino dest_nodes = *new_nodes;
249195b7b453SJohn Marino else
249295b7b453SJohn Marino {
249395b7b453SJohn Marino err = re_node_set_init_union (&dest_nodes,
249495b7b453SJohn Marino dest_state->entrance_nodes, new_nodes);
2495*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
249695b7b453SJohn Marino return err;
249795b7b453SJohn Marino }
249895b7b453SJohn Marino context = re_string_context_at (&mctx->input, dest_idx - 1,
249995b7b453SJohn Marino mctx->eflags);
250095b7b453SJohn Marino mctx->state_log[dest_idx]
250195b7b453SJohn Marino = re_acquire_state_context (&err, dfa, &dest_nodes, context);
250295b7b453SJohn Marino if (dest_state != NULL)
250395b7b453SJohn Marino re_node_set_free (&dest_nodes);
2504*09d4459fSDaniel Fojt if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
2505*09d4459fSDaniel Fojt && err != REG_NOERROR))
250695b7b453SJohn Marino return err;
250795b7b453SJohn Marino }
250895b7b453SJohn Marino return REG_NOERROR;
250995b7b453SJohn Marino }
251095b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
251195b7b453SJohn Marino
251295b7b453SJohn Marino static reg_errcode_t
transit_state_bkref(re_match_context_t * mctx,const re_node_set * nodes)251395b7b453SJohn Marino transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
251495b7b453SJohn Marino {
251595b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
251695b7b453SJohn Marino reg_errcode_t err;
251795b7b453SJohn Marino Idx i;
251895b7b453SJohn Marino Idx cur_str_idx = re_string_cur_idx (&mctx->input);
251995b7b453SJohn Marino
252095b7b453SJohn Marino for (i = 0; i < nodes->nelem; ++i)
252195b7b453SJohn Marino {
252295b7b453SJohn Marino Idx dest_str_idx, prev_nelem, bkc_idx;
252395b7b453SJohn Marino Idx node_idx = nodes->elems[i];
252495b7b453SJohn Marino unsigned int context;
252595b7b453SJohn Marino const re_token_t *node = dfa->nodes + node_idx;
252695b7b453SJohn Marino re_node_set *new_dest_nodes;
252795b7b453SJohn Marino
2528cf28ed85SJohn Marino /* Check whether 'node' is a backreference or not. */
252995b7b453SJohn Marino if (node->type != OP_BACK_REF)
253095b7b453SJohn Marino continue;
253195b7b453SJohn Marino
253295b7b453SJohn Marino if (node->constraint)
253395b7b453SJohn Marino {
253495b7b453SJohn Marino context = re_string_context_at (&mctx->input, cur_str_idx,
253595b7b453SJohn Marino mctx->eflags);
253695b7b453SJohn Marino if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
253795b7b453SJohn Marino continue;
253895b7b453SJohn Marino }
253995b7b453SJohn Marino
2540cf28ed85SJohn Marino /* 'node' is a backreference.
254195b7b453SJohn Marino Check the substring which the substring matched. */
254295b7b453SJohn Marino bkc_idx = mctx->nbkref_ents;
254395b7b453SJohn Marino err = get_subexp (mctx, node_idx, cur_str_idx);
2544*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
254595b7b453SJohn Marino goto free_return;
254695b7b453SJohn Marino
2547cf28ed85SJohn Marino /* And add the epsilon closures (which is 'new_dest_nodes') of
254895b7b453SJohn Marino the backreference to appropriate state_log. */
2549*09d4459fSDaniel Fojt DEBUG_ASSERT (dfa->nexts[node_idx] != -1);
255095b7b453SJohn Marino for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
255195b7b453SJohn Marino {
255295b7b453SJohn Marino Idx subexp_len;
255395b7b453SJohn Marino re_dfastate_t *dest_state;
255495b7b453SJohn Marino struct re_backref_cache_entry *bkref_ent;
255595b7b453SJohn Marino bkref_ent = mctx->bkref_ents + bkc_idx;
255695b7b453SJohn Marino if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
255795b7b453SJohn Marino continue;
255895b7b453SJohn Marino subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
255995b7b453SJohn Marino new_dest_nodes = (subexp_len == 0
256095b7b453SJohn Marino ? dfa->eclosures + dfa->edests[node_idx].elems[0]
256195b7b453SJohn Marino : dfa->eclosures + dfa->nexts[node_idx]);
256295b7b453SJohn Marino dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
256395b7b453SJohn Marino - bkref_ent->subexp_from);
256495b7b453SJohn Marino context = re_string_context_at (&mctx->input, dest_str_idx - 1,
256595b7b453SJohn Marino mctx->eflags);
256695b7b453SJohn Marino dest_state = mctx->state_log[dest_str_idx];
256795b7b453SJohn Marino prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
256895b7b453SJohn Marino : mctx->state_log[cur_str_idx]->nodes.nelem);
2569cf28ed85SJohn Marino /* Add 'new_dest_node' to state_log. */
257095b7b453SJohn Marino if (dest_state == NULL)
257195b7b453SJohn Marino {
257295b7b453SJohn Marino mctx->state_log[dest_str_idx]
257395b7b453SJohn Marino = re_acquire_state_context (&err, dfa, new_dest_nodes,
257495b7b453SJohn Marino context);
2575*09d4459fSDaniel Fojt if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2576*09d4459fSDaniel Fojt && err != REG_NOERROR))
257795b7b453SJohn Marino goto free_return;
257895b7b453SJohn Marino }
257995b7b453SJohn Marino else
258095b7b453SJohn Marino {
258195b7b453SJohn Marino re_node_set dest_nodes;
258295b7b453SJohn Marino err = re_node_set_init_union (&dest_nodes,
258395b7b453SJohn Marino dest_state->entrance_nodes,
258495b7b453SJohn Marino new_dest_nodes);
2585*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
258695b7b453SJohn Marino {
258795b7b453SJohn Marino re_node_set_free (&dest_nodes);
258895b7b453SJohn Marino goto free_return;
258995b7b453SJohn Marino }
259095b7b453SJohn Marino mctx->state_log[dest_str_idx]
259195b7b453SJohn Marino = re_acquire_state_context (&err, dfa, &dest_nodes, context);
259295b7b453SJohn Marino re_node_set_free (&dest_nodes);
2593*09d4459fSDaniel Fojt if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2594*09d4459fSDaniel Fojt && err != REG_NOERROR))
259595b7b453SJohn Marino goto free_return;
259695b7b453SJohn Marino }
259795b7b453SJohn Marino /* We need to check recursively if the backreference can epsilon
259895b7b453SJohn Marino transit. */
259995b7b453SJohn Marino if (subexp_len == 0
260095b7b453SJohn Marino && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
260195b7b453SJohn Marino {
260295b7b453SJohn Marino err = check_subexp_matching_top (mctx, new_dest_nodes,
260395b7b453SJohn Marino cur_str_idx);
2604*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
260595b7b453SJohn Marino goto free_return;
260695b7b453SJohn Marino err = transit_state_bkref (mctx, new_dest_nodes);
2607*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
260895b7b453SJohn Marino goto free_return;
260995b7b453SJohn Marino }
261095b7b453SJohn Marino }
261195b7b453SJohn Marino }
261295b7b453SJohn Marino err = REG_NOERROR;
261395b7b453SJohn Marino free_return:
261495b7b453SJohn Marino return err;
261595b7b453SJohn Marino }
261695b7b453SJohn Marino
261795b7b453SJohn Marino /* Enumerate all the candidates which the backreference BKREF_NODE can match
261895b7b453SJohn Marino at BKREF_STR_IDX, and register them by match_ctx_add_entry().
261995b7b453SJohn Marino Note that we might collect inappropriate candidates here.
262095b7b453SJohn Marino However, the cost of checking them strictly here is too high, then we
262195b7b453SJohn Marino delay these checking for prune_impossible_nodes(). */
262295b7b453SJohn Marino
262395b7b453SJohn Marino static reg_errcode_t
2624*09d4459fSDaniel Fojt __attribute_warn_unused_result__
get_subexp(re_match_context_t * mctx,Idx bkref_node,Idx bkref_str_idx)262595b7b453SJohn Marino get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
262695b7b453SJohn Marino {
262795b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
262895b7b453SJohn Marino Idx subexp_num, sub_top_idx;
262995b7b453SJohn Marino const char *buf = (const char *) re_string_get_buffer (&mctx->input);
263095b7b453SJohn Marino /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
263195b7b453SJohn Marino Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2632*09d4459fSDaniel Fojt if (cache_idx != -1)
263395b7b453SJohn Marino {
263495b7b453SJohn Marino const struct re_backref_cache_entry *entry
263595b7b453SJohn Marino = mctx->bkref_ents + cache_idx;
263695b7b453SJohn Marino do
263795b7b453SJohn Marino if (entry->node == bkref_node)
263895b7b453SJohn Marino return REG_NOERROR; /* We already checked it. */
263995b7b453SJohn Marino while (entry++->more);
264095b7b453SJohn Marino }
264195b7b453SJohn Marino
264295b7b453SJohn Marino subexp_num = dfa->nodes[bkref_node].opr.idx;
264395b7b453SJohn Marino
264495b7b453SJohn Marino /* For each sub expression */
264595b7b453SJohn Marino for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
264695b7b453SJohn Marino {
264795b7b453SJohn Marino reg_errcode_t err;
264895b7b453SJohn Marino re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
264995b7b453SJohn Marino re_sub_match_last_t *sub_last;
265095b7b453SJohn Marino Idx sub_last_idx, sl_str, bkref_str_off;
265195b7b453SJohn Marino
265295b7b453SJohn Marino if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
265395b7b453SJohn Marino continue; /* It isn't related. */
265495b7b453SJohn Marino
265595b7b453SJohn Marino sl_str = sub_top->str_idx;
265695b7b453SJohn Marino bkref_str_off = bkref_str_idx;
265795b7b453SJohn Marino /* At first, check the last node of sub expressions we already
265895b7b453SJohn Marino evaluated. */
265995b7b453SJohn Marino for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
266095b7b453SJohn Marino {
266195b7b453SJohn Marino regoff_t sl_str_diff;
266295b7b453SJohn Marino sub_last = sub_top->lasts[sub_last_idx];
266395b7b453SJohn Marino sl_str_diff = sub_last->str_idx - sl_str;
266495b7b453SJohn Marino /* The matched string by the sub expression match with the substring
266595b7b453SJohn Marino at the back reference? */
266695b7b453SJohn Marino if (sl_str_diff > 0)
266795b7b453SJohn Marino {
2668*09d4459fSDaniel Fojt if (__glibc_unlikely (bkref_str_off + sl_str_diff
2669*09d4459fSDaniel Fojt > mctx->input.valid_len))
267095b7b453SJohn Marino {
267195b7b453SJohn Marino /* Not enough chars for a successful match. */
267295b7b453SJohn Marino if (bkref_str_off + sl_str_diff > mctx->input.len)
267395b7b453SJohn Marino break;
267495b7b453SJohn Marino
267595b7b453SJohn Marino err = clean_state_log_if_needed (mctx,
267695b7b453SJohn Marino bkref_str_off
267795b7b453SJohn Marino + sl_str_diff);
2678*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
267995b7b453SJohn Marino return err;
268095b7b453SJohn Marino buf = (const char *) re_string_get_buffer (&mctx->input);
268195b7b453SJohn Marino }
268295b7b453SJohn Marino if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
268395b7b453SJohn Marino /* We don't need to search this sub expression any more. */
268495b7b453SJohn Marino break;
268595b7b453SJohn Marino }
268695b7b453SJohn Marino bkref_str_off += sl_str_diff;
268795b7b453SJohn Marino sl_str += sl_str_diff;
268895b7b453SJohn Marino err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
268995b7b453SJohn Marino bkref_str_idx);
269095b7b453SJohn Marino
269195b7b453SJohn Marino /* Reload buf, since the preceding call might have reallocated
269295b7b453SJohn Marino the buffer. */
269395b7b453SJohn Marino buf = (const char *) re_string_get_buffer (&mctx->input);
269495b7b453SJohn Marino
269595b7b453SJohn Marino if (err == REG_NOMATCH)
269695b7b453SJohn Marino continue;
2697*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
269895b7b453SJohn Marino return err;
269995b7b453SJohn Marino }
270095b7b453SJohn Marino
270195b7b453SJohn Marino if (sub_last_idx < sub_top->nlasts)
270295b7b453SJohn Marino continue;
270395b7b453SJohn Marino if (sub_last_idx > 0)
270495b7b453SJohn Marino ++sl_str;
270595b7b453SJohn Marino /* Then, search for the other last nodes of the sub expression. */
270695b7b453SJohn Marino for (; sl_str <= bkref_str_idx; ++sl_str)
270795b7b453SJohn Marino {
270895b7b453SJohn Marino Idx cls_node;
270995b7b453SJohn Marino regoff_t sl_str_off;
271095b7b453SJohn Marino const re_node_set *nodes;
271195b7b453SJohn Marino sl_str_off = sl_str - sub_top->str_idx;
271295b7b453SJohn Marino /* The matched string by the sub expression match with the substring
271395b7b453SJohn Marino at the back reference? */
271495b7b453SJohn Marino if (sl_str_off > 0)
271595b7b453SJohn Marino {
2716*09d4459fSDaniel Fojt if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
271795b7b453SJohn Marino {
271895b7b453SJohn Marino /* If we are at the end of the input, we cannot match. */
271995b7b453SJohn Marino if (bkref_str_off >= mctx->input.len)
272095b7b453SJohn Marino break;
272195b7b453SJohn Marino
2722680a9cb8SJohn Marino err = extend_buffers (mctx, bkref_str_off + 1);
2723*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
272495b7b453SJohn Marino return err;
272595b7b453SJohn Marino
272695b7b453SJohn Marino buf = (const char *) re_string_get_buffer (&mctx->input);
272795b7b453SJohn Marino }
272895b7b453SJohn Marino if (buf [bkref_str_off++] != buf[sl_str - 1])
272995b7b453SJohn Marino break; /* We don't need to search this sub expression
273095b7b453SJohn Marino any more. */
273195b7b453SJohn Marino }
273295b7b453SJohn Marino if (mctx->state_log[sl_str] == NULL)
273395b7b453SJohn Marino continue;
273495b7b453SJohn Marino /* Does this state have a ')' of the sub expression? */
273595b7b453SJohn Marino nodes = &mctx->state_log[sl_str]->nodes;
273695b7b453SJohn Marino cls_node = find_subexp_node (dfa, nodes, subexp_num,
273795b7b453SJohn Marino OP_CLOSE_SUBEXP);
2738*09d4459fSDaniel Fojt if (cls_node == -1)
273995b7b453SJohn Marino continue; /* No. */
274095b7b453SJohn Marino if (sub_top->path == NULL)
274195b7b453SJohn Marino {
274295b7b453SJohn Marino sub_top->path = calloc (sizeof (state_array_t),
274395b7b453SJohn Marino sl_str - sub_top->str_idx + 1);
274495b7b453SJohn Marino if (sub_top->path == NULL)
274595b7b453SJohn Marino return REG_ESPACE;
274695b7b453SJohn Marino }
274795b7b453SJohn Marino /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
274895b7b453SJohn Marino in the current context? */
274995b7b453SJohn Marino err = check_arrival (mctx, sub_top->path, sub_top->node,
275095b7b453SJohn Marino sub_top->str_idx, cls_node, sl_str,
275195b7b453SJohn Marino OP_CLOSE_SUBEXP);
275295b7b453SJohn Marino if (err == REG_NOMATCH)
275395b7b453SJohn Marino continue;
2754*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
275595b7b453SJohn Marino return err;
275695b7b453SJohn Marino sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2757*09d4459fSDaniel Fojt if (__glibc_unlikely (sub_last == NULL))
275895b7b453SJohn Marino return REG_ESPACE;
275995b7b453SJohn Marino err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
276095b7b453SJohn Marino bkref_str_idx);
2761*09d4459fSDaniel Fojt buf = (const char *) re_string_get_buffer (&mctx->input);
276295b7b453SJohn Marino if (err == REG_NOMATCH)
276395b7b453SJohn Marino continue;
2764*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
2765*09d4459fSDaniel Fojt return err;
276695b7b453SJohn Marino }
276795b7b453SJohn Marino }
276895b7b453SJohn Marino return REG_NOERROR;
276995b7b453SJohn Marino }
277095b7b453SJohn Marino
277195b7b453SJohn Marino /* Helper functions for get_subexp(). */
277295b7b453SJohn Marino
277395b7b453SJohn Marino /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
277495b7b453SJohn Marino If it can arrive, register the sub expression expressed with SUB_TOP
277595b7b453SJohn Marino and SUB_LAST. */
277695b7b453SJohn Marino
277795b7b453SJohn Marino static reg_errcode_t
get_subexp_sub(re_match_context_t * mctx,const re_sub_match_top_t * sub_top,re_sub_match_last_t * sub_last,Idx bkref_node,Idx bkref_str)277895b7b453SJohn Marino get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
277995b7b453SJohn Marino re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
278095b7b453SJohn Marino {
278195b7b453SJohn Marino reg_errcode_t err;
278295b7b453SJohn Marino Idx to_idx;
278395b7b453SJohn Marino /* Can the subexpression arrive the back reference? */
278495b7b453SJohn Marino err = check_arrival (mctx, &sub_last->path, sub_last->node,
278595b7b453SJohn Marino sub_last->str_idx, bkref_node, bkref_str,
278695b7b453SJohn Marino OP_OPEN_SUBEXP);
278795b7b453SJohn Marino if (err != REG_NOERROR)
278895b7b453SJohn Marino return err;
278995b7b453SJohn Marino err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
279095b7b453SJohn Marino sub_last->str_idx);
2791*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
279295b7b453SJohn Marino return err;
279395b7b453SJohn Marino to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
279495b7b453SJohn Marino return clean_state_log_if_needed (mctx, to_idx);
279595b7b453SJohn Marino }
279695b7b453SJohn Marino
279795b7b453SJohn Marino /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
279895b7b453SJohn Marino Search '(' if FL_OPEN, or search ')' otherwise.
279995b7b453SJohn Marino TODO: This function isn't efficient...
280095b7b453SJohn Marino Because there might be more than one nodes whose types are
280195b7b453SJohn Marino OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
280295b7b453SJohn Marino nodes.
280395b7b453SJohn Marino E.g. RE: (a){2} */
280495b7b453SJohn Marino
280595b7b453SJohn Marino static Idx
find_subexp_node(const re_dfa_t * dfa,const re_node_set * nodes,Idx subexp_idx,int type)280695b7b453SJohn Marino find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
280795b7b453SJohn Marino Idx subexp_idx, int type)
280895b7b453SJohn Marino {
280995b7b453SJohn Marino Idx cls_idx;
281095b7b453SJohn Marino for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
281195b7b453SJohn Marino {
281295b7b453SJohn Marino Idx cls_node = nodes->elems[cls_idx];
281395b7b453SJohn Marino const re_token_t *node = dfa->nodes + cls_node;
281495b7b453SJohn Marino if (node->type == type
281595b7b453SJohn Marino && node->opr.idx == subexp_idx)
281695b7b453SJohn Marino return cls_node;
281795b7b453SJohn Marino }
2818*09d4459fSDaniel Fojt return -1;
281995b7b453SJohn Marino }
282095b7b453SJohn Marino
282195b7b453SJohn Marino /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
282295b7b453SJohn Marino LAST_NODE at LAST_STR. We record the path onto PATH since it will be
282395b7b453SJohn Marino heavily reused.
282495b7b453SJohn Marino Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
282595b7b453SJohn Marino
282695b7b453SJohn Marino static reg_errcode_t
2827*09d4459fSDaniel Fojt __attribute_warn_unused_result__
check_arrival(re_match_context_t * mctx,state_array_t * path,Idx top_node,Idx top_str,Idx last_node,Idx last_str,int type)282895b7b453SJohn Marino check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
282995b7b453SJohn Marino Idx top_str, Idx last_node, Idx last_str, int type)
283095b7b453SJohn Marino {
283195b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
283295b7b453SJohn Marino reg_errcode_t err = REG_NOERROR;
283395b7b453SJohn Marino Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
283495b7b453SJohn Marino re_dfastate_t *cur_state = NULL;
283595b7b453SJohn Marino re_node_set *cur_nodes, next_nodes;
283695b7b453SJohn Marino re_dfastate_t **backup_state_log;
283795b7b453SJohn Marino unsigned int context;
283895b7b453SJohn Marino
283995b7b453SJohn Marino subexp_num = dfa->nodes[top_node].opr.idx;
284095b7b453SJohn Marino /* Extend the buffer if we need. */
2841*09d4459fSDaniel Fojt if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1))
284295b7b453SJohn Marino {
284395b7b453SJohn Marino re_dfastate_t **new_array;
284495b7b453SJohn Marino Idx old_alloc = path->alloc;
2845cf28ed85SJohn Marino Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2846cf28ed85SJohn Marino Idx new_alloc;
2847*09d4459fSDaniel Fojt if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
2848cf28ed85SJohn Marino return REG_ESPACE;
2849cf28ed85SJohn Marino new_alloc = old_alloc + incr_alloc;
2850*09d4459fSDaniel Fojt if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
285195b7b453SJohn Marino return REG_ESPACE;
285295b7b453SJohn Marino new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2853*09d4459fSDaniel Fojt if (__glibc_unlikely (new_array == NULL))
285495b7b453SJohn Marino return REG_ESPACE;
285595b7b453SJohn Marino path->array = new_array;
285695b7b453SJohn Marino path->alloc = new_alloc;
285795b7b453SJohn Marino memset (new_array + old_alloc, '\0',
285895b7b453SJohn Marino sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
285995b7b453SJohn Marino }
286095b7b453SJohn Marino
286195b7b453SJohn Marino str_idx = path->next_idx ? path->next_idx : top_str;
286295b7b453SJohn Marino
286395b7b453SJohn Marino /* Temporary modify MCTX. */
286495b7b453SJohn Marino backup_state_log = mctx->state_log;
286595b7b453SJohn Marino backup_cur_idx = mctx->input.cur_idx;
286695b7b453SJohn Marino mctx->state_log = path->array;
286795b7b453SJohn Marino mctx->input.cur_idx = str_idx;
286895b7b453SJohn Marino
286995b7b453SJohn Marino /* Setup initial node set. */
287095b7b453SJohn Marino context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
287195b7b453SJohn Marino if (str_idx == top_str)
287295b7b453SJohn Marino {
287395b7b453SJohn Marino err = re_node_set_init_1 (&next_nodes, top_node);
2874*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
287595b7b453SJohn Marino return err;
287695b7b453SJohn Marino err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2877*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
287895b7b453SJohn Marino {
287995b7b453SJohn Marino re_node_set_free (&next_nodes);
288095b7b453SJohn Marino return err;
288195b7b453SJohn Marino }
288295b7b453SJohn Marino }
288395b7b453SJohn Marino else
288495b7b453SJohn Marino {
288595b7b453SJohn Marino cur_state = mctx->state_log[str_idx];
288695b7b453SJohn Marino if (cur_state && cur_state->has_backref)
288795b7b453SJohn Marino {
288895b7b453SJohn Marino err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2889*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
289095b7b453SJohn Marino return err;
289195b7b453SJohn Marino }
289295b7b453SJohn Marino else
289395b7b453SJohn Marino re_node_set_init_empty (&next_nodes);
289495b7b453SJohn Marino }
289595b7b453SJohn Marino if (str_idx == top_str || (cur_state && cur_state->has_backref))
289695b7b453SJohn Marino {
289795b7b453SJohn Marino if (next_nodes.nelem)
289895b7b453SJohn Marino {
289995b7b453SJohn Marino err = expand_bkref_cache (mctx, &next_nodes, str_idx,
290095b7b453SJohn Marino subexp_num, type);
2901*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
290295b7b453SJohn Marino {
290395b7b453SJohn Marino re_node_set_free (&next_nodes);
290495b7b453SJohn Marino return err;
290595b7b453SJohn Marino }
290695b7b453SJohn Marino }
290795b7b453SJohn Marino cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2908*09d4459fSDaniel Fojt if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
290995b7b453SJohn Marino {
291095b7b453SJohn Marino re_node_set_free (&next_nodes);
291195b7b453SJohn Marino return err;
291295b7b453SJohn Marino }
291395b7b453SJohn Marino mctx->state_log[str_idx] = cur_state;
291495b7b453SJohn Marino }
291595b7b453SJohn Marino
291695b7b453SJohn Marino for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
291795b7b453SJohn Marino {
291895b7b453SJohn Marino re_node_set_empty (&next_nodes);
291995b7b453SJohn Marino if (mctx->state_log[str_idx + 1])
292095b7b453SJohn Marino {
292195b7b453SJohn Marino err = re_node_set_merge (&next_nodes,
292295b7b453SJohn Marino &mctx->state_log[str_idx + 1]->nodes);
2923*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
292495b7b453SJohn Marino {
292595b7b453SJohn Marino re_node_set_free (&next_nodes);
292695b7b453SJohn Marino return err;
292795b7b453SJohn Marino }
292895b7b453SJohn Marino }
292995b7b453SJohn Marino if (cur_state)
293095b7b453SJohn Marino {
293195b7b453SJohn Marino err = check_arrival_add_next_nodes (mctx, str_idx,
293295b7b453SJohn Marino &cur_state->non_eps_nodes,
293395b7b453SJohn Marino &next_nodes);
2934*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
293595b7b453SJohn Marino {
293695b7b453SJohn Marino re_node_set_free (&next_nodes);
293795b7b453SJohn Marino return err;
293895b7b453SJohn Marino }
293995b7b453SJohn Marino }
294095b7b453SJohn Marino ++str_idx;
294195b7b453SJohn Marino if (next_nodes.nelem)
294295b7b453SJohn Marino {
294395b7b453SJohn Marino err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2944*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
294595b7b453SJohn Marino {
294695b7b453SJohn Marino re_node_set_free (&next_nodes);
294795b7b453SJohn Marino return err;
294895b7b453SJohn Marino }
294995b7b453SJohn Marino err = expand_bkref_cache (mctx, &next_nodes, str_idx,
295095b7b453SJohn Marino subexp_num, type);
2951*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
295295b7b453SJohn Marino {
295395b7b453SJohn Marino re_node_set_free (&next_nodes);
295495b7b453SJohn Marino return err;
295595b7b453SJohn Marino }
295695b7b453SJohn Marino }
295795b7b453SJohn Marino context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
295895b7b453SJohn Marino cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2959*09d4459fSDaniel Fojt if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
296095b7b453SJohn Marino {
296195b7b453SJohn Marino re_node_set_free (&next_nodes);
296295b7b453SJohn Marino return err;
296395b7b453SJohn Marino }
296495b7b453SJohn Marino mctx->state_log[str_idx] = cur_state;
296595b7b453SJohn Marino null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
296695b7b453SJohn Marino }
296795b7b453SJohn Marino re_node_set_free (&next_nodes);
296895b7b453SJohn Marino cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
296995b7b453SJohn Marino : &mctx->state_log[last_str]->nodes);
297095b7b453SJohn Marino path->next_idx = str_idx;
297195b7b453SJohn Marino
297295b7b453SJohn Marino /* Fix MCTX. */
297395b7b453SJohn Marino mctx->state_log = backup_state_log;
297495b7b453SJohn Marino mctx->input.cur_idx = backup_cur_idx;
297595b7b453SJohn Marino
297695b7b453SJohn Marino /* Then check the current node set has the node LAST_NODE. */
297795b7b453SJohn Marino if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
297895b7b453SJohn Marino return REG_NOERROR;
297995b7b453SJohn Marino
298095b7b453SJohn Marino return REG_NOMATCH;
298195b7b453SJohn Marino }
298295b7b453SJohn Marino
298395b7b453SJohn Marino /* Helper functions for check_arrival. */
298495b7b453SJohn Marino
298595b7b453SJohn Marino /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
298695b7b453SJohn Marino to NEXT_NODES.
298795b7b453SJohn Marino TODO: This function is similar to the functions transit_state*(),
298895b7b453SJohn Marino however this function has many additional works.
298995b7b453SJohn Marino Can't we unify them? */
299095b7b453SJohn Marino
299195b7b453SJohn Marino static reg_errcode_t
2992*09d4459fSDaniel Fojt __attribute_warn_unused_result__
check_arrival_add_next_nodes(re_match_context_t * mctx,Idx str_idx,re_node_set * cur_nodes,re_node_set * next_nodes)299395b7b453SJohn Marino check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
299495b7b453SJohn Marino re_node_set *cur_nodes, re_node_set *next_nodes)
299595b7b453SJohn Marino {
299695b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
299795b7b453SJohn Marino bool ok;
299895b7b453SJohn Marino Idx cur_idx;
299995b7b453SJohn Marino #ifdef RE_ENABLE_I18N
300095b7b453SJohn Marino reg_errcode_t err = REG_NOERROR;
300195b7b453SJohn Marino #endif
300295b7b453SJohn Marino re_node_set union_set;
300395b7b453SJohn Marino re_node_set_init_empty (&union_set);
300495b7b453SJohn Marino for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
300595b7b453SJohn Marino {
300695b7b453SJohn Marino int naccepted = 0;
300795b7b453SJohn Marino Idx cur_node = cur_nodes->elems[cur_idx];
3008*09d4459fSDaniel Fojt DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type));
3009*09d4459fSDaniel Fojt
301095b7b453SJohn Marino #ifdef RE_ENABLE_I18N
3011cf28ed85SJohn Marino /* If the node may accept "multi byte". */
301295b7b453SJohn Marino if (dfa->nodes[cur_node].accept_mb)
301395b7b453SJohn Marino {
301495b7b453SJohn Marino naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
301595b7b453SJohn Marino str_idx);
301695b7b453SJohn Marino if (naccepted > 1)
301795b7b453SJohn Marino {
301895b7b453SJohn Marino re_dfastate_t *dest_state;
301995b7b453SJohn Marino Idx next_node = dfa->nexts[cur_node];
302095b7b453SJohn Marino Idx next_idx = str_idx + naccepted;
302195b7b453SJohn Marino dest_state = mctx->state_log[next_idx];
302295b7b453SJohn Marino re_node_set_empty (&union_set);
302395b7b453SJohn Marino if (dest_state)
302495b7b453SJohn Marino {
302595b7b453SJohn Marino err = re_node_set_merge (&union_set, &dest_state->nodes);
3026*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
302795b7b453SJohn Marino {
302895b7b453SJohn Marino re_node_set_free (&union_set);
302995b7b453SJohn Marino return err;
303095b7b453SJohn Marino }
303195b7b453SJohn Marino }
303295b7b453SJohn Marino ok = re_node_set_insert (&union_set, next_node);
3033*09d4459fSDaniel Fojt if (__glibc_unlikely (! ok))
303495b7b453SJohn Marino {
303595b7b453SJohn Marino re_node_set_free (&union_set);
303695b7b453SJohn Marino return REG_ESPACE;
303795b7b453SJohn Marino }
303895b7b453SJohn Marino mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
303995b7b453SJohn Marino &union_set);
3040*09d4459fSDaniel Fojt if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
3041*09d4459fSDaniel Fojt && err != REG_NOERROR))
304295b7b453SJohn Marino {
304395b7b453SJohn Marino re_node_set_free (&union_set);
304495b7b453SJohn Marino return err;
304595b7b453SJohn Marino }
304695b7b453SJohn Marino }
304795b7b453SJohn Marino }
304895b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
304995b7b453SJohn Marino if (naccepted
305095b7b453SJohn Marino || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
305195b7b453SJohn Marino {
305295b7b453SJohn Marino ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3053*09d4459fSDaniel Fojt if (__glibc_unlikely (! ok))
305495b7b453SJohn Marino {
305595b7b453SJohn Marino re_node_set_free (&union_set);
305695b7b453SJohn Marino return REG_ESPACE;
305795b7b453SJohn Marino }
305895b7b453SJohn Marino }
305995b7b453SJohn Marino }
306095b7b453SJohn Marino re_node_set_free (&union_set);
306195b7b453SJohn Marino return REG_NOERROR;
306295b7b453SJohn Marino }
306395b7b453SJohn Marino
306495b7b453SJohn Marino /* For all the nodes in CUR_NODES, add the epsilon closures of them to
306595b7b453SJohn Marino CUR_NODES, however exclude the nodes which are:
306695b7b453SJohn Marino - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
306795b7b453SJohn Marino - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
306895b7b453SJohn Marino */
306995b7b453SJohn Marino
307095b7b453SJohn Marino static reg_errcode_t
check_arrival_expand_ecl(const re_dfa_t * dfa,re_node_set * cur_nodes,Idx ex_subexp,int type)307195b7b453SJohn Marino check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
307295b7b453SJohn Marino Idx ex_subexp, int type)
307395b7b453SJohn Marino {
307495b7b453SJohn Marino reg_errcode_t err;
307595b7b453SJohn Marino Idx idx, outside_node;
307695b7b453SJohn Marino re_node_set new_nodes;
3077*09d4459fSDaniel Fojt DEBUG_ASSERT (cur_nodes->nelem);
307895b7b453SJohn Marino err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3079*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
308095b7b453SJohn Marino return err;
308195b7b453SJohn Marino /* Create a new node set NEW_NODES with the nodes which are epsilon
308295b7b453SJohn Marino closures of the node in CUR_NODES. */
308395b7b453SJohn Marino
308495b7b453SJohn Marino for (idx = 0; idx < cur_nodes->nelem; ++idx)
308595b7b453SJohn Marino {
308695b7b453SJohn Marino Idx cur_node = cur_nodes->elems[idx];
308795b7b453SJohn Marino const re_node_set *eclosure = dfa->eclosures + cur_node;
308895b7b453SJohn Marino outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3089*09d4459fSDaniel Fojt if (outside_node == -1)
309095b7b453SJohn Marino {
309195b7b453SJohn Marino /* There are no problematic nodes, just merge them. */
309295b7b453SJohn Marino err = re_node_set_merge (&new_nodes, eclosure);
3093*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
309495b7b453SJohn Marino {
309595b7b453SJohn Marino re_node_set_free (&new_nodes);
309695b7b453SJohn Marino return err;
309795b7b453SJohn Marino }
309895b7b453SJohn Marino }
309995b7b453SJohn Marino else
310095b7b453SJohn Marino {
310195b7b453SJohn Marino /* There are problematic nodes, re-calculate incrementally. */
310295b7b453SJohn Marino err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
310395b7b453SJohn Marino ex_subexp, type);
3104*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
310595b7b453SJohn Marino {
310695b7b453SJohn Marino re_node_set_free (&new_nodes);
310795b7b453SJohn Marino return err;
310895b7b453SJohn Marino }
310995b7b453SJohn Marino }
311095b7b453SJohn Marino }
311195b7b453SJohn Marino re_node_set_free (cur_nodes);
311295b7b453SJohn Marino *cur_nodes = new_nodes;
311395b7b453SJohn Marino return REG_NOERROR;
311495b7b453SJohn Marino }
311595b7b453SJohn Marino
311695b7b453SJohn Marino /* Helper function for check_arrival_expand_ecl.
311795b7b453SJohn Marino Check incrementally the epsilon closure of TARGET, and if it isn't
311895b7b453SJohn Marino problematic append it to DST_NODES. */
311995b7b453SJohn Marino
312095b7b453SJohn Marino static reg_errcode_t
3121*09d4459fSDaniel Fojt __attribute_warn_unused_result__
check_arrival_expand_ecl_sub(const re_dfa_t * dfa,re_node_set * dst_nodes,Idx target,Idx ex_subexp,int type)312295b7b453SJohn Marino check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
312395b7b453SJohn Marino Idx target, Idx ex_subexp, int type)
312495b7b453SJohn Marino {
312595b7b453SJohn Marino Idx cur_node;
312695b7b453SJohn Marino for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
312795b7b453SJohn Marino {
312895b7b453SJohn Marino bool ok;
312995b7b453SJohn Marino
313095b7b453SJohn Marino if (dfa->nodes[cur_node].type == type
313195b7b453SJohn Marino && dfa->nodes[cur_node].opr.idx == ex_subexp)
313295b7b453SJohn Marino {
313395b7b453SJohn Marino if (type == OP_CLOSE_SUBEXP)
313495b7b453SJohn Marino {
313595b7b453SJohn Marino ok = re_node_set_insert (dst_nodes, cur_node);
3136*09d4459fSDaniel Fojt if (__glibc_unlikely (! ok))
313795b7b453SJohn Marino return REG_ESPACE;
313895b7b453SJohn Marino }
313995b7b453SJohn Marino break;
314095b7b453SJohn Marino }
314195b7b453SJohn Marino ok = re_node_set_insert (dst_nodes, cur_node);
3142*09d4459fSDaniel Fojt if (__glibc_unlikely (! ok))
314395b7b453SJohn Marino return REG_ESPACE;
314495b7b453SJohn Marino if (dfa->edests[cur_node].nelem == 0)
314595b7b453SJohn Marino break;
314695b7b453SJohn Marino if (dfa->edests[cur_node].nelem == 2)
314795b7b453SJohn Marino {
314895b7b453SJohn Marino reg_errcode_t err;
314995b7b453SJohn Marino err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
315095b7b453SJohn Marino dfa->edests[cur_node].elems[1],
315195b7b453SJohn Marino ex_subexp, type);
3152*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
315395b7b453SJohn Marino return err;
315495b7b453SJohn Marino }
315595b7b453SJohn Marino cur_node = dfa->edests[cur_node].elems[0];
315695b7b453SJohn Marino }
315795b7b453SJohn Marino return REG_NOERROR;
315895b7b453SJohn Marino }
315995b7b453SJohn Marino
316095b7b453SJohn Marino
316195b7b453SJohn Marino /* For all the back references in the current state, calculate the
316295b7b453SJohn Marino destination of the back references by the appropriate entry
316395b7b453SJohn Marino in MCTX->BKREF_ENTS. */
316495b7b453SJohn Marino
316595b7b453SJohn Marino static reg_errcode_t
3166*09d4459fSDaniel Fojt __attribute_warn_unused_result__
expand_bkref_cache(re_match_context_t * mctx,re_node_set * cur_nodes,Idx cur_str,Idx subexp_num,int type)316795b7b453SJohn Marino expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
316895b7b453SJohn Marino Idx cur_str, Idx subexp_num, int type)
316995b7b453SJohn Marino {
317095b7b453SJohn Marino const re_dfa_t *const dfa = mctx->dfa;
317195b7b453SJohn Marino reg_errcode_t err;
317295b7b453SJohn Marino Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
317395b7b453SJohn Marino struct re_backref_cache_entry *ent;
317495b7b453SJohn Marino
3175*09d4459fSDaniel Fojt if (cache_idx_start == -1)
317695b7b453SJohn Marino return REG_NOERROR;
317795b7b453SJohn Marino
317895b7b453SJohn Marino restart:
317995b7b453SJohn Marino ent = mctx->bkref_ents + cache_idx_start;
318095b7b453SJohn Marino do
318195b7b453SJohn Marino {
318295b7b453SJohn Marino Idx to_idx, next_node;
318395b7b453SJohn Marino
318495b7b453SJohn Marino /* Is this entry ENT is appropriate? */
318595b7b453SJohn Marino if (!re_node_set_contains (cur_nodes, ent->node))
318695b7b453SJohn Marino continue; /* No. */
318795b7b453SJohn Marino
318895b7b453SJohn Marino to_idx = cur_str + ent->subexp_to - ent->subexp_from;
318995b7b453SJohn Marino /* Calculate the destination of the back reference, and append it
319095b7b453SJohn Marino to MCTX->STATE_LOG. */
319195b7b453SJohn Marino if (to_idx == cur_str)
319295b7b453SJohn Marino {
319395b7b453SJohn Marino /* The backreference did epsilon transit, we must re-check all the
319495b7b453SJohn Marino node in the current state. */
319595b7b453SJohn Marino re_node_set new_dests;
319695b7b453SJohn Marino reg_errcode_t err2, err3;
319795b7b453SJohn Marino next_node = dfa->edests[ent->node].elems[0];
319895b7b453SJohn Marino if (re_node_set_contains (cur_nodes, next_node))
319995b7b453SJohn Marino continue;
320095b7b453SJohn Marino err = re_node_set_init_1 (&new_dests, next_node);
320195b7b453SJohn Marino err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
320295b7b453SJohn Marino err3 = re_node_set_merge (cur_nodes, &new_dests);
320395b7b453SJohn Marino re_node_set_free (&new_dests);
3204*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR
3205*09d4459fSDaniel Fojt || err3 != REG_NOERROR))
320695b7b453SJohn Marino {
320795b7b453SJohn Marino err = (err != REG_NOERROR ? err
320895b7b453SJohn Marino : (err2 != REG_NOERROR ? err2 : err3));
320995b7b453SJohn Marino return err;
321095b7b453SJohn Marino }
321195b7b453SJohn Marino /* TODO: It is still inefficient... */
321295b7b453SJohn Marino goto restart;
321395b7b453SJohn Marino }
321495b7b453SJohn Marino else
321595b7b453SJohn Marino {
321695b7b453SJohn Marino re_node_set union_set;
321795b7b453SJohn Marino next_node = dfa->nexts[ent->node];
321895b7b453SJohn Marino if (mctx->state_log[to_idx])
321995b7b453SJohn Marino {
322095b7b453SJohn Marino bool ok;
322195b7b453SJohn Marino if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
322295b7b453SJohn Marino next_node))
322395b7b453SJohn Marino continue;
322495b7b453SJohn Marino err = re_node_set_init_copy (&union_set,
322595b7b453SJohn Marino &mctx->state_log[to_idx]->nodes);
322695b7b453SJohn Marino ok = re_node_set_insert (&union_set, next_node);
3227*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR || ! ok))
322895b7b453SJohn Marino {
322995b7b453SJohn Marino re_node_set_free (&union_set);
323095b7b453SJohn Marino err = err != REG_NOERROR ? err : REG_ESPACE;
323195b7b453SJohn Marino return err;
323295b7b453SJohn Marino }
323395b7b453SJohn Marino }
323495b7b453SJohn Marino else
323595b7b453SJohn Marino {
323695b7b453SJohn Marino err = re_node_set_init_1 (&union_set, next_node);
3237*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
323895b7b453SJohn Marino return err;
323995b7b453SJohn Marino }
324095b7b453SJohn Marino mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
324195b7b453SJohn Marino re_node_set_free (&union_set);
3242*09d4459fSDaniel Fojt if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
3243*09d4459fSDaniel Fojt && err != REG_NOERROR))
324495b7b453SJohn Marino return err;
324595b7b453SJohn Marino }
324695b7b453SJohn Marino }
324795b7b453SJohn Marino while (ent++->more);
324895b7b453SJohn Marino return REG_NOERROR;
324995b7b453SJohn Marino }
325095b7b453SJohn Marino
325195b7b453SJohn Marino /* Build transition table for the state.
325295b7b453SJohn Marino Return true if successful. */
325395b7b453SJohn Marino
325495b7b453SJohn Marino static bool
build_trtable(const re_dfa_t * dfa,re_dfastate_t * state)325595b7b453SJohn Marino build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
325695b7b453SJohn Marino {
325795b7b453SJohn Marino reg_errcode_t err;
325895b7b453SJohn Marino Idx i, j;
325995b7b453SJohn Marino int ch;
326095b7b453SJohn Marino bool need_word_trtable = false;
326195b7b453SJohn Marino bitset_word_t elem, mask;
326295b7b453SJohn Marino bool dests_node_malloced = false;
326395b7b453SJohn Marino bool dest_states_malloced = false;
3264cf28ed85SJohn Marino Idx ndests; /* Number of the destination states from 'state'. */
326595b7b453SJohn Marino re_dfastate_t **trtable;
326695b7b453SJohn Marino re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
326795b7b453SJohn Marino re_node_set follows, *dests_node;
326895b7b453SJohn Marino bitset_t *dests_ch;
326995b7b453SJohn Marino bitset_t acceptable;
327095b7b453SJohn Marino
327195b7b453SJohn Marino struct dests_alloc
327295b7b453SJohn Marino {
327395b7b453SJohn Marino re_node_set dests_node[SBC_MAX];
327495b7b453SJohn Marino bitset_t dests_ch[SBC_MAX];
327595b7b453SJohn Marino } *dests_alloc;
327695b7b453SJohn Marino
327795b7b453SJohn Marino /* We build DFA states which corresponds to the destination nodes
3278cf28ed85SJohn Marino from 'state'. 'dests_node[i]' represents the nodes which i-th
3279cf28ed85SJohn Marino destination state contains, and 'dests_ch[i]' represents the
328095b7b453SJohn Marino characters which i-th destination state accepts. */
328195b7b453SJohn Marino if (__libc_use_alloca (sizeof (struct dests_alloc)))
328295b7b453SJohn Marino dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
328395b7b453SJohn Marino else
328495b7b453SJohn Marino {
328595b7b453SJohn Marino dests_alloc = re_malloc (struct dests_alloc, 1);
3286*09d4459fSDaniel Fojt if (__glibc_unlikely (dests_alloc == NULL))
328795b7b453SJohn Marino return false;
328895b7b453SJohn Marino dests_node_malloced = true;
328995b7b453SJohn Marino }
329095b7b453SJohn Marino dests_node = dests_alloc->dests_node;
329195b7b453SJohn Marino dests_ch = dests_alloc->dests_ch;
329295b7b453SJohn Marino
3293cf28ed85SJohn Marino /* Initialize transition table. */
329495b7b453SJohn Marino state->word_trtable = state->trtable = NULL;
329595b7b453SJohn Marino
3296cf28ed85SJohn Marino /* At first, group all nodes belonging to 'state' into several
329795b7b453SJohn Marino destinations. */
329895b7b453SJohn Marino ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3299*09d4459fSDaniel Fojt if (__glibc_unlikely (ndests <= 0))
330095b7b453SJohn Marino {
330195b7b453SJohn Marino if (dests_node_malloced)
3302*09d4459fSDaniel Fojt re_free (dests_alloc);
3303cf28ed85SJohn Marino /* Return false in case of an error, true otherwise. */
330495b7b453SJohn Marino if (ndests == 0)
330595b7b453SJohn Marino {
330695b7b453SJohn Marino state->trtable = (re_dfastate_t **)
330795b7b453SJohn Marino calloc (sizeof (re_dfastate_t *), SBC_MAX);
3308*09d4459fSDaniel Fojt if (__glibc_unlikely (state->trtable == NULL))
3309200fbe8dSJohn Marino return false;
331095b7b453SJohn Marino return true;
331195b7b453SJohn Marino }
331295b7b453SJohn Marino return false;
331395b7b453SJohn Marino }
331495b7b453SJohn Marino
331595b7b453SJohn Marino err = re_node_set_alloc (&follows, ndests + 1);
3316*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
331795b7b453SJohn Marino goto out_free;
331895b7b453SJohn Marino
331995b7b453SJohn Marino /* Avoid arithmetic overflow in size calculation. */
3320*09d4459fSDaniel Fojt size_t ndests_max
3321*09d4459fSDaniel Fojt = ((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3322*09d4459fSDaniel Fojt / (3 * sizeof (re_dfastate_t *)));
3323*09d4459fSDaniel Fojt if (__glibc_unlikely (ndests_max < ndests))
332495b7b453SJohn Marino goto out_free;
332595b7b453SJohn Marino
332695b7b453SJohn Marino if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
332795b7b453SJohn Marino + ndests * 3 * sizeof (re_dfastate_t *)))
332895b7b453SJohn Marino dest_states = (re_dfastate_t **)
332995b7b453SJohn Marino alloca (ndests * 3 * sizeof (re_dfastate_t *));
333095b7b453SJohn Marino else
333195b7b453SJohn Marino {
3332*09d4459fSDaniel Fojt dest_states = re_malloc (re_dfastate_t *, ndests * 3);
3333*09d4459fSDaniel Fojt if (__glibc_unlikely (dest_states == NULL))
333495b7b453SJohn Marino {
333595b7b453SJohn Marino out_free:
333695b7b453SJohn Marino if (dest_states_malloced)
3337*09d4459fSDaniel Fojt re_free (dest_states);
333895b7b453SJohn Marino re_node_set_free (&follows);
333995b7b453SJohn Marino for (i = 0; i < ndests; ++i)
334095b7b453SJohn Marino re_node_set_free (dests_node + i);
334195b7b453SJohn Marino if (dests_node_malloced)
3342*09d4459fSDaniel Fojt re_free (dests_alloc);
334395b7b453SJohn Marino return false;
334495b7b453SJohn Marino }
334595b7b453SJohn Marino dest_states_malloced = true;
334695b7b453SJohn Marino }
334795b7b453SJohn Marino dest_states_word = dest_states + ndests;
334895b7b453SJohn Marino dest_states_nl = dest_states_word + ndests;
334995b7b453SJohn Marino bitset_empty (acceptable);
335095b7b453SJohn Marino
335195b7b453SJohn Marino /* Then build the states for all destinations. */
335295b7b453SJohn Marino for (i = 0; i < ndests; ++i)
335395b7b453SJohn Marino {
335495b7b453SJohn Marino Idx next_node;
335595b7b453SJohn Marino re_node_set_empty (&follows);
335695b7b453SJohn Marino /* Merge the follows of this destination states. */
335795b7b453SJohn Marino for (j = 0; j < dests_node[i].nelem; ++j)
335895b7b453SJohn Marino {
335995b7b453SJohn Marino next_node = dfa->nexts[dests_node[i].elems[j]];
3360*09d4459fSDaniel Fojt if (next_node != -1)
336195b7b453SJohn Marino {
336295b7b453SJohn Marino err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3363*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
336495b7b453SJohn Marino goto out_free;
336595b7b453SJohn Marino }
336695b7b453SJohn Marino }
336795b7b453SJohn Marino dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3368*09d4459fSDaniel Fojt if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
336995b7b453SJohn Marino goto out_free;
337095b7b453SJohn Marino /* If the new state has context constraint,
337195b7b453SJohn Marino build appropriate states for these contexts. */
337295b7b453SJohn Marino if (dest_states[i]->has_constraint)
337395b7b453SJohn Marino {
337495b7b453SJohn Marino dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
337595b7b453SJohn Marino CONTEXT_WORD);
3376*09d4459fSDaniel Fojt if (__glibc_unlikely (dest_states_word[i] == NULL
3377*09d4459fSDaniel Fojt && err != REG_NOERROR))
337895b7b453SJohn Marino goto out_free;
337995b7b453SJohn Marino
338095b7b453SJohn Marino if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
338195b7b453SJohn Marino need_word_trtable = true;
338295b7b453SJohn Marino
338395b7b453SJohn Marino dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
338495b7b453SJohn Marino CONTEXT_NEWLINE);
3385*09d4459fSDaniel Fojt if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
338695b7b453SJohn Marino goto out_free;
338795b7b453SJohn Marino }
338895b7b453SJohn Marino else
338995b7b453SJohn Marino {
339095b7b453SJohn Marino dest_states_word[i] = dest_states[i];
339195b7b453SJohn Marino dest_states_nl[i] = dest_states[i];
339295b7b453SJohn Marino }
339395b7b453SJohn Marino bitset_merge (acceptable, dests_ch[i]);
339495b7b453SJohn Marino }
339595b7b453SJohn Marino
3396*09d4459fSDaniel Fojt if (!__glibc_unlikely (need_word_trtable))
339795b7b453SJohn Marino {
339895b7b453SJohn Marino /* We don't care about whether the following character is a word
339995b7b453SJohn Marino character, or we are in a single-byte character set so we can
340095b7b453SJohn Marino discern by looking at the character code: allocate a
340195b7b453SJohn Marino 256-entry transition table. */
340295b7b453SJohn Marino trtable = state->trtable =
340395b7b453SJohn Marino (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3404*09d4459fSDaniel Fojt if (__glibc_unlikely (trtable == NULL))
340595b7b453SJohn Marino goto out_free;
340695b7b453SJohn Marino
340795b7b453SJohn Marino /* For all characters ch...: */
340895b7b453SJohn Marino for (i = 0; i < BITSET_WORDS; ++i)
340995b7b453SJohn Marino for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
341095b7b453SJohn Marino elem;
341195b7b453SJohn Marino mask <<= 1, elem >>= 1, ++ch)
3412*09d4459fSDaniel Fojt if (__glibc_unlikely (elem & 1))
341395b7b453SJohn Marino {
341495b7b453SJohn Marino /* There must be exactly one destination which accepts
341595b7b453SJohn Marino character ch. See group_nodes_into_DFAstates. */
341695b7b453SJohn Marino for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
341795b7b453SJohn Marino ;
341895b7b453SJohn Marino
341995b7b453SJohn Marino /* j-th destination accepts the word character ch. */
342095b7b453SJohn Marino if (dfa->word_char[i] & mask)
342195b7b453SJohn Marino trtable[ch] = dest_states_word[j];
342295b7b453SJohn Marino else
342395b7b453SJohn Marino trtable[ch] = dest_states[j];
342495b7b453SJohn Marino }
342595b7b453SJohn Marino }
342695b7b453SJohn Marino else
342795b7b453SJohn Marino {
342895b7b453SJohn Marino /* We care about whether the following character is a word
342995b7b453SJohn Marino character, and we are in a multi-byte character set: discern
343095b7b453SJohn Marino by looking at the character code: build two 256-entry
343195b7b453SJohn Marino transition tables, one starting at trtable[0] and one
343295b7b453SJohn Marino starting at trtable[SBC_MAX]. */
343395b7b453SJohn Marino trtable = state->word_trtable =
343495b7b453SJohn Marino (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3435*09d4459fSDaniel Fojt if (__glibc_unlikely (trtable == NULL))
343695b7b453SJohn Marino goto out_free;
343795b7b453SJohn Marino
343895b7b453SJohn Marino /* For all characters ch...: */
343995b7b453SJohn Marino for (i = 0; i < BITSET_WORDS; ++i)
344095b7b453SJohn Marino for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
344195b7b453SJohn Marino elem;
344295b7b453SJohn Marino mask <<= 1, elem >>= 1, ++ch)
3443*09d4459fSDaniel Fojt if (__glibc_unlikely (elem & 1))
344495b7b453SJohn Marino {
344595b7b453SJohn Marino /* There must be exactly one destination which accepts
344695b7b453SJohn Marino character ch. See group_nodes_into_DFAstates. */
344795b7b453SJohn Marino for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
344895b7b453SJohn Marino ;
344995b7b453SJohn Marino
345095b7b453SJohn Marino /* j-th destination accepts the word character ch. */
345195b7b453SJohn Marino trtable[ch] = dest_states[j];
345295b7b453SJohn Marino trtable[ch + SBC_MAX] = dest_states_word[j];
345395b7b453SJohn Marino }
345495b7b453SJohn Marino }
345595b7b453SJohn Marino
345695b7b453SJohn Marino /* new line */
345795b7b453SJohn Marino if (bitset_contain (acceptable, NEWLINE_CHAR))
345895b7b453SJohn Marino {
345995b7b453SJohn Marino /* The current state accepts newline character. */
346095b7b453SJohn Marino for (j = 0; j < ndests; ++j)
346195b7b453SJohn Marino if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
346295b7b453SJohn Marino {
346395b7b453SJohn Marino /* k-th destination accepts newline character. */
346495b7b453SJohn Marino trtable[NEWLINE_CHAR] = dest_states_nl[j];
346595b7b453SJohn Marino if (need_word_trtable)
346695b7b453SJohn Marino trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
346795b7b453SJohn Marino /* There must be only one destination which accepts
346895b7b453SJohn Marino newline. See group_nodes_into_DFAstates. */
346995b7b453SJohn Marino break;
347095b7b453SJohn Marino }
347195b7b453SJohn Marino }
347295b7b453SJohn Marino
347395b7b453SJohn Marino if (dest_states_malloced)
3474*09d4459fSDaniel Fojt re_free (dest_states);
347595b7b453SJohn Marino
347695b7b453SJohn Marino re_node_set_free (&follows);
347795b7b453SJohn Marino for (i = 0; i < ndests; ++i)
347895b7b453SJohn Marino re_node_set_free (dests_node + i);
347995b7b453SJohn Marino
348095b7b453SJohn Marino if (dests_node_malloced)
3481*09d4459fSDaniel Fojt re_free (dests_alloc);
348295b7b453SJohn Marino
348395b7b453SJohn Marino return true;
348495b7b453SJohn Marino }
348595b7b453SJohn Marino
348695b7b453SJohn Marino /* Group all nodes belonging to STATE into several destinations.
348795b7b453SJohn Marino Then for all destinations, set the nodes belonging to the destination
348895b7b453SJohn Marino to DESTS_NODE[i] and set the characters accepted by the destination
348995b7b453SJohn Marino to DEST_CH[i]. This function return the number of destinations. */
349095b7b453SJohn Marino
349195b7b453SJohn Marino static Idx
group_nodes_into_DFAstates(const re_dfa_t * dfa,const re_dfastate_t * state,re_node_set * dests_node,bitset_t * dests_ch)349295b7b453SJohn Marino group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
349395b7b453SJohn Marino re_node_set *dests_node, bitset_t *dests_ch)
349495b7b453SJohn Marino {
349595b7b453SJohn Marino reg_errcode_t err;
349695b7b453SJohn Marino bool ok;
349795b7b453SJohn Marino Idx i, j, k;
3498cf28ed85SJohn Marino Idx ndests; /* Number of the destinations from 'state'. */
349995b7b453SJohn Marino bitset_t accepts; /* Characters a node can accept. */
350095b7b453SJohn Marino const re_node_set *cur_nodes = &state->nodes;
350195b7b453SJohn Marino bitset_empty (accepts);
350295b7b453SJohn Marino ndests = 0;
350395b7b453SJohn Marino
3504cf28ed85SJohn Marino /* For all the nodes belonging to 'state', */
350595b7b453SJohn Marino for (i = 0; i < cur_nodes->nelem; ++i)
350695b7b453SJohn Marino {
350795b7b453SJohn Marino re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
350895b7b453SJohn Marino re_token_type_t type = node->type;
350995b7b453SJohn Marino unsigned int constraint = node->constraint;
351095b7b453SJohn Marino
351195b7b453SJohn Marino /* Enumerate all single byte character this node can accept. */
351295b7b453SJohn Marino if (type == CHARACTER)
351395b7b453SJohn Marino bitset_set (accepts, node->opr.c);
351495b7b453SJohn Marino else if (type == SIMPLE_BRACKET)
351595b7b453SJohn Marino {
351695b7b453SJohn Marino bitset_merge (accepts, node->opr.sbcset);
351795b7b453SJohn Marino }
351895b7b453SJohn Marino else if (type == OP_PERIOD)
351995b7b453SJohn Marino {
352095b7b453SJohn Marino #ifdef RE_ENABLE_I18N
352195b7b453SJohn Marino if (dfa->mb_cur_max > 1)
352295b7b453SJohn Marino bitset_merge (accepts, dfa->sb_char);
352395b7b453SJohn Marino else
352495b7b453SJohn Marino #endif
352595b7b453SJohn Marino bitset_set_all (accepts);
352695b7b453SJohn Marino if (!(dfa->syntax & RE_DOT_NEWLINE))
352795b7b453SJohn Marino bitset_clear (accepts, '\n');
352895b7b453SJohn Marino if (dfa->syntax & RE_DOT_NOT_NULL)
352995b7b453SJohn Marino bitset_clear (accepts, '\0');
353095b7b453SJohn Marino }
353195b7b453SJohn Marino #ifdef RE_ENABLE_I18N
353295b7b453SJohn Marino else if (type == OP_UTF8_PERIOD)
353395b7b453SJohn Marino {
353495b7b453SJohn Marino if (ASCII_CHARS % BITSET_WORD_BITS == 0)
353595b7b453SJohn Marino memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
353695b7b453SJohn Marino else
353795b7b453SJohn Marino bitset_merge (accepts, utf8_sb_map);
353895b7b453SJohn Marino if (!(dfa->syntax & RE_DOT_NEWLINE))
353995b7b453SJohn Marino bitset_clear (accepts, '\n');
354095b7b453SJohn Marino if (dfa->syntax & RE_DOT_NOT_NULL)
354195b7b453SJohn Marino bitset_clear (accepts, '\0');
354295b7b453SJohn Marino }
354395b7b453SJohn Marino #endif
354495b7b453SJohn Marino else
354595b7b453SJohn Marino continue;
354695b7b453SJohn Marino
3547cf28ed85SJohn Marino /* Check the 'accepts' and sift the characters which are not
354895b7b453SJohn Marino match it the context. */
354995b7b453SJohn Marino if (constraint)
355095b7b453SJohn Marino {
355195b7b453SJohn Marino if (constraint & NEXT_NEWLINE_CONSTRAINT)
355295b7b453SJohn Marino {
355395b7b453SJohn Marino bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
355495b7b453SJohn Marino bitset_empty (accepts);
355595b7b453SJohn Marino if (accepts_newline)
355695b7b453SJohn Marino bitset_set (accepts, NEWLINE_CHAR);
355795b7b453SJohn Marino else
355895b7b453SJohn Marino continue;
355995b7b453SJohn Marino }
356095b7b453SJohn Marino if (constraint & NEXT_ENDBUF_CONSTRAINT)
356195b7b453SJohn Marino {
356295b7b453SJohn Marino bitset_empty (accepts);
356395b7b453SJohn Marino continue;
356495b7b453SJohn Marino }
356595b7b453SJohn Marino
356695b7b453SJohn Marino if (constraint & NEXT_WORD_CONSTRAINT)
356795b7b453SJohn Marino {
356895b7b453SJohn Marino bitset_word_t any_set = 0;
356995b7b453SJohn Marino if (type == CHARACTER && !node->word_char)
357095b7b453SJohn Marino {
357195b7b453SJohn Marino bitset_empty (accepts);
357295b7b453SJohn Marino continue;
357395b7b453SJohn Marino }
357495b7b453SJohn Marino #ifdef RE_ENABLE_I18N
357595b7b453SJohn Marino if (dfa->mb_cur_max > 1)
357695b7b453SJohn Marino for (j = 0; j < BITSET_WORDS; ++j)
357795b7b453SJohn Marino any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
357895b7b453SJohn Marino else
357995b7b453SJohn Marino #endif
358095b7b453SJohn Marino for (j = 0; j < BITSET_WORDS; ++j)
358195b7b453SJohn Marino any_set |= (accepts[j] &= dfa->word_char[j]);
358295b7b453SJohn Marino if (!any_set)
358395b7b453SJohn Marino continue;
358495b7b453SJohn Marino }
358595b7b453SJohn Marino if (constraint & NEXT_NOTWORD_CONSTRAINT)
358695b7b453SJohn Marino {
358795b7b453SJohn Marino bitset_word_t any_set = 0;
358895b7b453SJohn Marino if (type == CHARACTER && node->word_char)
358995b7b453SJohn Marino {
359095b7b453SJohn Marino bitset_empty (accepts);
359195b7b453SJohn Marino continue;
359295b7b453SJohn Marino }
359395b7b453SJohn Marino #ifdef RE_ENABLE_I18N
359495b7b453SJohn Marino if (dfa->mb_cur_max > 1)
359595b7b453SJohn Marino for (j = 0; j < BITSET_WORDS; ++j)
359695b7b453SJohn Marino any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
359795b7b453SJohn Marino else
359895b7b453SJohn Marino #endif
359995b7b453SJohn Marino for (j = 0; j < BITSET_WORDS; ++j)
360095b7b453SJohn Marino any_set |= (accepts[j] &= ~dfa->word_char[j]);
360195b7b453SJohn Marino if (!any_set)
360295b7b453SJohn Marino continue;
360395b7b453SJohn Marino }
360495b7b453SJohn Marino }
360595b7b453SJohn Marino
3606cf28ed85SJohn Marino /* Then divide 'accepts' into DFA states, or create a new
360795b7b453SJohn Marino state. Above, we make sure that accepts is not empty. */
360895b7b453SJohn Marino for (j = 0; j < ndests; ++j)
360995b7b453SJohn Marino {
361095b7b453SJohn Marino bitset_t intersec; /* Intersection sets, see below. */
361195b7b453SJohn Marino bitset_t remains;
361295b7b453SJohn Marino /* Flags, see below. */
361395b7b453SJohn Marino bitset_word_t has_intersec, not_subset, not_consumed;
361495b7b453SJohn Marino
361595b7b453SJohn Marino /* Optimization, skip if this state doesn't accept the character. */
361695b7b453SJohn Marino if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
361795b7b453SJohn Marino continue;
361895b7b453SJohn Marino
3619cf28ed85SJohn Marino /* Enumerate the intersection set of this state and 'accepts'. */
362095b7b453SJohn Marino has_intersec = 0;
362195b7b453SJohn Marino for (k = 0; k < BITSET_WORDS; ++k)
362295b7b453SJohn Marino has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
362395b7b453SJohn Marino /* And skip if the intersection set is empty. */
362495b7b453SJohn Marino if (!has_intersec)
362595b7b453SJohn Marino continue;
362695b7b453SJohn Marino
3627cf28ed85SJohn Marino /* Then check if this state is a subset of 'accepts'. */
362895b7b453SJohn Marino not_subset = not_consumed = 0;
362995b7b453SJohn Marino for (k = 0; k < BITSET_WORDS; ++k)
363095b7b453SJohn Marino {
363195b7b453SJohn Marino not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
363295b7b453SJohn Marino not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
363395b7b453SJohn Marino }
363495b7b453SJohn Marino
3635cf28ed85SJohn Marino /* If this state isn't a subset of 'accepts', create a
3636cf28ed85SJohn Marino new group state, which has the 'remains'. */
363795b7b453SJohn Marino if (not_subset)
363895b7b453SJohn Marino {
363995b7b453SJohn Marino bitset_copy (dests_ch[ndests], remains);
364095b7b453SJohn Marino bitset_copy (dests_ch[j], intersec);
364195b7b453SJohn Marino err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3642*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
364395b7b453SJohn Marino goto error_return;
364495b7b453SJohn Marino ++ndests;
364595b7b453SJohn Marino }
364695b7b453SJohn Marino
364795b7b453SJohn Marino /* Put the position in the current group. */
364895b7b453SJohn Marino ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3649*09d4459fSDaniel Fojt if (__glibc_unlikely (! ok))
365095b7b453SJohn Marino goto error_return;
365195b7b453SJohn Marino
365295b7b453SJohn Marino /* If all characters are consumed, go to next node. */
365395b7b453SJohn Marino if (!not_consumed)
365495b7b453SJohn Marino break;
365595b7b453SJohn Marino }
365695b7b453SJohn Marino /* Some characters remain, create a new group. */
365795b7b453SJohn Marino if (j == ndests)
365895b7b453SJohn Marino {
365995b7b453SJohn Marino bitset_copy (dests_ch[ndests], accepts);
366095b7b453SJohn Marino err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3661*09d4459fSDaniel Fojt if (__glibc_unlikely (err != REG_NOERROR))
366295b7b453SJohn Marino goto error_return;
366395b7b453SJohn Marino ++ndests;
366495b7b453SJohn Marino bitset_empty (accepts);
366595b7b453SJohn Marino }
366695b7b453SJohn Marino }
3667*09d4459fSDaniel Fojt assume (ndests <= SBC_MAX);
366895b7b453SJohn Marino return ndests;
366995b7b453SJohn Marino error_return:
367095b7b453SJohn Marino for (j = 0; j < ndests; ++j)
367195b7b453SJohn Marino re_node_set_free (dests_node + j);
3672*09d4459fSDaniel Fojt return -1;
367395b7b453SJohn Marino }
367495b7b453SJohn Marino
367595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
3676cf28ed85SJohn Marino /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
367795b7b453SJohn Marino Return the number of the bytes the node accepts.
367895b7b453SJohn Marino STR_IDX is the current index of the input string.
367995b7b453SJohn Marino
368095b7b453SJohn Marino This function handles the nodes which can accept one character, or
368195b7b453SJohn Marino one collating element like '.', '[a-z]', opposite to the other nodes
368295b7b453SJohn Marino can only accept one byte. */
368395b7b453SJohn Marino
3684dc7c36e4SJohn Marino # ifdef _LIBC
3685dc7c36e4SJohn Marino # include <locale/weight.h>
3686dc7c36e4SJohn Marino # endif
3687dc7c36e4SJohn Marino
368895b7b453SJohn Marino static int
check_node_accept_bytes(const re_dfa_t * dfa,Idx node_idx,const re_string_t * input,Idx str_idx)368995b7b453SJohn Marino check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
369095b7b453SJohn Marino const re_string_t *input, Idx str_idx)
369195b7b453SJohn Marino {
369295b7b453SJohn Marino const re_token_t *node = dfa->nodes + node_idx;
369395b7b453SJohn Marino int char_len, elem_len;
369495b7b453SJohn Marino Idx i;
369595b7b453SJohn Marino
3696*09d4459fSDaniel Fojt if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
369795b7b453SJohn Marino {
369895b7b453SJohn Marino unsigned char c = re_string_byte_at (input, str_idx), d;
3699*09d4459fSDaniel Fojt if (__glibc_likely (c < 0xc2))
370095b7b453SJohn Marino return 0;
370195b7b453SJohn Marino
370295b7b453SJohn Marino if (str_idx + 2 > input->len)
370395b7b453SJohn Marino return 0;
370495b7b453SJohn Marino
370595b7b453SJohn Marino d = re_string_byte_at (input, str_idx + 1);
370695b7b453SJohn Marino if (c < 0xe0)
370795b7b453SJohn Marino return (d < 0x80 || d > 0xbf) ? 0 : 2;
370895b7b453SJohn Marino else if (c < 0xf0)
370995b7b453SJohn Marino {
371095b7b453SJohn Marino char_len = 3;
371195b7b453SJohn Marino if (c == 0xe0 && d < 0xa0)
371295b7b453SJohn Marino return 0;
371395b7b453SJohn Marino }
371495b7b453SJohn Marino else if (c < 0xf8)
371595b7b453SJohn Marino {
371695b7b453SJohn Marino char_len = 4;
371795b7b453SJohn Marino if (c == 0xf0 && d < 0x90)
371895b7b453SJohn Marino return 0;
371995b7b453SJohn Marino }
372095b7b453SJohn Marino else if (c < 0xfc)
372195b7b453SJohn Marino {
372295b7b453SJohn Marino char_len = 5;
372395b7b453SJohn Marino if (c == 0xf8 && d < 0x88)
372495b7b453SJohn Marino return 0;
372595b7b453SJohn Marino }
372695b7b453SJohn Marino else if (c < 0xfe)
372795b7b453SJohn Marino {
372895b7b453SJohn Marino char_len = 6;
372995b7b453SJohn Marino if (c == 0xfc && d < 0x84)
373095b7b453SJohn Marino return 0;
373195b7b453SJohn Marino }
373295b7b453SJohn Marino else
373395b7b453SJohn Marino return 0;
373495b7b453SJohn Marino
373595b7b453SJohn Marino if (str_idx + char_len > input->len)
373695b7b453SJohn Marino return 0;
373795b7b453SJohn Marino
373895b7b453SJohn Marino for (i = 1; i < char_len; ++i)
373995b7b453SJohn Marino {
374095b7b453SJohn Marino d = re_string_byte_at (input, str_idx + i);
374195b7b453SJohn Marino if (d < 0x80 || d > 0xbf)
374295b7b453SJohn Marino return 0;
374395b7b453SJohn Marino }
374495b7b453SJohn Marino return char_len;
374595b7b453SJohn Marino }
374695b7b453SJohn Marino
374795b7b453SJohn Marino char_len = re_string_char_size_at (input, str_idx);
374895b7b453SJohn Marino if (node->type == OP_PERIOD)
374995b7b453SJohn Marino {
375095b7b453SJohn Marino if (char_len <= 1)
375195b7b453SJohn Marino return 0;
375295b7b453SJohn Marino /* FIXME: I don't think this if is needed, as both '\n'
375395b7b453SJohn Marino and '\0' are char_len == 1. */
375495b7b453SJohn Marino /* '.' accepts any one character except the following two cases. */
3755*09d4459fSDaniel Fojt if ((!(dfa->syntax & RE_DOT_NEWLINE)
3756*09d4459fSDaniel Fojt && re_string_byte_at (input, str_idx) == '\n')
3757*09d4459fSDaniel Fojt || ((dfa->syntax & RE_DOT_NOT_NULL)
3758*09d4459fSDaniel Fojt && re_string_byte_at (input, str_idx) == '\0'))
375995b7b453SJohn Marino return 0;
376095b7b453SJohn Marino return char_len;
376195b7b453SJohn Marino }
376295b7b453SJohn Marino
376395b7b453SJohn Marino elem_len = re_string_elem_size_at (input, str_idx);
376495b7b453SJohn Marino if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
376595b7b453SJohn Marino return 0;
376695b7b453SJohn Marino
376795b7b453SJohn Marino if (node->type == COMPLEX_BRACKET)
376895b7b453SJohn Marino {
376995b7b453SJohn Marino const re_charset_t *cset = node->opr.mbcset;
377095b7b453SJohn Marino # ifdef _LIBC
377195b7b453SJohn Marino const unsigned char *pin
377295b7b453SJohn Marino = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
377395b7b453SJohn Marino Idx j;
377495b7b453SJohn Marino uint32_t nrules;
377595b7b453SJohn Marino # endif /* _LIBC */
377695b7b453SJohn Marino int match_len = 0;
377795b7b453SJohn Marino wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
377895b7b453SJohn Marino ? re_string_wchar_at (input, str_idx) : 0);
377995b7b453SJohn Marino
378095b7b453SJohn Marino /* match with multibyte character? */
378195b7b453SJohn Marino for (i = 0; i < cset->nmbchars; ++i)
378295b7b453SJohn Marino if (wc == cset->mbchars[i])
378395b7b453SJohn Marino {
378495b7b453SJohn Marino match_len = char_len;
378595b7b453SJohn Marino goto check_node_accept_bytes_match;
378695b7b453SJohn Marino }
378795b7b453SJohn Marino /* match with character_class? */
378895b7b453SJohn Marino for (i = 0; i < cset->nchar_classes; ++i)
378995b7b453SJohn Marino {
379095b7b453SJohn Marino wctype_t wt = cset->char_classes[i];
379195b7b453SJohn Marino if (__iswctype (wc, wt))
379295b7b453SJohn Marino {
379395b7b453SJohn Marino match_len = char_len;
379495b7b453SJohn Marino goto check_node_accept_bytes_match;
379595b7b453SJohn Marino }
379695b7b453SJohn Marino }
379795b7b453SJohn Marino
379895b7b453SJohn Marino # ifdef _LIBC
379995b7b453SJohn Marino nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
380095b7b453SJohn Marino if (nrules != 0)
380195b7b453SJohn Marino {
380295b7b453SJohn Marino unsigned int in_collseq = 0;
380395b7b453SJohn Marino const int32_t *table, *indirect;
380495b7b453SJohn Marino const unsigned char *weights, *extra;
380595b7b453SJohn Marino const char *collseqwc;
380695b7b453SJohn Marino
380795b7b453SJohn Marino /* match with collating_symbol? */
380895b7b453SJohn Marino if (cset->ncoll_syms)
380995b7b453SJohn Marino extra = (const unsigned char *)
381095b7b453SJohn Marino _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
381195b7b453SJohn Marino for (i = 0; i < cset->ncoll_syms; ++i)
381295b7b453SJohn Marino {
381395b7b453SJohn Marino const unsigned char *coll_sym = extra + cset->coll_syms[i];
381495b7b453SJohn Marino /* Compare the length of input collating element and
381595b7b453SJohn Marino the length of current collating element. */
381695b7b453SJohn Marino if (*coll_sym != elem_len)
381795b7b453SJohn Marino continue;
381895b7b453SJohn Marino /* Compare each bytes. */
381995b7b453SJohn Marino for (j = 0; j < *coll_sym; j++)
382095b7b453SJohn Marino if (pin[j] != coll_sym[1 + j])
382195b7b453SJohn Marino break;
382295b7b453SJohn Marino if (j == *coll_sym)
382395b7b453SJohn Marino {
382495b7b453SJohn Marino /* Match if every bytes is equal. */
382595b7b453SJohn Marino match_len = j;
382695b7b453SJohn Marino goto check_node_accept_bytes_match;
382795b7b453SJohn Marino }
382895b7b453SJohn Marino }
382995b7b453SJohn Marino
383095b7b453SJohn Marino if (cset->nranges)
383195b7b453SJohn Marino {
383295b7b453SJohn Marino if (elem_len <= char_len)
383395b7b453SJohn Marino {
383495b7b453SJohn Marino collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
383595b7b453SJohn Marino in_collseq = __collseq_table_lookup (collseqwc, wc);
383695b7b453SJohn Marino }
383795b7b453SJohn Marino else
383895b7b453SJohn Marino in_collseq = find_collation_sequence_value (pin, elem_len);
383995b7b453SJohn Marino }
384095b7b453SJohn Marino /* match with range expression? */
3841680a9cb8SJohn Marino /* FIXME: Implement rational ranges here, too. */
384295b7b453SJohn Marino for (i = 0; i < cset->nranges; ++i)
384395b7b453SJohn Marino if (cset->range_starts[i] <= in_collseq
384495b7b453SJohn Marino && in_collseq <= cset->range_ends[i])
384595b7b453SJohn Marino {
384695b7b453SJohn Marino match_len = elem_len;
384795b7b453SJohn Marino goto check_node_accept_bytes_match;
384895b7b453SJohn Marino }
384995b7b453SJohn Marino
385095b7b453SJohn Marino /* match with equivalence_class? */
385195b7b453SJohn Marino if (cset->nequiv_classes)
385295b7b453SJohn Marino {
385395b7b453SJohn Marino const unsigned char *cp = pin;
385495b7b453SJohn Marino table = (const int32_t *)
385595b7b453SJohn Marino _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
385695b7b453SJohn Marino weights = (const unsigned char *)
385795b7b453SJohn Marino _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
385895b7b453SJohn Marino extra = (const unsigned char *)
385995b7b453SJohn Marino _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
386095b7b453SJohn Marino indirect = (const int32_t *)
386195b7b453SJohn Marino _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3862dc7c36e4SJohn Marino int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3863*09d4459fSDaniel Fojt int32_t rule = idx >> 24;
3864*09d4459fSDaniel Fojt idx &= 0xffffff;
386595b7b453SJohn Marino if (idx > 0)
3866*09d4459fSDaniel Fojt {
3867*09d4459fSDaniel Fojt size_t weight_len = weights[idx];
386895b7b453SJohn Marino for (i = 0; i < cset->nequiv_classes; ++i)
386995b7b453SJohn Marino {
387095b7b453SJohn Marino int32_t equiv_class_idx = cset->equiv_classes[i];
3871*09d4459fSDaniel Fojt int32_t equiv_class_rule = equiv_class_idx >> 24;
387295b7b453SJohn Marino equiv_class_idx &= 0xffffff;
3873*09d4459fSDaniel Fojt if (weights[equiv_class_idx] == weight_len
3874*09d4459fSDaniel Fojt && equiv_class_rule == rule
3875*09d4459fSDaniel Fojt && memcmp (weights + idx + 1,
3876*09d4459fSDaniel Fojt weights + equiv_class_idx + 1,
3877*09d4459fSDaniel Fojt weight_len) == 0)
387895b7b453SJohn Marino {
387995b7b453SJohn Marino match_len = elem_len;
388095b7b453SJohn Marino goto check_node_accept_bytes_match;
388195b7b453SJohn Marino }
388295b7b453SJohn Marino }
388395b7b453SJohn Marino }
388495b7b453SJohn Marino }
388595b7b453SJohn Marino }
388695b7b453SJohn Marino else
388795b7b453SJohn Marino # endif /* _LIBC */
388895b7b453SJohn Marino {
388995b7b453SJohn Marino /* match with range expression? */
389095b7b453SJohn Marino for (i = 0; i < cset->nranges; ++i)
389195b7b453SJohn Marino {
3892680a9cb8SJohn Marino if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
389395b7b453SJohn Marino {
389495b7b453SJohn Marino match_len = char_len;
389595b7b453SJohn Marino goto check_node_accept_bytes_match;
389695b7b453SJohn Marino }
389795b7b453SJohn Marino }
389895b7b453SJohn Marino }
389995b7b453SJohn Marino check_node_accept_bytes_match:
390095b7b453SJohn Marino if (!cset->non_match)
390195b7b453SJohn Marino return match_len;
390295b7b453SJohn Marino else
390395b7b453SJohn Marino {
390495b7b453SJohn Marino if (match_len > 0)
390595b7b453SJohn Marino return 0;
390695b7b453SJohn Marino else
390795b7b453SJohn Marino return (elem_len > char_len) ? elem_len : char_len;
390895b7b453SJohn Marino }
390995b7b453SJohn Marino }
391095b7b453SJohn Marino return 0;
391195b7b453SJohn Marino }
391295b7b453SJohn Marino
391395b7b453SJohn Marino # ifdef _LIBC
391495b7b453SJohn Marino static unsigned int
find_collation_sequence_value(const unsigned char * mbs,size_t mbs_len)391595b7b453SJohn Marino find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
391695b7b453SJohn Marino {
391795b7b453SJohn Marino uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
391895b7b453SJohn Marino if (nrules == 0)
391995b7b453SJohn Marino {
392095b7b453SJohn Marino if (mbs_len == 1)
392195b7b453SJohn Marino {
392295b7b453SJohn Marino /* No valid character. Match it as a single byte character. */
392395b7b453SJohn Marino const unsigned char *collseq = (const unsigned char *)
392495b7b453SJohn Marino _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
392595b7b453SJohn Marino return collseq[mbs[0]];
392695b7b453SJohn Marino }
392795b7b453SJohn Marino return UINT_MAX;
392895b7b453SJohn Marino }
392995b7b453SJohn Marino else
393095b7b453SJohn Marino {
393195b7b453SJohn Marino int32_t idx;
393295b7b453SJohn Marino const unsigned char *extra = (const unsigned char *)
393395b7b453SJohn Marino _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
393495b7b453SJohn Marino int32_t extrasize = (const unsigned char *)
393595b7b453SJohn Marino _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
393695b7b453SJohn Marino
393795b7b453SJohn Marino for (idx = 0; idx < extrasize;)
393895b7b453SJohn Marino {
393995b7b453SJohn Marino int mbs_cnt;
394095b7b453SJohn Marino bool found = false;
394195b7b453SJohn Marino int32_t elem_mbs_len;
394295b7b453SJohn Marino /* Skip the name of collating element name. */
394395b7b453SJohn Marino idx = idx + extra[idx] + 1;
394495b7b453SJohn Marino elem_mbs_len = extra[idx++];
394595b7b453SJohn Marino if (mbs_len == elem_mbs_len)
394695b7b453SJohn Marino {
394795b7b453SJohn Marino for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
394895b7b453SJohn Marino if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
394995b7b453SJohn Marino break;
395095b7b453SJohn Marino if (mbs_cnt == elem_mbs_len)
395195b7b453SJohn Marino /* Found the entry. */
395295b7b453SJohn Marino found = true;
395395b7b453SJohn Marino }
395495b7b453SJohn Marino /* Skip the byte sequence of the collating element. */
395595b7b453SJohn Marino idx += elem_mbs_len;
395695b7b453SJohn Marino /* Adjust for the alignment. */
395795b7b453SJohn Marino idx = (idx + 3) & ~3;
395895b7b453SJohn Marino /* Skip the collation sequence value. */
395995b7b453SJohn Marino idx += sizeof (uint32_t);
396095b7b453SJohn Marino /* Skip the wide char sequence of the collating element. */
3961cf28ed85SJohn Marino idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
396295b7b453SJohn Marino /* If we found the entry, return the sequence value. */
396395b7b453SJohn Marino if (found)
396495b7b453SJohn Marino return *(uint32_t *) (extra + idx);
396595b7b453SJohn Marino /* Skip the collation sequence value. */
396695b7b453SJohn Marino idx += sizeof (uint32_t);
396795b7b453SJohn Marino }
396895b7b453SJohn Marino return UINT_MAX;
396995b7b453SJohn Marino }
397095b7b453SJohn Marino }
397195b7b453SJohn Marino # endif /* _LIBC */
397295b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
397395b7b453SJohn Marino
397495b7b453SJohn Marino /* Check whether the node accepts the byte which is IDX-th
397595b7b453SJohn Marino byte of the INPUT. */
397695b7b453SJohn Marino
397795b7b453SJohn Marino static bool
check_node_accept(const re_match_context_t * mctx,const re_token_t * node,Idx idx)397895b7b453SJohn Marino check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
397995b7b453SJohn Marino Idx idx)
398095b7b453SJohn Marino {
398195b7b453SJohn Marino unsigned char ch;
398295b7b453SJohn Marino ch = re_string_byte_at (&mctx->input, idx);
398395b7b453SJohn Marino switch (node->type)
398495b7b453SJohn Marino {
398595b7b453SJohn Marino case CHARACTER:
398695b7b453SJohn Marino if (node->opr.c != ch)
398795b7b453SJohn Marino return false;
398895b7b453SJohn Marino break;
398995b7b453SJohn Marino
399095b7b453SJohn Marino case SIMPLE_BRACKET:
399195b7b453SJohn Marino if (!bitset_contain (node->opr.sbcset, ch))
399295b7b453SJohn Marino return false;
399395b7b453SJohn Marino break;
399495b7b453SJohn Marino
399595b7b453SJohn Marino #ifdef RE_ENABLE_I18N
399695b7b453SJohn Marino case OP_UTF8_PERIOD:
399795b7b453SJohn Marino if (ch >= ASCII_CHARS)
399895b7b453SJohn Marino return false;
3999*09d4459fSDaniel Fojt FALLTHROUGH;
400095b7b453SJohn Marino #endif
400195b7b453SJohn Marino case OP_PERIOD:
400295b7b453SJohn Marino if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
400395b7b453SJohn Marino || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
400495b7b453SJohn Marino return false;
400595b7b453SJohn Marino break;
400695b7b453SJohn Marino
400795b7b453SJohn Marino default:
400895b7b453SJohn Marino return false;
400995b7b453SJohn Marino }
401095b7b453SJohn Marino
401195b7b453SJohn Marino if (node->constraint)
401295b7b453SJohn Marino {
401395b7b453SJohn Marino /* The node has constraints. Check whether the current context
401495b7b453SJohn Marino satisfies the constraints. */
401595b7b453SJohn Marino unsigned int context = re_string_context_at (&mctx->input, idx,
401695b7b453SJohn Marino mctx->eflags);
401795b7b453SJohn Marino if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
401895b7b453SJohn Marino return false;
401995b7b453SJohn Marino }
402095b7b453SJohn Marino
402195b7b453SJohn Marino return true;
402295b7b453SJohn Marino }
402395b7b453SJohn Marino
402495b7b453SJohn Marino /* Extend the buffers, if the buffers have run out. */
402595b7b453SJohn Marino
402695b7b453SJohn Marino static reg_errcode_t
4027*09d4459fSDaniel Fojt __attribute_warn_unused_result__
extend_buffers(re_match_context_t * mctx,int min_len)4028680a9cb8SJohn Marino extend_buffers (re_match_context_t *mctx, int min_len)
402995b7b453SJohn Marino {
403095b7b453SJohn Marino reg_errcode_t ret;
403195b7b453SJohn Marino re_string_t *pstr = &mctx->input;
403295b7b453SJohn Marino
403395b7b453SJohn Marino /* Avoid overflow. */
4034*09d4459fSDaniel Fojt if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
4035*09d4459fSDaniel Fojt <= pstr->bufs_len))
403695b7b453SJohn Marino return REG_ESPACE;
403795b7b453SJohn Marino
4038680a9cb8SJohn Marino /* Double the lengths of the buffers, but allocate at least MIN_LEN. */
4039680a9cb8SJohn Marino ret = re_string_realloc_buffers (pstr,
4040680a9cb8SJohn Marino MAX (min_len,
4041680a9cb8SJohn Marino MIN (pstr->len, pstr->bufs_len * 2)));
4042*09d4459fSDaniel Fojt if (__glibc_unlikely (ret != REG_NOERROR))
404395b7b453SJohn Marino return ret;
404495b7b453SJohn Marino
404595b7b453SJohn Marino if (mctx->state_log != NULL)
404695b7b453SJohn Marino {
404795b7b453SJohn Marino /* And double the length of state_log. */
404895b7b453SJohn Marino /* XXX We have no indication of the size of this buffer. If this
404995b7b453SJohn Marino allocation fail we have no indication that the state_log array
405095b7b453SJohn Marino does not have the right size. */
405195b7b453SJohn Marino re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
405295b7b453SJohn Marino pstr->bufs_len + 1);
4053*09d4459fSDaniel Fojt if (__glibc_unlikely (new_array == NULL))
405495b7b453SJohn Marino return REG_ESPACE;
405595b7b453SJohn Marino mctx->state_log = new_array;
405695b7b453SJohn Marino }
405795b7b453SJohn Marino
405895b7b453SJohn Marino /* Then reconstruct the buffers. */
405995b7b453SJohn Marino if (pstr->icase)
406095b7b453SJohn Marino {
406195b7b453SJohn Marino #ifdef RE_ENABLE_I18N
406295b7b453SJohn Marino if (pstr->mb_cur_max > 1)
406395b7b453SJohn Marino {
406495b7b453SJohn Marino ret = build_wcs_upper_buffer (pstr);
4065*09d4459fSDaniel Fojt if (__glibc_unlikely (ret != REG_NOERROR))
406695b7b453SJohn Marino return ret;
406795b7b453SJohn Marino }
406895b7b453SJohn Marino else
406995b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
407095b7b453SJohn Marino build_upper_buffer (pstr);
407195b7b453SJohn Marino }
407295b7b453SJohn Marino else
407395b7b453SJohn Marino {
407495b7b453SJohn Marino #ifdef RE_ENABLE_I18N
407595b7b453SJohn Marino if (pstr->mb_cur_max > 1)
407695b7b453SJohn Marino build_wcs_buffer (pstr);
407795b7b453SJohn Marino else
407895b7b453SJohn Marino #endif /* RE_ENABLE_I18N */
407995b7b453SJohn Marino {
408095b7b453SJohn Marino if (pstr->trans != NULL)
408195b7b453SJohn Marino re_string_translate_buffer (pstr);
408295b7b453SJohn Marino }
408395b7b453SJohn Marino }
408495b7b453SJohn Marino return REG_NOERROR;
408595b7b453SJohn Marino }
408695b7b453SJohn Marino
408795b7b453SJohn Marino
408895b7b453SJohn Marino /* Functions for matching context. */
408995b7b453SJohn Marino
409095b7b453SJohn Marino /* Initialize MCTX. */
409195b7b453SJohn Marino
409295b7b453SJohn Marino static reg_errcode_t
4093*09d4459fSDaniel Fojt __attribute_warn_unused_result__
match_ctx_init(re_match_context_t * mctx,int eflags,Idx n)409495b7b453SJohn Marino match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
409595b7b453SJohn Marino {
409695b7b453SJohn Marino mctx->eflags = eflags;
4097*09d4459fSDaniel Fojt mctx->match_last = -1;
409895b7b453SJohn Marino if (n > 0)
409995b7b453SJohn Marino {
410095b7b453SJohn Marino /* Avoid overflow. */
410195b7b453SJohn Marino size_t max_object_size =
410295b7b453SJohn Marino MAX (sizeof (struct re_backref_cache_entry),
410395b7b453SJohn Marino sizeof (re_sub_match_top_t *));
4104*09d4459fSDaniel Fojt if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
410595b7b453SJohn Marino return REG_ESPACE;
410695b7b453SJohn Marino
410795b7b453SJohn Marino mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
410895b7b453SJohn Marino mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4109*09d4459fSDaniel Fojt if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL))
411095b7b453SJohn Marino return REG_ESPACE;
411195b7b453SJohn Marino }
411295b7b453SJohn Marino /* Already zero-ed by the caller.
411395b7b453SJohn Marino else
411495b7b453SJohn Marino mctx->bkref_ents = NULL;
411595b7b453SJohn Marino mctx->nbkref_ents = 0;
411695b7b453SJohn Marino mctx->nsub_tops = 0; */
411795b7b453SJohn Marino mctx->abkref_ents = n;
411895b7b453SJohn Marino mctx->max_mb_elem_len = 1;
411995b7b453SJohn Marino mctx->asub_tops = n;
412095b7b453SJohn Marino return REG_NOERROR;
412195b7b453SJohn Marino }
412295b7b453SJohn Marino
412395b7b453SJohn Marino /* Clean the entries which depend on the current input in MCTX.
412495b7b453SJohn Marino This function must be invoked when the matcher changes the start index
412595b7b453SJohn Marino of the input, or changes the input string. */
412695b7b453SJohn Marino
412795b7b453SJohn Marino static void
match_ctx_clean(re_match_context_t * mctx)412895b7b453SJohn Marino match_ctx_clean (re_match_context_t *mctx)
412995b7b453SJohn Marino {
413095b7b453SJohn Marino Idx st_idx;
413195b7b453SJohn Marino for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
413295b7b453SJohn Marino {
413395b7b453SJohn Marino Idx sl_idx;
413495b7b453SJohn Marino re_sub_match_top_t *top = mctx->sub_tops[st_idx];
413595b7b453SJohn Marino for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
413695b7b453SJohn Marino {
413795b7b453SJohn Marino re_sub_match_last_t *last = top->lasts[sl_idx];
413895b7b453SJohn Marino re_free (last->path.array);
413995b7b453SJohn Marino re_free (last);
414095b7b453SJohn Marino }
414195b7b453SJohn Marino re_free (top->lasts);
414295b7b453SJohn Marino if (top->path)
414395b7b453SJohn Marino {
414495b7b453SJohn Marino re_free (top->path->array);
414595b7b453SJohn Marino re_free (top->path);
414695b7b453SJohn Marino }
4147*09d4459fSDaniel Fojt re_free (top);
414895b7b453SJohn Marino }
414995b7b453SJohn Marino
415095b7b453SJohn Marino mctx->nsub_tops = 0;
415195b7b453SJohn Marino mctx->nbkref_ents = 0;
415295b7b453SJohn Marino }
415395b7b453SJohn Marino
415495b7b453SJohn Marino /* Free all the memory associated with MCTX. */
415595b7b453SJohn Marino
415695b7b453SJohn Marino static void
match_ctx_free(re_match_context_t * mctx)415795b7b453SJohn Marino match_ctx_free (re_match_context_t *mctx)
415895b7b453SJohn Marino {
415995b7b453SJohn Marino /* First, free all the memory associated with MCTX->SUB_TOPS. */
416095b7b453SJohn Marino match_ctx_clean (mctx);
416195b7b453SJohn Marino re_free (mctx->sub_tops);
416295b7b453SJohn Marino re_free (mctx->bkref_ents);
416395b7b453SJohn Marino }
416495b7b453SJohn Marino
416595b7b453SJohn Marino /* Add a new backreference entry to MCTX.
416695b7b453SJohn Marino Note that we assume that caller never call this function with duplicate
416795b7b453SJohn Marino entry, and call with STR_IDX which isn't smaller than any existing entry.
416895b7b453SJohn Marino */
416995b7b453SJohn Marino
417095b7b453SJohn Marino static reg_errcode_t
4171*09d4459fSDaniel Fojt __attribute_warn_unused_result__
match_ctx_add_entry(re_match_context_t * mctx,Idx node,Idx str_idx,Idx from,Idx to)417295b7b453SJohn Marino match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
417395b7b453SJohn Marino Idx to)
417495b7b453SJohn Marino {
417595b7b453SJohn Marino if (mctx->nbkref_ents >= mctx->abkref_ents)
417695b7b453SJohn Marino {
417795b7b453SJohn Marino struct re_backref_cache_entry* new_entry;
417895b7b453SJohn Marino new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
417995b7b453SJohn Marino mctx->abkref_ents * 2);
4180*09d4459fSDaniel Fojt if (__glibc_unlikely (new_entry == NULL))
418195b7b453SJohn Marino {
418295b7b453SJohn Marino re_free (mctx->bkref_ents);
418395b7b453SJohn Marino return REG_ESPACE;
418495b7b453SJohn Marino }
418595b7b453SJohn Marino mctx->bkref_ents = new_entry;
418695b7b453SJohn Marino memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
418795b7b453SJohn Marino sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
418895b7b453SJohn Marino mctx->abkref_ents *= 2;
418995b7b453SJohn Marino }
419095b7b453SJohn Marino if (mctx->nbkref_ents > 0
419195b7b453SJohn Marino && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
419295b7b453SJohn Marino mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
419395b7b453SJohn Marino
419495b7b453SJohn Marino mctx->bkref_ents[mctx->nbkref_ents].node = node;
419595b7b453SJohn Marino mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
419695b7b453SJohn Marino mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
419795b7b453SJohn Marino mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
419895b7b453SJohn Marino
419995b7b453SJohn Marino /* This is a cache that saves negative results of check_dst_limits_calc_pos.
420095b7b453SJohn Marino If bit N is clear, means that this entry won't epsilon-transition to
420195b7b453SJohn Marino an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
420295b7b453SJohn Marino it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
420395b7b453SJohn Marino such node.
420495b7b453SJohn Marino
420595b7b453SJohn Marino A backreference does not epsilon-transition unless it is empty, so set
420695b7b453SJohn Marino to all zeros if FROM != TO. */
420795b7b453SJohn Marino mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
420895b7b453SJohn Marino = (from == to ? -1 : 0);
420995b7b453SJohn Marino
421095b7b453SJohn Marino mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
421195b7b453SJohn Marino if (mctx->max_mb_elem_len < to - from)
421295b7b453SJohn Marino mctx->max_mb_elem_len = to - from;
421395b7b453SJohn Marino return REG_NOERROR;
421495b7b453SJohn Marino }
421595b7b453SJohn Marino
4216*09d4459fSDaniel Fojt /* Return the first entry with the same str_idx, or -1 if none is
421795b7b453SJohn Marino found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
421895b7b453SJohn Marino
421995b7b453SJohn Marino static Idx
search_cur_bkref_entry(const re_match_context_t * mctx,Idx str_idx)422095b7b453SJohn Marino search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
422195b7b453SJohn Marino {
422295b7b453SJohn Marino Idx left, right, mid, last;
422395b7b453SJohn Marino last = right = mctx->nbkref_ents;
422495b7b453SJohn Marino for (left = 0; left < right;)
422595b7b453SJohn Marino {
422695b7b453SJohn Marino mid = (left + right) / 2;
422795b7b453SJohn Marino if (mctx->bkref_ents[mid].str_idx < str_idx)
422895b7b453SJohn Marino left = mid + 1;
422995b7b453SJohn Marino else
423095b7b453SJohn Marino right = mid;
423195b7b453SJohn Marino }
423295b7b453SJohn Marino if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
423395b7b453SJohn Marino return left;
423495b7b453SJohn Marino else
4235*09d4459fSDaniel Fojt return -1;
423695b7b453SJohn Marino }
423795b7b453SJohn Marino
423895b7b453SJohn Marino /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
423995b7b453SJohn Marino at STR_IDX. */
424095b7b453SJohn Marino
424195b7b453SJohn Marino static reg_errcode_t
4242*09d4459fSDaniel Fojt __attribute_warn_unused_result__
match_ctx_add_subtop(re_match_context_t * mctx,Idx node,Idx str_idx)424395b7b453SJohn Marino match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
424495b7b453SJohn Marino {
4245*09d4459fSDaniel Fojt DEBUG_ASSERT (mctx->sub_tops != NULL);
4246*09d4459fSDaniel Fojt DEBUG_ASSERT (mctx->asub_tops > 0);
4247*09d4459fSDaniel Fojt if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
424895b7b453SJohn Marino {
424995b7b453SJohn Marino Idx new_asub_tops = mctx->asub_tops * 2;
425095b7b453SJohn Marino re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
425195b7b453SJohn Marino re_sub_match_top_t *,
425295b7b453SJohn Marino new_asub_tops);
4253*09d4459fSDaniel Fojt if (__glibc_unlikely (new_array == NULL))
425495b7b453SJohn Marino return REG_ESPACE;
425595b7b453SJohn Marino mctx->sub_tops = new_array;
425695b7b453SJohn Marino mctx->asub_tops = new_asub_tops;
425795b7b453SJohn Marino }
425895b7b453SJohn Marino mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4259*09d4459fSDaniel Fojt if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
426095b7b453SJohn Marino return REG_ESPACE;
426195b7b453SJohn Marino mctx->sub_tops[mctx->nsub_tops]->node = node;
426295b7b453SJohn Marino mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
426395b7b453SJohn Marino return REG_NOERROR;
426495b7b453SJohn Marino }
426595b7b453SJohn Marino
426695b7b453SJohn Marino /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
426795b7b453SJohn Marino at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
426895b7b453SJohn Marino
426995b7b453SJohn Marino static re_sub_match_last_t *
match_ctx_add_sublast(re_sub_match_top_t * subtop,Idx node,Idx str_idx)427095b7b453SJohn Marino match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
427195b7b453SJohn Marino {
427295b7b453SJohn Marino re_sub_match_last_t *new_entry;
4273*09d4459fSDaniel Fojt if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
427495b7b453SJohn Marino {
427595b7b453SJohn Marino Idx new_alasts = 2 * subtop->alasts + 1;
427695b7b453SJohn Marino re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
427795b7b453SJohn Marino re_sub_match_last_t *,
427895b7b453SJohn Marino new_alasts);
4279*09d4459fSDaniel Fojt if (__glibc_unlikely (new_array == NULL))
428095b7b453SJohn Marino return NULL;
428195b7b453SJohn Marino subtop->lasts = new_array;
428295b7b453SJohn Marino subtop->alasts = new_alasts;
428395b7b453SJohn Marino }
428495b7b453SJohn Marino new_entry = calloc (1, sizeof (re_sub_match_last_t));
4285*09d4459fSDaniel Fojt if (__glibc_likely (new_entry != NULL))
428695b7b453SJohn Marino {
428795b7b453SJohn Marino subtop->lasts[subtop->nlasts] = new_entry;
428895b7b453SJohn Marino new_entry->node = node;
428995b7b453SJohn Marino new_entry->str_idx = str_idx;
429095b7b453SJohn Marino ++subtop->nlasts;
429195b7b453SJohn Marino }
429295b7b453SJohn Marino return new_entry;
429395b7b453SJohn Marino }
429495b7b453SJohn Marino
429595b7b453SJohn Marino static void
sift_ctx_init(re_sift_context_t * sctx,re_dfastate_t ** sifted_sts,re_dfastate_t ** limited_sts,Idx last_node,Idx last_str_idx)429695b7b453SJohn Marino sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
429795b7b453SJohn Marino re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
429895b7b453SJohn Marino {
429995b7b453SJohn Marino sctx->sifted_states = sifted_sts;
430095b7b453SJohn Marino sctx->limited_states = limited_sts;
430195b7b453SJohn Marino sctx->last_node = last_node;
430295b7b453SJohn Marino sctx->last_str_idx = last_str_idx;
430395b7b453SJohn Marino re_node_set_init_empty (&sctx->limits);
430495b7b453SJohn Marino }
4305