1 /* 2 tre_regexec.c - TRE POSIX compatible matching functions (and more). 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7 */ 8 9 #ifdef HAVE_CONFIG_H 10 #include <config.h> 11 #endif /* HAVE_CONFIG_H */ 12 13 #ifdef TRE_USE_ALLOCA 14 /* AIX requires this to be the first thing in the file. */ 15 #ifndef __GNUC__ 16 # if HAVE_ALLOCA_H 17 # include <alloca.h> 18 # else 19 # ifdef _AIX 20 #pragma alloca 21 # else 22 # ifndef alloca /* predefined by HP cc +Olibcalls */ 23 char *alloca (); 24 # endif 25 # endif 26 # endif 27 #endif 28 #endif /* TRE_USE_ALLOCA */ 29 30 #include <assert.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #ifdef HAVE_WCHAR_H 34 #include <wchar.h> 35 #endif /* HAVE_WCHAR_H */ 36 #ifdef HAVE_WCTYPE_H 37 #include <wctype.h> 38 #endif /* HAVE_WCTYPE_H */ 39 #ifndef TRE_WCHAR 40 #include <ctype.h> 41 #endif /* !TRE_WCHAR */ 42 #ifdef HAVE_MALLOC_H 43 #include <malloc.h> 44 #endif /* HAVE_MALLOC_H */ 45 #include <limits.h> 46 47 #include "tre-internal.h" 48 #include "tre.h" 49 #include "xmalloc.h" 50 51 52 /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match 53 endpoint values. */ 54 void 55 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, 56 const tre_tnfa_t *tnfa, int *tags, int match_eo) 57 { 58 tre_submatch_data_t *submatch_data; 59 unsigned int i, j; 60 int *parents; 61 62 i = 0; 63 if (match_eo >= 0 && !(cflags & REG_NOSUB)) 64 { 65 /* Construct submatch offsets from the tags. */ 66 DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo)); 67 submatch_data = tnfa->submatch_data; 68 while (i < tnfa->num_submatches && i < nmatch) 69 { 70 if (submatch_data[i].so_tag == tnfa->end_tag) 71 pmatch[i].rm_so = match_eo; 72 else 73 pmatch[i].rm_so = tags[submatch_data[i].so_tag]; 74 75 if (submatch_data[i].eo_tag == tnfa->end_tag) 76 pmatch[i].rm_eo = match_eo; 77 else 78 pmatch[i].rm_eo = tags[submatch_data[i].eo_tag]; 79 80 /* If either of the endpoints were not used, this submatch 81 was not part of the match. */ 82 if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1) 83 pmatch[i].rm_so = pmatch[i].rm_eo = -1; 84 85 DPRINT(("pmatch[%d] = {t%d = %d, t%d = %d}\n", i, 86 submatch_data[i].so_tag, pmatch[i].rm_so, 87 submatch_data[i].eo_tag, pmatch[i].rm_eo)); 88 i++; 89 } 90 /* Reset all submatches that are not within all of their parent 91 submatches. */ 92 i = 0; 93 while (i < tnfa->num_submatches && i < nmatch) 94 { 95 if (pmatch[i].rm_eo == -1) 96 assert(pmatch[i].rm_so == -1); 97 assert(pmatch[i].rm_so <= pmatch[i].rm_eo); 98 99 parents = submatch_data[i].parents; 100 if (parents != NULL) 101 for (j = 0; parents[j] >= 0; j++) 102 { 103 DPRINT(("pmatch[%d] parent %d\n", i, parents[j])); 104 if (pmatch[i].rm_so < pmatch[parents[j]].rm_so 105 || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo) 106 pmatch[i].rm_so = pmatch[i].rm_eo = -1; 107 } 108 i++; 109 } 110 } 111 112 while (i < nmatch) 113 { 114 pmatch[i].rm_so = -1; 115 pmatch[i].rm_eo = -1; 116 i++; 117 } 118 } 119 120 121 /* 122 Wrapper functions for POSIX compatible regexp matching. 123 */ 124 125 int 126 tre_have_backrefs(const regex_t *preg) 127 { 128 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 129 return tnfa->have_backrefs; 130 } 131 132 int 133 tre_have_approx(const regex_t *preg) 134 { 135 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 136 return tnfa->have_approx; 137 } 138 139 static int 140 tre_match(const tre_tnfa_t *tnfa, const void *string, size_t len, 141 tre_str_type_t type, size_t nmatch, regmatch_t pmatch[], 142 int eflags) 143 { 144 reg_errcode_t status; 145 int *tags = NULL, eo; 146 if (tnfa->num_tags > 0 && nmatch > 0) 147 { 148 #ifdef TRE_USE_ALLOCA 149 tags = alloca(sizeof(*tags) * tnfa->num_tags); 150 #else /* !TRE_USE_ALLOCA */ 151 tags = xmalloc(sizeof(*tags) * tnfa->num_tags); 152 #endif /* !TRE_USE_ALLOCA */ 153 if (tags == NULL) 154 return REG_ESPACE; 155 } 156 157 /* Dispatch to the appropriate matcher. */ 158 if (tnfa->have_backrefs || eflags & REG_BACKTRACKING_MATCHER) 159 { 160 /* The regex has back references, use the backtracking matcher. */ 161 if (type == STR_USER) 162 { 163 const tre_str_source *source = string; 164 if (source->rewind == NULL || source->compare == NULL) 165 /* The backtracking matcher requires rewind and compare 166 capabilities from the input stream. */ 167 return REG_BADPAT; 168 } 169 status = tre_tnfa_run_backtrack(tnfa, string, (int)len, type, 170 tags, eflags, &eo); 171 } 172 #ifdef TRE_APPROX 173 else if (tnfa->have_approx || eflags & REG_APPROX_MATCHER) 174 { 175 /* The regex uses approximate matching, use the approximate matcher. */ 176 regamatch_t match; 177 regaparams_t params; 178 tre_regaparams_default(¶ms); 179 params.max_err = 0; 180 params.max_cost = 0; 181 status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags, 182 &match, params, eflags, &eo); 183 } 184 #endif /* TRE_APPROX */ 185 else 186 { 187 /* Exact matching, no back references, use the parallel matcher. */ 188 status = tre_tnfa_run_parallel(tnfa, string, (int)len, type, 189 tags, eflags, &eo); 190 } 191 192 if (status == REG_OK) 193 /* A match was found, so fill the submatch registers. */ 194 tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); 195 #ifndef TRE_USE_ALLOCA 196 if (tags) 197 xfree(tags); 198 #endif /* !TRE_USE_ALLOCA */ 199 return status; 200 } 201 202 int 203 tre_regnexec(const regex_t *preg, const char *str, size_t len, 204 size_t nmatch, regmatch_t pmatch[], int eflags) 205 { 206 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 207 tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS; 208 209 return tre_match(tnfa, str, len, type, nmatch, pmatch, eflags); 210 } 211 212 int 213 tre_regexec(const regex_t *preg, const char *str, 214 size_t nmatch, regmatch_t pmatch[], int eflags) 215 { 216 return tre_regnexec(preg, str, (unsigned)-1, nmatch, pmatch, eflags); 217 } 218 219 220 #ifdef TRE_WCHAR 221 222 int 223 tre_regwnexec(const regex_t *preg, const wchar_t *str, size_t len, 224 size_t nmatch, regmatch_t pmatch[], int eflags) 225 { 226 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 227 return tre_match(tnfa, str, len, STR_WIDE, nmatch, pmatch, eflags); 228 } 229 230 int 231 tre_regwexec(const regex_t *preg, const wchar_t *str, 232 size_t nmatch, regmatch_t pmatch[], int eflags) 233 { 234 return tre_regwnexec(preg, str, (unsigned)-1, nmatch, pmatch, eflags); 235 } 236 237 #endif /* TRE_WCHAR */ 238 239 int 240 tre_reguexec(const regex_t *preg, const tre_str_source *str, 241 size_t nmatch, regmatch_t pmatch[], int eflags) 242 { 243 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 244 return tre_match(tnfa, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags); 245 } 246 247 248 #ifdef TRE_APPROX 249 250 /* 251 Wrapper functions for approximate regexp matching. 252 */ 253 254 static int 255 tre_match_approx(const tre_tnfa_t *tnfa, const void *string, size_t len, 256 tre_str_type_t type, regamatch_t *match, regaparams_t params, 257 int eflags) 258 { 259 reg_errcode_t status; 260 int *tags = NULL, eo; 261 262 /* If the regexp does not use approximate matching features, the 263 maximum cost is zero, and the approximate matcher isn't forced, 264 use the exact matcher instead. */ 265 if (params.max_cost == 0 && !tnfa->have_approx 266 && !(eflags & REG_APPROX_MATCHER)) 267 return tre_match(tnfa, string, len, type, match->nmatch, match->pmatch, 268 eflags); 269 270 /* Back references are not supported by the approximate matcher. */ 271 if (tnfa->have_backrefs) 272 return REG_BADPAT; 273 274 if (tnfa->num_tags > 0 && match->nmatch > 0) 275 { 276 #if TRE_USE_ALLOCA 277 tags = alloca(sizeof(*tags) * tnfa->num_tags); 278 #else /* !TRE_USE_ALLOCA */ 279 tags = xmalloc(sizeof(*tags) * tnfa->num_tags); 280 #endif /* !TRE_USE_ALLOCA */ 281 if (tags == NULL) 282 return REG_ESPACE; 283 } 284 status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags, 285 match, params, eflags, &eo); 286 if (status == REG_OK) 287 tre_fill_pmatch(match->nmatch, match->pmatch, tnfa->cflags, tnfa, tags, eo); 288 #ifndef TRE_USE_ALLOCA 289 if (tags) 290 xfree(tags); 291 #endif /* !TRE_USE_ALLOCA */ 292 return status; 293 } 294 295 int 296 tre_reganexec(const regex_t *preg, const char *str, size_t len, 297 regamatch_t *match, regaparams_t params, int eflags) 298 { 299 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 300 tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS; 301 302 return tre_match_approx(tnfa, str, len, type, match, params, eflags); 303 } 304 305 int 306 tre_regaexec(const regex_t *preg, const char *str, 307 regamatch_t *match, regaparams_t params, int eflags) 308 { 309 return tre_reganexec(preg, str, (unsigned)-1, match, params, eflags); 310 } 311 312 #ifdef TRE_WCHAR 313 314 int 315 tre_regawnexec(const regex_t *preg, const wchar_t *str, size_t len, 316 regamatch_t *match, regaparams_t params, int eflags) 317 { 318 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 319 return tre_match_approx(tnfa, str, len, STR_WIDE, 320 match, params, eflags); 321 } 322 323 int 324 tre_regawexec(const regex_t *preg, const wchar_t *str, 325 regamatch_t *match, regaparams_t params, int eflags) 326 { 327 return tre_regawnexec(preg, str, (unsigned)-1, match, params, eflags); 328 } 329 330 #endif /* TRE_WCHAR */ 331 332 void 333 tre_regaparams_default(regaparams_t *params) 334 { 335 memset(params, 0, sizeof(*params)); 336 params->cost_ins = 1; 337 params->cost_del = 1; 338 params->cost_subst = 1; 339 params->max_cost = INT_MAX; 340 params->max_ins = INT_MAX; 341 params->max_del = INT_MAX; 342 params->max_subst = INT_MAX; 343 params->max_err = INT_MAX; 344 } 345 346 #endif /* TRE_APPROX */ 347 348 /* EOF */ 349