1 /* 2 tre_regexec.c - TRE POSIX compatible matching functions (and more). 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7 */ 8 9 #ifdef HAVE_CONFIG_H 10 #include <config.h> 11 #endif /* HAVE_CONFIG_H */ 12 13 #ifdef TRE_USE_ALLOCA 14 /* AIX requires this to be the first thing in the file. */ 15 #ifndef __GNUC__ 16 # if HAVE_ALLOCA_H 17 # include <alloca.h> 18 # else 19 # ifdef _AIX 20 #pragma alloca 21 # else 22 # ifndef alloca /* predefined by HP cc +Olibcalls */ 23 char *alloca (); 24 # endif 25 # endif 26 # endif 27 #endif 28 #endif /* TRE_USE_ALLOCA */ 29 30 #include <assert.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #ifdef HAVE_WCHAR_H 34 #include <wchar.h> 35 #endif /* HAVE_WCHAR_H */ 36 #ifdef HAVE_WCTYPE_H 37 #include <wctype.h> 38 #endif /* HAVE_WCTYPE_H */ 39 #ifndef TRE_WCHAR 40 #include <ctype.h> 41 #endif /* !TRE_WCHAR */ 42 #ifdef HAVE_MALLOC_H 43 #include <malloc.h> 44 #endif /* HAVE_MALLOC_H */ 45 #include <limits.h> 46 47 #include "tre-internal.h" 48 #include "tre.h" 49 #include "xmalloc.h" 50 51 #ifdef __weak_alias 52 __weak_alias(regexec,_regexec) 53 #endif 54 55 /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match 56 endpoint values. */ 57 void 58 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, 59 const tre_tnfa_t *tnfa, int *tags, int match_eo) 60 { 61 tre_submatch_data_t *submatch_data; 62 unsigned int i, j; 63 int *parents; 64 65 i = 0; 66 if (match_eo >= 0 && !(cflags & REG_NOSUB)) 67 { 68 /* Construct submatch offsets from the tags. */ 69 DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo)); 70 submatch_data = tnfa->submatch_data; 71 while (i < tnfa->num_submatches && i < nmatch) 72 { 73 if (submatch_data[i].so_tag == tnfa->end_tag) 74 pmatch[i].rm_so = match_eo; 75 else 76 pmatch[i].rm_so = tags[submatch_data[i].so_tag]; 77 78 if (submatch_data[i].eo_tag == tnfa->end_tag) 79 pmatch[i].rm_eo = match_eo; 80 else 81 pmatch[i].rm_eo = tags[submatch_data[i].eo_tag]; 82 83 /* If either of the endpoints were not used, this submatch 84 was not part of the match. */ 85 if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1) 86 pmatch[i].rm_so = pmatch[i].rm_eo = -1; 87 88 DPRINT(("pmatch[%d] = {t%d = %d, t%d = %d}\n", i, 89 submatch_data[i].so_tag, pmatch[i].rm_so, 90 submatch_data[i].eo_tag, pmatch[i].rm_eo)); 91 i++; 92 } 93 /* Reset all submatches that are not within all of their parent 94 submatches. */ 95 i = 0; 96 while (i < tnfa->num_submatches && i < nmatch) 97 { 98 if (pmatch[i].rm_eo == -1) 99 assert(pmatch[i].rm_so == -1); 100 assert(pmatch[i].rm_so <= pmatch[i].rm_eo); 101 102 parents = submatch_data[i].parents; 103 if (parents != NULL) 104 for (j = 0; parents[j] >= 0; j++) 105 { 106 DPRINT(("pmatch[%d] parent %d\n", i, parents[j])); 107 if (pmatch[i].rm_so < pmatch[parents[j]].rm_so 108 || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo) 109 pmatch[i].rm_so = pmatch[i].rm_eo = -1; 110 } 111 i++; 112 } 113 } 114 115 while (i < nmatch) 116 { 117 pmatch[i].rm_so = -1; 118 pmatch[i].rm_eo = -1; 119 i++; 120 } 121 } 122 123 124 /* 125 Wrapper functions for POSIX compatible regexp matching. 126 */ 127 128 int 129 tre_have_backrefs(const regex_t *preg) 130 { 131 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 132 return tnfa->have_backrefs; 133 } 134 135 int 136 tre_have_approx(const regex_t *preg) 137 { 138 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 139 return tnfa->have_approx; 140 } 141 142 static int 143 tre_match(const tre_tnfa_t *tnfa, const void *string, size_t len, 144 tre_str_type_t type, size_t nmatch, regmatch_t pmatch[], 145 int eflags) 146 { 147 reg_errcode_t status; 148 int *tags = NULL, eo; 149 if (tnfa->num_tags > 0 && nmatch > 0) 150 { 151 #ifdef TRE_USE_ALLOCA 152 tags = alloca(sizeof(*tags) * tnfa->num_tags); 153 #else /* !TRE_USE_ALLOCA */ 154 tags = xmalloc(sizeof(*tags) * tnfa->num_tags); 155 #endif /* !TRE_USE_ALLOCA */ 156 if (tags == NULL) 157 return REG_ESPACE; 158 } 159 160 /* Dispatch to the appropriate matcher. */ 161 if (tnfa->have_backrefs || eflags & REG_BACKTRACKING_MATCHER) 162 { 163 /* The regex has back references, use the backtracking matcher. */ 164 if (type == STR_USER) 165 { 166 const tre_str_source *source = string; 167 if (source->rewind == NULL || source->compare == NULL) 168 /* The backtracking matcher requires rewind and compare 169 capabilities from the input stream. */ 170 return REG_BADPAT; 171 } 172 status = tre_tnfa_run_backtrack(tnfa, string, (int)len, type, 173 tags, eflags, &eo); 174 } 175 #ifdef TRE_APPROX 176 else if (tnfa->have_approx || eflags & REG_APPROX_MATCHER) 177 { 178 /* The regex uses approximate matching, use the approximate matcher. */ 179 regamatch_t match; 180 regaparams_t params; 181 tre_regaparams_default(¶ms); 182 params.max_err = 0; 183 params.max_cost = 0; 184 status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags, 185 &match, params, eflags, &eo); 186 } 187 #endif /* TRE_APPROX */ 188 else 189 { 190 /* Exact matching, no back references, use the parallel matcher. */ 191 status = tre_tnfa_run_parallel(tnfa, string, (int)len, type, 192 tags, eflags, &eo); 193 } 194 195 if (status == REG_OK) 196 /* A match was found, so fill the submatch registers. */ 197 tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); 198 #ifndef TRE_USE_ALLOCA 199 if (tags) 200 xfree(tags); 201 #endif /* !TRE_USE_ALLOCA */ 202 return status; 203 } 204 205 int 206 tre_regnexec(const regex_t *preg, const char *str, size_t len, 207 size_t nmatch, regmatch_t pmatch[], int eflags) 208 { 209 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 210 /* CONSTCOND */ 211 tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS; 212 213 return tre_match(tnfa, str, len, type, nmatch, pmatch, eflags); 214 } 215 216 int 217 tre_regexec(const regex_t *preg, const char *str, 218 size_t nmatch, regmatch_t pmatch[], int eflags) 219 { 220 const char *newstr; 221 unsigned newflags; 222 size_t newlen; 223 224 if (eflags & REG_STARTEND) { 225 /* LINTED */ 226 newstr = &str[pmatch[0].rm_so]; 227 /* LINTED */ 228 newlen = pmatch[0].rm_eo; 229 newflags = (unsigned)(eflags & ~REG_STARTEND); 230 } else { 231 newstr = str; 232 newlen = (size_t)-1; 233 newflags = (unsigned)eflags; 234 } 235 return tre_regnexec(preg, newstr, newlen, nmatch, pmatch, (int)newflags); 236 } 237 238 239 #ifdef TRE_WCHAR 240 241 int 242 tre_regwnexec(const regex_t *preg, const wchar_t *str, size_t len, 243 size_t nmatch, regmatch_t pmatch[], int eflags) 244 { 245 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 246 return tre_match(tnfa, str, len, STR_WIDE, nmatch, pmatch, eflags); 247 } 248 249 int 250 tre_regwexec(const regex_t *preg, const wchar_t *str, 251 size_t nmatch, regmatch_t pmatch[], int eflags) 252 { 253 return tre_regwnexec(preg, str, (unsigned)-1, nmatch, pmatch, eflags); 254 } 255 256 #endif /* TRE_WCHAR */ 257 258 int 259 tre_reguexec(const regex_t *preg, const tre_str_source *str, 260 size_t nmatch, regmatch_t pmatch[], int eflags) 261 { 262 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 263 return tre_match(tnfa, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags); 264 } 265 266 267 #ifdef TRE_APPROX 268 269 /* 270 Wrapper functions for approximate regexp matching. 271 */ 272 273 static int 274 tre_match_approx(const tre_tnfa_t *tnfa, const void *string, size_t len, 275 tre_str_type_t type, regamatch_t *match, regaparams_t params, 276 int eflags) 277 { 278 reg_errcode_t status; 279 int *tags = NULL, eo; 280 281 /* If the regexp does not use approximate matching features, the 282 maximum cost is zero, and the approximate matcher isn't forced, 283 use the exact matcher instead. */ 284 if (params.max_cost == 0 && !tnfa->have_approx 285 && !(eflags & REG_APPROX_MATCHER)) 286 return tre_match(tnfa, string, len, type, match->nmatch, match->pmatch, 287 eflags); 288 289 /* Back references are not supported by the approximate matcher. */ 290 if (tnfa->have_backrefs) 291 return REG_BADPAT; 292 293 if (tnfa->num_tags > 0 && match->nmatch > 0) 294 { 295 #if TRE_USE_ALLOCA 296 tags = alloca(sizeof(*tags) * tnfa->num_tags); 297 #else /* !TRE_USE_ALLOCA */ 298 tags = xmalloc(sizeof(*tags) * tnfa->num_tags); 299 #endif /* !TRE_USE_ALLOCA */ 300 if (tags == NULL) 301 return REG_ESPACE; 302 } 303 status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags, 304 match, params, eflags, &eo); 305 if (status == REG_OK) 306 tre_fill_pmatch(match->nmatch, match->pmatch, tnfa->cflags, tnfa, tags, eo); 307 #ifndef TRE_USE_ALLOCA 308 if (tags) 309 xfree(tags); 310 #endif /* !TRE_USE_ALLOCA */ 311 return status; 312 } 313 314 int 315 tre_reganexec(const regex_t *preg, const char *str, size_t len, 316 regamatch_t *match, regaparams_t params, int eflags) 317 { 318 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 319 tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS; 320 321 return tre_match_approx(tnfa, str, len, type, match, params, eflags); 322 } 323 324 int 325 tre_regaexec(const regex_t *preg, const char *str, 326 regamatch_t *match, regaparams_t params, int eflags) 327 { 328 return tre_reganexec(preg, str, (unsigned)-1, match, params, eflags); 329 } 330 331 #ifdef TRE_WCHAR 332 333 int 334 tre_regawnexec(const regex_t *preg, const wchar_t *str, size_t len, 335 regamatch_t *match, regaparams_t params, int eflags) 336 { 337 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 338 return tre_match_approx(tnfa, str, len, STR_WIDE, 339 match, params, eflags); 340 } 341 342 int 343 tre_regawexec(const regex_t *preg, const wchar_t *str, 344 regamatch_t *match, regaparams_t params, int eflags) 345 { 346 return tre_regawnexec(preg, str, (unsigned)-1, match, params, eflags); 347 } 348 349 #endif /* TRE_WCHAR */ 350 351 void 352 tre_regaparams_default(regaparams_t *params) 353 { 354 memset(params, 0, sizeof(*params)); 355 params->cost_ins = 1; 356 params->cost_del = 1; 357 params->cost_subst = 1; 358 params->max_cost = INT_MAX; 359 params->max_ins = INT_MAX; 360 params->max_del = INT_MAX; 361 params->max_subst = INT_MAX; 362 params->max_err = INT_MAX; 363 } 364 365 #endif /* TRE_APPROX */ 366 367 /* EOF */ 368