1 /* 2 tre_regexec.c - TRE POSIX compatible matching functions (and more). 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7 */ 8 9 #ifdef HAVE_CONFIG_H 10 #include <config.h> 11 #endif /* HAVE_CONFIG_H */ 12 13 #ifdef TRE_USE_ALLOCA 14 /* AIX requires this to be the first thing in the file. */ 15 #ifndef __GNUC__ 16 # if HAVE_ALLOCA_H 17 # include <alloca.h> 18 # else 19 # ifdef _AIX 20 #pragma alloca 21 # else 22 # ifndef alloca /* predefined by HP cc +Olibcalls */ 23 char *alloca (); 24 # endif 25 # endif 26 # endif 27 #endif 28 #endif /* TRE_USE_ALLOCA */ 29 30 #include <assert.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #ifdef HAVE_WCHAR_H 34 #include <wchar.h> 35 #endif /* HAVE_WCHAR_H */ 36 #ifdef HAVE_WCTYPE_H 37 #include <wctype.h> 38 #endif /* HAVE_WCTYPE_H */ 39 #ifndef TRE_WCHAR 40 #include <ctype.h> 41 #endif /* !TRE_WCHAR */ 42 #ifdef HAVE_MALLOC_H 43 #include <malloc.h> 44 #endif /* HAVE_MALLOC_H */ 45 #include <limits.h> 46 47 #include "tre-internal.h" 48 #include "tre.h" 49 #include "xmalloc.h" 50 51 52 /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match 53 endpoint values. */ 54 void 55 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, 56 const tre_tnfa_t *tnfa, int *tags, int match_eo) 57 { 58 tre_submatch_data_t *submatch_data; 59 unsigned int i, j; 60 int *parents; 61 62 i = 0; 63 if (match_eo >= 0 && !(cflags & REG_NOSUB)) 64 { 65 /* Construct submatch offsets from the tags. */ 66 DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo)); 67 submatch_data = tnfa->submatch_data; 68 while (i < tnfa->num_submatches && i < nmatch) 69 { 70 if (submatch_data[i].so_tag == tnfa->end_tag) 71 pmatch[i].rm_so = match_eo; 72 else 73 pmatch[i].rm_so = tags[submatch_data[i].so_tag]; 74 75 if (submatch_data[i].eo_tag == tnfa->end_tag) 76 pmatch[i].rm_eo = match_eo; 77 else 78 pmatch[i].rm_eo = tags[submatch_data[i].eo_tag]; 79 80 /* If either of the endpoints were not used, this submatch 81 was not part of the match. */ 82 if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1) 83 pmatch[i].rm_so = pmatch[i].rm_eo = -1; 84 85 DPRINT(("pmatch[%d] = {t%d = %d, t%d = %d}\n", i, 86 submatch_data[i].so_tag, pmatch[i].rm_so, 87 submatch_data[i].eo_tag, pmatch[i].rm_eo)); 88 i++; 89 } 90 /* Reset all submatches that are not within all of their parent 91 submatches. */ 92 i = 0; 93 while (i < tnfa->num_submatches && i < nmatch) 94 { 95 if (pmatch[i].rm_eo == -1) 96 assert(pmatch[i].rm_so == -1); 97 assert(pmatch[i].rm_so <= pmatch[i].rm_eo); 98 99 parents = submatch_data[i].parents; 100 if (parents != NULL) 101 for (j = 0; parents[j] >= 0; j++) 102 { 103 DPRINT(("pmatch[%d] parent %d\n", i, parents[j])); 104 if (pmatch[i].rm_so < pmatch[parents[j]].rm_so 105 || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo) 106 pmatch[i].rm_so = pmatch[i].rm_eo = -1; 107 } 108 i++; 109 } 110 } 111 112 while (i < nmatch) 113 { 114 pmatch[i].rm_so = -1; 115 pmatch[i].rm_eo = -1; 116 i++; 117 } 118 } 119 120 121 /* 122 Wrapper functions for POSIX compatible regexp matching. 123 */ 124 125 int 126 tre_have_backrefs(const regex_t *preg) 127 { 128 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 129 return tnfa->have_backrefs; 130 } 131 132 int 133 tre_have_approx(const regex_t *preg) 134 { 135 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 136 return tnfa->have_approx; 137 } 138 139 static int 140 tre_match(const tre_tnfa_t *tnfa, const void *string, size_t len, 141 tre_str_type_t type, size_t nmatch, regmatch_t pmatch[], 142 int eflags) 143 { 144 reg_errcode_t status; 145 int *tags = NULL, eo; 146 if (tnfa->num_tags > 0 && nmatch > 0) 147 { 148 #ifdef TRE_USE_ALLOCA 149 tags = alloca(sizeof(*tags) * tnfa->num_tags); 150 #else /* !TRE_USE_ALLOCA */ 151 tags = xmalloc(sizeof(*tags) * tnfa->num_tags); 152 #endif /* !TRE_USE_ALLOCA */ 153 if (tags == NULL) 154 return REG_ESPACE; 155 } 156 157 /* Dispatch to the appropriate matcher. */ 158 if (tnfa->have_backrefs || eflags & REG_BACKTRACKING_MATCHER) 159 { 160 /* The regex has back references, use the backtracking matcher. */ 161 if (type == STR_USER) 162 { 163 const tre_str_source *source = string; 164 if (source->rewind == NULL || source->compare == NULL) 165 /* The backtracking matcher requires rewind and compare 166 capabilities from the input stream. */ 167 return REG_BADPAT; 168 } 169 status = tre_tnfa_run_backtrack(tnfa, string, (int)len, type, 170 tags, eflags, &eo); 171 } 172 #ifdef TRE_APPROX 173 else if (tnfa->have_approx || eflags & REG_APPROX_MATCHER) 174 { 175 /* The regex uses approximate matching, use the approximate matcher. */ 176 regamatch_t match; 177 regaparams_t params; 178 tre_regaparams_default(¶ms); 179 params.max_err = 0; 180 params.max_cost = 0; 181 status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags, 182 &match, params, eflags, &eo); 183 } 184 #endif /* TRE_APPROX */ 185 else 186 { 187 /* Exact matching, no back references, use the parallel matcher. */ 188 status = tre_tnfa_run_parallel(tnfa, string, (int)len, type, 189 tags, eflags, &eo); 190 } 191 192 if (status == REG_OK) 193 /* A match was found, so fill the submatch registers. */ 194 tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); 195 #ifndef TRE_USE_ALLOCA 196 if (tags) 197 xfree(tags); 198 #endif /* !TRE_USE_ALLOCA */ 199 return status; 200 } 201 202 int 203 tre_regnexec(const regex_t *preg, const char *str, size_t len, 204 size_t nmatch, regmatch_t pmatch[], int eflags) 205 { 206 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 207 tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS; 208 209 return tre_match(tnfa, str, len, type, nmatch, pmatch, eflags); 210 } 211 212 int 213 tre_regexec(const regex_t *preg, const char *str, 214 size_t nmatch, regmatch_t pmatch[], int eflags) 215 { 216 const char *newstr; 217 unsigned newflags; 218 size_t newlen; 219 220 if (eflags & REG_STARTEND) { 221 newstr = &str[pmatch[0].rm_so]; 222 newlen = pmatch[0].rm_eo; 223 newflags = (unsigned)(eflags & ~REG_STARTEND); 224 } else { 225 newstr = str; 226 newlen = (size_t)-1; 227 newflags = (unsigned)eflags; 228 } 229 return tre_regnexec(preg, newstr, newlen, nmatch, pmatch, (int)newflags); 230 } 231 232 233 #ifdef TRE_WCHAR 234 235 int 236 tre_regwnexec(const regex_t *preg, const wchar_t *str, size_t len, 237 size_t nmatch, regmatch_t pmatch[], int eflags) 238 { 239 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 240 return tre_match(tnfa, str, len, STR_WIDE, nmatch, pmatch, eflags); 241 } 242 243 int 244 tre_regwexec(const regex_t *preg, const wchar_t *str, 245 size_t nmatch, regmatch_t pmatch[], int eflags) 246 { 247 return tre_regwnexec(preg, str, (unsigned)-1, nmatch, pmatch, eflags); 248 } 249 250 #endif /* TRE_WCHAR */ 251 252 int 253 tre_reguexec(const regex_t *preg, const tre_str_source *str, 254 size_t nmatch, regmatch_t pmatch[], int eflags) 255 { 256 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 257 return tre_match(tnfa, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags); 258 } 259 260 261 #ifdef TRE_APPROX 262 263 /* 264 Wrapper functions for approximate regexp matching. 265 */ 266 267 static int 268 tre_match_approx(const tre_tnfa_t *tnfa, const void *string, size_t len, 269 tre_str_type_t type, regamatch_t *match, regaparams_t params, 270 int eflags) 271 { 272 reg_errcode_t status; 273 int *tags = NULL, eo; 274 275 /* If the regexp does not use approximate matching features, the 276 maximum cost is zero, and the approximate matcher isn't forced, 277 use the exact matcher instead. */ 278 if (params.max_cost == 0 && !tnfa->have_approx 279 && !(eflags & REG_APPROX_MATCHER)) 280 return tre_match(tnfa, string, len, type, match->nmatch, match->pmatch, 281 eflags); 282 283 /* Back references are not supported by the approximate matcher. */ 284 if (tnfa->have_backrefs) 285 return REG_BADPAT; 286 287 if (tnfa->num_tags > 0 && match->nmatch > 0) 288 { 289 #if TRE_USE_ALLOCA 290 tags = alloca(sizeof(*tags) * tnfa->num_tags); 291 #else /* !TRE_USE_ALLOCA */ 292 tags = xmalloc(sizeof(*tags) * tnfa->num_tags); 293 #endif /* !TRE_USE_ALLOCA */ 294 if (tags == NULL) 295 return REG_ESPACE; 296 } 297 status = tre_tnfa_run_approx(tnfa, string, (int)len, type, tags, 298 match, params, eflags, &eo); 299 if (status == REG_OK) 300 tre_fill_pmatch(match->nmatch, match->pmatch, tnfa->cflags, tnfa, tags, eo); 301 #ifndef TRE_USE_ALLOCA 302 if (tags) 303 xfree(tags); 304 #endif /* !TRE_USE_ALLOCA */ 305 return status; 306 } 307 308 int 309 tre_reganexec(const regex_t *preg, const char *str, size_t len, 310 regamatch_t *match, regaparams_t params, int eflags) 311 { 312 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 313 tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS; 314 315 return tre_match_approx(tnfa, str, len, type, match, params, eflags); 316 } 317 318 int 319 tre_regaexec(const regex_t *preg, const char *str, 320 regamatch_t *match, regaparams_t params, int eflags) 321 { 322 return tre_reganexec(preg, str, (unsigned)-1, match, params, eflags); 323 } 324 325 #ifdef TRE_WCHAR 326 327 int 328 tre_regawnexec(const regex_t *preg, const wchar_t *str, size_t len, 329 regamatch_t *match, regaparams_t params, int eflags) 330 { 331 tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD; 332 return tre_match_approx(tnfa, str, len, STR_WIDE, 333 match, params, eflags); 334 } 335 336 int 337 tre_regawexec(const regex_t *preg, const wchar_t *str, 338 regamatch_t *match, regaparams_t params, int eflags) 339 { 340 return tre_regawnexec(preg, str, (unsigned)-1, match, params, eflags); 341 } 342 343 #endif /* TRE_WCHAR */ 344 345 void 346 tre_regaparams_default(regaparams_t *params) 347 { 348 memset(params, 0, sizeof(*params)); 349 params->cost_ins = 1; 350 params->cost_del = 1; 351 params->cost_subst = 1; 352 params->max_cost = INT_MAX; 353 params->max_ins = INT_MAX; 354 params->max_del = INT_MAX; 355 params->max_subst = INT_MAX; 356 params->max_err = INT_MAX; 357 } 358 359 #endif /* TRE_APPROX */ 360 361 /* EOF */ 362