1 /* 2 retest.c - TRE regression test program 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7 */ 8 9 /* 10 This is just a simple test application containing various hands-written 11 tests for regression testing TRE. I've tried to surround TRE specific 12 tests inside ifdefs, so this can be used to test any POSIX compatible 13 regexp implementation. 14 */ 15 16 #ifdef HAVE_CONFIG_H 17 #include <config.h> 18 #endif /* HAVE_CONFIG_H */ 19 20 #include <stdlib.h> 21 #include <stdio.h> 22 #include <stdarg.h> 23 #include <string.h> 24 #include <sys/types.h> 25 #include <locale.h> 26 #ifdef HAVE_MALLOC_H 27 #include <malloc.h> 28 #endif /* HAVE_MALLOC_H */ 29 #include <regex.h> 30 31 #ifdef TRE_VERSION 32 #define HAVE_REGNEXEC 1 33 #define HAVE_REGNCOMP 1 34 #include "xmalloc.h" 35 #else /* !TRE_VERSION */ 36 #define xmalloc malloc 37 #define xfree free 38 #endif /* !TRE_VERSION */ 39 #include "tre-internal.h" 40 41 #ifdef WRETEST 42 #include <wchar.h> 43 #define CHAR_T wchar_t 44 #define L(x) (L ## x) 45 46 #define MAXSTRSIZE 1024 47 static wchar_t wstr[MAXSTRSIZE]; 48 static wchar_t wregex[MAXSTRSIZE]; 49 static int woffs[MAXSTRSIZE]; 50 51 #define tre_regexec tre_regwexec 52 #define tre_regnexec tre_regwnexec 53 #define tre_regcomp tre_regwcomp 54 #define tre_regncomp tre_regwncomp 55 56 /* Iterate mbrtowc over the multi-byte sequence STR of length LEN, 57 store the result in BUF and memoize the successive byte offsets 58 in OFF. */ 59 60 static int 61 mbntowc (wchar_t *buf, const char *str, size_t len, int *off) 62 { 63 int n, wlen; 64 #ifdef HAVE_MBSTATE_T 65 mbstate_t cst; 66 memset(&cst, 0, sizeof(cst)); 67 #endif 68 69 if (len >= MAXSTRSIZE) 70 { 71 fprintf(stderr, "Increase MAXSTRSIZE to %ld or more and recompile!\n", 72 (long)len + 1); 73 exit(EXIT_FAILURE); 74 } 75 76 if (off) 77 { 78 memset(off + 1, -1, len * sizeof(int)); 79 *off = 0; 80 } 81 82 wlen = 0; 83 while (len > 0) 84 { 85 n = tre_mbrtowc(buf ? buf++ : NULL, str, len, &cst); 86 if (n < 0) 87 return n; 88 if (n == 0) 89 n = 1; 90 str += n; 91 len -= n; 92 wlen += 1; 93 if (off) 94 *(off += n) = wlen; 95 } 96 97 return(wlen); 98 } 99 100 #else /* !WRETEST */ 101 #define CHAR_T char 102 #define L(x) (x) 103 #endif /* !WRETEST */ 104 105 static int valid_reobj = 0; 106 static regex_t reobj; 107 static regmatch_t pmatch_global[32]; 108 static const CHAR_T *regex_pattern; 109 static int cflags_global; 110 static int use_regnexec = 0; 111 static int use_regncomp = 0; 112 static int avoid_eflags = 0; 113 114 static int comp_tests = 0; 115 static int exec_tests = 0; 116 static int comp_errors = 0; 117 static int exec_errors = 0; 118 119 #ifndef REG_OK 120 #define REG_OK 0 121 #endif /* REG_OK */ 122 123 #define END -2 124 125 static void 126 test_status(char c) 127 { 128 static int k = 0; 129 printf("%c", c); 130 if (++k % 79 == 0) 131 printf("\n"); 132 fflush(stdout); 133 } 134 135 136 static int 137 wrap_regexec(const CHAR_T *data, size_t len, 138 size_t pmatch_len, regmatch_t *pmatch, int eflags) 139 { 140 CHAR_T *buf = NULL; 141 int result; 142 143 if (len == 0 && use_regnexec) 144 { 145 /* Zero length string and using tre_regnexec(), the pointer we give 146 should not be dereferenced at all. */ 147 buf = NULL; 148 } 149 else 150 { 151 /* Copy the data to a separate buffer to make a better test for 152 tre_regexec() and tre_regnexec(). */ 153 buf = xmalloc((len + !use_regnexec) * sizeof(CHAR_T)); 154 if (!buf) 155 return REG_ESPACE; 156 memcpy(buf, data, len * sizeof(CHAR_T)); 157 test_status('#'); 158 } 159 160 #ifdef HAVE_REGNEXEC 161 if (use_regnexec) 162 { 163 if (len == 0) 164 result = tre_regnexec(&reobj, NULL, len, pmatch_len, pmatch, eflags); 165 else 166 result = tre_regnexec(&reobj, buf, len, pmatch_len, pmatch, eflags); 167 } 168 else 169 #endif /* HAVE_REGNEXEC */ 170 { 171 buf[len] = L('\0'); 172 result = tre_regexec(&reobj, buf, pmatch_len, pmatch, eflags); 173 } 174 175 xfree(buf); 176 return result; 177 } 178 179 static int 180 wrap_regcomp(regex_t *preg, const CHAR_T *data, size_t len, int cflags) 181 { 182 #ifdef HAVE_REGNCOMP 183 if (use_regncomp) 184 return tre_regncomp(preg, data, len, cflags); 185 else 186 return tre_regcomp(preg, data, cflags); 187 #else /* !HAVE_REGNCOMP */ 188 fprintf(stderr, "%s\n", data); 189 return tre_regcomp(preg, data, cflags); 190 #endif /* !HAVE_REGNCOMP */ 191 } 192 193 static int 194 execute(const CHAR_T *data, int len, size_t pmatch_len, regmatch_t *pmatch, 195 int eflags) 196 { 197 #ifdef MALLOC_DEBUGGING 198 int i = 0; 199 int ret; 200 201 while (1) 202 { 203 xmalloc_configure(i); 204 comp_tests++; 205 ret = wrap_regexec(data, len, pmatch_len, pmatch, eflags); 206 if (ret != REG_ESPACE) 207 { 208 break; 209 } 210 #ifdef REGEX_DEBUG 211 xmalloc_dump_leaks(); 212 #endif /* REGEX_DEBUG */ 213 i++; 214 } 215 return ret; 216 #else /* !MALLOC_DEBUGGING */ 217 return wrap_regexec(data, len, pmatch_len, pmatch, eflags); 218 #endif /* !MALLOC_DEBUGGING */ 219 } 220 221 static int 222 check(va_list ap, int ret, const CHAR_T *str, 223 size_t pmatch_len, regmatch_t *pmatch, int eflags) 224 { 225 int fail = 0; 226 227 if (ret != va_arg(ap, int)) 228 { 229 #ifndef WRETEST 230 printf("Exec error, regex: \"%s\", cflags %d, " 231 "string: \"%s\", eflags %d\n", regex_pattern, cflags_global, 232 str, eflags); 233 #else /* WRETEST */ 234 printf("Exec error, regex: \"%ls\", cflags %d, " 235 "string: \"%ls\", eflags %d\n", regex_pattern, cflags_global, 236 str, eflags); 237 #endif /* WRETEST */ 238 printf(" got %smatch (tre_regexec returned %d)\n", ret ? "no " : "", ret); 239 return 1; 240 } 241 242 if (ret == 0) 243 { 244 unsigned int i; 245 246 for (i = 0; i < pmatch_len; i++) 247 { 248 int rm_so, rm_eo; 249 rm_so = va_arg(ap, int); 250 if (rm_so == END) 251 break; 252 rm_eo = va_arg(ap, int); 253 #ifdef WRETEST 254 if (rm_so >= 0) 255 { 256 int n = rm_so; 257 258 if ((rm_so = woffs[rm_so]) < 0 || 259 (n = rm_eo, rm_eo = woffs[rm_eo]) < 0) 260 { 261 printf("Invalid or incomplete multi-byte sequence " 262 "in string %ls before byte offset %d\n", str, n); 263 return 1; 264 } 265 } 266 #endif /* WRETEST */ 267 if (pmatch[i].rm_so != rm_so 268 || pmatch[i].rm_eo != rm_eo) 269 { 270 #ifndef WRETEST 271 printf("Exec error, regex: \"%s\", string: \"%s\"\n", 272 regex_pattern, str); 273 printf(" group %d: expected (%d, %d) \"%.*s\", " 274 "got (%d, %d) \"%.*s\"\n", 275 #else /* WRETEST */ 276 printf("Exec error, regex: \"%ls\", string: \"%ls\"\n", 277 regex_pattern, str); 278 printf(" group %d: expected (%d, %d) \"%.*ls\", " 279 "got (%d, %d) \"%.*ls\"\n", 280 #endif /* WRETEST */ 281 i, rm_so, rm_eo, rm_eo - rm_so, str + rm_so, 282 (int)pmatch[i].rm_so, (int)pmatch[i].rm_eo, 283 (int)(pmatch[i].rm_eo - pmatch[i].rm_so), 284 str + pmatch[i].rm_so); 285 fail = 1; 286 } 287 } 288 289 if (!(cflags_global & REG_NOSUB) && reobj.re_nsub != i - 1 290 && reobj.re_nsub <= pmatch_len && pmatch) 291 { 292 #ifndef WRETEST 293 printf("Comp error, regex: \"%s\"\n", regex_pattern); 294 #else /* WRETEST */ 295 printf("Comp error, regex: \"%ls\"\n", regex_pattern); 296 #endif /* WRETEST */ 297 printf(" re_nsub is %d, should be %d\n", (int)reobj.re_nsub, i - 1); 298 fail = 1; 299 } 300 301 302 for (; i < pmatch_len; i++) 303 if (pmatch[i].rm_so != -1 || pmatch[i].rm_eo != -1) 304 { 305 if (!fail) 306 #ifndef WRETEST 307 printf("Exec error, regex: \"%s\", string: \"%s\"\n", 308 regex_pattern, str); 309 #else /* WRETEST */ 310 printf("Exec error, regex: \"%ls\", string: \"%ls\"\n", 311 regex_pattern, str); 312 #endif /* WRETEST */ 313 printf(" group %d: expected (-1, -1), got (%d, %d)\n", 314 i, (int)pmatch[i].rm_so, (int)pmatch[i].rm_eo); 315 fail = 1; 316 } 317 } 318 319 return fail; 320 } 321 322 323 static void 324 test_nexec(const char *data, size_t len, int eflags, ...) 325 { 326 int m; 327 int fail = 0; 328 int extra_flags[] = {0, REG_BACKTRACKING_MATCHER, REG_APPROX_MATCHER}; 329 size_t i; 330 va_list ap; 331 332 if (!valid_reobj) 333 { 334 exec_errors++; 335 return; 336 } 337 338 #ifdef WRETEST 339 { 340 int wlen = mbntowc(wstr, data, len, woffs); 341 if (wlen < 0) 342 { 343 exec_errors++; 344 printf("Invalid or incomplete multi-byte sequence in %s\n", data); 345 return; 346 } 347 wstr[wlen] = L'\0'; 348 len = wlen; 349 } 350 #define data wstr 351 #endif /* WRETEST */ 352 353 use_regnexec = 1; 354 355 for (i = 0; i < elementsof(extra_flags); i++) 356 { 357 int final_flags = eflags | extra_flags[i]; 358 359 if ((final_flags & REG_BACKTRACKING_MATCHER 360 && tre_have_approx(&reobj)) 361 || (final_flags & REG_APPROX_MATCHER 362 && tre_have_backrefs(&reobj)) 363 || (final_flags & avoid_eflags)) 364 continue; 365 366 /* Test with a pmatch array. */ 367 exec_tests++; 368 m = execute(data, len, elementsof(pmatch_global), pmatch_global, 369 final_flags); 370 va_start(ap, eflags); 371 fail |= check(ap, m, data, elementsof(pmatch_global), pmatch_global, 372 final_flags); 373 va_end(ap); 374 375 /* Same test with a NULL pmatch. */ 376 exec_tests++; 377 m = execute(data, len, 0, NULL, final_flags); 378 va_start(ap, eflags); 379 fail |= check(ap, m, data, 0, NULL, final_flags); 380 va_end(ap); 381 } 382 383 #ifdef WRETEST 384 #undef data 385 #endif /* WRETEST */ 386 387 if (fail) 388 exec_errors++; 389 } 390 391 392 393 static void 394 test_exec(const char *str, int eflags, ...) 395 { 396 int m; 397 int fail = 0; 398 size_t len = strlen(str); 399 int extra_flags[] = {0, 400 REG_BACKTRACKING_MATCHER, 401 REG_APPROX_MATCHER, 402 REG_BACKTRACKING_MATCHER | REG_APPROX_MATCHER}; 403 size_t i; 404 va_list ap; 405 406 if (!valid_reobj) 407 { 408 exec_errors++; 409 return; 410 } 411 412 #ifdef WRETEST 413 { 414 int wlen = mbntowc(wstr, str, len, woffs); 415 if (wlen < 0) 416 { 417 exec_errors++; 418 printf("Invalid or incomplete multi-byte sequence in %s\n", str); 419 return; 420 } 421 wstr[wlen] = L'\0'; 422 len = wlen; 423 } 424 #define str wstr 425 #endif /* WRETEST */ 426 427 for (use_regnexec = 0; use_regnexec < 2; use_regnexec++) 428 { 429 for (i = 0; i < elementsof(extra_flags); i++) 430 { 431 int final_flags = eflags | extra_flags[i]; 432 433 if ((final_flags & REG_BACKTRACKING_MATCHER 434 && tre_have_approx(&reobj)) 435 || (final_flags & REG_APPROX_MATCHER 436 && tre_have_backrefs(&reobj)) 437 || (final_flags & avoid_eflags)) 438 continue; 439 440 /* Test with a pmatch array. */ 441 exec_tests++; 442 m = execute(str, len, elementsof(pmatch_global), pmatch_global, 443 final_flags); 444 va_start(ap, eflags); 445 fail |= check(ap, m, str, elementsof(pmatch_global), pmatch_global, 446 final_flags); 447 va_end(ap); 448 449 /* Same test with a NULL pmatch. */ 450 exec_tests++; 451 m = execute(str, len, 0, NULL, final_flags); 452 va_start(ap, eflags); 453 fail |= check(ap, m, str, 0, NULL, final_flags); 454 va_end(ap); 455 } 456 } 457 458 #ifdef WRETEST 459 #undef str 460 #endif /* WRETEST */ 461 462 if (fail) 463 exec_errors++; 464 } 465 466 467 static void 468 test_comp(const char *re, int flags, int ret) 469 { 470 int errcode = 0; 471 int len = strlen(re); 472 473 if (valid_reobj) 474 { 475 tre_regfree(&reobj); 476 valid_reobj = 0; 477 } 478 479 comp_tests++; 480 481 #ifdef WRETEST 482 { 483 int wlen = mbntowc(wregex, re, len, NULL); 484 485 if (wlen < 0) 486 { 487 comp_errors++; 488 printf("Invalid or incomplete multi-byte sequence in %s\n", re); 489 return; 490 } 491 wregex[wlen] = L'\0'; 492 len = wlen; 493 } 494 #define re wregex 495 #endif /* WRETEST */ 496 regex_pattern = re; 497 cflags_global = flags; 498 499 #ifdef MALLOC_DEBUGGING 500 { 501 static int j = 0; 502 int i = 0; 503 while (1) 504 { 505 xmalloc_configure(i); 506 comp_tests++; 507 if (j++ % 20 == 0) 508 test_status('.'); 509 errcode = wrap_regcomp(&reobj, re, len, flags); 510 if (errcode != REG_ESPACE) 511 { 512 test_status('*'); 513 break; 514 } 515 #ifdef REGEX_DEBUG 516 xmalloc_dump_leaks(); 517 #endif /* REGEX_DEBUG */ 518 i++; 519 } 520 } 521 #else /* !MALLOC_DEBUGGING */ 522 errcode = wrap_regcomp(&reobj, re, len, flags); 523 #endif /* !MALLOC_DEBUGGING */ 524 525 #ifdef WRETEST 526 #undef re 527 #endif /* WRETEST */ 528 529 if (errcode != ret) 530 { 531 #ifndef WRETEST 532 printf("Comp error, regex: \"%s\"\n", regex_pattern); 533 #else /* WRETEST */ 534 printf("Comp error, regex: \"%ls\"\n", regex_pattern); 535 #endif /* WRETEST */ 536 printf(" expected return code %d, got %d.\n", 537 ret, errcode); 538 comp_errors++; 539 } 540 541 if (errcode == 0) 542 valid_reobj = 1; 543 } 544 545 546 547 /* To enable tests for known bugs, set this to 1. */ 548 #define KNOWN_BUG 0 549 550 int 551 main(int argc, char **argv) 552 { 553 554 #ifdef WRETEST 555 /* Need an 8-bit locale. Or move the two tests with non-ascii 556 characters to the localized internationalization tests. */ 557 if (setlocale(LC_CTYPE, "en_US.ISO-8859-1") == NULL) 558 fprintf(stderr, "Could not set locale en_US.ISO-8859-1. Expect some\n" 559 "`Invalid or incomplete multi-byte sequence' errors.\n"); 560 #endif /* WRETEST */ 561 /* Large number of macros in one regexp. */ 562 test_comp("[A-Z]\\d\\s?\\d[A-Z]{2}|[A-Z]\\d{2}\\s?\\d[A-Z]{2}|[A-Z]{2}\\d" 563 "\\s?\\d[A-Z]{2}|[A-Z]{2}\\d{2}\\s?\\d[A-Z]{2}|[A-Z]\\d[A-Z]\\s?" 564 "\\d[A-Z]{2}|[A-Z]{2}\\d[A-Z]\\s?\\d[A-Z]{2}|[A-Z]{3}\\s?\\d[A-Z]" 565 "{2}", REG_EXTENDED, 0); 566 567 test_comp("a{11}(b{2}c){2}", REG_EXTENDED, 0); 568 test_comp("a{2}{2}xb+xc*xd?x", REG_EXTENDED, 0); 569 test_comp("^!packet [0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3} [0-9]+", 570 REG_EXTENDED, 0); 571 test_comp("^!pfast [0-9]{1,15} ([0-9]{1,3}\\.){3}[0-9]{1,3}[0-9]{1,5}$", 572 REG_EXTENDED, 0); 573 574 #if KNOWN_BUG 575 /* Should these match or not? */ 576 test_comp("(a)*-\\1b", REG_EXTENDED, 0); 577 test_exec("aaa-b", 0, REG_NOMATCH); 578 test_comp("((.*)\\1)+", REG_EXTENDED, 0); 579 test_exec("xxxxxx", 0, REG_NOMATCH); 580 #endif 581 582 #ifdef TRE_APPROX 583 /* 584 * Approximate matching tests. 585 * 586 * The approximate matcher always searches for the best match, and returns 587 * the leftmost and longest one if there are several best matches. 588 */ 589 590 test_comp("(fou){# ~1}", REG_EXTENDED, 0); 591 test_comp("(fuu){#}", REG_EXTENDED, 0); 592 test_comp("(fuu){# ~}", REG_EXTENDED, 0); 593 test_comp("(anaconda){ 1i + 1d < 1, #1}", REG_EXTENDED, 0); 594 test_comp("(anaconda){ 1i + 1d < 1 #1 ~10 }", REG_EXTENDED, 0); 595 test_comp("(anaconda){ #1, ~1, 1i + 1d < 1 }", REG_EXTENDED, 0); 596 597 test_comp("(znacnda){ #1 ~3 1i + 1d < 1 }", REG_EXTENDED, 0); 598 test_exec("molasses anaconda foo bar baz smith anderson ", 599 0, REG_NOMATCH); 600 test_comp("(znacnda){ #1 ~3 1i + 1d < 2 }", REG_EXTENDED, 0); 601 test_exec("molasses anaconda foo bar baz smith anderson ", 602 0, REG_OK, 9, 17, 9, 17, END); 603 test_comp("(ananda){ 1i + 1d < 2 }", REG_EXTENDED, 0); 604 test_exec("molasses anaconda foo bar baz smith anderson ", 605 0, REG_NOMATCH); 606 607 test_comp("(fuu){ +3 -3 ~5}", REG_EXTENDED, 0); 608 test_exec("anaconda foo bar baz smith anderson", 609 0, REG_OK, 9, 10, 9, 10, END); 610 test_comp("(fuu){ +2 -2 ~5}", REG_EXTENDED, 0); 611 test_exec("anaconda foo bar baz smith anderson", 612 0, REG_OK, 9, 10, 9, 10, END); 613 test_comp("(fuu){ +3 -3 ~}", REG_EXTENDED, 0); 614 test_exec("anaconda foo bar baz smith anderson", 615 0, REG_OK, 9, 10, 9, 10, END); 616 617 test_comp("(laurikari){ #3, 1i + 1d < 3 }", REG_EXTENDED, 0); 618 619 /* No cost limit. */ 620 test_comp("(foobar){~}", REG_EXTENDED, 0); 621 test_exec("xirefoabralfobarxie", 0, REG_OK, 11, 16, 11, 16, END); 622 623 /* At most two errors. */ 624 test_comp("(foobar){~2}", REG_EXTENDED, 0); 625 test_exec("xirefoabrzlfd", 0, REG_OK, 4, 9, 4, 9, END); 626 test_exec("xirefoabzlfd", 0, REG_NOMATCH); 627 628 /* At most two inserts or substitutions and max two errors total. */ 629 test_comp("(foobar){+2#2~2}", REG_EXTENDED, 0); 630 test_exec("oobargoobaploowap", 0, REG_OK, 5, 11, 5, 11, END); 631 632 /* Find best whole word match for "foobar". */ 633 test_comp("\\<(foobar){~}\\>", REG_EXTENDED, 0); 634 test_exec("zfoobarz", 0, REG_OK, 0, 8, 0, 8, END); 635 test_exec("boing zfoobarz goobar woop", 0, REG_OK, 15, 21, 15, 21, END); 636 637 /* Match whole string, allow only 1 error. */ 638 test_comp("^(foobar){~1}$", REG_EXTENDED, 0); 639 test_exec("foobar", 0, REG_OK, 0, 6, 0, 6, END); 640 test_exec("xfoobar", 0, REG_OK, 0, 7, 0, 7, END); 641 /* 642 This currently fails. 643 test_exec("foobarx", 0, REG_OK, 0, 7, 0, 7, END); 644 */ 645 test_exec("fooxbar", 0, REG_OK, 0, 7, 0, 7, END); 646 test_exec("foxbar", 0, REG_OK, 0, 6, 0, 6, END); 647 test_exec("xoobar", 0, REG_OK, 0, 6, 0, 6, END); 648 test_exec("foobax", 0, REG_OK, 0, 6, 0, 6, END); 649 test_exec("oobar", 0, REG_OK, 0, 5, 0, 5, END); 650 test_exec("fobar", 0, REG_OK, 0, 5, 0, 5, END); 651 test_exec("fooba", 0, REG_OK, 0, 5, 0, 5, END); 652 test_exec("xfoobarx", 0, REG_NOMATCH); 653 test_exec("foobarxx", 0, REG_NOMATCH); 654 test_exec("xxfoobar", 0, REG_NOMATCH); 655 test_exec("xfoxbar", 0, REG_NOMATCH); 656 test_exec("foxbarx", 0, REG_NOMATCH); 657 658 /* At most one insert, two deletes, and three substitutions. 659 Additionally, deletes cost two and substitutes one, and total 660 cost must be less than 4. */ 661 test_comp("(foobar){+1 -2 #3, 2d + 1s < 4}", REG_EXTENDED, 0); 662 test_exec("3oifaowefbaoraofuiebofasebfaobfaorfeoaro", 663 0, REG_OK, 26, 33, 26, 33, END); 664 665 /* Partially approximate matches. */ 666 test_comp("foo(bar){~1}zap", REG_EXTENDED, 0); 667 test_exec("foobarzap", 0, REG_OK, 0, 9, 3, 6, END); 668 test_exec("fobarzap", 0, REG_NOMATCH); 669 test_exec("foobrzap", 0, REG_OK, 0, 8, 3, 5, END); 670 test_comp("^.*(dot.org){~}.*$", REG_EXTENDED, 0); 671 test_exec("www.cnn.com 64.236.16.20\n" 672 "www.slashdot.org 66.35.250.150\n" 673 "For useful information, use www.slashdot.org\n" 674 "this is demo data!\n", 675 0, REG_OK, 0, 120, 93, 100, END); 676 677 /* Approximate matching and back referencing cannot be used together. */ 678 test_comp("(foo{~})\\1", REG_EXTENDED, REG_BADPAT); 679 680 #endif /* TRE_APPROX */ 681 682 /* 683 * Basic tests with pure regular expressions 684 */ 685 686 /* Basic string matching. */ 687 test_comp("foobar", REG_EXTENDED, 0); 688 test_exec("foobar", 0, REG_OK, 0, 6, END); 689 test_exec("xxxfoobarzapzot", 0, REG_OK, 3, 9, END); 690 test_comp("foobar", REG_EXTENDED | REG_NOSUB, 0); 691 test_exec("foobar", 0, REG_OK, END); 692 test_comp("aaaa", REG_EXTENDED, 0); 693 test_exec("xxaaaaaaaaaaaaaaaaa", 0, REG_OK, 2, 6, END); 694 695 /* Test zero length matches. */ 696 test_comp("(a*)", REG_EXTENDED, 0); 697 test_exec("", 0, REG_OK, 0, 0, 0, 0, END); 698 699 test_comp("(a*)*", REG_EXTENDED, 0); 700 test_exec("", 0, REG_OK, 0, 0, 0, 0, END); 701 702 test_comp("((a*)*)*", REG_EXTENDED, 0); 703 test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); 704 test_comp("(a*bcd)*", REG_EXTENDED, 0); 705 test_exec("aaaaaaaaaaaabcxbcxbcxaabcxaabcx", 0, REG_OK, 0, 0, -1, -1, END); 706 test_exec("aaaaaaaaaaaabcxbcxbcxaabcxaabc", 0, REG_OK, 0, 0, -1, -1, END); 707 test_exec("aaaaaaaaaaaabcxbcdbcxaabcxaabc", 0, REG_OK, 0, 0, -1, -1, END); 708 test_exec("aaaaaaaaaaaabcdbcdbcxaabcxaabc", 0, REG_OK, 0, 18, 15, 18, END); 709 710 test_comp("(a*)+", REG_EXTENDED, 0); 711 test_exec("-", 0, REG_OK, 0, 0, 0, 0, END); 712 713 /* This test blows up the backtracking matcher. */ 714 avoid_eflags = REG_BACKTRACKING_MATCHER; 715 test_comp("((a*)*b)*b", REG_EXTENDED, 0); 716 test_exec("aaaaaaaaaaaaaaaaaaaaaaaaab", 0, REG_OK, 717 25, 26, -1, -1, -1, -1, END); 718 avoid_eflags = 0; 719 720 test_comp("", 0, 0); 721 test_exec("", 0, REG_OK, 0, 0, END); 722 test_exec("foo", 0, REG_OK, 0, 0, END); 723 724 /* Test for submatch addressing which requires arbitrary lookahead. */ 725 test_comp("(a*)aaaaaa", REG_EXTENDED, 0); 726 test_exec("aaaaaaaaaaaaaaax", 0, REG_OK, 0, 15, 0, 9, END); 727 728 /* Test leftmost and longest matching and some tricky submatches. */ 729 test_comp("(a*)(a*)", REG_EXTENDED, 0); 730 test_exec("aaaa", 0, REG_OK, 0, 4, 0, 4, 4, 4, END); 731 test_comp("(abcd|abc)(d?)", REG_EXTENDED, 0); 732 test_exec("abcd", 0, REG_OK, 0, 4, 0, 4, 4, 4, END); 733 test_comp("(abc|abcd)(d?)", REG_EXTENDED, 0); 734 test_exec("abcd", 0, REG_OK, 0, 4, 0, 4, 4, 4, END); 735 test_comp("(abc|abcd)(d?)e", REG_EXTENDED, 0); 736 test_exec("abcde", 0, REG_OK, 0, 5, 0, 4, 4, 4, END); 737 test_comp("(abcd|abc)(d?)e", REG_EXTENDED, 0); 738 test_exec("abcde", 0, REG_OK, 0, 5, 0, 4, 4, 4, END); 739 test_comp("a(bc|bcd)(d?)", REG_EXTENDED, 0); 740 test_exec("abcd", 0, REG_OK, 0, 4, 1, 4, 4, 4, END); 741 test_comp("a(bcd|bc)(d?)", REG_EXTENDED, 0); 742 test_exec("abcd", 0, REG_OK, 0, 4, 1, 4, 4, 4, END); 743 test_comp("a*(a?bc|bcd)(d?)", REG_EXTENDED, 0); 744 test_exec("aaabcd", 0, REG_OK, 0, 6, 3, 6, 6, 6, END); 745 test_comp("a*(bcd|a?bc)(d?)", REG_EXTENDED, 0); 746 test_exec("aaabcd", 0, REG_OK, 0, 6, 3, 6, 6, 6, END); 747 test_comp("(a|(a*b*))*", REG_EXTENDED, 0); 748 test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); 749 test_exec("a", 0, REG_OK, 0, 1, 0, 1, -1, -1, END); 750 test_exec("aa", 0, REG_OK, 0, 2, 0, 2, 0, 2, END); 751 test_exec("aaa", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); 752 test_exec("bbb", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); 753 test_exec("aaabbb", 0, REG_OK, 0, 6, 0, 6, 0, 6, END); 754 test_exec("bbbaaa", 0, REG_OK, 0, 6, 3, 6, 3, 6, END); 755 test_comp("((a*b*)|a)*", REG_EXTENDED, 0); 756 test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); 757 test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, END); 758 test_exec("aa", 0, REG_OK, 0, 2, 0, 2, 0, 2, END); 759 test_exec("aaa", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); 760 test_exec("bbb", 0, REG_OK, 0, 3, 0, 3, 0, 3, END); 761 test_exec("aaabbb", 0, REG_OK, 0, 6, 0, 6, 0, 6, END); 762 test_exec("bbbaaa", 0, REG_OK, 0, 6, 3, 6, 3, 6, END); 763 test_comp("a.*(.*b.*(.*c.*).*d.*).*e.*(.*f.*).*g", REG_EXTENDED, 0); 764 test_exec("aabbccddeeffgg", 0, REG_OK, 0, 14, 3, 9, 5, 7, 11, 13, END); 765 test_comp("(wee|week)(night|knights)s*", REG_EXTENDED, 0); 766 test_exec("weeknights", 0, REG_OK, 0, 10, 0, 3, 3, 10, END); 767 test_exec("weeknightss", 0, REG_OK, 0, 11, 0, 3, 3, 10, END); 768 test_comp("a*", REG_EXTENDED, 0); 769 test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); 770 test_comp("aa*", REG_EXTENDED, 0); 771 test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); 772 test_comp("aaa*", REG_EXTENDED, 0); 773 test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); 774 test_comp("aaaa*", REG_EXTENDED, 0); 775 test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END); 776 777 /* Test clearing old submatch data with nesting parentheses 778 and iteration. */ 779 test_comp("((a)|(b))*c", REG_EXTENDED, 0); 780 test_exec("aaabc", 0, REG_OK, 0, 5, 3, 4, -1, -1, 3, 4, END); 781 test_exec("aaaac", 0, REG_OK, 0, 5, 3, 4, 3, 4, -1, -1, END); 782 test_comp("foo((bar)*)*zot", REG_EXTENDED, 0); 783 test_exec("foozot", 0, REG_OK, 0, 6, 3, 3, -1, -1, END); 784 test_exec("foobarzot", 0, REG_OK, 0, 9, 3, 6, 3, 6, END); 785 test_exec("foobarbarzot", 0, REG_OK, 0, 12, 3, 9, 6, 9, END); 786 787 test_comp("foo((zup)*|(bar)*|(zap)*)*zot", REG_EXTENDED, 0); 788 test_exec("foobarzapzot", 0, REG_OK, 789 0, 12, 6, 9, -1, -1, -1, -1, 6, 9, END); 790 test_exec("foobarbarzapzot", 0, REG_OK, 791 0, 15, 9, 12, -1, -1, -1, -1, 9, 12, END); 792 test_exec("foozupzot", 0, REG_OK, 793 0, 9, 3, 6, 3, 6, -1, -1, -1, -1, END); 794 test_exec("foobarzot", 0, REG_OK, 795 0, 9, 3, 6, -1, -1, 3, 6, -1, -1, END); 796 test_exec("foozapzot", 0, REG_OK, 797 0, 9, 3, 6, -1, -1, -1, -1, 3, 6, END); 798 test_exec("foozot", 0, REG_OK, 799 0, 6, 3, 3, -1, -1, -1, -1, -1, -1, END); 800 801 802 /* Test case where, e.g., Perl and Python regexp functions, and many 803 other backtracking matchers, fail to produce the longest match. 804 It is not exactly a bug since Perl does not claim to find the 805 longest match, but a confusing feature and, in my opinion, a bad 806 design choice because the union operator is traditionally defined 807 to be commutative (with respect to the language denoted by the RE). */ 808 test_comp("(a|ab)(blip)?", REG_EXTENDED, 0); 809 test_exec("ablip", 0, REG_OK, 0, 5, 0, 1, 1, 5, END); 810 test_exec("ab", 0, REG_OK, 0, 2, 0, 2, -1, -1, END); 811 test_comp("(ab|a)(blip)?", REG_EXTENDED, 0); 812 test_exec("ablip", 0, REG_OK, 0, 5, 0, 1, 1, 5, END); 813 test_exec("ab", 0, REG_OK, 0, 2, 0, 2, -1, -1, END); 814 815 /* Test more submatch addressing. */ 816 test_comp("((a|b)*)a(a|b)*", REG_EXTENDED, 0); 817 test_exec("aaaaabaaaba", 0, REG_OK, 0, 11, 0, 10, 9, 10, -1, -1, END); 818 test_exec("aaaaabaaab", 0, REG_OK, 0, 10, 0, 8, 7, 8, 9, 10, END); 819 test_exec("caa", 0, REG_OK, 1, 3, 1, 2, 1, 2, -1, -1, END); 820 test_comp("((a|aba)*)(ababbaba)((a|b)*)", REG_EXTENDED, 0); 821 test_exec("aabaababbabaaababbab", 0, REG_OK, 822 0, 20, 0, 4, 1, 4, 4, 12, 12, 20, 19, 20, END); 823 test_exec("aaaaababbaba", 0, REG_OK, 824 0, 12, 0, 4, 3, 4, 4, 12, 12, 12, -1, -1, END); 825 test_comp("((a|aba|abb|bba|bab)*)(ababbababbabbbabbbbbbabbaba)((a|b)*)", 826 REG_EXTENDED, 0); 827 test_exec("aabaabbbbabababaababbababbabbbabbbbbbabbabababbababababbabababa", 828 0, REG_OK, 0, 63, 0, 16, 13, 16, 16, 43, 43, 63, 62, 63, END); 829 830 /* Test for empty subexpressions. */ 831 test_comp("", 0, 0); 832 test_exec("", 0, REG_OK, 0, 0, END); 833 test_exec("foo", 0, REG_OK, 0, 0, END); 834 test_comp("(a|)", REG_EXTENDED, 0); 835 test_exec("a", 0, REG_OK, 0, 1, 0, 1, END); 836 test_exec("b", 0, REG_OK, 0, 0, 0, 0, END); 837 test_exec("", 0, REG_OK, 0, 0, 0, 0, END); 838 test_comp("a|", REG_EXTENDED, 0); 839 test_exec("a", 0, REG_OK, 0, 1, END); 840 test_exec("b", 0, REG_OK, 0, 0, END); 841 test_exec("", 0, REG_OK, 0, 0, END); 842 test_comp("|a", REG_EXTENDED, 0); 843 test_exec("a", 0, REG_OK, 0, 1, END); 844 test_exec("b", 0, REG_OK, 0, 0, END); 845 test_exec("", 0, REG_OK, 0, 0, END); 846 847 /* Miscellaneous tests. */ 848 test_comp("(a*)b(c*)", REG_EXTENDED, 0); 849 test_exec("abc", 0, REG_OK, 0, 3, 0, 1, 2, 3, END); 850 test_exec("***abc***", 0, REG_OK, 3, 6, 3, 4, 5, 6, END); 851 test_comp("(a)", REG_EXTENDED, 0); 852 test_exec("a", 0, REG_OK, 0, 1, 0, 1, END); 853 test_comp("((a))", REG_EXTENDED, 0); 854 test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, END); 855 test_comp("(((a)))", REG_EXTENDED, 0); 856 test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, 0, 1, END); 857 test_comp("((((((((((((((((((((a))))))))))))))))))))", REG_EXTENDED, 0); 858 test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 859 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 860 0, 1, 0, 1, 0, 1, END); 861 862 test_comp("ksntoeaiksntoeaikstneoaiksnteoaiksntoeaiskntoeaiskntoekainstoei" 863 "askntoeakisntoeksaitnokesantiksoentaikosentaiksoentaiksnoeaiskn" 864 "teoaksintoekasitnoeksaitkosetniaksoetnaisknoetakistoeksintokesa" 865 "nitksoentaisknoetaisknoetiaksotneaikstoekasitoeskatioksentaikso" 866 "enatiksoetnaiksonateiksoteaeskanotisknetaiskntoeasknitoskenatis" 867 "konetaisknoteai", 0, 0); 868 869 test_comp("((aab)|(aac)|(aa*))c", REG_EXTENDED, 0); 870 test_exec("aabc", 0, REG_OK, 0, 4, 0, 3, 0, 3, -1, -1, -1, -1, END); 871 test_exec("aacc", 0, REG_OK, 0, 4, 0, 3, -1, -1, 0, 3, -1, -1, END); 872 test_exec("aaac", 0, REG_OK, 0, 4, 0, 3, -1, -1, -1, -1, 0, 3, END); 873 874 test_comp("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", 875 REG_EXTENDED, 0); 876 test_exec("foo!bar!bas", 0, REG_OK, 877 0, 11, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11, END); 878 test_comp("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", 879 REG_EXTENDED, 0); 880 test_exec("foo!bar!bas", 0, REG_OK, 881 0, 11, -1, -1, -1, -1, 4, 8, 8, 11, END); 882 test_comp("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", 883 REG_EXTENDED, 0); 884 test_exec("foo!bar!bas", 0, REG_OK, 885 0, 11, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11, END); 886 887 test_comp("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", 888 REG_EXTENDED, 0); 889 test_exec("Muammar Quathafi", 0, REG_OK, 0, 16, -1, -1, 11, 13, END); 890 891 test_comp("(Ab|cD)*", REG_EXTENDED | REG_ICASE, 0); 892 test_exec("aBcD", 0, REG_OK, 0, 4, 2, 4, END); 893 894 test_comp("a**", REG_EXTENDED, REG_BADRPT); 895 test_comp("a*+", REG_EXTENDED, REG_BADRPT); 896 test_comp("a+*", REG_EXTENDED, REG_BADRPT); 897 test_comp("a++", REG_EXTENDED, REG_BADRPT); 898 test_comp("a?+", REG_EXTENDED, REG_BADRPT); 899 test_comp("a?*", REG_EXTENDED, REG_BADRPT); 900 test_comp("a{1,2}*", REG_EXTENDED, REG_BADRPT); 901 test_comp("a{1,2}+", REG_EXTENDED, REG_BADRPT); 902 903 /* 904 * Many of the following tests were mostly inspired by (or copied from) the 905 * libhackerlab posix test suite by Tom Lord. 906 */ 907 908 test_comp("a", 0, 0); 909 test_exec("a", 0, REG_OK, 0, 1, END); 910 test_comp("\\.", 0, 0); 911 test_exec(".", 0, REG_OK, 0, 1, END); 912 test_comp("\\[", 0, 0); 913 test_exec("[", 0, REG_OK, 0, 1, END); 914 test_comp("\\\\", 0, 0); 915 test_exec("\\", 0, REG_OK, 0, 1, END); 916 test_comp("\\*", 0, 0); 917 test_exec("*", 0, REG_OK, 0, 1, END); 918 test_comp("\\^", 0, 0); 919 test_exec("^", 0, REG_OK, 0, 1, END); 920 test_comp("\\$", 0, 0); 921 test_exec("$", 0, REG_OK, 0, 1, END); 922 923 test_comp("\\", 0, REG_EESCAPE); 924 925 test_comp("x\\.", 0, 0); 926 test_exec("x.", 0, REG_OK, 0, 2, END); 927 test_comp("x\\[", 0, 0); 928 test_exec("x[", 0, REG_OK, 0, 2, END); 929 test_comp("x\\\\", 0, 0); 930 test_exec("x\\", 0, REG_OK, 0, 2, END); 931 test_comp("x\\*", 0, 0); 932 test_exec("x*", 0, REG_OK, 0, 2, END); 933 test_comp("x\\^", 0, 0); 934 test_exec("x^", 0, REG_OK, 0, 2, END); 935 test_comp("x\\$", 0, 0); 936 test_exec("x$", 0, REG_OK, 0, 2, END); 937 938 test_comp("x\\", 0, REG_EESCAPE); 939 940 test_comp(".", 0, 0); 941 test_exec("a", 0, REG_OK, 0, 1, END); 942 test_exec("\n", 0, REG_OK, 0, 1, END); 943 944 test_comp("(+|?)", 0, 0); 945 test_exec("(+|?)", 0, REG_OK, 0, 5, END); 946 test_exec("+|?", 0, REG_NOMATCH); 947 test_exec("(+)", 0, REG_NOMATCH); 948 test_exec("+", 0, REG_NOMATCH); 949 950 951 /* 952 * Test bracket expressions. 953 */ 954 955 test_comp("[", 0, REG_EBRACK); 956 test_comp("[]", 0, REG_EBRACK); 957 test_comp("[^]", 0, REG_EBRACK); 958 959 test_comp("[]x]", 0, 0); 960 test_exec("]", 0, REG_OK, 0, 1, END); 961 test_exec("x", 0, REG_OK, 0, 1, END); 962 963 test_comp("[.]", 0, 0); 964 test_exec(".", 0, REG_OK, 0, 1, END); 965 test_exec("a", 0, REG_NOMATCH); 966 967 test_comp("[*]", 0, 0); 968 test_exec("*", 0, REG_OK, 0, 1, END); 969 970 test_comp("[[]", 0, 0); 971 test_exec("[", 0, REG_OK, 0, 1, END); 972 973 test_comp("[\\]", 0, 0); 974 test_exec("\\", 0, REG_OK, 0, 1, END); 975 976 test_comp("[-x]", 0, 0); 977 test_exec("-", 0, REG_OK, 0, 1, END); 978 test_exec("x", 0, REG_OK, 0, 1, END); 979 test_comp("[x-]", 0, 0); 980 test_exec("-", 0, REG_OK, 0, 1, END); 981 test_exec("x", 0, REG_OK, 0, 1, END); 982 test_comp("[-]", 0, 0); 983 test_exec("-", 0, REG_OK, 0, 1, END); 984 985 test_comp("[abc]", 0, 0); 986 test_exec("a", 0, REG_OK, 0, 1, END); 987 test_exec("b", 0, REG_OK, 0, 1, END); 988 test_exec("c", 0, REG_OK, 0, 1, END); 989 test_exec("d", 0, REG_NOMATCH); 990 test_exec("xa", 0, REG_OK, 1, 2, END); 991 test_exec("xb", 0, REG_OK, 1, 2, END); 992 test_exec("xc", 0, REG_OK, 1, 2, END); 993 test_exec("xd", 0, REG_NOMATCH); 994 test_comp("x[abc]", 0, 0); 995 test_exec("xa", 0, REG_OK, 0, 2, END); 996 test_exec("xb", 0, REG_OK, 0, 2, END); 997 test_exec("xc", 0, REG_OK, 0, 2, END); 998 test_exec("xd", 0, REG_NOMATCH); 999 test_comp("[^abc]", 0, 0); 1000 test_exec("a", 0, REG_NOMATCH); 1001 test_exec("b", 0, REG_NOMATCH); 1002 test_exec("c", 0, REG_NOMATCH); 1003 test_exec("d", 0, REG_OK, 0, 1, END); 1004 test_exec("xa", 0, REG_OK, 0, 1, END); 1005 test_exec("xb", 0, REG_OK, 0, 1, END); 1006 test_exec("xc", 0, REG_OK, 0, 1, END); 1007 test_exec("xd", 0, REG_OK, 0, 1, END); 1008 test_comp("x[^abc]", 0, 0); 1009 test_exec("xa", 0, REG_NOMATCH); 1010 test_exec("xb", 0, REG_NOMATCH); 1011 test_exec("xc", 0, REG_NOMATCH); 1012 test_exec("xd", 0, REG_OK, 0, 2, END); 1013 1014 test_comp("[()+?*\\]+", REG_EXTENDED, 0); 1015 test_exec("x\\*?+()x", 0, REG_OK, 1, 7, END); 1016 1017 /* Standard character classes. */ 1018 test_comp("[[:alnum:]]+", REG_EXTENDED, 0); 1019 test_exec("%abc123890XYZ=", 0, REG_OK, 1, 13, END); 1020 test_comp("[[:cntrl:]]+", REG_EXTENDED, 0); 1021 test_exec("%\n\t\015\f ", 0, REG_OK, 1, 5, END); 1022 test_comp("[[:lower:]]+", REG_EXTENDED, 0); 1023 test_exec("AbcdE", 0, REG_OK, 1, 4, END); 1024 test_comp("[[:lower:]]+", REG_EXTENDED | REG_ICASE, 0); 1025 test_exec("AbcdE", 0, REG_OK, 0, 5, END); 1026 test_comp("[[:space:]]+", REG_EXTENDED, 0); 1027 test_exec("x \t\f\nx", 0, REG_OK, 1, 5, END); 1028 test_comp("[[:alpha:]]+", REG_EXTENDED, 0); 1029 test_exec("%abC123890xyz=", 0, REG_OK, 1, 4, END); 1030 test_comp("[[:digit:]]+", REG_EXTENDED, 0); 1031 test_exec("%abC123890xyz=", 0, REG_OK, 4, 10, END); 1032 test_comp("[^[:digit:]]+", REG_EXTENDED, 0); 1033 test_exec("%abC123890xyz=", 0, REG_OK, 0, 4, END); 1034 test_comp("[[:print:]]+", REG_EXTENDED, 0); 1035 test_exec("\n %abC12\f", 0, REG_OK, 1, 8, END); 1036 test_comp("[[:upper:]]+", REG_EXTENDED, 0); 1037 test_exec("\n aBCDEFGHIJKLMNOPQRSTUVWXYz", 0, REG_OK, 3, 27, END); 1038 test_comp("[[:upper:]]+", REG_EXTENDED | REG_ICASE, 0); 1039 test_exec("\n aBCDEFGHIJKLMNOPQRSTUVWXYz", 0, REG_OK, 2, 28, END); 1040 #ifdef HAVE_ISWBLANK 1041 #ifdef HAVE_ISBLANK 1042 test_comp("[[:blank:]]+", REG_EXTENDED, 0); 1043 test_exec("\na \t b", 0, REG_OK, 2, 5, END); 1044 #endif /* HAVE_ISBLANK */ 1045 #endif /* HAVE_ISWBLANK */ 1046 test_comp("[[:graph:]]+", REG_EXTENDED, 0); 1047 test_exec("\n %abC12\f", 0, REG_OK, 2, 8, END); 1048 test_comp("[[:punct:]]+", REG_EXTENDED, 0); 1049 test_exec("a~!@#$%^&*()_+=-`[]{};':\"|\\,./?>< ", 1050 0, REG_OK, 1, 33, END); 1051 test_comp("[[:xdigit:]]+", REG_EXTENDED, 0); 1052 test_exec("-0123456789ABCDEFabcdef", 0, REG_OK, 1, 23, END); 1053 test_comp("[[:bogus-character-class-name:]", REG_EXTENDED, REG_ECTYPE); 1054 1055 1056 /* Range expressions (assuming that the C locale is being used). */ 1057 test_comp("[a-z]+", REG_EXTENDED, 0); 1058 test_exec("ABCabcxyzABC", 0, REG_OK, 3, 9, END); 1059 test_comp("[z-a]+", REG_EXTENDED, REG_ERANGE); 1060 test_comp("[a-b-c]", 0, REG_ERANGE); 1061 test_comp("[a-a]+", REG_EXTENDED, 0); 1062 test_exec("zaaaaab", 0, REG_OK, 1, 6, END); 1063 test_comp("[--Z]+", REG_EXTENDED, 0); 1064 test_exec("!ABC-./XYZ~", 0, REG_OK, 1, 10, END); 1065 test_comp("[*--]", 0, 0); 1066 test_exec("-", 0, REG_OK, 0, 1, END); 1067 test_exec("*", 0, REG_OK, 0, 1, END); 1068 test_comp("[*--Z]+", REG_EXTENDED, 0); 1069 test_exec("!+*,---ABC", 0, REG_OK, 1, 7, END); 1070 test_comp("[a-]+", REG_EXTENDED, 0); 1071 test_exec("xa-a--a-ay", 0, REG_OK, 1, 9, END); 1072 1073 /* REG_ICASE and character sets. */ 1074 test_comp("[a-c]*", REG_ICASE | REG_EXTENDED, 0); 1075 test_exec("cABbage", 0, REG_OK, 0, 5, END); 1076 test_comp("[^a-c]*", REG_ICASE | REG_EXTENDED, 0); 1077 test_exec("tObAcCo", 0, REG_OK, 0, 2, END); 1078 test_comp("[A-C]*", REG_ICASE | REG_EXTENDED, 0); 1079 test_exec("cABbage", 0, REG_OK, 0, 5, END); 1080 test_comp("[^A-C]*", REG_ICASE | REG_EXTENDED, 0); 1081 test_exec("tObAcCo", 0, REG_OK, 0, 2, END); 1082 1083 /* Complex character sets. */ 1084 test_comp("[[:digit:]a-z#$%]+", REG_EXTENDED, 0); 1085 test_exec("__abc#lmn012$x%yz789*", 0, REG_OK, 2, 20, END); 1086 test_comp("[[:digit:]a-z#$%]+", REG_ICASE | REG_EXTENDED, 0); 1087 test_exec("__abcLMN012x%#$yz789*", 0, REG_OK, 2, 20, END); 1088 test_comp("[^[:digit:]a-z#$%]+", REG_EXTENDED, 0); 1089 test_exec("abc#lmn012$x%yz789--@*,abc", 0, REG_OK, 18, 23, END); 1090 test_comp("[^[:digit:]a-z#$%]+", REG_ICASE | REG_EXTENDED, 0); 1091 test_exec("abc#lmn012$x%yz789--@*,abc", 0, REG_OK, 18, 23, END); 1092 test_comp("[^[:digit:]#$%[:xdigit:]]+", REG_ICASE | REG_EXTENDED, 0); 1093 test_exec("abc#lmn012$x%yz789--@*,abc", 0, REG_OK, 4, 7, END); 1094 test_comp("[^-]+", REG_EXTENDED, 0); 1095 test_exec("---afd*(&,ml---", 0, REG_OK, 3, 12, END); 1096 test_comp("[^--Z]+", REG_EXTENDED, 0); 1097 test_exec("---AFD*(&,ml---", 0, REG_OK, 6, 12, END); 1098 test_comp("[^--Z]+", REG_ICASE | REG_EXTENDED, 0); 1099 test_exec("---AFD*(&,ml---", 0, REG_OK, 6, 10, END); 1100 1101 /* Unsupported things (equivalence classes and multicharacter collating 1102 elements) */ 1103 test_comp("[[.foo.]]", 0, REG_ECOLLATE); 1104 test_comp("[[=foo=]]", 0, REG_ECOLLATE); 1105 test_comp("[[..]]", 0, REG_ECOLLATE); 1106 test_comp("[[==]]", 0, REG_ECOLLATE); 1107 test_comp("[[.]]", 0, REG_ECOLLATE); 1108 test_comp("[[=]]", 0, REG_ECOLLATE); 1109 test_comp("[[.]", 0, REG_ECOLLATE); 1110 test_comp("[[=]", 0, REG_ECOLLATE); 1111 test_comp("[[.", 0, REG_ECOLLATE); 1112 test_comp("[[=", 0, REG_ECOLLATE); 1113 1114 1115 1116 /* Miscellaneous tests. */ 1117 test_comp("abc\\(\\(de\\)\\(fg\\)\\)hi", 0, 0); 1118 test_exec("xabcdefghiy", 0, REG_OK, 1, 10, 4, 8, 4, 6, 6, 8, END); 1119 1120 test_comp("abc*def", 0, 0); 1121 test_exec("xabdefy", 0, REG_OK, 1, 6, END); 1122 test_exec("xabcdefy", 0, REG_OK, 1, 7, END); 1123 test_exec("xabcccccccdefy", 0, REG_OK, 1, 13, END); 1124 1125 test_comp("abc\\(def\\)*ghi", 0, 0); 1126 test_exec("xabcghiy", 0, REG_OK, 1, 7, -1, -1, END); 1127 test_exec("xabcdefghi", 0, REG_OK, 1, 10, 4, 7, END); 1128 test_exec("xabcdefdefdefghi", 0, REG_OK, 1, 16, 10, 13, END); 1129 1130 test_comp("a?", REG_EXTENDED, REG_OK); 1131 test_exec("aaaaa", 0, REG_OK, 0, 1, END); 1132 test_exec("xaaaaa", 0, REG_OK, 0, 0, END); 1133 test_comp("a+", REG_EXTENDED, REG_OK); 1134 test_exec("aaaaa", 0, REG_OK, 0, 5, END); 1135 test_exec("xaaaaa", 0, REG_OK, 1, 6, END); 1136 1137 1138 /* 1139 * Test anchors and their behaviour with the REG_NEWLINE compilation 1140 * flag and the REG_NOTBOL, REG_NOTEOL execution flags. 1141 */ 1142 1143 /* Normally, `^' matches the empty string at beginning of input. 1144 If REG_NOTBOL is used, `^' won't match the zero length string. */ 1145 test_comp("^abc", 0, 0); 1146 test_exec("abcdef", 0, REG_OK, 0, 3, END); 1147 test_exec("abcdef", REG_NOTBOL, REG_NOMATCH); 1148 test_exec("xyzabcdef", 0, REG_NOMATCH); 1149 test_exec("xyzabcdef", REG_NOTBOL, REG_NOMATCH); 1150 test_exec("\nabcdef", 0, REG_NOMATCH); 1151 test_exec("\nabcdef", REG_NOTBOL, REG_NOMATCH); 1152 1153 /* Normally, `$' matches the empty string at end of input. 1154 If REG_NOTEOL is used, `$' won't match the zero length string. */ 1155 test_comp("abc$", 0, 0); 1156 test_exec("defabc", 0, REG_OK, 3, 6, END); 1157 test_exec("defabc", REG_NOTEOL, REG_NOMATCH); 1158 test_exec("defabcxyz", 0, REG_NOMATCH); 1159 test_exec("defabcxyz", REG_NOTEOL, REG_NOMATCH); 1160 test_exec("defabc\n", 0, REG_NOMATCH); 1161 test_exec("defabc\n", REG_NOTEOL, REG_NOMATCH); 1162 1163 test_comp("^abc$", 0, 0); 1164 test_exec("abc", 0, REG_OK, 0, 3, END); 1165 test_exec("abc", REG_NOTBOL, REG_NOMATCH); 1166 test_exec("abc", REG_NOTEOL, REG_NOMATCH); 1167 test_exec("abc", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); 1168 test_exec("\nabc\n", 0, REG_NOMATCH); 1169 test_exec("defabc\n", 0, REG_NOMATCH); 1170 test_exec("\nabcdef", 0, REG_NOMATCH); 1171 test_exec("abcdef", 0, REG_NOMATCH); 1172 test_exec("defabc", 0, REG_NOMATCH); 1173 test_exec("abc\ndef", 0, REG_NOMATCH); 1174 test_exec("def\nabc", 0, REG_NOMATCH); 1175 1176 /* If REG_NEWLINE is used, `^' matches the empty string immediately after 1177 a newline, regardless of whether execution flags contain REG_NOTBOL. 1178 Similarly, if REG_NEWLINE is used, `$' matches the empty string 1179 immediately before a newline, regardless of execution flags. */ 1180 test_comp("^abc", REG_NEWLINE, 0); 1181 test_exec("abcdef", 0, REG_OK, 0, 3, END); 1182 test_exec("abcdef", REG_NOTBOL, REG_NOMATCH); 1183 test_exec("xyzabcdef", 0, REG_NOMATCH); 1184 test_exec("xyzabcdef", REG_NOTBOL, REG_NOMATCH); 1185 test_exec("\nabcdef", 0, REG_OK, 1, 4, END); 1186 test_exec("\nabcdef", REG_NOTBOL, 0, 1, 4, END); 1187 test_comp("abc$", REG_NEWLINE, 0); 1188 test_exec("defabc", 0, REG_OK, 3, 6, END); 1189 test_exec("defabc", REG_NOTEOL, REG_NOMATCH); 1190 test_exec("defabcxyz", 0, REG_NOMATCH); 1191 test_exec("defabcxyz", REG_NOTEOL, REG_NOMATCH); 1192 test_exec("defabc\n", 0, REG_OK, 3, 6, END); 1193 test_exec("defabc\n", REG_NOTEOL, 0, 3, 6, END); 1194 test_comp("^abc$", REG_NEWLINE, 0); 1195 test_exec("abc", 0, REG_OK, 0, 3, END); 1196 test_exec("abc", REG_NOTBOL, REG_NOMATCH); 1197 test_exec("abc", REG_NOTEOL, REG_NOMATCH); 1198 test_exec("abc", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); 1199 test_exec("\nabc\n", 0, REG_OK, 1, 4, END); 1200 test_exec("defabc\n", 0, REG_NOMATCH); 1201 test_exec("\nabcdef", 0, REG_NOMATCH); 1202 test_exec("abcdef", 0, REG_NOMATCH); 1203 test_exec("abcdef", REG_NOTBOL, REG_NOMATCH); 1204 test_exec("defabc", 0, REG_NOMATCH); 1205 test_exec("defabc", REG_NOTEOL, REG_NOMATCH); 1206 test_exec("abc\ndef", 0, REG_OK, 0, 3, END); 1207 test_exec("abc\ndef", REG_NOTBOL, REG_NOMATCH); 1208 test_exec("abc\ndef", REG_NOTEOL, 0, 0, 3, END); 1209 test_exec("abc\ndef", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); 1210 test_exec("def\nabc", 0, REG_OK, 4, 7, END); 1211 test_exec("def\nabc", REG_NOTBOL, 0, 4, 7, END); 1212 test_exec("def\nabc", REG_NOTEOL, REG_NOMATCH); 1213 test_exec("def\nabc", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH); 1214 1215 /* With BRE syntax, `^' has a special meaning only at the beginning of the 1216 RE or the beginning of a parenthesized subexpression. */ 1217 test_comp("a\\{0,1\\}^bc", 0, 0); 1218 test_exec("bc", 0, REG_NOMATCH); 1219 test_exec("^bc", 0, REG_OK, 0, 3, END); 1220 test_exec("abc", 0, REG_NOMATCH); 1221 test_exec("a^bc", 0, REG_OK, 0, 4, END); 1222 test_comp("a\\{0,1\\}\\(^bc\\)", 0, 0); 1223 test_exec("bc", 0, REG_OK, 0, 2, 0, 2, END); 1224 test_exec("^bc", 0, REG_NOMATCH); 1225 test_exec("abc", 0, REG_NOMATCH); 1226 test_exec("a^bc", 0, REG_NOMATCH); 1227 test_comp("(^a", 0, 0); 1228 test_exec("(^a", 0, REG_OK, 0, 3, END); 1229 1230 /* With BRE syntax, `$' has a special meaning only at the end of the 1231 RE or the end of a parenthesized subexpression. */ 1232 test_comp("ab$c\\{0,1\\}", 0, 0); 1233 test_exec("ab", 0, REG_NOMATCH); 1234 test_exec("ab$", 0, REG_OK, 0, 3, END); 1235 test_exec("abc", 0, REG_NOMATCH); 1236 test_exec("ab$c", 0, REG_OK, 0, 4, END); 1237 test_comp("\\(ab$\\)c\\{0,1\\}", 0, 0); 1238 test_exec("ab", 0, REG_OK, 0, 2, 0, 2, END); 1239 test_exec("ab$", 0, REG_NOMATCH); 1240 test_exec("abc", 0, REG_NOMATCH); 1241 test_exec("ab$c", 0, REG_NOMATCH); 1242 test_comp("a$)", 0, 0); 1243 test_exec("a$)", 0, REG_OK, 0, 3, END); 1244 1245 /* Miscellaneous tests for `^' and `$'. */ 1246 test_comp("foo^$", REG_EXTENDED, 0); 1247 test_exec("foo", 0, REG_NOMATCH); 1248 test_comp("x$\n^y", REG_EXTENDED | REG_NEWLINE, 0); 1249 test_exec("foo\nybarx\nyes\n", 0, REG_OK, 8, 11, END); 1250 test_comp("^$", 0, 0); 1251 test_exec("x", 0, REG_NOMATCH); 1252 test_exec("", 0, REG_OK, 0, 0, END); 1253 test_exec("\n", 0, REG_NOMATCH); 1254 test_comp("^$", REG_NEWLINE, 0); 1255 test_exec("x", 0, REG_NOMATCH); 1256 test_exec("", 0, REG_OK, 0, 0, END); 1257 test_exec("\n", 0, REG_OK, 0, 0, END); 1258 1259 /* REG_NEWLINE causes `.' not to match newlines. */ 1260 test_comp(".*", 0, 0); 1261 test_exec("ab\ncd", 0, REG_OK, 0, 5, END); 1262 test_comp(".*", REG_NEWLINE, 0); 1263 test_exec("ab\ncd", 0, REG_OK, 0, 2, END); 1264 1265 /* 1266 * Tests for nonstandard syntax extensions. 1267 */ 1268 1269 /* Zero width assertions. */ 1270 test_comp("\\<x", REG_EXTENDED, 0); 1271 test_exec("aax xaa", 0, REG_OK, 4, 5, END); 1272 test_exec("xaa", 0, REG_OK, 0, 1, END); 1273 test_comp("x\\>", REG_EXTENDED, 0); 1274 test_exec("axx xaa", 0, REG_OK, 2, 3, END); 1275 test_exec("aax", 0, REG_OK, 2, 3, END); 1276 test_comp("\\bx", REG_EXTENDED, 0); 1277 test_exec("axx xaa", 0, REG_OK, 4, 5, END); 1278 test_exec("aax", 0, REG_NOMATCH); 1279 test_exec("xax", 0, REG_OK, 0, 1, END); 1280 test_comp("x\\b", REG_EXTENDED, 0); 1281 test_exec("axx xaa", 0, REG_OK, 2, 3, END); 1282 test_exec("aax", 0, REG_OK, 2, 3, END); 1283 test_exec("xaa", 0, REG_NOMATCH); 1284 test_comp("\\Bx", REG_EXTENDED, 0); 1285 test_exec("aax xxa", 0, REG_OK, 2, 3, END); 1286 test_comp("\\Bx\\b", REG_EXTENDED, 0); 1287 test_exec("aax xxx", 0, REG_OK, 2, 3, END); 1288 test_comp("\\<.", REG_EXTENDED, 0); 1289 test_exec(";xaa", 0, REG_OK, 1, 2, END); 1290 1291 /* Shorthands for character classes. */ 1292 test_comp("\\w+", REG_EXTENDED, 0); 1293 test_exec(",.(a23_Nt-�o)", 0, REG_OK, 3, 9, END); 1294 test_comp("\\d+", REG_EXTENDED, 0); 1295 test_exec("uR120_4=v4", 0, REG_OK, 2, 5, END); 1296 test_comp("\\D+", REG_EXTENDED, 0); 1297 test_exec("120d_=vA4s", 0, REG_OK, 3, 8, END); 1298 1299 /* Quoted special characters. */ 1300 test_comp("\\t", REG_EXTENDED, 0); 1301 test_comp("\\e", REG_EXTENDED, 0); 1302 1303 /* Test the \x1B and \x{263a} extensions for specifying 8 bit and wide 1304 characters in hexadecimal. */ 1305 test_comp("\\x41", REG_EXTENDED, 0); 1306 test_exec("ABC", 0, REG_OK, 0, 1, END); 1307 test_comp("\\x5", REG_EXTENDED, 0); 1308 test_exec("\005", 0, REG_OK, 0, 1, END); 1309 test_comp("\\x5r", REG_EXTENDED, 0); 1310 test_exec("\005r", 0, REG_OK, 0, 2, END); 1311 test_comp("\\x", REG_EXTENDED, 0); 1312 test_nexec("\000", 1, 0, REG_OK, 0, 1, END); 1313 test_comp("\\xr", REG_EXTENDED, 0); 1314 test_nexec("\000r", 2, 0, REG_OK, 0, 2, END); 1315 test_comp("\\x{41}", REG_EXTENDED, 0); 1316 test_exec("ABC", 0, REG_OK, 0, 1, END); 1317 test_comp("\\x{5}", REG_EXTENDED, 0); 1318 test_exec("\005", 0, REG_OK, 0, 1, END); 1319 test_comp("\\x{5}r", REG_EXTENDED, 0); 1320 test_exec("\005r", 0, REG_OK, 0, 2, END); 1321 test_comp("\\x{}", REG_EXTENDED, 0); 1322 test_nexec("\000", 1, 0, REG_OK, 0, 1, END); 1323 test_comp("\\x{}r", REG_EXTENDED, 0); 1324 test_nexec("\000r", 2, 0, REG_OK, 0, 2, END); 1325 1326 /* Tests for (?inrU-inrU) and (?inrU-inrU:) */ 1327 test_comp("foo(?i)bar", REG_EXTENDED, 0); 1328 test_exec("fooBaR", 0, REG_OK, 0, 6, END); 1329 test_comp("foo(?i)bar|zap", REG_EXTENDED, 0); 1330 test_exec("fooBaR", 0, REG_OK, 0, 6, END); 1331 test_exec("foozap", 0, REG_OK, 0, 6, END); 1332 test_exec("foozAp", 0, REG_OK, 0, 6, END); 1333 test_exec("zap", 0, REG_NOMATCH); 1334 test_comp("foo(?-i:zap)zot", REG_EXTENDED | REG_ICASE, 0); 1335 test_exec("FoOzapZOt", 0, REG_OK, 0, 9, END); 1336 test_exec("FoOzApZOt", 0, REG_NOMATCH); 1337 test_comp("foo(?i:bar|zap)", REG_EXTENDED, 0); 1338 test_exec("foozap", 0, REG_OK, 0, 6, END); 1339 test_exec("foobar", 0, REG_OK, 0, 6, END); 1340 test_exec("foobAr", 0, REG_OK, 0, 6, END); 1341 test_exec("fooZaP", 0, REG_OK, 0, 6, END); 1342 test_comp("foo(?U:o*)(o*)", REG_EXTENDED, 0); 1343 test_exec("foooo", 0, REG_OK, 0, 5, 3, 5, END); 1344 1345 /* Test comment syntax. */ 1346 test_comp("foo(?# This here is a comment. )bar", REG_EXTENDED, 0); 1347 test_exec("foobar", 0, REG_OK, 0, 6, END); 1348 1349 /* Tests for \Q and \E. */ 1350 test_comp("\\((\\Q)?:\\<[^$\\E)", REG_EXTENDED, 0); 1351 test_exec("()?:\\<[^$", 0, REG_OK, 0, 9, 1, 9, END); 1352 test_comp("\\Qabc\\E.*", REG_EXTENDED, 0); 1353 test_exec("abcdef", 0, REG_OK, 0, 6, END); 1354 test_comp("\\Qabc\\E.*|foo", REG_EXTENDED, 0); 1355 test_exec("parabc123wxyz", 0, REG_OK, 3, 13, END); 1356 test_exec("fooabc123wxyz", 0, REG_OK, 0, 3, END); 1357 1358 /* 1359 * Test bounded repetitions. 1360 */ 1361 1362 test_comp("a{0,0}", REG_EXTENDED, REG_OK); 1363 test_exec("aaa", 0, REG_OK, 0, 0, END); 1364 test_comp("a{0,1}", REG_EXTENDED, REG_OK); 1365 test_exec("aaa", 0, REG_OK, 0, 1, END); 1366 test_comp("a{1,1}", REG_EXTENDED, REG_OK); 1367 test_exec("aaa", 0, REG_OK, 0, 1, END); 1368 test_comp("a{1,3}", REG_EXTENDED, REG_OK); 1369 test_exec("xaaaaa", 0, REG_OK, 1, 4, END); 1370 test_comp("a{0,3}", REG_EXTENDED, REG_OK); 1371 test_exec("aaaaa", 0, REG_OK, 0, 3, END); 1372 test_comp("a{0,}", REG_EXTENDED, REG_OK); 1373 test_exec("", 0, REG_OK, 0, 0, END); 1374 test_exec("a", 0, REG_OK, 0, 1, END); 1375 test_exec("aa", 0, REG_OK, 0, 2, END); 1376 test_exec("aaa", 0, REG_OK, 0, 3, END); 1377 test_comp("a{1,}", REG_EXTENDED, REG_OK); 1378 test_exec("", 0, REG_NOMATCH); 1379 test_exec("a", 0, REG_OK, 0, 1, END); 1380 test_exec("aa", 0, REG_OK, 0, 2, END); 1381 test_exec("aaa", 0, REG_OK, 0, 3, END); 1382 test_comp("a{2,}", REG_EXTENDED, REG_OK); 1383 test_exec("", 0, REG_NOMATCH); 1384 test_exec("a", 0, REG_NOMATCH); 1385 test_exec("aa", 0, REG_OK, 0, 2, END); 1386 test_exec("aaa", 0, REG_OK, 0, 3, END); 1387 test_comp("a{3,}", REG_EXTENDED, REG_OK); 1388 test_exec("", 0, REG_NOMATCH); 1389 test_exec("a", 0, REG_NOMATCH); 1390 test_exec("aa", 0, REG_NOMATCH); 1391 test_exec("aaa", 0, REG_OK, 0, 3, END); 1392 test_exec("aaaa", 0, REG_OK, 0, 4, END); 1393 test_exec("aaaaa", 0, REG_OK, 0, 5, END); 1394 test_exec("aaaaaa", 0, REG_OK, 0, 6, END); 1395 test_exec("aaaaaaa", 0, REG_OK, 0, 7, END); 1396 1397 test_comp("a{5,10}", REG_EXTENDED, REG_OK); 1398 test_comp("a{6,6}", REG_EXTENDED, REG_OK); 1399 test_exec("aaaaaaaaaaaa", 0, REG_OK, 0, 6, END); 1400 test_exec("xxaaaaaaaaaaaa", 0, REG_OK, 2, 8, END); 1401 test_exec("xxaaaaa", 0, REG_NOMATCH); 1402 test_comp("a{5,6}", REG_EXTENDED, REG_OK); 1403 test_exec("aaaaaaaaaaaa", 0, REG_OK, 0, 6, END); 1404 test_exec("xxaaaaaaaaaaaa", 0, REG_OK, 2, 8, END); 1405 test_exec("xxaaaaa", 0, REG_OK, 2, 7, END); 1406 test_exec("xxaaaa", 0, REG_NOMATCH); 1407 1408 /* Trickier ones... */ 1409 test_comp("([ab]{5,10})*b", REG_EXTENDED, REG_OK); 1410 test_exec("bbbbbabaaaaab", 0, REG_OK, 0, 13, 5, 12, END); 1411 test_exec("bbbbbbaaaaab", 0, REG_OK, 0, 12, 5, 11, END); 1412 test_exec("bbbbbbaaaab", 0, REG_OK, 0, 11, 0, 10, END); 1413 test_exec("bbbbbbaaab", 0, REG_OK, 0, 10, 0, 9, END); 1414 test_exec("bbbbbbaab", 0, REG_OK, 0, 9, 0, 8, END); 1415 test_exec("bbbbbbab", 0, REG_OK, 0, 8, 0, 7, END); 1416 1417 test_comp("([ab]*)(ab[ab]{5,10})ba", REG_EXTENDED, REG_OK); 1418 test_exec("abbabbbabaabbbbbbbbbbbbbabaaaabab", 0, REG_OK, 1419 0, 10, 0, 0, 0, 8, END); 1420 test_exec("abbabbbabaabbbbbbbbbbbbabaaaaabab", 0, REG_OK, 1421 0, 32, 0, 23, 23, 30, END); 1422 test_exec("abbabbbabaabbbbbbbbbbbbabaaaabab", 0, REG_OK, 1423 0, 24, 0, 10, 10, 22, END); 1424 test_exec("abbabbbabaabbbbbbbbbbbba", 0, REG_OK, 1425 0, 24, 0, 10, 10, 22, END); 1426 1427 /* Test repeating something that has submatches inside. */ 1428 test_comp("(a){0,5}", REG_EXTENDED, 0); 1429 test_exec("", 0, REG_OK, 0, 0, -1, -1, END); 1430 test_exec("a", 0, REG_OK, 0, 1, 0, 1, END); 1431 test_exec("aa", 0, REG_OK, 0, 2, 1, 2, END); 1432 test_exec("aaa", 0, REG_OK, 0, 3, 2, 3, END); 1433 test_exec("aaaa", 0, REG_OK, 0, 4, 3, 4, END); 1434 test_exec("aaaaa", 0, REG_OK, 0, 5, 4, 5, END); 1435 test_exec("aaaaaa", 0, REG_OK, 0, 5, 4, 5, END); 1436 1437 test_comp("(a){2,3}", REG_EXTENDED, 0); 1438 test_exec("", 0, REG_NOMATCH); 1439 test_exec("a", 0, REG_NOMATCH); 1440 test_exec("aa", 0, REG_OK, 0, 2, 1, 2, END); 1441 test_exec("aaa", 0, REG_OK, 0, 3, 2, 3, END); 1442 test_exec("aaaa", 0, REG_OK, 0, 3, 2, 3, END); 1443 1444 test_comp("\\(a\\)\\{4\\}", 0, 0); 1445 test_exec("aaaa", 0, REG_OK, 0, 4, 3, 4, END); 1446 1447 test_comp("\\(a*\\)\\{2\\}", 0, 0); 1448 test_exec("a", 0, REG_OK, 0, 1, 1, 1, END); 1449 1450 test_comp("((..)|(.)){2}", REG_EXTENDED, 0); 1451 test_exec("aa", 0, REG_OK, 0, 2, 1, 2, -1, -1, 1, 2, END); 1452 1453 /* Nested repeats. */ 1454 test_comp("(.){2}{3}", REG_EXTENDED, 0); 1455 test_exec("xxxxx", 0, REG_NOMATCH); 1456 test_exec("xxxxxx", 0, REG_OK, 0, 6, 5, 6, END); 1457 test_comp("(..){2}{3}", REG_EXTENDED, 0); 1458 test_exec("xxxxxxxxxxx", 0, REG_NOMATCH); 1459 test_exec("xxxxxxxxxxxx", 0, REG_OK, 0, 12, 10, 12, END); 1460 test_comp("((..){2}.){3}", REG_EXTENDED, 0); 1461 test_exec("xxxxxxxxxxxxxx", 0, REG_NOMATCH); 1462 test_exec("xxxxxxxxxxxxxxx", 0, REG_OK, 0, 15, 10, 15, 12, 14, END); 1463 test_comp("((..){1,2}.){3}", REG_EXTENDED, 0); 1464 test_exec("xxxxxxxx", 0, REG_NOMATCH); 1465 test_exec("xxxxxxxxx", 0, REG_OK, 0, 9, 6, 9, 6, 8, END); 1466 test_exec("xxxxxxxxxx", 0, REG_OK, 0, 9, 6, 9, 6, 8, END); 1467 test_exec("xxxxxxxxxxx", 0, REG_OK, 0, 11, 8, 11, 8, 10, END); 1468 test_comp("a{2}{2}x", REG_EXTENDED, 0); 1469 test_exec("", 0, REG_NOMATCH); 1470 test_exec("x", 0, REG_NOMATCH); 1471 test_exec("ax", 0, REG_NOMATCH); 1472 test_exec("aax", 0, REG_NOMATCH); 1473 test_exec("aaax", 0, REG_NOMATCH); 1474 test_exec("aaaax", 0, REG_OK, 0, 5, END); 1475 test_exec("aaaaax", 0, REG_OK, 1, 6, END); 1476 test_exec("aaaaaax", 0, REG_OK, 2, 7, END); 1477 test_exec("aaaaaaax", 0, REG_OK, 3, 8, END); 1478 test_exec("aaaaaaaax", 0, REG_OK, 4, 9, END); 1479 1480 /* Repeats with iterations inside. */ 1481 test_comp("([a-z]+){2,5}", REG_EXTENDED, 0); 1482 test_exec("a\n", 0, REG_NOMATCH); 1483 test_exec("aa\n", 0, REG_OK, 0, 2, 1, 2, END); 1484 1485 /* Multiple repeats in one regexp. */ 1486 test_comp("a{3}b{3}", REG_EXTENDED, 0); 1487 test_exec("aaabbb", 0, REG_OK, 0, 6, END); 1488 test_exec("aaabbbb", 0, REG_OK, 0, 6, END); 1489 test_exec("aaaabbb", 0, REG_OK, 1, 7, END); 1490 test_exec("aabbb", 0, REG_NOMATCH); 1491 test_exec("aaabb", 0, REG_NOMATCH); 1492 1493 /* Test that different types of repetitions work correctly when used 1494 in the same regexp. */ 1495 test_comp("a{2}{2}xb+xc*xd?x", REG_EXTENDED, 0); 1496 test_exec("aaaaxbxcxdx", 0, REG_OK, 0, 11, END); 1497 test_exec("aaaxbxcxdx", 0, REG_NOMATCH); 1498 test_exec("aabxcxdx", 0, REG_NOMATCH); 1499 test_exec("aaaacxdx", 0, REG_NOMATCH); 1500 test_exec("aaaaxbdx", 0, REG_NOMATCH); 1501 test_comp("^!packet [0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3} [0-9]+", 1502 REG_EXTENDED, 0); 1503 test_exec("!packet 10.0.2.4 12765 ei voittoa", 0, REG_OK, 0, 22, END); 1504 1505 /* 1506 * Back referencing tests. 1507 */ 1508 test_comp("([a-z]*) \\1", REG_EXTENDED, 0); 1509 test_exec("foobar foobar", 0, REG_OK, 0, 13, 0, 6, END); 1510 1511 /* Searching for a leftmost longest square (repeated string) */ 1512 test_comp("(.*)\\1", REG_EXTENDED, 0); 1513 test_exec("foobarfoobar", 0, REG_OK, 0, 12, 0, 6, END); 1514 1515 test_comp("a(b)*c\\1", REG_EXTENDED, 0); 1516 test_exec("acb", 0, REG_OK, 0, 2, -1, -1, END); 1517 test_exec("abbcbbb", 0, REG_OK, 0, 5, 2, 3, END); 1518 test_exec("abbdbd", 0, REG_NOMATCH); 1519 1520 test_comp("([a-c]*)\\1", REG_EXTENDED, 0); 1521 test_exec("abcacdef", 0, REG_OK, 0, 0, 0, 0, END); 1522 test_exec("abcabcabcd", 0, REG_OK, 0, 6, 0, 3, END); 1523 1524 test_comp("\\(a*\\)*\\(x\\)\\(\\1\\)", 0, 0); 1525 test_exec("x", 0, REG_OK, 0, 1, 0, 0, 0, 1, 1, 1, END); 1526 #if KNOWN_BUG 1527 test_exec("ax", 0, REG_OK, 0, 2, 1, 1, 1, 2, 2, 2, END); 1528 #endif 1529 1530 test_comp("(a)\\1{1,2}", REG_EXTENDED, 0); 1531 test_exec("aabc", 0, REG_OK, 0, 2, 0, 1, END); 1532 1533 test_comp("((.*)\\1)+", REG_EXTENDED, 0); 1534 test_exec("aa", 0, REG_OK, 0, 2, 0, 2, 0, 1, END); 1535 1536 #if KNOWN_BUG 1537 test_comp("()(\\1\\1)*", REG_EXTENDED, 0); 1538 test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END); 1539 #endif 1540 1541 /* Check that back references work with REG_NOSUB. */ 1542 test_comp("(o)\\1", REG_EXTENDED | REG_NOSUB, 0); 1543 test_exec("foobar", 0, REG_OK, END); 1544 test_comp("(o)\\1", REG_EXTENDED, 0); 1545 test_exec("foobar", 0, REG_OK, 1, 3, 1, 2, END); 1546 test_comp("(o)\\1", REG_EXTENDED, 0); 1547 test_exec("fobar", 0, REG_NOMATCH); 1548 1549 test_comp("\\1foo", REG_EXTENDED, REG_ESUBREG); 1550 test_comp("\\1foo(bar)", REG_EXTENDED, 0); 1551 1552 /* Back reference with zero-width assertion. */ 1553 test_comp("(.)\\1$", REG_EXTENDED, 0); 1554 test_exec("foox", 0, REG_NOMATCH); 1555 test_exec("foo", 0, REG_OK, 1, 3, 1, 2, END); 1556 1557 /* Back references together with {}. */ 1558 test_comp("([0-9]{5})\\1", REG_EXTENDED, 0); 1559 test_exec("12345", 0, REG_NOMATCH); 1560 test_exec("1234512345", 0, REG_OK, 0, 10, 0, 5, END); 1561 test_comp("([0-9]{4})\\1", REG_EXTENDED, 0); 1562 test_exec("1234", 0, REG_NOMATCH); 1563 test_exec("12341234", 0, REG_OK, 0, 8, 0, 4, END); 1564 1565 /* 1566 * Test minimal repetitions (non-greedy repetitions) 1567 */ 1568 avoid_eflags = REG_BACKTRACKING_MATCHER | REG_APPROX_MATCHER; 1569 1570 /* Basic .*/ 1571 test_comp(".*?", REG_EXTENDED, 0); 1572 test_exec("abcd", 0, REG_OK, 0, 0, END); 1573 test_comp(".+?", REG_EXTENDED, 0); 1574 test_exec("abcd", 0, REG_OK, 0, 1, END); 1575 test_comp(".??", REG_EXTENDED, 0); 1576 test_exec("abcd", 0, REG_OK, 0, 0, END); 1577 test_comp(".{2,5}?", REG_EXTENDED, 0); 1578 test_exec("abcd", 0, REG_OK, 0, 2, END); 1579 1580 /* More complicated. */ 1581 test_comp("<b>(.*?)</b>", REG_EXTENDED, 0); 1582 test_exec("<b>text1</b><b>text2</b>", 0, REG_OK, 0, 12, 3, 8, END); 1583 test_comp("a(.*?)(foo|bar|zap)", REG_EXTENDED, 0); 1584 test_exec("hubba wooga-booga zabar gafoo wazap", 0, REG_OK, 1585 4, 23, 5, 20, 20, 23, END); 1586 1587 /* Test REG_UNGREEDY. */ 1588 test_comp(".*", REG_EXTENDED | REG_UNGREEDY, 0); 1589 test_exec("abcd", 0, REG_OK, 0, 0, END); 1590 test_comp(".*?", REG_EXTENDED | REG_UNGREEDY, 0); 1591 test_exec("abcd", 0, REG_OK, 0, 4, END); 1592 1593 avoid_eflags = 0; 1594 1595 1596 /* 1597 * Error reporting tests. 1598 */ 1599 1600 test_comp("\\", REG_EXTENDED, REG_EESCAPE); 1601 test_comp("\\\\", REG_EXTENDED, REG_OK); 1602 test_exec("\\", 0, REG_OK, 0, 1, END); 1603 test_comp("(", REG_EXTENDED, REG_EPAREN); 1604 test_comp("(aaa", REG_EXTENDED, REG_EPAREN); 1605 test_comp(")", REG_EXTENDED, REG_OK); 1606 test_exec(")", 0, REG_OK, 0, 1, END); 1607 test_comp("a{1", REG_EXTENDED, REG_EBRACE); 1608 test_comp("a{1,x}", REG_EXTENDED, REG_BADBR); 1609 test_comp("a{1x}", REG_EXTENDED, REG_BADBR); 1610 test_comp("a{1,0}", REG_EXTENDED, REG_BADBR); 1611 test_comp("a{x}", REG_EXTENDED, REG_BADBR); 1612 test_comp("a{}", REG_EXTENDED, REG_BADBR); 1613 1614 1615 test_comp("\\", 0, REG_EESCAPE); 1616 test_comp("\\(", 0, REG_EPAREN); 1617 test_comp("\\)", 0, REG_EPAREN); 1618 test_comp("a\\{1", 0, REG_EBRACE); 1619 test_comp("a\\{1,x\\}", 0, REG_BADBR); 1620 test_comp("a\\{1x\\}", 0, REG_BADBR); 1621 test_comp("a\\{1,0\\}", 0, REG_BADBR); 1622 test_comp("a\\{x\\}", 0, REG_BADBR); 1623 test_comp("a\\{\\}", 0, REG_BADBR); 1624 1625 1626 1627 1628 /* 1629 * Internationalization tests. 1630 */ 1631 1632 /* This same test with the correct locale is below. */ 1633 test_comp("��+", REG_EXTENDED, 0); 1634 test_exec("���ξޤϡ�����������������", 0, REG_OK, 10, 13, END); 1635 1636 #if !defined(WIN32) && !defined(__OpenBSD__) 1637 if (setlocale(LC_CTYPE, "en_US.ISO-8859-1") != NULL) 1638 { 1639 printf("\nTesting LC_CTYPE en_US.ISO-8859-1\n"); 1640 test_comp("aBCdeFghiJKlmnoPQRstuvWXyZ���", REG_ICASE, 0); 1641 test_exec("abCDefGhiJKlmNoPqRStuVwXyz���", 0, REG_OK, 0, 29, END); 1642 } 1643 1644 #ifdef TRE_MULTIBYTE 1645 if (setlocale(LC_CTYPE, "ja_JP.eucjp") != NULL) 1646 { 1647 printf("\nTesting LC_CTYPE ja_JP.eucjp\n"); 1648 /* I tried to make a test where implementations not aware of multibyte 1649 character sets will fail. I have no idea what the japanese text here 1650 means, I took it from http://www.ipsec.co.jp/. */ 1651 test_comp("��+", REG_EXTENDED, 0); 1652 test_exec("���ξޤϡ�����������������", 0, REG_OK, 10, 12, END); 1653 1654 test_comp("a", REG_EXTENDED, 0); 1655 test_nexec("foo\000bar", 7, 0, REG_OK, 5, 6, END); 1656 test_comp("c$", REG_EXTENDED, 0); 1657 test_exec("abc", 0, REG_OK, 2, 3, END); 1658 } 1659 #endif /* TRE_MULTIBYTE */ 1660 #endif 1661 1662 tre_regfree(&reobj); 1663 1664 printf("\n"); 1665 if (comp_errors || exec_errors) 1666 printf("%d (%d + %d) out of %d tests FAILED!\n", 1667 comp_errors + exec_errors, comp_errors, exec_errors, 1668 comp_tests + exec_tests); 1669 else 1670 printf("All %d tests passed.\n", comp_tests + exec_tests); 1671 1672 1673 #ifdef MALLOC_DEBUGGING 1674 if (xmalloc_dump_leaks()) 1675 return 1; 1676 #endif /* MALLOC_DEBUGGING */ 1677 1678 return comp_errors || exec_errors; 1679 } 1680 1681 /* EOF */ 1682