1 /*
2 retest.c - TRE regression test program
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8
9 /*
10 This is just a simple test application containing various hands-written
11 tests for regression testing TRE. I've tried to surround TRE specific
12 tests inside ifdefs, so this can be used to test any POSIX compatible
13 regexp implementation.
14 */
15
16 #ifdef HAVE_CONFIG_H
17 #include <config.h>
18 #endif /* HAVE_CONFIG_H */
19
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <stdarg.h>
23 #include <string.h>
24 #include <sys/types.h>
25 #include <locale.h>
26 #ifdef HAVE_MALLOC_H
27 #include <malloc.h>
28 #endif /* HAVE_MALLOC_H */
29 #include <regex.h>
30
31 #ifdef TRE_VERSION
32 #define HAVE_REGNEXEC 1
33 #define HAVE_REGNCOMP 1
34 #include "xmalloc.h"
35 #else /* !TRE_VERSION */
36 #define xmalloc malloc
37 #define xfree free
38 #endif /* !TRE_VERSION */
39 #include "tre-internal.h"
40
41 #ifdef WRETEST
42 #include <wchar.h>
43 #define CHAR_T wchar_t
44 #define L(x) (L ## x)
45
46 #define MAXSTRSIZE 1024
47 static wchar_t wstr[MAXSTRSIZE];
48 static wchar_t wregex[MAXSTRSIZE];
49 static int woffs[MAXSTRSIZE];
50
51 #define tre_regexec tre_regwexec
52 #define tre_regnexec tre_regwnexec
53 #define tre_regcomp tre_regwcomp
54 #define tre_regncomp tre_regwncomp
55
56 /* Iterate mbrtowc over the multi-byte sequence STR of length LEN,
57 store the result in BUF and memoize the successive byte offsets
58 in OFF. */
59
60 static int
mbntowc(wchar_t * buf,const char * str,size_t len,int * off)61 mbntowc (wchar_t *buf, const char *str, size_t len, int *off)
62 {
63 int n, wlen;
64 #ifdef HAVE_MBSTATE_T
65 mbstate_t cst;
66 memset(&cst, 0, sizeof(cst));
67 #endif
68
69 if (len >= MAXSTRSIZE)
70 {
71 fprintf(stderr, "Increase MAXSTRSIZE to %ld or more and recompile!\n",
72 (long)len + 1);
73 exit(EXIT_FAILURE);
74 }
75
76 if (off)
77 {
78 memset(off + 1, -1, len * sizeof(int));
79 *off = 0;
80 }
81
82 wlen = 0;
83 while (len > 0)
84 {
85 n = tre_mbrtowc(buf ? buf++ : NULL, str, len, &cst);
86 if (n < 0)
87 return n;
88 if (n == 0)
89 n = 1;
90 str += n;
91 len -= n;
92 wlen += 1;
93 if (off)
94 *(off += n) = wlen;
95 }
96
97 return(wlen);
98 }
99
100 #else /* !WRETEST */
101 #define CHAR_T char
102 #define L(x) (x)
103 #endif /* !WRETEST */
104
105 static int valid_reobj = 0;
106 static regex_t reobj;
107 static regmatch_t pmatch_global[32];
108 static const CHAR_T *regex_pattern;
109 static int cflags_global;
110 static int use_regnexec = 0;
111 static int use_regncomp = 0;
112 static int avoid_eflags = 0;
113
114 static int comp_tests = 0;
115 static int exec_tests = 0;
116 static int comp_errors = 0;
117 static int exec_errors = 0;
118
119 #ifndef REG_OK
120 #define REG_OK 0
121 #endif /* REG_OK */
122
123 #define END -2
124
125 static void
test_status(char c)126 test_status(char c)
127 {
128 static int k = 0;
129 printf("%c", c);
130 if (++k % 79 == 0)
131 printf("\n");
132 fflush(stdout);
133 }
134
135
136 static int
wrap_regexec(const CHAR_T * data,size_t len,size_t pmatch_len,regmatch_t * pmatch,int eflags)137 wrap_regexec(const CHAR_T *data, size_t len,
138 size_t pmatch_len, regmatch_t *pmatch, int eflags)
139 {
140 CHAR_T *buf = NULL;
141 int result;
142
143 if (len == 0 && use_regnexec)
144 {
145 /* Zero length string and using tre_regnexec(), the pointer we give
146 should not be dereferenced at all. */
147 buf = NULL;
148 }
149 else
150 {
151 /* Copy the data to a separate buffer to make a better test for
152 tre_regexec() and tre_regnexec(). */
153 buf = xmalloc((len + !use_regnexec) * sizeof(CHAR_T));
154 if (!buf)
155 return REG_ESPACE;
156 memcpy(buf, data, len * sizeof(CHAR_T));
157 test_status('#');
158 }
159
160 #ifdef HAVE_REGNEXEC
161 if (use_regnexec)
162 {
163 if (len == 0)
164 result = tre_regnexec(&reobj, NULL, len, pmatch_len, pmatch, eflags);
165 else
166 result = tre_regnexec(&reobj, buf, len, pmatch_len, pmatch, eflags);
167 }
168 else
169 #endif /* HAVE_REGNEXEC */
170 {
171 buf[len] = L('\0');
172 result = tre_regexec(&reobj, buf, pmatch_len, pmatch, eflags);
173 }
174
175 xfree(buf);
176 return result;
177 }
178
179 static int
wrap_regcomp(regex_t * preg,const CHAR_T * data,size_t len,int cflags)180 wrap_regcomp(regex_t *preg, const CHAR_T *data, size_t len, int cflags)
181 {
182 #ifdef HAVE_REGNCOMP
183 if (use_regncomp)
184 return tre_regncomp(preg, data, len, cflags);
185 else
186 return tre_regcomp(preg, data, cflags);
187 #else /* !HAVE_REGNCOMP */
188 fprintf(stderr, "%s\n", data);
189 return tre_regcomp(preg, data, cflags);
190 #endif /* !HAVE_REGNCOMP */
191 }
192
193 static int
execute(const CHAR_T * data,int len,size_t pmatch_len,regmatch_t * pmatch,int eflags)194 execute(const CHAR_T *data, int len, size_t pmatch_len, regmatch_t *pmatch,
195 int eflags)
196 {
197 #ifdef MALLOC_DEBUGGING
198 int i = 0;
199 int ret;
200
201 while (1)
202 {
203 xmalloc_configure(i);
204 comp_tests++;
205 ret = wrap_regexec(data, len, pmatch_len, pmatch, eflags);
206 if (ret != REG_ESPACE)
207 {
208 break;
209 }
210 #ifdef REGEX_DEBUG
211 xmalloc_dump_leaks();
212 #endif /* REGEX_DEBUG */
213 i++;
214 }
215 return ret;
216 #else /* !MALLOC_DEBUGGING */
217 return wrap_regexec(data, len, pmatch_len, pmatch, eflags);
218 #endif /* !MALLOC_DEBUGGING */
219 }
220
221 static int
check(va_list ap,int ret,const CHAR_T * str,size_t pmatch_len,regmatch_t * pmatch,int eflags)222 check(va_list ap, int ret, const CHAR_T *str,
223 size_t pmatch_len, regmatch_t *pmatch, int eflags)
224 {
225 int fail = 0;
226
227 if (ret != va_arg(ap, int))
228 {
229 #ifndef WRETEST
230 printf("Exec error, regex: \"%s\", cflags %d, "
231 "string: \"%s\", eflags %d\n", regex_pattern, cflags_global,
232 str, eflags);
233 #else /* WRETEST */
234 printf("Exec error, regex: \"%ls\", cflags %d, "
235 "string: \"%ls\", eflags %d\n", regex_pattern, cflags_global,
236 str, eflags);
237 #endif /* WRETEST */
238 printf(" got %smatch (tre_regexec returned %d)\n", ret ? "no " : "", ret);
239 return 1;
240 }
241
242 if (ret == 0)
243 {
244 unsigned int i;
245
246 for (i = 0; i < pmatch_len; i++)
247 {
248 int rm_so, rm_eo;
249 rm_so = va_arg(ap, int);
250 if (rm_so == END)
251 break;
252 rm_eo = va_arg(ap, int);
253 #ifdef WRETEST
254 if (rm_so >= 0)
255 {
256 int n = rm_so;
257
258 if ((rm_so = woffs[rm_so]) < 0 ||
259 (n = rm_eo, rm_eo = woffs[rm_eo]) < 0)
260 {
261 printf("Invalid or incomplete multi-byte sequence "
262 "in string %ls before byte offset %d\n", str, n);
263 return 1;
264 }
265 }
266 #endif /* WRETEST */
267 if (pmatch[i].rm_so != rm_so
268 || pmatch[i].rm_eo != rm_eo)
269 {
270 #ifndef WRETEST
271 printf("Exec error, regex: \"%s\", string: \"%s\"\n",
272 regex_pattern, str);
273 printf(" group %d: expected (%d, %d) \"%.*s\", "
274 "got (%d, %d) \"%.*s\"\n",
275 #else /* WRETEST */
276 printf("Exec error, regex: \"%ls\", string: \"%ls\"\n",
277 regex_pattern, str);
278 printf(" group %d: expected (%d, %d) \"%.*ls\", "
279 "got (%d, %d) \"%.*ls\"\n",
280 #endif /* WRETEST */
281 i, rm_so, rm_eo, rm_eo - rm_so, str + rm_so,
282 (int)pmatch[i].rm_so, (int)pmatch[i].rm_eo,
283 (int)(pmatch[i].rm_eo - pmatch[i].rm_so),
284 str + pmatch[i].rm_so);
285 fail = 1;
286 }
287 }
288
289 if (!(cflags_global & REG_NOSUB) && reobj.re_nsub != i - 1
290 && reobj.re_nsub <= pmatch_len && pmatch)
291 {
292 #ifndef WRETEST
293 printf("Comp error, regex: \"%s\"\n", regex_pattern);
294 #else /* WRETEST */
295 printf("Comp error, regex: \"%ls\"\n", regex_pattern);
296 #endif /* WRETEST */
297 printf(" re_nsub is %d, should be %d\n", (int)reobj.re_nsub, i - 1);
298 fail = 1;
299 }
300
301
302 for (; i < pmatch_len; i++)
303 if (pmatch[i].rm_so != -1 || pmatch[i].rm_eo != -1)
304 {
305 if (!fail)
306 #ifndef WRETEST
307 printf("Exec error, regex: \"%s\", string: \"%s\"\n",
308 regex_pattern, str);
309 #else /* WRETEST */
310 printf("Exec error, regex: \"%ls\", string: \"%ls\"\n",
311 regex_pattern, str);
312 #endif /* WRETEST */
313 printf(" group %d: expected (-1, -1), got (%d, %d)\n",
314 i, (int)pmatch[i].rm_so, (int)pmatch[i].rm_eo);
315 fail = 1;
316 }
317 }
318
319 return fail;
320 }
321
322
323 static void
test_nexec(const char * data,size_t len,int eflags,...)324 test_nexec(const char *data, size_t len, int eflags, ...)
325 {
326 int m;
327 int fail = 0;
328 int extra_flags[] = {0, REG_BACKTRACKING_MATCHER, REG_APPROX_MATCHER};
329 size_t i;
330 va_list ap;
331
332 if (!valid_reobj)
333 {
334 exec_errors++;
335 return;
336 }
337
338 #ifdef WRETEST
339 {
340 int wlen = mbntowc(wstr, data, len, woffs);
341 if (wlen < 0)
342 {
343 exec_errors++;
344 printf("Invalid or incomplete multi-byte sequence in %s\n", data);
345 return;
346 }
347 wstr[wlen] = L'\0';
348 len = wlen;
349 }
350 #define data wstr
351 #endif /* WRETEST */
352
353 use_regnexec = 1;
354
355 for (i = 0; i < elementsof(extra_flags); i++)
356 {
357 int final_flags = eflags | extra_flags[i];
358
359 if ((final_flags & REG_BACKTRACKING_MATCHER
360 && tre_have_approx(&reobj))
361 || (final_flags & REG_APPROX_MATCHER
362 && tre_have_backrefs(&reobj))
363 || (final_flags & avoid_eflags))
364 continue;
365
366 /* Test with a pmatch array. */
367 exec_tests++;
368 m = execute(data, len, elementsof(pmatch_global), pmatch_global,
369 final_flags);
370 va_start(ap, eflags);
371 fail |= check(ap, m, data, elementsof(pmatch_global), pmatch_global,
372 final_flags);
373 va_end(ap);
374
375 /* Same test with a NULL pmatch. */
376 exec_tests++;
377 m = execute(data, len, 0, NULL, final_flags);
378 va_start(ap, eflags);
379 fail |= check(ap, m, data, 0, NULL, final_flags);
380 va_end(ap);
381 }
382
383 #ifdef WRETEST
384 #undef data
385 #endif /* WRETEST */
386
387 if (fail)
388 exec_errors++;
389 }
390
391
392
393 static void
test_exec(const char * str,int eflags,...)394 test_exec(const char *str, int eflags, ...)
395 {
396 int m;
397 int fail = 0;
398 size_t len = strlen(str);
399 int extra_flags[] = {0,
400 REG_BACKTRACKING_MATCHER,
401 REG_APPROX_MATCHER,
402 REG_BACKTRACKING_MATCHER | REG_APPROX_MATCHER};
403 size_t i;
404 va_list ap;
405
406 if (!valid_reobj)
407 {
408 exec_errors++;
409 return;
410 }
411
412 #ifdef WRETEST
413 {
414 int wlen = mbntowc(wstr, str, len, woffs);
415 if (wlen < 0)
416 {
417 exec_errors++;
418 printf("Invalid or incomplete multi-byte sequence in %s\n", str);
419 return;
420 }
421 wstr[wlen] = L'\0';
422 len = wlen;
423 }
424 #define str wstr
425 #endif /* WRETEST */
426
427 for (use_regnexec = 0; use_regnexec < 2; use_regnexec++)
428 {
429 for (i = 0; i < elementsof(extra_flags); i++)
430 {
431 int final_flags = eflags | extra_flags[i];
432
433 if ((final_flags & REG_BACKTRACKING_MATCHER
434 && tre_have_approx(&reobj))
435 || (final_flags & REG_APPROX_MATCHER
436 && tre_have_backrefs(&reobj))
437 || (final_flags & avoid_eflags))
438 continue;
439
440 /* Test with a pmatch array. */
441 exec_tests++;
442 m = execute(str, len, elementsof(pmatch_global), pmatch_global,
443 final_flags);
444 va_start(ap, eflags);
445 fail |= check(ap, m, str, elementsof(pmatch_global), pmatch_global,
446 final_flags);
447 va_end(ap);
448
449 /* Same test with a NULL pmatch. */
450 exec_tests++;
451 m = execute(str, len, 0, NULL, final_flags);
452 va_start(ap, eflags);
453 fail |= check(ap, m, str, 0, NULL, final_flags);
454 va_end(ap);
455 }
456 }
457
458 #ifdef WRETEST
459 #undef str
460 #endif /* WRETEST */
461
462 if (fail)
463 exec_errors++;
464 }
465
466
467 static void
test_comp(const char * re,int flags,int ret)468 test_comp(const char *re, int flags, int ret)
469 {
470 int errcode = 0;
471 int len = strlen(re);
472
473 if (valid_reobj)
474 {
475 tre_regfree(&reobj);
476 valid_reobj = 0;
477 }
478
479 comp_tests++;
480
481 #ifdef WRETEST
482 {
483 int wlen = mbntowc(wregex, re, len, NULL);
484
485 if (wlen < 0)
486 {
487 comp_errors++;
488 printf("Invalid or incomplete multi-byte sequence in %s\n", re);
489 return;
490 }
491 wregex[wlen] = L'\0';
492 len = wlen;
493 }
494 #define re wregex
495 #endif /* WRETEST */
496 regex_pattern = re;
497 cflags_global = flags;
498
499 #ifdef MALLOC_DEBUGGING
500 {
501 static int j = 0;
502 int i = 0;
503 while (1)
504 {
505 xmalloc_configure(i);
506 comp_tests++;
507 if (j++ % 20 == 0)
508 test_status('.');
509 errcode = wrap_regcomp(&reobj, re, len, flags);
510 if (errcode != REG_ESPACE)
511 {
512 test_status('*');
513 break;
514 }
515 #ifdef REGEX_DEBUG
516 xmalloc_dump_leaks();
517 #endif /* REGEX_DEBUG */
518 i++;
519 }
520 }
521 #else /* !MALLOC_DEBUGGING */
522 errcode = wrap_regcomp(&reobj, re, len, flags);
523 #endif /* !MALLOC_DEBUGGING */
524
525 #ifdef WRETEST
526 #undef re
527 #endif /* WRETEST */
528
529 if (errcode != ret)
530 {
531 #ifndef WRETEST
532 printf("Comp error, regex: \"%s\"\n", regex_pattern);
533 #else /* WRETEST */
534 printf("Comp error, regex: \"%ls\"\n", regex_pattern);
535 #endif /* WRETEST */
536 printf(" expected return code %d, got %d.\n",
537 ret, errcode);
538 comp_errors++;
539 }
540
541 if (errcode == 0)
542 valid_reobj = 1;
543 }
544
545
546
547 /* To enable tests for known bugs, set this to 1. */
548 #define KNOWN_BUG 0
549
550 int
main(int argc,char ** argv)551 main(int argc, char **argv)
552 {
553
554 #ifdef WRETEST
555 /* Need an 8-bit locale. Or move the two tests with non-ascii
556 characters to the localized internationalization tests. */
557 if (setlocale(LC_CTYPE, "en_US.ISO-8859-1") == NULL)
558 fprintf(stderr, "Could not set locale en_US.ISO-8859-1. Expect some\n"
559 "`Invalid or incomplete multi-byte sequence' errors.\n");
560 #endif /* WRETEST */
561 /* Large number of macros in one regexp. */
562 test_comp("[A-Z]\\d\\s?\\d[A-Z]{2}|[A-Z]\\d{2}\\s?\\d[A-Z]{2}|[A-Z]{2}\\d"
563 "\\s?\\d[A-Z]{2}|[A-Z]{2}\\d{2}\\s?\\d[A-Z]{2}|[A-Z]\\d[A-Z]\\s?"
564 "\\d[A-Z]{2}|[A-Z]{2}\\d[A-Z]\\s?\\d[A-Z]{2}|[A-Z]{3}\\s?\\d[A-Z]"
565 "{2}", REG_EXTENDED, 0);
566
567 test_comp("a{11}(b{2}c){2}", REG_EXTENDED, 0);
568 test_comp("a{2}{2}xb+xc*xd?x", REG_EXTENDED, 0);
569 test_comp("^!packet [0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3} [0-9]+",
570 REG_EXTENDED, 0);
571 test_comp("^!pfast [0-9]{1,15} ([0-9]{1,3}\\.){3}[0-9]{1,3}[0-9]{1,5}$",
572 REG_EXTENDED, 0);
573
574 #if KNOWN_BUG
575 /* Should these match or not? */
576 test_comp("(a)*-\\1b", REG_EXTENDED, 0);
577 test_exec("aaa-b", 0, REG_NOMATCH);
578 test_comp("((.*)\\1)+", REG_EXTENDED, 0);
579 test_exec("xxxxxx", 0, REG_NOMATCH);
580 #endif
581
582 #ifdef TRE_APPROX
583 /*
584 * Approximate matching tests.
585 *
586 * The approximate matcher always searches for the best match, and returns
587 * the leftmost and longest one if there are several best matches.
588 */
589
590 test_comp("(fou){# ~1}", REG_EXTENDED, 0);
591 test_comp("(fuu){#}", REG_EXTENDED, 0);
592 test_comp("(fuu){# ~}", REG_EXTENDED, 0);
593 test_comp("(anaconda){ 1i + 1d < 1, #1}", REG_EXTENDED, 0);
594 test_comp("(anaconda){ 1i + 1d < 1 #1 ~10 }", REG_EXTENDED, 0);
595 test_comp("(anaconda){ #1, ~1, 1i + 1d < 1 }", REG_EXTENDED, 0);
596
597 test_comp("(znacnda){ #1 ~3 1i + 1d < 1 }", REG_EXTENDED, 0);
598 test_exec("molasses anaconda foo bar baz smith anderson ",
599 0, REG_NOMATCH);
600 test_comp("(znacnda){ #1 ~3 1i + 1d < 2 }", REG_EXTENDED, 0);
601 test_exec("molasses anaconda foo bar baz smith anderson ",
602 0, REG_OK, 9, 17, 9, 17, END);
603 test_comp("(ananda){ 1i + 1d < 2 }", REG_EXTENDED, 0);
604 test_exec("molasses anaconda foo bar baz smith anderson ",
605 0, REG_NOMATCH);
606
607 test_comp("(fuu){ +3 -3 ~5}", REG_EXTENDED, 0);
608 test_exec("anaconda foo bar baz smith anderson",
609 0, REG_OK, 9, 10, 9, 10, END);
610 test_comp("(fuu){ +2 -2 ~5}", REG_EXTENDED, 0);
611 test_exec("anaconda foo bar baz smith anderson",
612 0, REG_OK, 9, 10, 9, 10, END);
613 test_comp("(fuu){ +3 -3 ~}", REG_EXTENDED, 0);
614 test_exec("anaconda foo bar baz smith anderson",
615 0, REG_OK, 9, 10, 9, 10, END);
616
617 test_comp("(laurikari){ #3, 1i + 1d < 3 }", REG_EXTENDED, 0);
618
619 /* No cost limit. */
620 test_comp("(foobar){~}", REG_EXTENDED, 0);
621 test_exec("xirefoabralfobarxie", 0, REG_OK, 11, 16, 11, 16, END);
622
623 /* At most two errors. */
624 test_comp("(foobar){~2}", REG_EXTENDED, 0);
625 test_exec("xirefoabrzlfd", 0, REG_OK, 4, 9, 4, 9, END);
626 test_exec("xirefoabzlfd", 0, REG_NOMATCH);
627
628 /* At most two inserts or substitutions and max two errors total. */
629 test_comp("(foobar){+2#2~2}", REG_EXTENDED, 0);
630 test_exec("oobargoobaploowap", 0, REG_OK, 5, 11, 5, 11, END);
631
632 /* Find best whole word match for "foobar". */
633 test_comp("\\<(foobar){~}\\>", REG_EXTENDED, 0);
634 test_exec("zfoobarz", 0, REG_OK, 0, 8, 0, 8, END);
635 test_exec("boing zfoobarz goobar woop", 0, REG_OK, 15, 21, 15, 21, END);
636
637 /* Match whole string, allow only 1 error. */
638 test_comp("^(foobar){~1}$", REG_EXTENDED, 0);
639 test_exec("foobar", 0, REG_OK, 0, 6, 0, 6, END);
640 test_exec("xfoobar", 0, REG_OK, 0, 7, 0, 7, END);
641 /*
642 This currently fails.
643 test_exec("foobarx", 0, REG_OK, 0, 7, 0, 7, END);
644 */
645 test_exec("fooxbar", 0, REG_OK, 0, 7, 0, 7, END);
646 test_exec("foxbar", 0, REG_OK, 0, 6, 0, 6, END);
647 test_exec("xoobar", 0, REG_OK, 0, 6, 0, 6, END);
648 test_exec("foobax", 0, REG_OK, 0, 6, 0, 6, END);
649 test_exec("oobar", 0, REG_OK, 0, 5, 0, 5, END);
650 test_exec("fobar", 0, REG_OK, 0, 5, 0, 5, END);
651 test_exec("fooba", 0, REG_OK, 0, 5, 0, 5, END);
652 test_exec("xfoobarx", 0, REG_NOMATCH);
653 test_exec("foobarxx", 0, REG_NOMATCH);
654 test_exec("xxfoobar", 0, REG_NOMATCH);
655 test_exec("xfoxbar", 0, REG_NOMATCH);
656 test_exec("foxbarx", 0, REG_NOMATCH);
657
658 /* At most one insert, two deletes, and three substitutions.
659 Additionally, deletes cost two and substitutes one, and total
660 cost must be less than 4. */
661 test_comp("(foobar){+1 -2 #3, 2d + 1s < 4}", REG_EXTENDED, 0);
662 test_exec("3oifaowefbaoraofuiebofasebfaobfaorfeoaro",
663 0, REG_OK, 26, 33, 26, 33, END);
664
665 /* Partially approximate matches. */
666 test_comp("foo(bar){~1}zap", REG_EXTENDED, 0);
667 test_exec("foobarzap", 0, REG_OK, 0, 9, 3, 6, END);
668 test_exec("fobarzap", 0, REG_NOMATCH);
669 test_exec("foobrzap", 0, REG_OK, 0, 8, 3, 5, END);
670 test_comp("^.*(dot.org){~}.*$", REG_EXTENDED, 0);
671 test_exec("www.cnn.com 64.236.16.20\n"
672 "www.slashdot.org 66.35.250.150\n"
673 "For useful information, use www.slashdot.org\n"
674 "this is demo data!\n",
675 0, REG_OK, 0, 120, 93, 100, END);
676
677 /* Approximate matching and back referencing cannot be used together. */
678 test_comp("(foo{~})\\1", REG_EXTENDED, REG_BADPAT);
679
680 #endif /* TRE_APPROX */
681
682 /*
683 * Basic tests with pure regular expressions
684 */
685
686 /* Basic string matching. */
687 test_comp("foobar", REG_EXTENDED, 0);
688 test_exec("foobar", 0, REG_OK, 0, 6, END);
689 test_exec("xxxfoobarzapzot", 0, REG_OK, 3, 9, END);
690 test_comp("foobar", REG_EXTENDED | REG_NOSUB, 0);
691 test_exec("foobar", 0, REG_OK, END);
692 test_comp("aaaa", REG_EXTENDED, 0);
693 test_exec("xxaaaaaaaaaaaaaaaaa", 0, REG_OK, 2, 6, END);
694
695 /* Test zero length matches. */
696 test_comp("(a*)", REG_EXTENDED, 0);
697 test_exec("", 0, REG_OK, 0, 0, 0, 0, END);
698
699 test_comp("(a*)*", REG_EXTENDED, 0);
700 test_exec("", 0, REG_OK, 0, 0, 0, 0, END);
701
702 test_comp("((a*)*)*", REG_EXTENDED, 0);
703 test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END);
704 test_comp("(a*bcd)*", REG_EXTENDED, 0);
705 test_exec("aaaaaaaaaaaabcxbcxbcxaabcxaabcx", 0, REG_OK, 0, 0, -1, -1, END);
706 test_exec("aaaaaaaaaaaabcxbcxbcxaabcxaabc", 0, REG_OK, 0, 0, -1, -1, END);
707 test_exec("aaaaaaaaaaaabcxbcdbcxaabcxaabc", 0, REG_OK, 0, 0, -1, -1, END);
708 test_exec("aaaaaaaaaaaabcdbcdbcxaabcxaabc", 0, REG_OK, 0, 18, 15, 18, END);
709
710 test_comp("(a*)+", REG_EXTENDED, 0);
711 test_exec("-", 0, REG_OK, 0, 0, 0, 0, END);
712
713 /* This test blows up the backtracking matcher. */
714 avoid_eflags = REG_BACKTRACKING_MATCHER;
715 test_comp("((a*)*b)*b", REG_EXTENDED, 0);
716 test_exec("aaaaaaaaaaaaaaaaaaaaaaaaab", 0, REG_OK,
717 25, 26, -1, -1, -1, -1, END);
718 avoid_eflags = 0;
719
720 test_comp("", 0, 0);
721 test_exec("", 0, REG_OK, 0, 0, END);
722 test_exec("foo", 0, REG_OK, 0, 0, END);
723
724 /* Test for submatch addressing which requires arbitrary lookahead. */
725 test_comp("(a*)aaaaaa", REG_EXTENDED, 0);
726 test_exec("aaaaaaaaaaaaaaax", 0, REG_OK, 0, 15, 0, 9, END);
727
728 /* Test leftmost and longest matching and some tricky submatches. */
729 test_comp("(a*)(a*)", REG_EXTENDED, 0);
730 test_exec("aaaa", 0, REG_OK, 0, 4, 0, 4, 4, 4, END);
731 test_comp("(abcd|abc)(d?)", REG_EXTENDED, 0);
732 test_exec("abcd", 0, REG_OK, 0, 4, 0, 4, 4, 4, END);
733 test_comp("(abc|abcd)(d?)", REG_EXTENDED, 0);
734 test_exec("abcd", 0, REG_OK, 0, 4, 0, 4, 4, 4, END);
735 test_comp("(abc|abcd)(d?)e", REG_EXTENDED, 0);
736 test_exec("abcde", 0, REG_OK, 0, 5, 0, 4, 4, 4, END);
737 test_comp("(abcd|abc)(d?)e", REG_EXTENDED, 0);
738 test_exec("abcde", 0, REG_OK, 0, 5, 0, 4, 4, 4, END);
739 test_comp("a(bc|bcd)(d?)", REG_EXTENDED, 0);
740 test_exec("abcd", 0, REG_OK, 0, 4, 1, 4, 4, 4, END);
741 test_comp("a(bcd|bc)(d?)", REG_EXTENDED, 0);
742 test_exec("abcd", 0, REG_OK, 0, 4, 1, 4, 4, 4, END);
743 test_comp("a*(a?bc|bcd)(d?)", REG_EXTENDED, 0);
744 test_exec("aaabcd", 0, REG_OK, 0, 6, 3, 6, 6, 6, END);
745 test_comp("a*(bcd|a?bc)(d?)", REG_EXTENDED, 0);
746 test_exec("aaabcd", 0, REG_OK, 0, 6, 3, 6, 6, 6, END);
747 test_comp("(a|(a*b*))*", REG_EXTENDED, 0);
748 test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END);
749 test_exec("a", 0, REG_OK, 0, 1, 0, 1, -1, -1, END);
750 test_exec("aa", 0, REG_OK, 0, 2, 0, 2, 0, 2, END);
751 test_exec("aaa", 0, REG_OK, 0, 3, 0, 3, 0, 3, END);
752 test_exec("bbb", 0, REG_OK, 0, 3, 0, 3, 0, 3, END);
753 test_exec("aaabbb", 0, REG_OK, 0, 6, 0, 6, 0, 6, END);
754 test_exec("bbbaaa", 0, REG_OK, 0, 6, 3, 6, 3, 6, END);
755 test_comp("((a*b*)|a)*", REG_EXTENDED, 0);
756 test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END);
757 test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, END);
758 test_exec("aa", 0, REG_OK, 0, 2, 0, 2, 0, 2, END);
759 test_exec("aaa", 0, REG_OK, 0, 3, 0, 3, 0, 3, END);
760 test_exec("bbb", 0, REG_OK, 0, 3, 0, 3, 0, 3, END);
761 test_exec("aaabbb", 0, REG_OK, 0, 6, 0, 6, 0, 6, END);
762 test_exec("bbbaaa", 0, REG_OK, 0, 6, 3, 6, 3, 6, END);
763 test_comp("a.*(.*b.*(.*c.*).*d.*).*e.*(.*f.*).*g", REG_EXTENDED, 0);
764 test_exec("aabbccddeeffgg", 0, REG_OK, 0, 14, 3, 9, 5, 7, 11, 13, END);
765 test_comp("(wee|week)(night|knights)s*", REG_EXTENDED, 0);
766 test_exec("weeknights", 0, REG_OK, 0, 10, 0, 3, 3, 10, END);
767 test_exec("weeknightss", 0, REG_OK, 0, 11, 0, 3, 3, 10, END);
768 test_comp("a*", REG_EXTENDED, 0);
769 test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END);
770 test_comp("aa*", REG_EXTENDED, 0);
771 test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END);
772 test_comp("aaa*", REG_EXTENDED, 0);
773 test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END);
774 test_comp("aaaa*", REG_EXTENDED, 0);
775 test_exec("aaaaaaaaaa", 0, REG_OK, 0, 10, END);
776
777 /* Test clearing old submatch data with nesting parentheses
778 and iteration. */
779 test_comp("((a)|(b))*c", REG_EXTENDED, 0);
780 test_exec("aaabc", 0, REG_OK, 0, 5, 3, 4, -1, -1, 3, 4, END);
781 test_exec("aaaac", 0, REG_OK, 0, 5, 3, 4, 3, 4, -1, -1, END);
782 test_comp("foo((bar)*)*zot", REG_EXTENDED, 0);
783 test_exec("foozot", 0, REG_OK, 0, 6, 3, 3, -1, -1, END);
784 test_exec("foobarzot", 0, REG_OK, 0, 9, 3, 6, 3, 6, END);
785 test_exec("foobarbarzot", 0, REG_OK, 0, 12, 3, 9, 6, 9, END);
786
787 test_comp("foo((zup)*|(bar)*|(zap)*)*zot", REG_EXTENDED, 0);
788 test_exec("foobarzapzot", 0, REG_OK,
789 0, 12, 6, 9, -1, -1, -1, -1, 6, 9, END);
790 test_exec("foobarbarzapzot", 0, REG_OK,
791 0, 15, 9, 12, -1, -1, -1, -1, 9, 12, END);
792 test_exec("foozupzot", 0, REG_OK,
793 0, 9, 3, 6, 3, 6, -1, -1, -1, -1, END);
794 test_exec("foobarzot", 0, REG_OK,
795 0, 9, 3, 6, -1, -1, 3, 6, -1, -1, END);
796 test_exec("foozapzot", 0, REG_OK,
797 0, 9, 3, 6, -1, -1, -1, -1, 3, 6, END);
798 test_exec("foozot", 0, REG_OK,
799 0, 6, 3, 3, -1, -1, -1, -1, -1, -1, END);
800
801
802 /* Test case where, e.g., Perl and Python regexp functions, and many
803 other backtracking matchers, fail to produce the longest match.
804 It is not exactly a bug since Perl does not claim to find the
805 longest match, but a confusing feature and, in my opinion, a bad
806 design choice because the union operator is traditionally defined
807 to be commutative (with respect to the language denoted by the RE). */
808 test_comp("(a|ab)(blip)?", REG_EXTENDED, 0);
809 test_exec("ablip", 0, REG_OK, 0, 5, 0, 1, 1, 5, END);
810 test_exec("ab", 0, REG_OK, 0, 2, 0, 2, -1, -1, END);
811 test_comp("(ab|a)(blip)?", REG_EXTENDED, 0);
812 test_exec("ablip", 0, REG_OK, 0, 5, 0, 1, 1, 5, END);
813 test_exec("ab", 0, REG_OK, 0, 2, 0, 2, -1, -1, END);
814
815 /* Test more submatch addressing. */
816 test_comp("((a|b)*)a(a|b)*", REG_EXTENDED, 0);
817 test_exec("aaaaabaaaba", 0, REG_OK, 0, 11, 0, 10, 9, 10, -1, -1, END);
818 test_exec("aaaaabaaab", 0, REG_OK, 0, 10, 0, 8, 7, 8, 9, 10, END);
819 test_exec("caa", 0, REG_OK, 1, 3, 1, 2, 1, 2, -1, -1, END);
820 test_comp("((a|aba)*)(ababbaba)((a|b)*)", REG_EXTENDED, 0);
821 test_exec("aabaababbabaaababbab", 0, REG_OK,
822 0, 20, 0, 4, 1, 4, 4, 12, 12, 20, 19, 20, END);
823 test_exec("aaaaababbaba", 0, REG_OK,
824 0, 12, 0, 4, 3, 4, 4, 12, 12, 12, -1, -1, END);
825 test_comp("((a|aba|abb|bba|bab)*)(ababbababbabbbabbbbbbabbaba)((a|b)*)",
826 REG_EXTENDED, 0);
827 test_exec("aabaabbbbabababaababbababbabbbabbbbbbabbabababbababababbabababa",
828 0, REG_OK, 0, 63, 0, 16, 13, 16, 16, 43, 43, 63, 62, 63, END);
829
830 /* Test for empty subexpressions. */
831 test_comp("", 0, 0);
832 test_exec("", 0, REG_OK, 0, 0, END);
833 test_exec("foo", 0, REG_OK, 0, 0, END);
834 test_comp("(a|)", REG_EXTENDED, 0);
835 test_exec("a", 0, REG_OK, 0, 1, 0, 1, END);
836 test_exec("b", 0, REG_OK, 0, 0, 0, 0, END);
837 test_exec("", 0, REG_OK, 0, 0, 0, 0, END);
838 test_comp("a|", REG_EXTENDED, 0);
839 test_exec("a", 0, REG_OK, 0, 1, END);
840 test_exec("b", 0, REG_OK, 0, 0, END);
841 test_exec("", 0, REG_OK, 0, 0, END);
842 test_comp("|a", REG_EXTENDED, 0);
843 test_exec("a", 0, REG_OK, 0, 1, END);
844 test_exec("b", 0, REG_OK, 0, 0, END);
845 test_exec("", 0, REG_OK, 0, 0, END);
846
847 /* Miscellaneous tests. */
848 test_comp("(a*)b(c*)", REG_EXTENDED, 0);
849 test_exec("abc", 0, REG_OK, 0, 3, 0, 1, 2, 3, END);
850 test_exec("***abc***", 0, REG_OK, 3, 6, 3, 4, 5, 6, END);
851 test_comp("(a)", REG_EXTENDED, 0);
852 test_exec("a", 0, REG_OK, 0, 1, 0, 1, END);
853 test_comp("((a))", REG_EXTENDED, 0);
854 test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, END);
855 test_comp("(((a)))", REG_EXTENDED, 0);
856 test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, 0, 1, END);
857 test_comp("((((((((((((((((((((a))))))))))))))))))))", REG_EXTENDED, 0);
858 test_exec("a", 0, REG_OK, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
859 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
860 0, 1, 0, 1, 0, 1, END);
861
862 test_comp("ksntoeaiksntoeaikstneoaiksnteoaiksntoeaiskntoeaiskntoekainstoei"
863 "askntoeakisntoeksaitnokesantiksoentaikosentaiksoentaiksnoeaiskn"
864 "teoaksintoekasitnoeksaitkosetniaksoetnaisknoetakistoeksintokesa"
865 "nitksoentaisknoetaisknoetiaksotneaikstoekasitoeskatioksentaikso"
866 "enatiksoetnaiksonateiksoteaeskanotisknetaiskntoeasknitoskenatis"
867 "konetaisknoteai", 0, 0);
868
869 test_comp("((aab)|(aac)|(aa*))c", REG_EXTENDED, 0);
870 test_exec("aabc", 0, REG_OK, 0, 4, 0, 3, 0, 3, -1, -1, -1, -1, END);
871 test_exec("aacc", 0, REG_OK, 0, 4, 0, 3, -1, -1, 0, 3, -1, -1, END);
872 test_exec("aaac", 0, REG_OK, 0, 4, 0, 3, -1, -1, -1, -1, 0, 3, END);
873
874 test_comp("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$",
875 REG_EXTENDED, 0);
876 test_exec("foo!bar!bas", 0, REG_OK,
877 0, 11, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11, END);
878 test_comp("^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$",
879 REG_EXTENDED, 0);
880 test_exec("foo!bar!bas", 0, REG_OK,
881 0, 11, -1, -1, -1, -1, 4, 8, 8, 11, END);
882 test_comp("^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$",
883 REG_EXTENDED, 0);
884 test_exec("foo!bar!bas", 0, REG_OK,
885 0, 11, 0, 11, -1, -1, -1, -1, 4, 8, 8, 11, END);
886
887 test_comp("M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
888 REG_EXTENDED, 0);
889 test_exec("Muammar Quathafi", 0, REG_OK, 0, 16, -1, -1, 11, 13, END);
890
891 test_comp("(Ab|cD)*", REG_EXTENDED | REG_ICASE, 0);
892 test_exec("aBcD", 0, REG_OK, 0, 4, 2, 4, END);
893
894 test_comp("a**", REG_EXTENDED, REG_BADRPT);
895 test_comp("a*+", REG_EXTENDED, REG_BADRPT);
896 test_comp("a+*", REG_EXTENDED, REG_BADRPT);
897 test_comp("a++", REG_EXTENDED, REG_BADRPT);
898 test_comp("a?+", REG_EXTENDED, REG_BADRPT);
899 test_comp("a?*", REG_EXTENDED, REG_BADRPT);
900 test_comp("a{1,2}*", REG_EXTENDED, REG_BADRPT);
901 test_comp("a{1,2}+", REG_EXTENDED, REG_BADRPT);
902
903 /*
904 * Many of the following tests were mostly inspired by (or copied from) the
905 * libhackerlab posix test suite by Tom Lord.
906 */
907
908 test_comp("a", 0, 0);
909 test_exec("a", 0, REG_OK, 0, 1, END);
910 test_comp("\\.", 0, 0);
911 test_exec(".", 0, REG_OK, 0, 1, END);
912 test_comp("\\[", 0, 0);
913 test_exec("[", 0, REG_OK, 0, 1, END);
914 test_comp("\\\\", 0, 0);
915 test_exec("\\", 0, REG_OK, 0, 1, END);
916 test_comp("\\*", 0, 0);
917 test_exec("*", 0, REG_OK, 0, 1, END);
918 test_comp("\\^", 0, 0);
919 test_exec("^", 0, REG_OK, 0, 1, END);
920 test_comp("\\$", 0, 0);
921 test_exec("$", 0, REG_OK, 0, 1, END);
922
923 test_comp("\\", 0, REG_EESCAPE);
924
925 test_comp("x\\.", 0, 0);
926 test_exec("x.", 0, REG_OK, 0, 2, END);
927 test_comp("x\\[", 0, 0);
928 test_exec("x[", 0, REG_OK, 0, 2, END);
929 test_comp("x\\\\", 0, 0);
930 test_exec("x\\", 0, REG_OK, 0, 2, END);
931 test_comp("x\\*", 0, 0);
932 test_exec("x*", 0, REG_OK, 0, 2, END);
933 test_comp("x\\^", 0, 0);
934 test_exec("x^", 0, REG_OK, 0, 2, END);
935 test_comp("x\\$", 0, 0);
936 test_exec("x$", 0, REG_OK, 0, 2, END);
937
938 test_comp("x\\", 0, REG_EESCAPE);
939
940 test_comp(".", 0, 0);
941 test_exec("a", 0, REG_OK, 0, 1, END);
942 test_exec("\n", 0, REG_OK, 0, 1, END);
943
944 test_comp("(+|?)", 0, 0);
945 test_exec("(+|?)", 0, REG_OK, 0, 5, END);
946 test_exec("+|?", 0, REG_NOMATCH);
947 test_exec("(+)", 0, REG_NOMATCH);
948 test_exec("+", 0, REG_NOMATCH);
949
950
951 /*
952 * Test bracket expressions.
953 */
954
955 test_comp("[", 0, REG_EBRACK);
956 test_comp("[]", 0, REG_EBRACK);
957 test_comp("[^]", 0, REG_EBRACK);
958
959 test_comp("[]x]", 0, 0);
960 test_exec("]", 0, REG_OK, 0, 1, END);
961 test_exec("x", 0, REG_OK, 0, 1, END);
962
963 test_comp("[.]", 0, 0);
964 test_exec(".", 0, REG_OK, 0, 1, END);
965 test_exec("a", 0, REG_NOMATCH);
966
967 test_comp("[*]", 0, 0);
968 test_exec("*", 0, REG_OK, 0, 1, END);
969
970 test_comp("[[]", 0, 0);
971 test_exec("[", 0, REG_OK, 0, 1, END);
972
973 test_comp("[\\]", 0, 0);
974 test_exec("\\", 0, REG_OK, 0, 1, END);
975
976 test_comp("[-x]", 0, 0);
977 test_exec("-", 0, REG_OK, 0, 1, END);
978 test_exec("x", 0, REG_OK, 0, 1, END);
979 test_comp("[x-]", 0, 0);
980 test_exec("-", 0, REG_OK, 0, 1, END);
981 test_exec("x", 0, REG_OK, 0, 1, END);
982 test_comp("[-]", 0, 0);
983 test_exec("-", 0, REG_OK, 0, 1, END);
984
985 test_comp("[abc]", 0, 0);
986 test_exec("a", 0, REG_OK, 0, 1, END);
987 test_exec("b", 0, REG_OK, 0, 1, END);
988 test_exec("c", 0, REG_OK, 0, 1, END);
989 test_exec("d", 0, REG_NOMATCH);
990 test_exec("xa", 0, REG_OK, 1, 2, END);
991 test_exec("xb", 0, REG_OK, 1, 2, END);
992 test_exec("xc", 0, REG_OK, 1, 2, END);
993 test_exec("xd", 0, REG_NOMATCH);
994 test_comp("x[abc]", 0, 0);
995 test_exec("xa", 0, REG_OK, 0, 2, END);
996 test_exec("xb", 0, REG_OK, 0, 2, END);
997 test_exec("xc", 0, REG_OK, 0, 2, END);
998 test_exec("xd", 0, REG_NOMATCH);
999 test_comp("[^abc]", 0, 0);
1000 test_exec("a", 0, REG_NOMATCH);
1001 test_exec("b", 0, REG_NOMATCH);
1002 test_exec("c", 0, REG_NOMATCH);
1003 test_exec("d", 0, REG_OK, 0, 1, END);
1004 test_exec("xa", 0, REG_OK, 0, 1, END);
1005 test_exec("xb", 0, REG_OK, 0, 1, END);
1006 test_exec("xc", 0, REG_OK, 0, 1, END);
1007 test_exec("xd", 0, REG_OK, 0, 1, END);
1008 test_comp("x[^abc]", 0, 0);
1009 test_exec("xa", 0, REG_NOMATCH);
1010 test_exec("xb", 0, REG_NOMATCH);
1011 test_exec("xc", 0, REG_NOMATCH);
1012 test_exec("xd", 0, REG_OK, 0, 2, END);
1013
1014 test_comp("[()+?*\\]+", REG_EXTENDED, 0);
1015 test_exec("x\\*?+()x", 0, REG_OK, 1, 7, END);
1016
1017 /* Standard character classes. */
1018 test_comp("[[:alnum:]]+", REG_EXTENDED, 0);
1019 test_exec("%abc123890XYZ=", 0, REG_OK, 1, 13, END);
1020 test_comp("[[:cntrl:]]+", REG_EXTENDED, 0);
1021 test_exec("%\n\t\015\f ", 0, REG_OK, 1, 5, END);
1022 test_comp("[[:lower:]]+", REG_EXTENDED, 0);
1023 test_exec("AbcdE", 0, REG_OK, 1, 4, END);
1024 test_comp("[[:lower:]]+", REG_EXTENDED | REG_ICASE, 0);
1025 test_exec("AbcdE", 0, REG_OK, 0, 5, END);
1026 test_comp("[[:space:]]+", REG_EXTENDED, 0);
1027 test_exec("x \t\f\nx", 0, REG_OK, 1, 5, END);
1028 test_comp("[[:alpha:]]+", REG_EXTENDED, 0);
1029 test_exec("%abC123890xyz=", 0, REG_OK, 1, 4, END);
1030 test_comp("[[:digit:]]+", REG_EXTENDED, 0);
1031 test_exec("%abC123890xyz=", 0, REG_OK, 4, 10, END);
1032 test_comp("[^[:digit:]]+", REG_EXTENDED, 0);
1033 test_exec("%abC123890xyz=", 0, REG_OK, 0, 4, END);
1034 test_comp("[[:print:]]+", REG_EXTENDED, 0);
1035 test_exec("\n %abC12\f", 0, REG_OK, 1, 8, END);
1036 test_comp("[[:upper:]]+", REG_EXTENDED, 0);
1037 test_exec("\n aBCDEFGHIJKLMNOPQRSTUVWXYz", 0, REG_OK, 3, 27, END);
1038 test_comp("[[:upper:]]+", REG_EXTENDED | REG_ICASE, 0);
1039 test_exec("\n aBCDEFGHIJKLMNOPQRSTUVWXYz", 0, REG_OK, 2, 28, END);
1040 #ifdef HAVE_ISWBLANK
1041 #ifdef HAVE_ISBLANK
1042 test_comp("[[:blank:]]+", REG_EXTENDED, 0);
1043 test_exec("\na \t b", 0, REG_OK, 2, 5, END);
1044 #endif /* HAVE_ISBLANK */
1045 #endif /* HAVE_ISWBLANK */
1046 test_comp("[[:graph:]]+", REG_EXTENDED, 0);
1047 test_exec("\n %abC12\f", 0, REG_OK, 2, 8, END);
1048 test_comp("[[:punct:]]+", REG_EXTENDED, 0);
1049 test_exec("a~!@#$%^&*()_+=-`[]{};':\"|\\,./?>< ",
1050 0, REG_OK, 1, 33, END);
1051 test_comp("[[:xdigit:]]+", REG_EXTENDED, 0);
1052 test_exec("-0123456789ABCDEFabcdef", 0, REG_OK, 1, 23, END);
1053 test_comp("[[:bogus-character-class-name:]", REG_EXTENDED, REG_ECTYPE);
1054
1055
1056 /* Range expressions (assuming that the C locale is being used). */
1057 test_comp("[a-z]+", REG_EXTENDED, 0);
1058 test_exec("ABCabcxyzABC", 0, REG_OK, 3, 9, END);
1059 test_comp("[z-a]+", REG_EXTENDED, REG_ERANGE);
1060 test_comp("[a-b-c]", 0, REG_ERANGE);
1061 test_comp("[a-a]+", REG_EXTENDED, 0);
1062 test_exec("zaaaaab", 0, REG_OK, 1, 6, END);
1063 test_comp("[--Z]+", REG_EXTENDED, 0);
1064 test_exec("!ABC-./XYZ~", 0, REG_OK, 1, 10, END);
1065 test_comp("[*--]", 0, 0);
1066 test_exec("-", 0, REG_OK, 0, 1, END);
1067 test_exec("*", 0, REG_OK, 0, 1, END);
1068 test_comp("[*--Z]+", REG_EXTENDED, 0);
1069 test_exec("!+*,---ABC", 0, REG_OK, 1, 7, END);
1070 test_comp("[a-]+", REG_EXTENDED, 0);
1071 test_exec("xa-a--a-ay", 0, REG_OK, 1, 9, END);
1072
1073 /* REG_ICASE and character sets. */
1074 test_comp("[a-c]*", REG_ICASE | REG_EXTENDED, 0);
1075 test_exec("cABbage", 0, REG_OK, 0, 5, END);
1076 test_comp("[^a-c]*", REG_ICASE | REG_EXTENDED, 0);
1077 test_exec("tObAcCo", 0, REG_OK, 0, 2, END);
1078 test_comp("[A-C]*", REG_ICASE | REG_EXTENDED, 0);
1079 test_exec("cABbage", 0, REG_OK, 0, 5, END);
1080 test_comp("[^A-C]*", REG_ICASE | REG_EXTENDED, 0);
1081 test_exec("tObAcCo", 0, REG_OK, 0, 2, END);
1082
1083 /* Complex character sets. */
1084 test_comp("[[:digit:]a-z#$%]+", REG_EXTENDED, 0);
1085 test_exec("__abc#lmn012$x%yz789*", 0, REG_OK, 2, 20, END);
1086 test_comp("[[:digit:]a-z#$%]+", REG_ICASE | REG_EXTENDED, 0);
1087 test_exec("__abcLMN012x%#$yz789*", 0, REG_OK, 2, 20, END);
1088 test_comp("[^[:digit:]a-z#$%]+", REG_EXTENDED, 0);
1089 test_exec("abc#lmn012$x%yz789--@*,abc", 0, REG_OK, 18, 23, END);
1090 test_comp("[^[:digit:]a-z#$%]+", REG_ICASE | REG_EXTENDED, 0);
1091 test_exec("abc#lmn012$x%yz789--@*,abc", 0, REG_OK, 18, 23, END);
1092 test_comp("[^[:digit:]#$%[:xdigit:]]+", REG_ICASE | REG_EXTENDED, 0);
1093 test_exec("abc#lmn012$x%yz789--@*,abc", 0, REG_OK, 4, 7, END);
1094 test_comp("[^-]+", REG_EXTENDED, 0);
1095 test_exec("---afd*(&,ml---", 0, REG_OK, 3, 12, END);
1096 test_comp("[^--Z]+", REG_EXTENDED, 0);
1097 test_exec("---AFD*(&,ml---", 0, REG_OK, 6, 12, END);
1098 test_comp("[^--Z]+", REG_ICASE | REG_EXTENDED, 0);
1099 test_exec("---AFD*(&,ml---", 0, REG_OK, 6, 10, END);
1100
1101 /* Unsupported things (equivalence classes and multicharacter collating
1102 elements) */
1103 test_comp("[[.foo.]]", 0, REG_ECOLLATE);
1104 test_comp("[[=foo=]]", 0, REG_ECOLLATE);
1105 test_comp("[[..]]", 0, REG_ECOLLATE);
1106 test_comp("[[==]]", 0, REG_ECOLLATE);
1107 test_comp("[[.]]", 0, REG_ECOLLATE);
1108 test_comp("[[=]]", 0, REG_ECOLLATE);
1109 test_comp("[[.]", 0, REG_ECOLLATE);
1110 test_comp("[[=]", 0, REG_ECOLLATE);
1111 test_comp("[[.", 0, REG_ECOLLATE);
1112 test_comp("[[=", 0, REG_ECOLLATE);
1113
1114
1115
1116 /* Miscellaneous tests. */
1117 test_comp("abc\\(\\(de\\)\\(fg\\)\\)hi", 0, 0);
1118 test_exec("xabcdefghiy", 0, REG_OK, 1, 10, 4, 8, 4, 6, 6, 8, END);
1119
1120 test_comp("abc*def", 0, 0);
1121 test_exec("xabdefy", 0, REG_OK, 1, 6, END);
1122 test_exec("xabcdefy", 0, REG_OK, 1, 7, END);
1123 test_exec("xabcccccccdefy", 0, REG_OK, 1, 13, END);
1124
1125 test_comp("abc\\(def\\)*ghi", 0, 0);
1126 test_exec("xabcghiy", 0, REG_OK, 1, 7, -1, -1, END);
1127 test_exec("xabcdefghi", 0, REG_OK, 1, 10, 4, 7, END);
1128 test_exec("xabcdefdefdefghi", 0, REG_OK, 1, 16, 10, 13, END);
1129
1130 test_comp("a?", REG_EXTENDED, REG_OK);
1131 test_exec("aaaaa", 0, REG_OK, 0, 1, END);
1132 test_exec("xaaaaa", 0, REG_OK, 0, 0, END);
1133 test_comp("a+", REG_EXTENDED, REG_OK);
1134 test_exec("aaaaa", 0, REG_OK, 0, 5, END);
1135 test_exec("xaaaaa", 0, REG_OK, 1, 6, END);
1136
1137
1138 /*
1139 * Test anchors and their behaviour with the REG_NEWLINE compilation
1140 * flag and the REG_NOTBOL, REG_NOTEOL execution flags.
1141 */
1142
1143 /* Normally, `^' matches the empty string at beginning of input.
1144 If REG_NOTBOL is used, `^' won't match the zero length string. */
1145 test_comp("^abc", 0, 0);
1146 test_exec("abcdef", 0, REG_OK, 0, 3, END);
1147 test_exec("abcdef", REG_NOTBOL, REG_NOMATCH);
1148 test_exec("xyzabcdef", 0, REG_NOMATCH);
1149 test_exec("xyzabcdef", REG_NOTBOL, REG_NOMATCH);
1150 test_exec("\nabcdef", 0, REG_NOMATCH);
1151 test_exec("\nabcdef", REG_NOTBOL, REG_NOMATCH);
1152
1153 /* Normally, `$' matches the empty string at end of input.
1154 If REG_NOTEOL is used, `$' won't match the zero length string. */
1155 test_comp("abc$", 0, 0);
1156 test_exec("defabc", 0, REG_OK, 3, 6, END);
1157 test_exec("defabc", REG_NOTEOL, REG_NOMATCH);
1158 test_exec("defabcxyz", 0, REG_NOMATCH);
1159 test_exec("defabcxyz", REG_NOTEOL, REG_NOMATCH);
1160 test_exec("defabc\n", 0, REG_NOMATCH);
1161 test_exec("defabc\n", REG_NOTEOL, REG_NOMATCH);
1162
1163 test_comp("^abc$", 0, 0);
1164 test_exec("abc", 0, REG_OK, 0, 3, END);
1165 test_exec("abc", REG_NOTBOL, REG_NOMATCH);
1166 test_exec("abc", REG_NOTEOL, REG_NOMATCH);
1167 test_exec("abc", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH);
1168 test_exec("\nabc\n", 0, REG_NOMATCH);
1169 test_exec("defabc\n", 0, REG_NOMATCH);
1170 test_exec("\nabcdef", 0, REG_NOMATCH);
1171 test_exec("abcdef", 0, REG_NOMATCH);
1172 test_exec("defabc", 0, REG_NOMATCH);
1173 test_exec("abc\ndef", 0, REG_NOMATCH);
1174 test_exec("def\nabc", 0, REG_NOMATCH);
1175
1176 /* If REG_NEWLINE is used, `^' matches the empty string immediately after
1177 a newline, regardless of whether execution flags contain REG_NOTBOL.
1178 Similarly, if REG_NEWLINE is used, `$' matches the empty string
1179 immediately before a newline, regardless of execution flags. */
1180 test_comp("^abc", REG_NEWLINE, 0);
1181 test_exec("abcdef", 0, REG_OK, 0, 3, END);
1182 test_exec("abcdef", REG_NOTBOL, REG_NOMATCH);
1183 test_exec("xyzabcdef", 0, REG_NOMATCH);
1184 test_exec("xyzabcdef", REG_NOTBOL, REG_NOMATCH);
1185 test_exec("\nabcdef", 0, REG_OK, 1, 4, END);
1186 test_exec("\nabcdef", REG_NOTBOL, 0, 1, 4, END);
1187 test_comp("abc$", REG_NEWLINE, 0);
1188 test_exec("defabc", 0, REG_OK, 3, 6, END);
1189 test_exec("defabc", REG_NOTEOL, REG_NOMATCH);
1190 test_exec("defabcxyz", 0, REG_NOMATCH);
1191 test_exec("defabcxyz", REG_NOTEOL, REG_NOMATCH);
1192 test_exec("defabc\n", 0, REG_OK, 3, 6, END);
1193 test_exec("defabc\n", REG_NOTEOL, 0, 3, 6, END);
1194 test_comp("^abc$", REG_NEWLINE, 0);
1195 test_exec("abc", 0, REG_OK, 0, 3, END);
1196 test_exec("abc", REG_NOTBOL, REG_NOMATCH);
1197 test_exec("abc", REG_NOTEOL, REG_NOMATCH);
1198 test_exec("abc", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH);
1199 test_exec("\nabc\n", 0, REG_OK, 1, 4, END);
1200 test_exec("defabc\n", 0, REG_NOMATCH);
1201 test_exec("\nabcdef", 0, REG_NOMATCH);
1202 test_exec("abcdef", 0, REG_NOMATCH);
1203 test_exec("abcdef", REG_NOTBOL, REG_NOMATCH);
1204 test_exec("defabc", 0, REG_NOMATCH);
1205 test_exec("defabc", REG_NOTEOL, REG_NOMATCH);
1206 test_exec("abc\ndef", 0, REG_OK, 0, 3, END);
1207 test_exec("abc\ndef", REG_NOTBOL, REG_NOMATCH);
1208 test_exec("abc\ndef", REG_NOTEOL, 0, 0, 3, END);
1209 test_exec("abc\ndef", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH);
1210 test_exec("def\nabc", 0, REG_OK, 4, 7, END);
1211 test_exec("def\nabc", REG_NOTBOL, 0, 4, 7, END);
1212 test_exec("def\nabc", REG_NOTEOL, REG_NOMATCH);
1213 test_exec("def\nabc", REG_NOTBOL | REG_NOTEOL, REG_NOMATCH);
1214
1215 /* With BRE syntax, `^' has a special meaning only at the beginning of the
1216 RE or the beginning of a parenthesized subexpression. */
1217 test_comp("a\\{0,1\\}^bc", 0, 0);
1218 test_exec("bc", 0, REG_NOMATCH);
1219 test_exec("^bc", 0, REG_OK, 0, 3, END);
1220 test_exec("abc", 0, REG_NOMATCH);
1221 test_exec("a^bc", 0, REG_OK, 0, 4, END);
1222 test_comp("a\\{0,1\\}\\(^bc\\)", 0, 0);
1223 test_exec("bc", 0, REG_OK, 0, 2, 0, 2, END);
1224 test_exec("^bc", 0, REG_NOMATCH);
1225 test_exec("abc", 0, REG_NOMATCH);
1226 test_exec("a^bc", 0, REG_NOMATCH);
1227 test_comp("(^a", 0, 0);
1228 test_exec("(^a", 0, REG_OK, 0, 3, END);
1229
1230 /* With BRE syntax, `$' has a special meaning only at the end of the
1231 RE or the end of a parenthesized subexpression. */
1232 test_comp("ab$c\\{0,1\\}", 0, 0);
1233 test_exec("ab", 0, REG_NOMATCH);
1234 test_exec("ab$", 0, REG_OK, 0, 3, END);
1235 test_exec("abc", 0, REG_NOMATCH);
1236 test_exec("ab$c", 0, REG_OK, 0, 4, END);
1237 test_comp("\\(ab$\\)c\\{0,1\\}", 0, 0);
1238 test_exec("ab", 0, REG_OK, 0, 2, 0, 2, END);
1239 test_exec("ab$", 0, REG_NOMATCH);
1240 test_exec("abc", 0, REG_NOMATCH);
1241 test_exec("ab$c", 0, REG_NOMATCH);
1242 test_comp("a$)", 0, 0);
1243 test_exec("a$)", 0, REG_OK, 0, 3, END);
1244
1245 /* Miscellaneous tests for `^' and `$'. */
1246 test_comp("foo^$", REG_EXTENDED, 0);
1247 test_exec("foo", 0, REG_NOMATCH);
1248 test_comp("x$\n^y", REG_EXTENDED | REG_NEWLINE, 0);
1249 test_exec("foo\nybarx\nyes\n", 0, REG_OK, 8, 11, END);
1250 test_comp("^$", 0, 0);
1251 test_exec("x", 0, REG_NOMATCH);
1252 test_exec("", 0, REG_OK, 0, 0, END);
1253 test_exec("\n", 0, REG_NOMATCH);
1254 test_comp("^$", REG_NEWLINE, 0);
1255 test_exec("x", 0, REG_NOMATCH);
1256 test_exec("", 0, REG_OK, 0, 0, END);
1257 test_exec("\n", 0, REG_OK, 0, 0, END);
1258
1259 /* REG_NEWLINE causes `.' not to match newlines. */
1260 test_comp(".*", 0, 0);
1261 test_exec("ab\ncd", 0, REG_OK, 0, 5, END);
1262 test_comp(".*", REG_NEWLINE, 0);
1263 test_exec("ab\ncd", 0, REG_OK, 0, 2, END);
1264
1265 /*
1266 * Tests for nonstandard syntax extensions.
1267 */
1268
1269 /* Zero width assertions. */
1270 test_comp("\\<x", REG_EXTENDED, 0);
1271 test_exec("aax xaa", 0, REG_OK, 4, 5, END);
1272 test_exec("xaa", 0, REG_OK, 0, 1, END);
1273 test_comp("x\\>", REG_EXTENDED, 0);
1274 test_exec("axx xaa", 0, REG_OK, 2, 3, END);
1275 test_exec("aax", 0, REG_OK, 2, 3, END);
1276 test_comp("\\bx", REG_EXTENDED, 0);
1277 test_exec("axx xaa", 0, REG_OK, 4, 5, END);
1278 test_exec("aax", 0, REG_NOMATCH);
1279 test_exec("xax", 0, REG_OK, 0, 1, END);
1280 test_comp("x\\b", REG_EXTENDED, 0);
1281 test_exec("axx xaa", 0, REG_OK, 2, 3, END);
1282 test_exec("aax", 0, REG_OK, 2, 3, END);
1283 test_exec("xaa", 0, REG_NOMATCH);
1284 test_comp("\\Bx", REG_EXTENDED, 0);
1285 test_exec("aax xxa", 0, REG_OK, 2, 3, END);
1286 test_comp("\\Bx\\b", REG_EXTENDED, 0);
1287 test_exec("aax xxx", 0, REG_OK, 2, 3, END);
1288 test_comp("\\<.", REG_EXTENDED, 0);
1289 test_exec(";xaa", 0, REG_OK, 1, 2, END);
1290
1291 /* Shorthands for character classes. */
1292 test_comp("\\w+", REG_EXTENDED, 0);
1293 test_exec(",.(a23_Nt-�o)", 0, REG_OK, 3, 9, END);
1294 test_comp("\\d+", REG_EXTENDED, 0);
1295 test_exec("uR120_4=v4", 0, REG_OK, 2, 5, END);
1296 test_comp("\\D+", REG_EXTENDED, 0);
1297 test_exec("120d_=vA4s", 0, REG_OK, 3, 8, END);
1298
1299 /* Quoted special characters. */
1300 test_comp("\\t", REG_EXTENDED, 0);
1301 test_comp("\\e", REG_EXTENDED, 0);
1302
1303 /* Test the \x1B and \x{263a} extensions for specifying 8 bit and wide
1304 characters in hexadecimal. */
1305 test_comp("\\x41", REG_EXTENDED, 0);
1306 test_exec("ABC", 0, REG_OK, 0, 1, END);
1307 test_comp("\\x5", REG_EXTENDED, 0);
1308 test_exec("\005", 0, REG_OK, 0, 1, END);
1309 test_comp("\\x5r", REG_EXTENDED, 0);
1310 test_exec("\005r", 0, REG_OK, 0, 2, END);
1311 test_comp("\\x", REG_EXTENDED, 0);
1312 test_nexec("\000", 1, 0, REG_OK, 0, 1, END);
1313 test_comp("\\xr", REG_EXTENDED, 0);
1314 test_nexec("\000r", 2, 0, REG_OK, 0, 2, END);
1315 test_comp("\\x{41}", REG_EXTENDED, 0);
1316 test_exec("ABC", 0, REG_OK, 0, 1, END);
1317 test_comp("\\x{5}", REG_EXTENDED, 0);
1318 test_exec("\005", 0, REG_OK, 0, 1, END);
1319 test_comp("\\x{5}r", REG_EXTENDED, 0);
1320 test_exec("\005r", 0, REG_OK, 0, 2, END);
1321 test_comp("\\x{}", REG_EXTENDED, 0);
1322 test_nexec("\000", 1, 0, REG_OK, 0, 1, END);
1323 test_comp("\\x{}r", REG_EXTENDED, 0);
1324 test_nexec("\000r", 2, 0, REG_OK, 0, 2, END);
1325
1326 /* Tests for (?inrU-inrU) and (?inrU-inrU:) */
1327 test_comp("foo(?i)bar", REG_EXTENDED, 0);
1328 test_exec("fooBaR", 0, REG_OK, 0, 6, END);
1329 test_comp("foo(?i)bar|zap", REG_EXTENDED, 0);
1330 test_exec("fooBaR", 0, REG_OK, 0, 6, END);
1331 test_exec("foozap", 0, REG_OK, 0, 6, END);
1332 test_exec("foozAp", 0, REG_OK, 0, 6, END);
1333 test_exec("zap", 0, REG_NOMATCH);
1334 test_comp("foo(?-i:zap)zot", REG_EXTENDED | REG_ICASE, 0);
1335 test_exec("FoOzapZOt", 0, REG_OK, 0, 9, END);
1336 test_exec("FoOzApZOt", 0, REG_NOMATCH);
1337 test_comp("foo(?i:bar|zap)", REG_EXTENDED, 0);
1338 test_exec("foozap", 0, REG_OK, 0, 6, END);
1339 test_exec("foobar", 0, REG_OK, 0, 6, END);
1340 test_exec("foobAr", 0, REG_OK, 0, 6, END);
1341 test_exec("fooZaP", 0, REG_OK, 0, 6, END);
1342 test_comp("foo(?U:o*)(o*)", REG_EXTENDED, 0);
1343 test_exec("foooo", 0, REG_OK, 0, 5, 3, 5, END);
1344
1345 /* Test comment syntax. */
1346 test_comp("foo(?# This here is a comment. )bar", REG_EXTENDED, 0);
1347 test_exec("foobar", 0, REG_OK, 0, 6, END);
1348
1349 /* Tests for \Q and \E. */
1350 test_comp("\\((\\Q)?:\\<[^$\\E)", REG_EXTENDED, 0);
1351 test_exec("()?:\\<[^$", 0, REG_OK, 0, 9, 1, 9, END);
1352 test_comp("\\Qabc\\E.*", REG_EXTENDED, 0);
1353 test_exec("abcdef", 0, REG_OK, 0, 6, END);
1354 test_comp("\\Qabc\\E.*|foo", REG_EXTENDED, 0);
1355 test_exec("parabc123wxyz", 0, REG_OK, 3, 13, END);
1356 test_exec("fooabc123wxyz", 0, REG_OK, 0, 3, END);
1357
1358 /*
1359 * Test bounded repetitions.
1360 */
1361
1362 test_comp("a{0,0}", REG_EXTENDED, REG_OK);
1363 test_exec("aaa", 0, REG_OK, 0, 0, END);
1364 test_comp("a{0,1}", REG_EXTENDED, REG_OK);
1365 test_exec("aaa", 0, REG_OK, 0, 1, END);
1366 test_comp("a{1,1}", REG_EXTENDED, REG_OK);
1367 test_exec("aaa", 0, REG_OK, 0, 1, END);
1368 test_comp("a{1,3}", REG_EXTENDED, REG_OK);
1369 test_exec("xaaaaa", 0, REG_OK, 1, 4, END);
1370 test_comp("a{0,3}", REG_EXTENDED, REG_OK);
1371 test_exec("aaaaa", 0, REG_OK, 0, 3, END);
1372 test_comp("a{0,}", REG_EXTENDED, REG_OK);
1373 test_exec("", 0, REG_OK, 0, 0, END);
1374 test_exec("a", 0, REG_OK, 0, 1, END);
1375 test_exec("aa", 0, REG_OK, 0, 2, END);
1376 test_exec("aaa", 0, REG_OK, 0, 3, END);
1377 test_comp("a{1,}", REG_EXTENDED, REG_OK);
1378 test_exec("", 0, REG_NOMATCH);
1379 test_exec("a", 0, REG_OK, 0, 1, END);
1380 test_exec("aa", 0, REG_OK, 0, 2, END);
1381 test_exec("aaa", 0, REG_OK, 0, 3, END);
1382 test_comp("a{2,}", REG_EXTENDED, REG_OK);
1383 test_exec("", 0, REG_NOMATCH);
1384 test_exec("a", 0, REG_NOMATCH);
1385 test_exec("aa", 0, REG_OK, 0, 2, END);
1386 test_exec("aaa", 0, REG_OK, 0, 3, END);
1387 test_comp("a{3,}", REG_EXTENDED, REG_OK);
1388 test_exec("", 0, REG_NOMATCH);
1389 test_exec("a", 0, REG_NOMATCH);
1390 test_exec("aa", 0, REG_NOMATCH);
1391 test_exec("aaa", 0, REG_OK, 0, 3, END);
1392 test_exec("aaaa", 0, REG_OK, 0, 4, END);
1393 test_exec("aaaaa", 0, REG_OK, 0, 5, END);
1394 test_exec("aaaaaa", 0, REG_OK, 0, 6, END);
1395 test_exec("aaaaaaa", 0, REG_OK, 0, 7, END);
1396
1397 test_comp("a{5,10}", REG_EXTENDED, REG_OK);
1398 test_comp("a{6,6}", REG_EXTENDED, REG_OK);
1399 test_exec("aaaaaaaaaaaa", 0, REG_OK, 0, 6, END);
1400 test_exec("xxaaaaaaaaaaaa", 0, REG_OK, 2, 8, END);
1401 test_exec("xxaaaaa", 0, REG_NOMATCH);
1402 test_comp("a{5,6}", REG_EXTENDED, REG_OK);
1403 test_exec("aaaaaaaaaaaa", 0, REG_OK, 0, 6, END);
1404 test_exec("xxaaaaaaaaaaaa", 0, REG_OK, 2, 8, END);
1405 test_exec("xxaaaaa", 0, REG_OK, 2, 7, END);
1406 test_exec("xxaaaa", 0, REG_NOMATCH);
1407
1408 /* Trickier ones... */
1409 test_comp("([ab]{5,10})*b", REG_EXTENDED, REG_OK);
1410 test_exec("bbbbbabaaaaab", 0, REG_OK, 0, 13, 5, 12, END);
1411 test_exec("bbbbbbaaaaab", 0, REG_OK, 0, 12, 5, 11, END);
1412 test_exec("bbbbbbaaaab", 0, REG_OK, 0, 11, 0, 10, END);
1413 test_exec("bbbbbbaaab", 0, REG_OK, 0, 10, 0, 9, END);
1414 test_exec("bbbbbbaab", 0, REG_OK, 0, 9, 0, 8, END);
1415 test_exec("bbbbbbab", 0, REG_OK, 0, 8, 0, 7, END);
1416
1417 test_comp("([ab]*)(ab[ab]{5,10})ba", REG_EXTENDED, REG_OK);
1418 test_exec("abbabbbabaabbbbbbbbbbbbbabaaaabab", 0, REG_OK,
1419 0, 10, 0, 0, 0, 8, END);
1420 test_exec("abbabbbabaabbbbbbbbbbbbabaaaaabab", 0, REG_OK,
1421 0, 32, 0, 23, 23, 30, END);
1422 test_exec("abbabbbabaabbbbbbbbbbbbabaaaabab", 0, REG_OK,
1423 0, 24, 0, 10, 10, 22, END);
1424 test_exec("abbabbbabaabbbbbbbbbbbba", 0, REG_OK,
1425 0, 24, 0, 10, 10, 22, END);
1426
1427 /* Test repeating something that has submatches inside. */
1428 test_comp("(a){0,5}", REG_EXTENDED, 0);
1429 test_exec("", 0, REG_OK, 0, 0, -1, -1, END);
1430 test_exec("a", 0, REG_OK, 0, 1, 0, 1, END);
1431 test_exec("aa", 0, REG_OK, 0, 2, 1, 2, END);
1432 test_exec("aaa", 0, REG_OK, 0, 3, 2, 3, END);
1433 test_exec("aaaa", 0, REG_OK, 0, 4, 3, 4, END);
1434 test_exec("aaaaa", 0, REG_OK, 0, 5, 4, 5, END);
1435 test_exec("aaaaaa", 0, REG_OK, 0, 5, 4, 5, END);
1436
1437 test_comp("(a){2,3}", REG_EXTENDED, 0);
1438 test_exec("", 0, REG_NOMATCH);
1439 test_exec("a", 0, REG_NOMATCH);
1440 test_exec("aa", 0, REG_OK, 0, 2, 1, 2, END);
1441 test_exec("aaa", 0, REG_OK, 0, 3, 2, 3, END);
1442 test_exec("aaaa", 0, REG_OK, 0, 3, 2, 3, END);
1443
1444 test_comp("\\(a\\)\\{4\\}", 0, 0);
1445 test_exec("aaaa", 0, REG_OK, 0, 4, 3, 4, END);
1446
1447 test_comp("\\(a*\\)\\{2\\}", 0, 0);
1448 test_exec("a", 0, REG_OK, 0, 1, 1, 1, END);
1449
1450 test_comp("((..)|(.)){2}", REG_EXTENDED, 0);
1451 test_exec("aa", 0, REG_OK, 0, 2, 1, 2, -1, -1, 1, 2, END);
1452
1453 /* Nested repeats. */
1454 test_comp("(.){2}{3}", REG_EXTENDED, 0);
1455 test_exec("xxxxx", 0, REG_NOMATCH);
1456 test_exec("xxxxxx", 0, REG_OK, 0, 6, 5, 6, END);
1457 test_comp("(..){2}{3}", REG_EXTENDED, 0);
1458 test_exec("xxxxxxxxxxx", 0, REG_NOMATCH);
1459 test_exec("xxxxxxxxxxxx", 0, REG_OK, 0, 12, 10, 12, END);
1460 test_comp("((..){2}.){3}", REG_EXTENDED, 0);
1461 test_exec("xxxxxxxxxxxxxx", 0, REG_NOMATCH);
1462 test_exec("xxxxxxxxxxxxxxx", 0, REG_OK, 0, 15, 10, 15, 12, 14, END);
1463 test_comp("((..){1,2}.){3}", REG_EXTENDED, 0);
1464 test_exec("xxxxxxxx", 0, REG_NOMATCH);
1465 test_exec("xxxxxxxxx", 0, REG_OK, 0, 9, 6, 9, 6, 8, END);
1466 test_exec("xxxxxxxxxx", 0, REG_OK, 0, 9, 6, 9, 6, 8, END);
1467 test_exec("xxxxxxxxxxx", 0, REG_OK, 0, 11, 8, 11, 8, 10, END);
1468 test_comp("a{2}{2}x", REG_EXTENDED, 0);
1469 test_exec("", 0, REG_NOMATCH);
1470 test_exec("x", 0, REG_NOMATCH);
1471 test_exec("ax", 0, REG_NOMATCH);
1472 test_exec("aax", 0, REG_NOMATCH);
1473 test_exec("aaax", 0, REG_NOMATCH);
1474 test_exec("aaaax", 0, REG_OK, 0, 5, END);
1475 test_exec("aaaaax", 0, REG_OK, 1, 6, END);
1476 test_exec("aaaaaax", 0, REG_OK, 2, 7, END);
1477 test_exec("aaaaaaax", 0, REG_OK, 3, 8, END);
1478 test_exec("aaaaaaaax", 0, REG_OK, 4, 9, END);
1479
1480 /* Repeats with iterations inside. */
1481 test_comp("([a-z]+){2,5}", REG_EXTENDED, 0);
1482 test_exec("a\n", 0, REG_NOMATCH);
1483 test_exec("aa\n", 0, REG_OK, 0, 2, 1, 2, END);
1484
1485 /* Multiple repeats in one regexp. */
1486 test_comp("a{3}b{3}", REG_EXTENDED, 0);
1487 test_exec("aaabbb", 0, REG_OK, 0, 6, END);
1488 test_exec("aaabbbb", 0, REG_OK, 0, 6, END);
1489 test_exec("aaaabbb", 0, REG_OK, 1, 7, END);
1490 test_exec("aabbb", 0, REG_NOMATCH);
1491 test_exec("aaabb", 0, REG_NOMATCH);
1492
1493 /* Test that different types of repetitions work correctly when used
1494 in the same regexp. */
1495 test_comp("a{2}{2}xb+xc*xd?x", REG_EXTENDED, 0);
1496 test_exec("aaaaxbxcxdx", 0, REG_OK, 0, 11, END);
1497 test_exec("aaaxbxcxdx", 0, REG_NOMATCH);
1498 test_exec("aabxcxdx", 0, REG_NOMATCH);
1499 test_exec("aaaacxdx", 0, REG_NOMATCH);
1500 test_exec("aaaaxbdx", 0, REG_NOMATCH);
1501 test_comp("^!packet [0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3} [0-9]+",
1502 REG_EXTENDED, 0);
1503 test_exec("!packet 10.0.2.4 12765 ei voittoa", 0, REG_OK, 0, 22, END);
1504
1505 /*
1506 * Back referencing tests.
1507 */
1508 test_comp("([a-z]*) \\1", REG_EXTENDED, 0);
1509 test_exec("foobar foobar", 0, REG_OK, 0, 13, 0, 6, END);
1510
1511 /* Searching for a leftmost longest square (repeated string) */
1512 test_comp("(.*)\\1", REG_EXTENDED, 0);
1513 test_exec("foobarfoobar", 0, REG_OK, 0, 12, 0, 6, END);
1514
1515 test_comp("a(b)*c\\1", REG_EXTENDED, 0);
1516 test_exec("acb", 0, REG_OK, 0, 2, -1, -1, END);
1517 test_exec("abbcbbb", 0, REG_OK, 0, 5, 2, 3, END);
1518 test_exec("abbdbd", 0, REG_NOMATCH);
1519
1520 test_comp("([a-c]*)\\1", REG_EXTENDED, 0);
1521 test_exec("abcacdef", 0, REG_OK, 0, 0, 0, 0, END);
1522 test_exec("abcabcabcd", 0, REG_OK, 0, 6, 0, 3, END);
1523
1524 test_comp("\\(a*\\)*\\(x\\)\\(\\1\\)", 0, 0);
1525 test_exec("x", 0, REG_OK, 0, 1, 0, 0, 0, 1, 1, 1, END);
1526 #if KNOWN_BUG
1527 test_exec("ax", 0, REG_OK, 0, 2, 1, 1, 1, 2, 2, 2, END);
1528 #endif
1529
1530 test_comp("(a)\\1{1,2}", REG_EXTENDED, 0);
1531 test_exec("aabc", 0, REG_OK, 0, 2, 0, 1, END);
1532
1533 test_comp("((.*)\\1)+", REG_EXTENDED, 0);
1534 test_exec("aa", 0, REG_OK, 0, 2, 0, 2, 0, 1, END);
1535
1536 #if KNOWN_BUG
1537 test_comp("()(\\1\\1)*", REG_EXTENDED, 0);
1538 test_exec("", 0, REG_OK, 0, 0, 0, 0, 0, 0, END);
1539 #endif
1540
1541 /* Check that back references work with REG_NOSUB. */
1542 test_comp("(o)\\1", REG_EXTENDED | REG_NOSUB, 0);
1543 test_exec("foobar", 0, REG_OK, END);
1544 test_comp("(o)\\1", REG_EXTENDED, 0);
1545 test_exec("foobar", 0, REG_OK, 1, 3, 1, 2, END);
1546 test_comp("(o)\\1", REG_EXTENDED, 0);
1547 test_exec("fobar", 0, REG_NOMATCH);
1548
1549 test_comp("\\1foo", REG_EXTENDED, REG_ESUBREG);
1550 test_comp("\\1foo(bar)", REG_EXTENDED, 0);
1551
1552 /* Back reference with zero-width assertion. */
1553 test_comp("(.)\\1$", REG_EXTENDED, 0);
1554 test_exec("foox", 0, REG_NOMATCH);
1555 test_exec("foo", 0, REG_OK, 1, 3, 1, 2, END);
1556
1557 /* Back references together with {}. */
1558 test_comp("([0-9]{5})\\1", REG_EXTENDED, 0);
1559 test_exec("12345", 0, REG_NOMATCH);
1560 test_exec("1234512345", 0, REG_OK, 0, 10, 0, 5, END);
1561 test_comp("([0-9]{4})\\1", REG_EXTENDED, 0);
1562 test_exec("1234", 0, REG_NOMATCH);
1563 test_exec("12341234", 0, REG_OK, 0, 8, 0, 4, END);
1564
1565 /*
1566 * Test minimal repetitions (non-greedy repetitions)
1567 */
1568 avoid_eflags = REG_BACKTRACKING_MATCHER | REG_APPROX_MATCHER;
1569
1570 /* Basic .*/
1571 test_comp(".*?", REG_EXTENDED, 0);
1572 test_exec("abcd", 0, REG_OK, 0, 0, END);
1573 test_comp(".+?", REG_EXTENDED, 0);
1574 test_exec("abcd", 0, REG_OK, 0, 1, END);
1575 test_comp(".??", REG_EXTENDED, 0);
1576 test_exec("abcd", 0, REG_OK, 0, 0, END);
1577 test_comp(".{2,5}?", REG_EXTENDED, 0);
1578 test_exec("abcd", 0, REG_OK, 0, 2, END);
1579
1580 /* More complicated. */
1581 test_comp("<b>(.*?)</b>", REG_EXTENDED, 0);
1582 test_exec("<b>text1</b><b>text2</b>", 0, REG_OK, 0, 12, 3, 8, END);
1583 test_comp("a(.*?)(foo|bar|zap)", REG_EXTENDED, 0);
1584 test_exec("hubba wooga-booga zabar gafoo wazap", 0, REG_OK,
1585 4, 23, 5, 20, 20, 23, END);
1586
1587 /* Test REG_UNGREEDY. */
1588 test_comp(".*", REG_EXTENDED | REG_UNGREEDY, 0);
1589 test_exec("abcd", 0, REG_OK, 0, 0, END);
1590 test_comp(".*?", REG_EXTENDED | REG_UNGREEDY, 0);
1591 test_exec("abcd", 0, REG_OK, 0, 4, END);
1592
1593 avoid_eflags = 0;
1594
1595
1596 /*
1597 * Error reporting tests.
1598 */
1599
1600 test_comp("\\", REG_EXTENDED, REG_EESCAPE);
1601 test_comp("\\\\", REG_EXTENDED, REG_OK);
1602 test_exec("\\", 0, REG_OK, 0, 1, END);
1603 test_comp("(", REG_EXTENDED, REG_EPAREN);
1604 test_comp("(aaa", REG_EXTENDED, REG_EPAREN);
1605 test_comp(")", REG_EXTENDED, REG_OK);
1606 test_exec(")", 0, REG_OK, 0, 1, END);
1607 test_comp("a{1", REG_EXTENDED, REG_EBRACE);
1608 test_comp("a{1,x}", REG_EXTENDED, REG_BADBR);
1609 test_comp("a{1x}", REG_EXTENDED, REG_BADBR);
1610 test_comp("a{1,0}", REG_EXTENDED, REG_BADBR);
1611 test_comp("a{x}", REG_EXTENDED, REG_BADBR);
1612 test_comp("a{}", REG_EXTENDED, REG_BADBR);
1613
1614
1615 test_comp("\\", 0, REG_EESCAPE);
1616 test_comp("\\(", 0, REG_EPAREN);
1617 test_comp("\\)", 0, REG_EPAREN);
1618 test_comp("a\\{1", 0, REG_EBRACE);
1619 test_comp("a\\{1,x\\}", 0, REG_BADBR);
1620 test_comp("a\\{1x\\}", 0, REG_BADBR);
1621 test_comp("a\\{1,0\\}", 0, REG_BADBR);
1622 test_comp("a\\{x\\}", 0, REG_BADBR);
1623 test_comp("a\\{\\}", 0, REG_BADBR);
1624
1625
1626
1627
1628 /*
1629 * Internationalization tests.
1630 */
1631
1632 /* This same test with the correct locale is below. */
1633 test_comp("��+", REG_EXTENDED, 0);
1634 test_exec("���ξޤϡ�����������������", 0, REG_OK, 10, 13, END);
1635
1636 #if !defined(WIN32) && !defined(__OpenBSD__)
1637 if (setlocale(LC_CTYPE, "en_US.ISO-8859-1") != NULL)
1638 {
1639 printf("\nTesting LC_CTYPE en_US.ISO-8859-1\n");
1640 test_comp("aBCdeFghiJKlmnoPQRstuvWXyZ���", REG_ICASE, 0);
1641 test_exec("abCDefGhiJKlmNoPqRStuVwXyz���", 0, REG_OK, 0, 29, END);
1642 }
1643
1644 #ifdef TRE_MULTIBYTE
1645 if (setlocale(LC_CTYPE, "ja_JP.eucjp") != NULL)
1646 {
1647 printf("\nTesting LC_CTYPE ja_JP.eucjp\n");
1648 /* I tried to make a test where implementations not aware of multibyte
1649 character sets will fail. I have no idea what the japanese text here
1650 means, I took it from http://www.ipsec.co.jp/. */
1651 test_comp("��+", REG_EXTENDED, 0);
1652 test_exec("���ξޤϡ�����������������", 0, REG_OK, 10, 12, END);
1653
1654 test_comp("a", REG_EXTENDED, 0);
1655 test_nexec("foo\000bar", 7, 0, REG_OK, 5, 6, END);
1656 test_comp("c$", REG_EXTENDED, 0);
1657 test_exec("abc", 0, REG_OK, 2, 3, END);
1658 }
1659 #endif /* TRE_MULTIBYTE */
1660 #endif
1661
1662 tre_regfree(&reobj);
1663
1664 printf("\n");
1665 if (comp_errors || exec_errors)
1666 printf("%d (%d + %d) out of %d tests FAILED!\n",
1667 comp_errors + exec_errors, comp_errors, exec_errors,
1668 comp_tests + exec_tests);
1669 else
1670 printf("All %d tests passed.\n", comp_tests + exec_tests);
1671
1672
1673 #ifdef MALLOC_DEBUGGING
1674 if (xmalloc_dump_leaks())
1675 return 1;
1676 #endif /* MALLOC_DEBUGGING */
1677
1678 return comp_errors || exec_errors;
1679 }
1680
1681 /* EOF */
1682