xref: /netbsd-src/sys/external/bsd/sljit/dist/regex_src/regexMain.c (revision 06eb4e7bdb1e14f0c368bf8554cee763517c4736)
177d68377Salnsn /*
277d68377Salnsn  *    Stack-less Just-In-Time compiler
377d68377Salnsn  *
4*06eb4e7bSalnsn  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
577d68377Salnsn  *
677d68377Salnsn  * Redistribution and use in source and binary forms, with or without modification, are
777d68377Salnsn  * permitted provided that the following conditions are met:
877d68377Salnsn  *
977d68377Salnsn  *   1. Redistributions of source code must retain the above copyright notice, this list of
1077d68377Salnsn  *      conditions and the following disclaimer.
1177d68377Salnsn  *
1277d68377Salnsn  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
1377d68377Salnsn  *      of conditions and the following disclaimer in the documentation and/or other materials
1477d68377Salnsn  *      provided with the distribution.
1577d68377Salnsn  *
1677d68377Salnsn  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
1777d68377Salnsn  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1877d68377Salnsn  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
1977d68377Salnsn  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2077d68377Salnsn  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
2177d68377Salnsn  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
2277d68377Salnsn  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2377d68377Salnsn  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
2477d68377Salnsn  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2577d68377Salnsn  */
2677d68377Salnsn 
27e5292e6bSalnsn /* Must be the first one. Must not depend on any other include. */
28*06eb4e7bSalnsn #include "sljitLir.h"
2977d68377Salnsn #include "regexJIT.h"
3077d68377Salnsn 
3177d68377Salnsn #include <stdio.h>
3277d68377Salnsn 
33e5292e6bSalnsn #if defined _WIN32 || defined _WIN64
34e5292e6bSalnsn #define COLOR_RED
35e5292e6bSalnsn #define COLOR_GREEN
36e5292e6bSalnsn #define COLOR_ARCH
37e5292e6bSalnsn #define COLOR_DEFAULT
38e5292e6bSalnsn #else
39e5292e6bSalnsn #define COLOR_RED "\33[31m"
40e5292e6bSalnsn #define COLOR_GREEN "\33[32m"
41e5292e6bSalnsn #define COLOR_ARCH "\33[33m"
42e5292e6bSalnsn #define COLOR_DEFAULT "\33[0m"
43e5292e6bSalnsn #endif
44e5292e6bSalnsn 
4577d68377Salnsn #ifdef REGEX_USE_8BIT_CHARS
4677d68377Salnsn #define S(str)	str
4777d68377Salnsn #else
4877d68377Salnsn #define S(str)	L##str
4977d68377Salnsn #endif
5077d68377Salnsn 
5177d68377Salnsn #ifdef REGEX_MATCH_VERBOSE
verbose_test(regex_char_t * pattern,regex_char_t * string)5277d68377Salnsn void verbose_test(regex_char_t *pattern, regex_char_t *string)
5377d68377Salnsn {
5477d68377Salnsn 	int error;
5577d68377Salnsn 	regex_char_t *ptr;
5677d68377Salnsn 	struct regex_machine* machine;
5777d68377Salnsn 	struct regex_match* match;
5877d68377Salnsn 	int begin, end, id;
5977d68377Salnsn 
6077d68377Salnsn 	ptr = pattern;
6177d68377Salnsn 	while (*ptr)
6277d68377Salnsn 		ptr++;
6377d68377Salnsn 
6477d68377Salnsn 	printf("Start test '%s' matches to '%s'\n", pattern, string);
6577d68377Salnsn 	machine = regex_compile(pattern, ptr - pattern, REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error);
6677d68377Salnsn 
6777d68377Salnsn 	if (error) {
6877d68377Salnsn 		printf("WARNING: Error %d\n", error);
6977d68377Salnsn 		return;
7077d68377Salnsn 	}
7177d68377Salnsn 	if (!machine) {
7277d68377Salnsn 		printf("ERROR: machine must be exists. Report this bug, please\n");
7377d68377Salnsn 		return;
7477d68377Salnsn 	}
7577d68377Salnsn 
7677d68377Salnsn 	match = regex_begin_match(machine);
7777d68377Salnsn 	if (!match) {
7877d68377Salnsn 		printf("WARNING: Not enough memory for matching\n");
7977d68377Salnsn 		regex_free_machine(machine);
8077d68377Salnsn 		return;
8177d68377Salnsn 	}
8277d68377Salnsn 
8377d68377Salnsn 	ptr = string;
8477d68377Salnsn 	while (*ptr)
8577d68377Salnsn 		ptr++;
8677d68377Salnsn 
8777d68377Salnsn 	regex_continue_match_debug(match, string, ptr - string);
8877d68377Salnsn 
8977d68377Salnsn 	begin = regex_get_result(match, &end, &id);
9077d68377Salnsn 	printf("Math returns: %3d->%3d [%3d]\n", begin, end, id);
9177d68377Salnsn 
9277d68377Salnsn 	regex_free_match(match);
9377d68377Salnsn 	regex_free_machine(machine);
9477d68377Salnsn }
9577d68377Salnsn #endif
9677d68377Salnsn 
9777d68377Salnsn struct test_case {
9877d68377Salnsn 	int begin;	/* Expected begin. */
9977d68377Salnsn 	int end;	/* Expected end. */
10077d68377Salnsn 	int id;		/* Expected id. */
10177d68377Salnsn 	int finished;	/* -1 : don't care, 0 : false, 1 : true. */
10277d68377Salnsn 	int flags;	/* REGEX_MATCH_* */
10377d68377Salnsn 	const regex_char_t *pattern;	/* NULL : use the previous pattern. */
10477d68377Salnsn 	const regex_char_t *string;	/* NULL : end of tests. */
10577d68377Salnsn };
10677d68377Salnsn 
run_tests(struct test_case * test,int verbose,int silent)107e5292e6bSalnsn void run_tests(struct test_case* test, int verbose, int silent)
10877d68377Salnsn {
10977d68377Salnsn 	int error;
11077d68377Salnsn 	const regex_char_t *ptr;
11177d68377Salnsn 	struct regex_machine* machine = NULL;
11277d68377Salnsn 	struct regex_match* match;
11377d68377Salnsn 	int begin, end, id, finished;
11477d68377Salnsn 	int success = 0, fail = 0;
11577d68377Salnsn 
116e5292e6bSalnsn 	if (!verbose && !silent)
117e5292e6bSalnsn 		printf("Pass -v to enable verbose, -s to disable this hint.\n\n");
118e5292e6bSalnsn 
11977d68377Salnsn 	for ( ; test->string ; test++) {
120e5292e6bSalnsn 		if (verbose)
12177d68377Salnsn 			printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
12277d68377Salnsn 		fail++;
12377d68377Salnsn 
12477d68377Salnsn 		if (test->pattern) {
12577d68377Salnsn 			if (machine)
12677d68377Salnsn 				regex_free_machine(machine);
12777d68377Salnsn 
12877d68377Salnsn 			ptr = test->pattern;
12977d68377Salnsn 			while (*ptr)
13077d68377Salnsn 				ptr++;
13177d68377Salnsn 
13277d68377Salnsn 			machine = regex_compile(test->pattern, ptr - test->pattern, test->flags, &error);
13377d68377Salnsn 
13477d68377Salnsn 			if (error) {
135e5292e6bSalnsn 				if (!verbose)
136e5292e6bSalnsn 					printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
13777d68377Salnsn 				printf("ABORT: Error %d\n", error);
13877d68377Salnsn 				return;
13977d68377Salnsn 			}
14077d68377Salnsn 			if (!machine) {
141e5292e6bSalnsn 				if (!verbose)
142e5292e6bSalnsn 					printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
14377d68377Salnsn 				printf("ABORT: machine must be exists. Report this bug, please\n");
14477d68377Salnsn 				return;
14577d68377Salnsn 			}
14677d68377Salnsn 		}
14777d68377Salnsn 		else if (test->flags != 0) {
148e5292e6bSalnsn 			if (!verbose)
149e5292e6bSalnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
15077d68377Salnsn 			printf("ABORT: flag must be 0 if no pattern\n");
15177d68377Salnsn 			return;
15277d68377Salnsn 		}
15377d68377Salnsn 
15477d68377Salnsn 		ptr = test->string;
15577d68377Salnsn 		while (*ptr)
15677d68377Salnsn 			ptr++;
15777d68377Salnsn 
15877d68377Salnsn 		match = regex_begin_match(machine);
15977d68377Salnsn #ifdef REGEX_MATCH_VERBOSE
16077d68377Salnsn 		if (!match) {
161e5292e6bSalnsn 			if (!verbose)
162e5292e6bSalnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
16377d68377Salnsn 			printf("ABORT: Not enough memory for matching\n");
16477d68377Salnsn 			regex_free_machine(machine);
16577d68377Salnsn 			return;
16677d68377Salnsn 		}
16777d68377Salnsn 		regex_continue_match_debug(match, test->string, ptr - test->string);
16877d68377Salnsn 		begin = regex_get_result(match, &end, &id);
16977d68377Salnsn 		finished = regex_is_match_finished(match);
17077d68377Salnsn 
17177d68377Salnsn 		if (begin != test->begin || end != test->end || id != test->id) {
172e5292e6bSalnsn 			if (!verbose)
173e5292e6bSalnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
17477d68377Salnsn 			printf("FAIL A: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
17577d68377Salnsn 			continue;
17677d68377Salnsn 		}
17777d68377Salnsn 		if (test->finished != -1 && test->finished != !!finished) {
178e5292e6bSalnsn 			if (!verbose)
179e5292e6bSalnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
18077d68377Salnsn 			printf("FAIL A: finish check\n");
18177d68377Salnsn 			continue;
18277d68377Salnsn 		}
18377d68377Salnsn #endif
18477d68377Salnsn 
18577d68377Salnsn 		regex_reset_match(match);
18677d68377Salnsn 		regex_continue_match(match, test->string, ptr - test->string);
18777d68377Salnsn 		begin = regex_get_result(match, &end, &id);
18877d68377Salnsn 		finished = regex_is_match_finished(match);
18977d68377Salnsn 		regex_free_match(match);
19077d68377Salnsn 
19177d68377Salnsn 		if (begin != test->begin || end != test->end || id != test->id) {
192e5292e6bSalnsn 			if (!verbose)
193e5292e6bSalnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
19477d68377Salnsn 			printf("FAIL B: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
19577d68377Salnsn 			continue;
19677d68377Salnsn 		}
19777d68377Salnsn 		if (test->finished != -1 && test->finished != !!finished) {
198e5292e6bSalnsn 			if (!verbose)
199e5292e6bSalnsn 				printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
20077d68377Salnsn 			printf("FAIL B: finish check\n");
20177d68377Salnsn 			continue;
20277d68377Salnsn 		}
20377d68377Salnsn 
204e5292e6bSalnsn 		if (verbose)
20577d68377Salnsn 			printf("SUCCESS\n");
20677d68377Salnsn 		fail--;
20777d68377Salnsn 		success++;
20877d68377Salnsn 	}
20977d68377Salnsn 	if (machine)
21077d68377Salnsn 		regex_free_machine(machine);
21177d68377Salnsn 
21299e10043Salnsn 	printf("REGEX tests: ");
21377d68377Salnsn 	if (fail == 0)
21499e10043Salnsn 		printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " ");
21577d68377Salnsn 	else
21699e10043Salnsn 		printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are failed ", fail, fail * 100 / (success + fail));
21799e10043Salnsn 	printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "\n", regex_get_platform_name());
21877d68377Salnsn }
21977d68377Salnsn 
22077d68377Salnsn /* Testing. */
22177d68377Salnsn 
22277d68377Salnsn static struct test_case tests[] = {
22377d68377Salnsn { 3, 7, 0, -1, 0,
22477d68377Salnsn   S("text"), S("is textile") },
22577d68377Salnsn { 0, 10, 0, -1, 0,
22677d68377Salnsn   S("^(ab|c)*?d+(es)?"), S("abccabddeses") },
22777d68377Salnsn { -1, 0, 0, 1, 0,
22877d68377Salnsn   S("^a+"), S("saaaa") },
22977d68377Salnsn { 3, 6, 0, 0, 0,
23077d68377Salnsn   S("(a+|b+)$"), S("saabbb") },
23177d68377Salnsn { 1, 6, 0, 0, 0,
23277d68377Salnsn   S("(a+|b+){,2}$"), S("saabbb") },
23377d68377Salnsn { 1, 6, 0, 1, 0,
23477d68377Salnsn   S("(abcde|bc)(a+*|(b|c){2}+){0}"), S("babcdeaaaaaaaa") },
23577d68377Salnsn { 1, 6, 0, 1, 0,
23677d68377Salnsn   S("(abc(aa)?|(cab+){2})"), S("cabcaa") },
23777d68377Salnsn { -1, 0, 0, 1, 0,
23877d68377Salnsn   S("^(abc(aa)?|(cab+){2})$"), S("cabcaa") },
23977d68377Salnsn { 0, 3, 1, -1, 0,
24077d68377Salnsn   S("^(ab{001!})?c"), S("abcde") },
24177d68377Salnsn { 1, 15, 2, -1, 0,
24277d68377Salnsn   S("(c?(a|bb{2!}){2,3}()+d){2,3}"), S("ccabbadbbadcaadcaad") },
24377d68377Salnsn { 2, 9, 0, -1, 0,
24477d68377Salnsn   NULL, S("cacaadaadaa") },
24577d68377Salnsn { -1, 0, 0, -1, REGEX_MATCH_BEGIN,
24677d68377Salnsn   S("(((ab?c|d{1})))"), S("ad") },
24777d68377Salnsn { 0, 9, 3, -1, REGEX_MATCH_BEGIN,
24877d68377Salnsn   S("^((a{1!}|b{2!}|c{3!}){3,6}d)+"), S("cabadbacddaa") },
24977d68377Salnsn { 1, 6, 0, 0, REGEX_MATCH_END,
25077d68377Salnsn   S("(a+(bb|cc?)?){4,}"), S("maaaac") },
25177d68377Salnsn { 3, 12, 1, 0, REGEX_MATCH_END,
25277d68377Salnsn   S("(x+x+{02,03}(x+|{1!})){03,06}$"), S("aaaxxxxxxxxx") },
25377d68377Salnsn { 1, 2, 3, -1, 0,
25477d68377Salnsn   S("((c{1!})?|x+{2!}|{3!})(a|c)"), S("scs") },
25577d68377Salnsn { 1, 4, 2, 1, 0,
25677d68377Salnsn   NULL, S("sxxaxxxaccacca") },
25777d68377Salnsn { 0, 2, 1, 1, 0,
25877d68377Salnsn   NULL, S("ccdcdcdddddcdccccd") },
25977d68377Salnsn { 0, 3, 0, -1, REGEX_MATCH_NON_GREEDY,
26077d68377Salnsn   S("^a+a+a+"), S("aaaaaa") },
26177d68377Salnsn { 2, 5, 0, -1, REGEX_MATCH_NON_GREEDY,
26277d68377Salnsn   S("a+a+a+"), S("bbaaaaaa") },
26377d68377Salnsn { 1, 4, 0, 1, 0,
26477d68377Salnsn   S("baa|a+"), S("sbaaaaaa") },
26577d68377Salnsn { 0, 6, 0, 1, 0,
26677d68377Salnsn   S("baaa|baa|sbaaaa"), S("sbaaaaa") },
26777d68377Salnsn { 1, 4, 0, 1, REGEX_MATCH_NON_GREEDY,
26877d68377Salnsn   S("baaa|baa"), S("xbaaa") },
26977d68377Salnsn { 0, 0, 3, 1, 0,
27077d68377Salnsn   S("{3!}"), S("xx") },
27177d68377Salnsn { 0, 0, 1, 1, 0,
27277d68377Salnsn   S("{1!}(a{2!})*"), S("xx") },
27377d68377Salnsn { 0, 2, 2, 0, 0,
27477d68377Salnsn   NULL, S("aa") },
27577d68377Salnsn { 0, 0, 1, 1, REGEX_MATCH_NON_GREEDY,
27677d68377Salnsn   S("{1!}(a{2!})*"), S("aaxx") },
27777d68377Salnsn { 4, 12, 0, 1, 0,
27877d68377Salnsn   S("(.[]-]){3}[^]-]{2}"), S("ax-xs-[][]lmn") },
27977d68377Salnsn { 3, 7, 1, 1, 0,
28077d68377Salnsn   S("([ABC]|[abc]{1!}){3,5}"), S("AbSAabbx") },
28177d68377Salnsn { 0, 8, 3, 0, 0,
28277d68377Salnsn   S("^[x\\-y[\\]]+([[\\]]{3!})*$"), S("x-y[-][]") },
28377d68377Salnsn { 0, 9, 0, 0, 0,
28477d68377Salnsn   NULL, S("x-y[-][]x") },
28577d68377Salnsn { 2, 8, 0, 1, 0,
28677d68377Salnsn   S("<(/{1!})?[^>]+>"), S("  <html></html> ") },
28777d68377Salnsn { 2, 9, 1, 1, 0,
28877d68377Salnsn   NULL, S("  </html><html> ") },
28977d68377Salnsn { 2, 9, 0, 1, 0,
29077d68377Salnsn   S("[A-Z0-9a-z]+"), S("[(Iden9aA)]") },
29177d68377Salnsn { 1, 4, 0, 1, 0,
29277d68377Salnsn   S("[^x-y]+[a-c_]{2,3}"), S("x_a_y") },
29377d68377Salnsn { 4, 11, 0, 0, 0,
29477d68377Salnsn   NULL, S("ssaymmaa_ccl") },
29577d68377Salnsn { 3, 6, 0, 1, REGEX_NEWLINE,
29677d68377Salnsn   S(".a[^k]"), S("\na\nxa\ns") },
29777d68377Salnsn { 0, 2, 0, 1, REGEX_NEWLINE,
29877d68377Salnsn   S("^a+"), S("aa\n") },
29977d68377Salnsn { 1, 4, 0, 1, 0 /* =REGEX_NEWLINE */,
30077d68377Salnsn   NULL, S("\naaa\n") },
30177d68377Salnsn { 2, 3, 0, 1, 0 /* =REGEX_NEWLINE */,
30277d68377Salnsn   NULL, S("\n\na\n") },
30377d68377Salnsn { 0, 2, 0, 1, REGEX_NEWLINE,
30477d68377Salnsn   S("a+$"), S("aa\n") },
30577d68377Salnsn { 0, 3, 0, 0, 0 /* =REGEX_NEWLINE */,
30677d68377Salnsn   NULL, S("aaa") },
30777d68377Salnsn { 2, 4, 1, 1, REGEX_NEWLINE,
30877d68377Salnsn   S("^a(a{1!})*$"), S("\n\naa\n\n") },
30977d68377Salnsn { 0, 1, 0, 0, 0 /* REGEX_NEWLINE */,
31077d68377Salnsn   NULL, S("a") },
31177d68377Salnsn { -1, 0, 0, -1, 0 /* REGEX_NEWLINE */,
31277d68377Salnsn   NULL, S("ab\nba") },
31377d68377Salnsn { -1, 0, 0, 0, 0,
31477d68377Salnsn   NULL, NULL }
31577d68377Salnsn };
31677d68377Salnsn 
main(int argc,char * argv[])31777d68377Salnsn int main(int argc, char* argv[])
31877d68377Salnsn {
319e5292e6bSalnsn 	int has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0');
320e5292e6bSalnsn 
32177d68377Salnsn /*	verbose_test("a((b)((c|d))|)c|"); */
32277d68377Salnsn /*	verbose_test("Xa{009,0010}Xb{,7}Xc{5,}Xd{,}Xe{1,}Xf{,1}X"); */
32377d68377Salnsn /*	verbose_test("{3!}({3})({0!}){,"); */
32477d68377Salnsn /*	verbose_test("(s(ab){2,4}t){2,}*S(a*(b)(c()|)d+){3,4}{0,0}*M"); */
32577d68377Salnsn /*	verbose_test("^a({2!})*b+(a|{1!}b)+d$"); */
32677d68377Salnsn /*	verbose_test("((a|b|c)*(xy)+)+", "asbcxyxy"); */
32777d68377Salnsn 
328e5292e6bSalnsn 	run_tests(tests, has_arg && argv[1][1] == 'v', has_arg && argv[1][1] == 's');
329*06eb4e7bSalnsn 
330*06eb4e7bSalnsn 	sljit_free_unused_memory_exec();
331*06eb4e7bSalnsn 
33277d68377Salnsn 	return 0;
33377d68377Salnsn }
334