177d68377Salnsn /*
277d68377Salnsn * Stack-less Just-In-Time compiler
377d68377Salnsn *
4*06eb4e7bSalnsn * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
577d68377Salnsn *
677d68377Salnsn * Redistribution and use in source and binary forms, with or without modification, are
777d68377Salnsn * permitted provided that the following conditions are met:
877d68377Salnsn *
977d68377Salnsn * 1. Redistributions of source code must retain the above copyright notice, this list of
1077d68377Salnsn * conditions and the following disclaimer.
1177d68377Salnsn *
1277d68377Salnsn * 2. Redistributions in binary form must reproduce the above copyright notice, this list
1377d68377Salnsn * of conditions and the following disclaimer in the documentation and/or other materials
1477d68377Salnsn * provided with the distribution.
1577d68377Salnsn *
1677d68377Salnsn * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
1777d68377Salnsn * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1877d68377Salnsn * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
1977d68377Salnsn * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2077d68377Salnsn * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
2177d68377Salnsn * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
2277d68377Salnsn * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2377d68377Salnsn * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
2477d68377Salnsn * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2577d68377Salnsn */
2677d68377Salnsn
27e5292e6bSalnsn /* Must be the first one. Must not depend on any other include. */
28*06eb4e7bSalnsn #include "sljitLir.h"
2977d68377Salnsn #include "regexJIT.h"
3077d68377Salnsn
3177d68377Salnsn #include <stdio.h>
3277d68377Salnsn
33e5292e6bSalnsn #if defined _WIN32 || defined _WIN64
34e5292e6bSalnsn #define COLOR_RED
35e5292e6bSalnsn #define COLOR_GREEN
36e5292e6bSalnsn #define COLOR_ARCH
37e5292e6bSalnsn #define COLOR_DEFAULT
38e5292e6bSalnsn #else
39e5292e6bSalnsn #define COLOR_RED "\33[31m"
40e5292e6bSalnsn #define COLOR_GREEN "\33[32m"
41e5292e6bSalnsn #define COLOR_ARCH "\33[33m"
42e5292e6bSalnsn #define COLOR_DEFAULT "\33[0m"
43e5292e6bSalnsn #endif
44e5292e6bSalnsn
4577d68377Salnsn #ifdef REGEX_USE_8BIT_CHARS
4677d68377Salnsn #define S(str) str
4777d68377Salnsn #else
4877d68377Salnsn #define S(str) L##str
4977d68377Salnsn #endif
5077d68377Salnsn
5177d68377Salnsn #ifdef REGEX_MATCH_VERBOSE
verbose_test(regex_char_t * pattern,regex_char_t * string)5277d68377Salnsn void verbose_test(regex_char_t *pattern, regex_char_t *string)
5377d68377Salnsn {
5477d68377Salnsn int error;
5577d68377Salnsn regex_char_t *ptr;
5677d68377Salnsn struct regex_machine* machine;
5777d68377Salnsn struct regex_match* match;
5877d68377Salnsn int begin, end, id;
5977d68377Salnsn
6077d68377Salnsn ptr = pattern;
6177d68377Salnsn while (*ptr)
6277d68377Salnsn ptr++;
6377d68377Salnsn
6477d68377Salnsn printf("Start test '%s' matches to '%s'\n", pattern, string);
6577d68377Salnsn machine = regex_compile(pattern, ptr - pattern, REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error);
6677d68377Salnsn
6777d68377Salnsn if (error) {
6877d68377Salnsn printf("WARNING: Error %d\n", error);
6977d68377Salnsn return;
7077d68377Salnsn }
7177d68377Salnsn if (!machine) {
7277d68377Salnsn printf("ERROR: machine must be exists. Report this bug, please\n");
7377d68377Salnsn return;
7477d68377Salnsn }
7577d68377Salnsn
7677d68377Salnsn match = regex_begin_match(machine);
7777d68377Salnsn if (!match) {
7877d68377Salnsn printf("WARNING: Not enough memory for matching\n");
7977d68377Salnsn regex_free_machine(machine);
8077d68377Salnsn return;
8177d68377Salnsn }
8277d68377Salnsn
8377d68377Salnsn ptr = string;
8477d68377Salnsn while (*ptr)
8577d68377Salnsn ptr++;
8677d68377Salnsn
8777d68377Salnsn regex_continue_match_debug(match, string, ptr - string);
8877d68377Salnsn
8977d68377Salnsn begin = regex_get_result(match, &end, &id);
9077d68377Salnsn printf("Math returns: %3d->%3d [%3d]\n", begin, end, id);
9177d68377Salnsn
9277d68377Salnsn regex_free_match(match);
9377d68377Salnsn regex_free_machine(machine);
9477d68377Salnsn }
9577d68377Salnsn #endif
9677d68377Salnsn
9777d68377Salnsn struct test_case {
9877d68377Salnsn int begin; /* Expected begin. */
9977d68377Salnsn int end; /* Expected end. */
10077d68377Salnsn int id; /* Expected id. */
10177d68377Salnsn int finished; /* -1 : don't care, 0 : false, 1 : true. */
10277d68377Salnsn int flags; /* REGEX_MATCH_* */
10377d68377Salnsn const regex_char_t *pattern; /* NULL : use the previous pattern. */
10477d68377Salnsn const regex_char_t *string; /* NULL : end of tests. */
10577d68377Salnsn };
10677d68377Salnsn
run_tests(struct test_case * test,int verbose,int silent)107e5292e6bSalnsn void run_tests(struct test_case* test, int verbose, int silent)
10877d68377Salnsn {
10977d68377Salnsn int error;
11077d68377Salnsn const regex_char_t *ptr;
11177d68377Salnsn struct regex_machine* machine = NULL;
11277d68377Salnsn struct regex_match* match;
11377d68377Salnsn int begin, end, id, finished;
11477d68377Salnsn int success = 0, fail = 0;
11577d68377Salnsn
116e5292e6bSalnsn if (!verbose && !silent)
117e5292e6bSalnsn printf("Pass -v to enable verbose, -s to disable this hint.\n\n");
118e5292e6bSalnsn
11977d68377Salnsn for ( ; test->string ; test++) {
120e5292e6bSalnsn if (verbose)
12177d68377Salnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
12277d68377Salnsn fail++;
12377d68377Salnsn
12477d68377Salnsn if (test->pattern) {
12577d68377Salnsn if (machine)
12677d68377Salnsn regex_free_machine(machine);
12777d68377Salnsn
12877d68377Salnsn ptr = test->pattern;
12977d68377Salnsn while (*ptr)
13077d68377Salnsn ptr++;
13177d68377Salnsn
13277d68377Salnsn machine = regex_compile(test->pattern, ptr - test->pattern, test->flags, &error);
13377d68377Salnsn
13477d68377Salnsn if (error) {
135e5292e6bSalnsn if (!verbose)
136e5292e6bSalnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
13777d68377Salnsn printf("ABORT: Error %d\n", error);
13877d68377Salnsn return;
13977d68377Salnsn }
14077d68377Salnsn if (!machine) {
141e5292e6bSalnsn if (!verbose)
142e5292e6bSalnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
14377d68377Salnsn printf("ABORT: machine must be exists. Report this bug, please\n");
14477d68377Salnsn return;
14577d68377Salnsn }
14677d68377Salnsn }
14777d68377Salnsn else if (test->flags != 0) {
148e5292e6bSalnsn if (!verbose)
149e5292e6bSalnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
15077d68377Salnsn printf("ABORT: flag must be 0 if no pattern\n");
15177d68377Salnsn return;
15277d68377Salnsn }
15377d68377Salnsn
15477d68377Salnsn ptr = test->string;
15577d68377Salnsn while (*ptr)
15677d68377Salnsn ptr++;
15777d68377Salnsn
15877d68377Salnsn match = regex_begin_match(machine);
15977d68377Salnsn #ifdef REGEX_MATCH_VERBOSE
16077d68377Salnsn if (!match) {
161e5292e6bSalnsn if (!verbose)
162e5292e6bSalnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
16377d68377Salnsn printf("ABORT: Not enough memory for matching\n");
16477d68377Salnsn regex_free_machine(machine);
16577d68377Salnsn return;
16677d68377Salnsn }
16777d68377Salnsn regex_continue_match_debug(match, test->string, ptr - test->string);
16877d68377Salnsn begin = regex_get_result(match, &end, &id);
16977d68377Salnsn finished = regex_is_match_finished(match);
17077d68377Salnsn
17177d68377Salnsn if (begin != test->begin || end != test->end || id != test->id) {
172e5292e6bSalnsn if (!verbose)
173e5292e6bSalnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
17477d68377Salnsn printf("FAIL A: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
17577d68377Salnsn continue;
17677d68377Salnsn }
17777d68377Salnsn if (test->finished != -1 && test->finished != !!finished) {
178e5292e6bSalnsn if (!verbose)
179e5292e6bSalnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
18077d68377Salnsn printf("FAIL A: finish check\n");
18177d68377Salnsn continue;
18277d68377Salnsn }
18377d68377Salnsn #endif
18477d68377Salnsn
18577d68377Salnsn regex_reset_match(match);
18677d68377Salnsn regex_continue_match(match, test->string, ptr - test->string);
18777d68377Salnsn begin = regex_get_result(match, &end, &id);
18877d68377Salnsn finished = regex_is_match_finished(match);
18977d68377Salnsn regex_free_match(match);
19077d68377Salnsn
19177d68377Salnsn if (begin != test->begin || end != test->end || id != test->id) {
192e5292e6bSalnsn if (!verbose)
193e5292e6bSalnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
19477d68377Salnsn printf("FAIL B: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id);
19577d68377Salnsn continue;
19677d68377Salnsn }
19777d68377Salnsn if (test->finished != -1 && test->finished != !!finished) {
198e5292e6bSalnsn if (!verbose)
199e5292e6bSalnsn printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string);
20077d68377Salnsn printf("FAIL B: finish check\n");
20177d68377Salnsn continue;
20277d68377Salnsn }
20377d68377Salnsn
204e5292e6bSalnsn if (verbose)
20577d68377Salnsn printf("SUCCESS\n");
20677d68377Salnsn fail--;
20777d68377Salnsn success++;
20877d68377Salnsn }
20977d68377Salnsn if (machine)
21077d68377Salnsn regex_free_machine(machine);
21177d68377Salnsn
21299e10043Salnsn printf("REGEX tests: ");
21377d68377Salnsn if (fail == 0)
21499e10043Salnsn printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " ");
21577d68377Salnsn else
21699e10043Salnsn printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are failed ", fail, fail * 100 / (success + fail));
21799e10043Salnsn printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "\n", regex_get_platform_name());
21877d68377Salnsn }
21977d68377Salnsn
22077d68377Salnsn /* Testing. */
22177d68377Salnsn
22277d68377Salnsn static struct test_case tests[] = {
22377d68377Salnsn { 3, 7, 0, -1, 0,
22477d68377Salnsn S("text"), S("is textile") },
22577d68377Salnsn { 0, 10, 0, -1, 0,
22677d68377Salnsn S("^(ab|c)*?d+(es)?"), S("abccabddeses") },
22777d68377Salnsn { -1, 0, 0, 1, 0,
22877d68377Salnsn S("^a+"), S("saaaa") },
22977d68377Salnsn { 3, 6, 0, 0, 0,
23077d68377Salnsn S("(a+|b+)$"), S("saabbb") },
23177d68377Salnsn { 1, 6, 0, 0, 0,
23277d68377Salnsn S("(a+|b+){,2}$"), S("saabbb") },
23377d68377Salnsn { 1, 6, 0, 1, 0,
23477d68377Salnsn S("(abcde|bc)(a+*|(b|c){2}+){0}"), S("babcdeaaaaaaaa") },
23577d68377Salnsn { 1, 6, 0, 1, 0,
23677d68377Salnsn S("(abc(aa)?|(cab+){2})"), S("cabcaa") },
23777d68377Salnsn { -1, 0, 0, 1, 0,
23877d68377Salnsn S("^(abc(aa)?|(cab+){2})$"), S("cabcaa") },
23977d68377Salnsn { 0, 3, 1, -1, 0,
24077d68377Salnsn S("^(ab{001!})?c"), S("abcde") },
24177d68377Salnsn { 1, 15, 2, -1, 0,
24277d68377Salnsn S("(c?(a|bb{2!}){2,3}()+d){2,3}"), S("ccabbadbbadcaadcaad") },
24377d68377Salnsn { 2, 9, 0, -1, 0,
24477d68377Salnsn NULL, S("cacaadaadaa") },
24577d68377Salnsn { -1, 0, 0, -1, REGEX_MATCH_BEGIN,
24677d68377Salnsn S("(((ab?c|d{1})))"), S("ad") },
24777d68377Salnsn { 0, 9, 3, -1, REGEX_MATCH_BEGIN,
24877d68377Salnsn S("^((a{1!}|b{2!}|c{3!}){3,6}d)+"), S("cabadbacddaa") },
24977d68377Salnsn { 1, 6, 0, 0, REGEX_MATCH_END,
25077d68377Salnsn S("(a+(bb|cc?)?){4,}"), S("maaaac") },
25177d68377Salnsn { 3, 12, 1, 0, REGEX_MATCH_END,
25277d68377Salnsn S("(x+x+{02,03}(x+|{1!})){03,06}$"), S("aaaxxxxxxxxx") },
25377d68377Salnsn { 1, 2, 3, -1, 0,
25477d68377Salnsn S("((c{1!})?|x+{2!}|{3!})(a|c)"), S("scs") },
25577d68377Salnsn { 1, 4, 2, 1, 0,
25677d68377Salnsn NULL, S("sxxaxxxaccacca") },
25777d68377Salnsn { 0, 2, 1, 1, 0,
25877d68377Salnsn NULL, S("ccdcdcdddddcdccccd") },
25977d68377Salnsn { 0, 3, 0, -1, REGEX_MATCH_NON_GREEDY,
26077d68377Salnsn S("^a+a+a+"), S("aaaaaa") },
26177d68377Salnsn { 2, 5, 0, -1, REGEX_MATCH_NON_GREEDY,
26277d68377Salnsn S("a+a+a+"), S("bbaaaaaa") },
26377d68377Salnsn { 1, 4, 0, 1, 0,
26477d68377Salnsn S("baa|a+"), S("sbaaaaaa") },
26577d68377Salnsn { 0, 6, 0, 1, 0,
26677d68377Salnsn S("baaa|baa|sbaaaa"), S("sbaaaaa") },
26777d68377Salnsn { 1, 4, 0, 1, REGEX_MATCH_NON_GREEDY,
26877d68377Salnsn S("baaa|baa"), S("xbaaa") },
26977d68377Salnsn { 0, 0, 3, 1, 0,
27077d68377Salnsn S("{3!}"), S("xx") },
27177d68377Salnsn { 0, 0, 1, 1, 0,
27277d68377Salnsn S("{1!}(a{2!})*"), S("xx") },
27377d68377Salnsn { 0, 2, 2, 0, 0,
27477d68377Salnsn NULL, S("aa") },
27577d68377Salnsn { 0, 0, 1, 1, REGEX_MATCH_NON_GREEDY,
27677d68377Salnsn S("{1!}(a{2!})*"), S("aaxx") },
27777d68377Salnsn { 4, 12, 0, 1, 0,
27877d68377Salnsn S("(.[]-]){3}[^]-]{2}"), S("ax-xs-[][]lmn") },
27977d68377Salnsn { 3, 7, 1, 1, 0,
28077d68377Salnsn S("([ABC]|[abc]{1!}){3,5}"), S("AbSAabbx") },
28177d68377Salnsn { 0, 8, 3, 0, 0,
28277d68377Salnsn S("^[x\\-y[\\]]+([[\\]]{3!})*$"), S("x-y[-][]") },
28377d68377Salnsn { 0, 9, 0, 0, 0,
28477d68377Salnsn NULL, S("x-y[-][]x") },
28577d68377Salnsn { 2, 8, 0, 1, 0,
28677d68377Salnsn S("<(/{1!})?[^>]+>"), S(" <html></html> ") },
28777d68377Salnsn { 2, 9, 1, 1, 0,
28877d68377Salnsn NULL, S(" </html><html> ") },
28977d68377Salnsn { 2, 9, 0, 1, 0,
29077d68377Salnsn S("[A-Z0-9a-z]+"), S("[(Iden9aA)]") },
29177d68377Salnsn { 1, 4, 0, 1, 0,
29277d68377Salnsn S("[^x-y]+[a-c_]{2,3}"), S("x_a_y") },
29377d68377Salnsn { 4, 11, 0, 0, 0,
29477d68377Salnsn NULL, S("ssaymmaa_ccl") },
29577d68377Salnsn { 3, 6, 0, 1, REGEX_NEWLINE,
29677d68377Salnsn S(".a[^k]"), S("\na\nxa\ns") },
29777d68377Salnsn { 0, 2, 0, 1, REGEX_NEWLINE,
29877d68377Salnsn S("^a+"), S("aa\n") },
29977d68377Salnsn { 1, 4, 0, 1, 0 /* =REGEX_NEWLINE */,
30077d68377Salnsn NULL, S("\naaa\n") },
30177d68377Salnsn { 2, 3, 0, 1, 0 /* =REGEX_NEWLINE */,
30277d68377Salnsn NULL, S("\n\na\n") },
30377d68377Salnsn { 0, 2, 0, 1, REGEX_NEWLINE,
30477d68377Salnsn S("a+$"), S("aa\n") },
30577d68377Salnsn { 0, 3, 0, 0, 0 /* =REGEX_NEWLINE */,
30677d68377Salnsn NULL, S("aaa") },
30777d68377Salnsn { 2, 4, 1, 1, REGEX_NEWLINE,
30877d68377Salnsn S("^a(a{1!})*$"), S("\n\naa\n\n") },
30977d68377Salnsn { 0, 1, 0, 0, 0 /* REGEX_NEWLINE */,
31077d68377Salnsn NULL, S("a") },
31177d68377Salnsn { -1, 0, 0, -1, 0 /* REGEX_NEWLINE */,
31277d68377Salnsn NULL, S("ab\nba") },
31377d68377Salnsn { -1, 0, 0, 0, 0,
31477d68377Salnsn NULL, NULL }
31577d68377Salnsn };
31677d68377Salnsn
main(int argc,char * argv[])31777d68377Salnsn int main(int argc, char* argv[])
31877d68377Salnsn {
319e5292e6bSalnsn int has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0');
320e5292e6bSalnsn
32177d68377Salnsn /* verbose_test("a((b)((c|d))|)c|"); */
32277d68377Salnsn /* verbose_test("Xa{009,0010}Xb{,7}Xc{5,}Xd{,}Xe{1,}Xf{,1}X"); */
32377d68377Salnsn /* verbose_test("{3!}({3})({0!}){,"); */
32477d68377Salnsn /* verbose_test("(s(ab){2,4}t){2,}*S(a*(b)(c()|)d+){3,4}{0,0}*M"); */
32577d68377Salnsn /* verbose_test("^a({2!})*b+(a|{1!}b)+d$"); */
32677d68377Salnsn /* verbose_test("((a|b|c)*(xy)+)+", "asbcxyxy"); */
32777d68377Salnsn
328e5292e6bSalnsn run_tests(tests, has_arg && argv[1][1] == 'v', has_arg && argv[1][1] == 's');
329*06eb4e7bSalnsn
330*06eb4e7bSalnsn sljit_free_unused_memory_exec();
331*06eb4e7bSalnsn
33277d68377Salnsn return 0;
33377d68377Salnsn }
334