1 /* 2 * Stack-less Just-In-Time compiler 3 * 4 * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without modification, are 7 * permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this list of 10 * conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 13 * of conditions and the following disclaimer in the documentation and/or other materials 14 * provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* Must be the first one. Must not depend on any other include. */ 28 #include "regexJIT.h" 29 30 #include <stdio.h> 31 32 #if defined _WIN32 || defined _WIN64 33 #define COLOR_RED 34 #define COLOR_GREEN 35 #define COLOR_ARCH 36 #define COLOR_DEFAULT 37 #else 38 #define COLOR_RED "\33[31m" 39 #define COLOR_GREEN "\33[32m" 40 #define COLOR_ARCH "\33[33m" 41 #define COLOR_DEFAULT "\33[0m" 42 #endif 43 44 #ifdef REGEX_USE_8BIT_CHARS 45 #define S(str) str 46 #else 47 #define S(str) L##str 48 #endif 49 50 #ifdef REGEX_MATCH_VERBOSE 51 void verbose_test(regex_char_t *pattern, regex_char_t *string) 52 { 53 int error; 54 regex_char_t *ptr; 55 struct regex_machine* machine; 56 struct regex_match* match; 57 int begin, end, id; 58 59 ptr = pattern; 60 while (*ptr) 61 ptr++; 62 63 printf("Start test '%s' matches to '%s'\n", pattern, string); 64 machine = regex_compile(pattern, ptr - pattern, REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error); 65 66 if (error) { 67 printf("WARNING: Error %d\n", error); 68 return; 69 } 70 if (!machine) { 71 printf("ERROR: machine must be exists. Report this bug, please\n"); 72 return; 73 } 74 75 match = regex_begin_match(machine); 76 if (!match) { 77 printf("WARNING: Not enough memory for matching\n"); 78 regex_free_machine(machine); 79 return; 80 } 81 82 ptr = string; 83 while (*ptr) 84 ptr++; 85 86 regex_continue_match_debug(match, string, ptr - string); 87 88 begin = regex_get_result(match, &end, &id); 89 printf("Math returns: %3d->%3d [%3d]\n", begin, end, id); 90 91 regex_free_match(match); 92 regex_free_machine(machine); 93 } 94 #endif 95 96 struct test_case { 97 int begin; /* Expected begin. */ 98 int end; /* Expected end. */ 99 int id; /* Expected id. */ 100 int finished; /* -1 : don't care, 0 : false, 1 : true. */ 101 int flags; /* REGEX_MATCH_* */ 102 const regex_char_t *pattern; /* NULL : use the previous pattern. */ 103 const regex_char_t *string; /* NULL : end of tests. */ 104 }; 105 106 void run_tests(struct test_case* test, int verbose, int silent) 107 { 108 int error; 109 const regex_char_t *ptr; 110 struct regex_machine* machine = NULL; 111 struct regex_match* match; 112 int begin, end, id, finished; 113 int success = 0, fail = 0; 114 115 if (!verbose && !silent) 116 printf("Pass -v to enable verbose, -s to disable this hint.\n\n"); 117 118 for ( ; test->string ; test++) { 119 if (verbose) 120 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 121 fail++; 122 123 if (test->pattern) { 124 if (machine) 125 regex_free_machine(machine); 126 127 ptr = test->pattern; 128 while (*ptr) 129 ptr++; 130 131 machine = regex_compile(test->pattern, ptr - test->pattern, test->flags, &error); 132 133 if (error) { 134 if (!verbose) 135 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 136 printf("ABORT: Error %d\n", error); 137 return; 138 } 139 if (!machine) { 140 if (!verbose) 141 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 142 printf("ABORT: machine must be exists. Report this bug, please\n"); 143 return; 144 } 145 } 146 else if (test->flags != 0) { 147 if (!verbose) 148 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 149 printf("ABORT: flag must be 0 if no pattern\n"); 150 return; 151 } 152 153 ptr = test->string; 154 while (*ptr) 155 ptr++; 156 157 match = regex_begin_match(machine); 158 #ifdef REGEX_MATCH_VERBOSE 159 if (!match) { 160 if (!verbose) 161 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 162 printf("ABORT: Not enough memory for matching\n"); 163 regex_free_machine(machine); 164 return; 165 } 166 regex_continue_match_debug(match, test->string, ptr - test->string); 167 begin = regex_get_result(match, &end, &id); 168 finished = regex_is_match_finished(match); 169 170 if (begin != test->begin || end != test->end || id != test->id) { 171 if (!verbose) 172 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 173 printf("FAIL A: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id); 174 continue; 175 } 176 if (test->finished != -1 && test->finished != !!finished) { 177 if (!verbose) 178 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 179 printf("FAIL A: finish check\n"); 180 continue; 181 } 182 #endif 183 184 regex_reset_match(match); 185 regex_continue_match(match, test->string, ptr - test->string); 186 begin = regex_get_result(match, &end, &id); 187 finished = regex_is_match_finished(match); 188 regex_free_match(match); 189 190 if (begin != test->begin || end != test->end || id != test->id) { 191 if (!verbose) 192 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 193 printf("FAIL B: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id); 194 continue; 195 } 196 if (test->finished != -1 && test->finished != !!finished) { 197 if (!verbose) 198 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 199 printf("FAIL B: finish check\n"); 200 continue; 201 } 202 203 if (verbose) 204 printf("SUCCESS\n"); 205 fail--; 206 success++; 207 } 208 if (machine) 209 regex_free_machine(machine); 210 211 printf("REGEX tests: "); 212 if (fail == 0) 213 printf("all tests are " COLOR_GREEN "PASSED" COLOR_DEFAULT " "); 214 else 215 printf(COLOR_RED "%d" COLOR_DEFAULT " (" COLOR_RED "%d%%" COLOR_DEFAULT ") tests are failed ", fail, fail * 100 / (success + fail)); 216 printf("on " COLOR_ARCH "%s" COLOR_DEFAULT "\n", regex_get_platform_name()); 217 } 218 219 /* Testing. */ 220 221 static struct test_case tests[] = { 222 { 3, 7, 0, -1, 0, 223 S("text"), S("is textile") }, 224 { 0, 10, 0, -1, 0, 225 S("^(ab|c)*?d+(es)?"), S("abccabddeses") }, 226 { -1, 0, 0, 1, 0, 227 S("^a+"), S("saaaa") }, 228 { 3, 6, 0, 0, 0, 229 S("(a+|b+)$"), S("saabbb") }, 230 { 1, 6, 0, 0, 0, 231 S("(a+|b+){,2}$"), S("saabbb") }, 232 { 1, 6, 0, 1, 0, 233 S("(abcde|bc)(a+*|(b|c){2}+){0}"), S("babcdeaaaaaaaa") }, 234 { 1, 6, 0, 1, 0, 235 S("(abc(aa)?|(cab+){2})"), S("cabcaa") }, 236 { -1, 0, 0, 1, 0, 237 S("^(abc(aa)?|(cab+){2})$"), S("cabcaa") }, 238 { 0, 3, 1, -1, 0, 239 S("^(ab{001!})?c"), S("abcde") }, 240 { 1, 15, 2, -1, 0, 241 S("(c?(a|bb{2!}){2,3}()+d){2,3}"), S("ccabbadbbadcaadcaad") }, 242 { 2, 9, 0, -1, 0, 243 NULL, S("cacaadaadaa") }, 244 { -1, 0, 0, -1, REGEX_MATCH_BEGIN, 245 S("(((ab?c|d{1})))"), S("ad") }, 246 { 0, 9, 3, -1, REGEX_MATCH_BEGIN, 247 S("^((a{1!}|b{2!}|c{3!}){3,6}d)+"), S("cabadbacddaa") }, 248 { 1, 6, 0, 0, REGEX_MATCH_END, 249 S("(a+(bb|cc?)?){4,}"), S("maaaac") }, 250 { 3, 12, 1, 0, REGEX_MATCH_END, 251 S("(x+x+{02,03}(x+|{1!})){03,06}$"), S("aaaxxxxxxxxx") }, 252 { 1, 2, 3, -1, 0, 253 S("((c{1!})?|x+{2!}|{3!})(a|c)"), S("scs") }, 254 { 1, 4, 2, 1, 0, 255 NULL, S("sxxaxxxaccacca") }, 256 { 0, 2, 1, 1, 0, 257 NULL, S("ccdcdcdddddcdccccd") }, 258 { 0, 3, 0, -1, REGEX_MATCH_NON_GREEDY, 259 S("^a+a+a+"), S("aaaaaa") }, 260 { 2, 5, 0, -1, REGEX_MATCH_NON_GREEDY, 261 S("a+a+a+"), S("bbaaaaaa") }, 262 { 1, 4, 0, 1, 0, 263 S("baa|a+"), S("sbaaaaaa") }, 264 { 0, 6, 0, 1, 0, 265 S("baaa|baa|sbaaaa"), S("sbaaaaa") }, 266 { 1, 4, 0, 1, REGEX_MATCH_NON_GREEDY, 267 S("baaa|baa"), S("xbaaa") }, 268 { 0, 0, 3, 1, 0, 269 S("{3!}"), S("xx") }, 270 { 0, 0, 1, 1, 0, 271 S("{1!}(a{2!})*"), S("xx") }, 272 { 0, 2, 2, 0, 0, 273 NULL, S("aa") }, 274 { 0, 0, 1, 1, REGEX_MATCH_NON_GREEDY, 275 S("{1!}(a{2!})*"), S("aaxx") }, 276 { 4, 12, 0, 1, 0, 277 S("(.[]-]){3}[^]-]{2}"), S("ax-xs-[][]lmn") }, 278 { 3, 7, 1, 1, 0, 279 S("([ABC]|[abc]{1!}){3,5}"), S("AbSAabbx") }, 280 { 0, 8, 3, 0, 0, 281 S("^[x\\-y[\\]]+([[\\]]{3!})*$"), S("x-y[-][]") }, 282 { 0, 9, 0, 0, 0, 283 NULL, S("x-y[-][]x") }, 284 { 2, 8, 0, 1, 0, 285 S("<(/{1!})?[^>]+>"), S(" <html></html> ") }, 286 { 2, 9, 1, 1, 0, 287 NULL, S(" </html><html> ") }, 288 { 2, 9, 0, 1, 0, 289 S("[A-Z0-9a-z]+"), S("[(Iden9aA)]") }, 290 { 1, 4, 0, 1, 0, 291 S("[^x-y]+[a-c_]{2,3}"), S("x_a_y") }, 292 { 4, 11, 0, 0, 0, 293 NULL, S("ssaymmaa_ccl") }, 294 { 3, 6, 0, 1, REGEX_NEWLINE, 295 S(".a[^k]"), S("\na\nxa\ns") }, 296 { 0, 2, 0, 1, REGEX_NEWLINE, 297 S("^a+"), S("aa\n") }, 298 { 1, 4, 0, 1, 0 /* =REGEX_NEWLINE */, 299 NULL, S("\naaa\n") }, 300 { 2, 3, 0, 1, 0 /* =REGEX_NEWLINE */, 301 NULL, S("\n\na\n") }, 302 { 0, 2, 0, 1, REGEX_NEWLINE, 303 S("a+$"), S("aa\n") }, 304 { 0, 3, 0, 0, 0 /* =REGEX_NEWLINE */, 305 NULL, S("aaa") }, 306 { 2, 4, 1, 1, REGEX_NEWLINE, 307 S("^a(a{1!})*$"), S("\n\naa\n\n") }, 308 { 0, 1, 0, 0, 0 /* REGEX_NEWLINE */, 309 NULL, S("a") }, 310 { -1, 0, 0, -1, 0 /* REGEX_NEWLINE */, 311 NULL, S("ab\nba") }, 312 { -1, 0, 0, 0, 0, 313 NULL, NULL } 314 }; 315 316 int main(int argc, char* argv[]) 317 { 318 int has_arg = (argc >= 2 && argv[1][0] == '-' && argv[1][2] == '\0'); 319 320 /* verbose_test("a((b)((c|d))|)c|"); */ 321 /* verbose_test("Xa{009,0010}Xb{,7}Xc{5,}Xd{,}Xe{1,}Xf{,1}X"); */ 322 /* verbose_test("{3!}({3})({0!}){,"); */ 323 /* verbose_test("(s(ab){2,4}t){2,}*S(a*(b)(c()|)d+){3,4}{0,0}*M"); */ 324 /* verbose_test("^a({2!})*b+(a|{1!}b)+d$"); */ 325 /* verbose_test("((a|b|c)*(xy)+)+", "asbcxyxy"); */ 326 327 run_tests(tests, has_arg && argv[1][1] == 'v', has_arg && argv[1][1] == 's'); 328 return 0; 329 } 330