1 /* 2 * Stack-less Just-In-Time compiler 3 * 4 * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without modification, are 7 * permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this list of 10 * conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list 13 * of conditions and the following disclaimer in the documentation and/or other materials 14 * provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "regexJIT.h" 28 29 #include <stdio.h> 30 31 #ifdef REGEX_USE_8BIT_CHARS 32 #define S(str) str 33 #else 34 #define S(str) L##str 35 #endif 36 37 #ifdef REGEX_MATCH_VERBOSE 38 void verbose_test(regex_char_t *pattern, regex_char_t *string) 39 { 40 int error; 41 regex_char_t *ptr; 42 struct regex_machine* machine; 43 struct regex_match* match; 44 int begin, end, id; 45 46 ptr = pattern; 47 while (*ptr) 48 ptr++; 49 50 printf("Start test '%s' matches to '%s'\n", pattern, string); 51 machine = regex_compile(pattern, ptr - pattern, REGEX_MATCH_VERBOSE | REGEX_NEWLINE, &error); 52 53 if (error) { 54 printf("WARNING: Error %d\n", error); 55 return; 56 } 57 if (!machine) { 58 printf("ERROR: machine must be exists. Report this bug, please\n"); 59 return; 60 } 61 62 match = regex_begin_match(machine); 63 if (!match) { 64 printf("WARNING: Not enough memory for matching\n"); 65 regex_free_machine(machine); 66 return; 67 } 68 69 ptr = string; 70 while (*ptr) 71 ptr++; 72 73 regex_continue_match_debug(match, string, ptr - string); 74 75 begin = regex_get_result(match, &end, &id); 76 printf("Math returns: %3d->%3d [%3d]\n", begin, end, id); 77 78 regex_free_match(match); 79 regex_free_machine(machine); 80 } 81 #endif 82 83 struct test_case { 84 int begin; /* Expected begin. */ 85 int end; /* Expected end. */ 86 int id; /* Expected id. */ 87 int finished; /* -1 : don't care, 0 : false, 1 : true. */ 88 int flags; /* REGEX_MATCH_* */ 89 const regex_char_t *pattern; /* NULL : use the previous pattern. */ 90 const regex_char_t *string; /* NULL : end of tests. */ 91 }; 92 93 void run_tests(struct test_case* test) 94 { 95 int error; 96 const regex_char_t *ptr; 97 struct regex_machine* machine = NULL; 98 struct regex_match* match; 99 int begin, end, id, finished; 100 int success = 0, fail = 0; 101 102 for ( ; test->string ; test++) { 103 printf("test: '%s' '%s': ", test->pattern ? test->pattern : "[[REUSE]]", test->string); 104 fail++; 105 106 if (test->pattern) { 107 if (machine) 108 regex_free_machine(machine); 109 110 ptr = test->pattern; 111 while (*ptr) 112 ptr++; 113 114 machine = regex_compile(test->pattern, ptr - test->pattern, test->flags, &error); 115 116 if (error) { 117 printf("ABORT: Error %d\n", error); 118 return; 119 } 120 if (!machine) { 121 printf("ABORT: machine must be exists. Report this bug, please\n"); 122 return; 123 } 124 } 125 else if (test->flags != 0) { 126 printf("ABORT: flag must be 0 if no pattern\n"); 127 return; 128 } 129 130 ptr = test->string; 131 while (*ptr) 132 ptr++; 133 134 match = regex_begin_match(machine); 135 #ifdef REGEX_MATCH_VERBOSE 136 if (!match) { 137 printf("ABORT: Not enough memory for matching\n"); 138 regex_free_machine(machine); 139 return; 140 } 141 regex_continue_match_debug(match, test->string, ptr - test->string); 142 begin = regex_get_result(match, &end, &id); 143 finished = regex_is_match_finished(match); 144 145 if (begin != test->begin || end != test->end || id != test->id) { 146 printf("FAIL A: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id); 147 continue; 148 } 149 if (test->finished != -1 && test->finished != !!finished) { 150 printf("FAIL A: finish check\n"); 151 continue; 152 } 153 #endif 154 155 regex_reset_match(match); 156 regex_continue_match(match, test->string, ptr - test->string); 157 begin = regex_get_result(match, &end, &id); 158 finished = regex_is_match_finished(match); 159 regex_free_match(match); 160 161 if (begin != test->begin || end != test->end || id != test->id) { 162 printf("FAIL B: begin: %d != %d || end: %d != %d || id: %d != %d\n", test->begin, begin, test->end, end, test->id, id); 163 continue; 164 } 165 if (test->finished != -1 && test->finished != !!finished) { 166 printf("FAIL B: finish check\n"); 167 continue; 168 } 169 170 printf("SUCCESS\n"); 171 fail--; 172 success++; 173 } 174 if (machine) 175 regex_free_machine(machine); 176 177 printf("On %s: ", regex_get_platform_name()); 178 if (fail == 0) 179 printf("All tests are passed!\n"); 180 else 181 printf("Successful test ratio: %d%%.\n", success * 100 / (success + fail)); 182 } 183 184 /* Testing. */ 185 186 static struct test_case tests[] = { 187 { 3, 7, 0, -1, 0, 188 S("text"), S("is textile") }, 189 { 0, 10, 0, -1, 0, 190 S("^(ab|c)*?d+(es)?"), S("abccabddeses") }, 191 { -1, 0, 0, 1, 0, 192 S("^a+"), S("saaaa") }, 193 { 3, 6, 0, 0, 0, 194 S("(a+|b+)$"), S("saabbb") }, 195 { 1, 6, 0, 0, 0, 196 S("(a+|b+){,2}$"), S("saabbb") }, 197 { 1, 6, 0, 1, 0, 198 S("(abcde|bc)(a+*|(b|c){2}+){0}"), S("babcdeaaaaaaaa") }, 199 { 1, 6, 0, 1, 0, 200 S("(abc(aa)?|(cab+){2})"), S("cabcaa") }, 201 { -1, 0, 0, 1, 0, 202 S("^(abc(aa)?|(cab+){2})$"), S("cabcaa") }, 203 { 0, 3, 1, -1, 0, 204 S("^(ab{001!})?c"), S("abcde") }, 205 { 1, 15, 2, -1, 0, 206 S("(c?(a|bb{2!}){2,3}()+d){2,3}"), S("ccabbadbbadcaadcaad") }, 207 { 2, 9, 0, -1, 0, 208 NULL, S("cacaadaadaa") }, 209 { -1, 0, 0, -1, REGEX_MATCH_BEGIN, 210 S("(((ab?c|d{1})))"), S("ad") }, 211 { 0, 9, 3, -1, REGEX_MATCH_BEGIN, 212 S("^((a{1!}|b{2!}|c{3!}){3,6}d)+"), S("cabadbacddaa") }, 213 { 1, 6, 0, 0, REGEX_MATCH_END, 214 S("(a+(bb|cc?)?){4,}"), S("maaaac") }, 215 { 3, 12, 1, 0, REGEX_MATCH_END, 216 S("(x+x+{02,03}(x+|{1!})){03,06}$"), S("aaaxxxxxxxxx") }, 217 { 1, 2, 3, -1, 0, 218 S("((c{1!})?|x+{2!}|{3!})(a|c)"), S("scs") }, 219 { 1, 4, 2, 1, 0, 220 NULL, S("sxxaxxxaccacca") }, 221 { 0, 2, 1, 1, 0, 222 NULL, S("ccdcdcdddddcdccccd") }, 223 { 0, 3, 0, -1, REGEX_MATCH_NON_GREEDY, 224 S("^a+a+a+"), S("aaaaaa") }, 225 { 2, 5, 0, -1, REGEX_MATCH_NON_GREEDY, 226 S("a+a+a+"), S("bbaaaaaa") }, 227 { 1, 4, 0, 1, 0, 228 S("baa|a+"), S("sbaaaaaa") }, 229 { 0, 6, 0, 1, 0, 230 S("baaa|baa|sbaaaa"), S("sbaaaaa") }, 231 { 1, 4, 0, 1, REGEX_MATCH_NON_GREEDY, 232 S("baaa|baa"), S("xbaaa") }, 233 { 0, 0, 3, 1, 0, 234 S("{3!}"), S("xx") }, 235 { 0, 0, 1, 1, 0, 236 S("{1!}(a{2!})*"), S("xx") }, 237 { 0, 2, 2, 0, 0, 238 NULL, S("aa") }, 239 { 0, 0, 1, 1, REGEX_MATCH_NON_GREEDY, 240 S("{1!}(a{2!})*"), S("aaxx") }, 241 { 4, 12, 0, 1, 0, 242 S("(.[]-]){3}[^]-]{2}"), S("ax-xs-[][]lmn") }, 243 { 3, 7, 1, 1, 0, 244 S("([ABC]|[abc]{1!}){3,5}"), S("AbSAabbx") }, 245 { 0, 8, 3, 0, 0, 246 S("^[x\\-y[\\]]+([[\\]]{3!})*$"), S("x-y[-][]") }, 247 { 0, 9, 0, 0, 0, 248 NULL, S("x-y[-][]x") }, 249 { 2, 8, 0, 1, 0, 250 S("<(/{1!})?[^>]+>"), S(" <html></html> ") }, 251 { 2, 9, 1, 1, 0, 252 NULL, S(" </html><html> ") }, 253 { 2, 9, 0, 1, 0, 254 S("[A-Z0-9a-z]+"), S("[(Iden9aA)]") }, 255 { 1, 4, 0, 1, 0, 256 S("[^x-y]+[a-c_]{2,3}"), S("x_a_y") }, 257 { 4, 11, 0, 0, 0, 258 NULL, S("ssaymmaa_ccl") }, 259 { 3, 6, 0, 1, REGEX_NEWLINE, 260 S(".a[^k]"), S("\na\nxa\ns") }, 261 { 0, 2, 0, 1, REGEX_NEWLINE, 262 S("^a+"), S("aa\n") }, 263 { 1, 4, 0, 1, 0 /* =REGEX_NEWLINE */, 264 NULL, S("\naaa\n") }, 265 { 2, 3, 0, 1, 0 /* =REGEX_NEWLINE */, 266 NULL, S("\n\na\n") }, 267 { 0, 2, 0, 1, REGEX_NEWLINE, 268 S("a+$"), S("aa\n") }, 269 { 0, 3, 0, 0, 0 /* =REGEX_NEWLINE */, 270 NULL, S("aaa") }, 271 { 2, 4, 1, 1, REGEX_NEWLINE, 272 S("^a(a{1!})*$"), S("\n\naa\n\n") }, 273 { 0, 1, 0, 0, 0 /* REGEX_NEWLINE */, 274 NULL, S("a") }, 275 { -1, 0, 0, -1, 0 /* REGEX_NEWLINE */, 276 NULL, S("ab\nba") }, 277 { -1, 0, 0, 0, 0, 278 NULL, NULL } 279 }; 280 281 int main(int argc, char* argv[]) 282 { 283 /* verbose_test("a((b)((c|d))|)c|"); */ 284 /* verbose_test("Xa{009,0010}Xb{,7}Xc{5,}Xd{,}Xe{1,}Xf{,1}X"); */ 285 /* verbose_test("{3!}({3})({0!}){,"); */ 286 /* verbose_test("(s(ab){2,4}t){2,}*S(a*(b)(c()|)d+){3,4}{0,0}*M"); */ 287 /* verbose_test("^a({2!})*b+(a|{1!}b)+d$"); */ 288 /* verbose_test("((a|b|c)*(xy)+)+", "asbcxyxy"); */ 289 290 run_tests(tests); 291 return 0; 292 } 293 294