15185a700Sflorian /* 25185a700Sflorian * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 35185a700Sflorian * 45185a700Sflorian * Permission to use, copy, modify, and/or distribute this software for any 55185a700Sflorian * purpose with or without fee is hereby granted, provided that the above 65185a700Sflorian * copyright notice and this permission notice appear in all copies. 75185a700Sflorian * 85185a700Sflorian * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 95185a700Sflorian * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 105185a700Sflorian * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 115185a700Sflorian * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 125185a700Sflorian * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 135185a700Sflorian * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 145185a700Sflorian * PERFORMANCE OF THIS SOFTWARE. 155185a700Sflorian */ 165185a700Sflorian 175185a700Sflorian #include <isc/regex.h> 18c6d1a7a6Sjsg #include <isc/types.h> 195185a700Sflorian #include <string.h> 205185a700Sflorian 215185a700Sflorian /* 225185a700Sflorian * Validate the regular expression 'C' locale. 235185a700Sflorian */ 245185a700Sflorian int 255185a700Sflorian isc_regex_validate(const char *c) { 265185a700Sflorian enum { 275185a700Sflorian none, parse_bracket, parse_bound, 285185a700Sflorian parse_ce, parse_ec, parse_cc 295185a700Sflorian } state = none; 305185a700Sflorian /* Well known character classes. */ 315185a700Sflorian const char *cc[] = { 325185a700Sflorian ":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:", 335185a700Sflorian ":space:", ":blank:", ":lower:", ":upper:", ":cntrl:", 345185a700Sflorian ":print:", ":xdigit:" 355185a700Sflorian }; 361fb015a8Sflorian int seen_comma = 0; 371fb015a8Sflorian int seen_high = 0; 381fb015a8Sflorian int seen_char = 0; 391fb015a8Sflorian int seen_ec = 0; 401fb015a8Sflorian int seen_ce = 0; 411fb015a8Sflorian int have_atom = 0; 425185a700Sflorian int group = 0; 435185a700Sflorian int range = 0; 445185a700Sflorian int sub = 0; 451fb015a8Sflorian int empty_ok = 0; 461fb015a8Sflorian int neg = 0; 471fb015a8Sflorian int was_multiple = 0; 485185a700Sflorian unsigned int low = 0; 495185a700Sflorian unsigned int high = 0; 505185a700Sflorian const char *ccname = NULL; 515185a700Sflorian int range_start = 0; 525185a700Sflorian 535185a700Sflorian if (c == NULL || *c == 0) 54d28dedc0Sflorian return(-1); 555185a700Sflorian 565185a700Sflorian while (c != NULL && *c != 0) { 575185a700Sflorian switch (state) { 585185a700Sflorian case none: 595185a700Sflorian switch (*c) { 605185a700Sflorian case '\\': /* make literal */ 615185a700Sflorian ++c; 625185a700Sflorian switch (*c) { 635185a700Sflorian case '1': case '2': case '3': 645185a700Sflorian case '4': case '5': case '6': 655185a700Sflorian case '7': case '8': case '9': 665185a700Sflorian if ((*c - '0') > sub) 67d28dedc0Sflorian return(-1); 681fb015a8Sflorian have_atom = 1; 691fb015a8Sflorian was_multiple = 0; 705185a700Sflorian break; 715185a700Sflorian case 0: 72d28dedc0Sflorian return(-1); 735185a700Sflorian default: 745185a700Sflorian goto literal; 755185a700Sflorian } 765185a700Sflorian ++c; 775185a700Sflorian break; 785185a700Sflorian case '[': /* bracket start */ 795185a700Sflorian ++c; 801fb015a8Sflorian neg = 0; 811fb015a8Sflorian was_multiple = 0; 821fb015a8Sflorian seen_char = 0; 835185a700Sflorian state = parse_bracket; 845185a700Sflorian break; 855185a700Sflorian case '{': /* bound start */ 865185a700Sflorian switch (c[1]) { 875185a700Sflorian case '0': case '1': case '2': case '3': 885185a700Sflorian case '4': case '5': case '6': case '7': 895185a700Sflorian case '8': case '9': 905185a700Sflorian if (!have_atom) 91d28dedc0Sflorian return(-1); 925185a700Sflorian if (was_multiple) 93d28dedc0Sflorian return(-1); 941fb015a8Sflorian seen_comma = 0; 951fb015a8Sflorian seen_high = 0; 965185a700Sflorian low = high = 0; 975185a700Sflorian state = parse_bound; 985185a700Sflorian break; 995185a700Sflorian default: 1005185a700Sflorian goto literal; 1015185a700Sflorian } 1025185a700Sflorian ++c; 1031fb015a8Sflorian have_atom = 1; 1041fb015a8Sflorian was_multiple = 1; 1055185a700Sflorian break; 1065185a700Sflorian case '}': 1075185a700Sflorian goto literal; 1085185a700Sflorian case '(': /* group start */ 1091fb015a8Sflorian have_atom = 0; 1101fb015a8Sflorian was_multiple = 0; 1111fb015a8Sflorian empty_ok = 1; 1125185a700Sflorian ++group; 1135185a700Sflorian ++sub; 1145185a700Sflorian ++c; 1155185a700Sflorian break; 1165185a700Sflorian case ')': /* group end */ 1175185a700Sflorian if (group && !have_atom && !empty_ok) 118d28dedc0Sflorian return(-1); 1191fb015a8Sflorian have_atom = 1; 1201fb015a8Sflorian was_multiple = 0; 1215185a700Sflorian if (group != 0) 1225185a700Sflorian --group; 1235185a700Sflorian ++c; 1245185a700Sflorian break; 125b73bdc82Sjmc case '|': /* alternative separator */ 1265185a700Sflorian if (!have_atom) 127d28dedc0Sflorian return(-1); 1281fb015a8Sflorian have_atom = 0; 1291fb015a8Sflorian empty_ok = 0; 1301fb015a8Sflorian was_multiple = 0; 1315185a700Sflorian ++c; 1325185a700Sflorian break; 1335185a700Sflorian case '^': 1345185a700Sflorian case '$': 1351fb015a8Sflorian have_atom = 1; 1361fb015a8Sflorian was_multiple = 1; 1375185a700Sflorian ++c; 1385185a700Sflorian break; 1395185a700Sflorian case '+': 1405185a700Sflorian case '*': 1415185a700Sflorian case '?': 1425185a700Sflorian if (was_multiple) 143d28dedc0Sflorian return(-1); 1445185a700Sflorian if (!have_atom) 145d28dedc0Sflorian return(-1); 1461fb015a8Sflorian have_atom = 1; 1471fb015a8Sflorian was_multiple = 1; 1485185a700Sflorian ++c; 1495185a700Sflorian break; 1505185a700Sflorian case '.': 1515185a700Sflorian default: 1525185a700Sflorian literal: 1531fb015a8Sflorian have_atom = 1; 1541fb015a8Sflorian was_multiple = 0; 1555185a700Sflorian ++c; 1565185a700Sflorian break; 1575185a700Sflorian } 1585185a700Sflorian break; 1595185a700Sflorian case parse_bound: 1605185a700Sflorian switch (*c) { 1615185a700Sflorian case '0': case '1': case '2': case '3': case '4': 1625185a700Sflorian case '5': case '6': case '7': case '8': case '9': 1635185a700Sflorian if (!seen_comma) { 1645185a700Sflorian low = low * 10 + *c - '0'; 1655185a700Sflorian if (low > 255) 166d28dedc0Sflorian return(-1); 1675185a700Sflorian } else { 1681fb015a8Sflorian seen_high = 1; 1695185a700Sflorian high = high * 10 + *c - '0'; 1705185a700Sflorian if (high > 255) 171d28dedc0Sflorian return(-1); 1725185a700Sflorian } 1735185a700Sflorian ++c; 1745185a700Sflorian break; 1755185a700Sflorian case ',': 1765185a700Sflorian if (seen_comma) 177d28dedc0Sflorian return(-1); 1781fb015a8Sflorian seen_comma = 1; 1795185a700Sflorian ++c; 1805185a700Sflorian break; 1815185a700Sflorian default: 1825185a700Sflorian case '{': 183d28dedc0Sflorian return(-1); 1845185a700Sflorian case '}': 1855185a700Sflorian if (seen_high && low > high) 186d28dedc0Sflorian return(-1); 1871fb015a8Sflorian seen_comma = 0; 1885185a700Sflorian state = none; 1895185a700Sflorian ++c; 1905185a700Sflorian break; 1915185a700Sflorian } 1925185a700Sflorian break; 1935185a700Sflorian case parse_bracket: 1945185a700Sflorian switch (*c) { 1955185a700Sflorian case '^': 1965185a700Sflorian if (seen_char || neg) goto inside; 1971fb015a8Sflorian neg = 1; 1985185a700Sflorian ++c; 1995185a700Sflorian break; 2005185a700Sflorian case '-': 2015185a700Sflorian if (range == 2) goto inside; 2025185a700Sflorian if (!seen_char) goto inside; 2035185a700Sflorian if (range == 1) 204d28dedc0Sflorian return(-1); 2055185a700Sflorian range = 2; 2065185a700Sflorian ++c; 2075185a700Sflorian break; 2085185a700Sflorian case '[': 2095185a700Sflorian ++c; 2105185a700Sflorian switch (*c) { 2115185a700Sflorian case '.': /* collating element */ 2125185a700Sflorian if (range != 0) --range; 2135185a700Sflorian ++c; 2145185a700Sflorian state = parse_ce; 2151fb015a8Sflorian seen_ce = 0; 2165185a700Sflorian break; 2175185a700Sflorian case '=': /* equivalence class */ 2185185a700Sflorian if (range == 2) 219d28dedc0Sflorian return(-1); 2205185a700Sflorian ++c; 2215185a700Sflorian state = parse_ec; 2221fb015a8Sflorian seen_ec = 0; 2235185a700Sflorian break; 2245185a700Sflorian case ':': /* character class */ 2255185a700Sflorian if (range == 2) 226d28dedc0Sflorian return(-1); 2275185a700Sflorian ccname = c; 2285185a700Sflorian ++c; 2295185a700Sflorian state = parse_cc; 2305185a700Sflorian break; 2315185a700Sflorian } 2321fb015a8Sflorian seen_char = 1; 2335185a700Sflorian break; 2345185a700Sflorian case ']': 2355185a700Sflorian if (!c[1] && !seen_char) 236d28dedc0Sflorian return(-1); 2375185a700Sflorian if (!seen_char) 2385185a700Sflorian goto inside; 2395185a700Sflorian ++c; 2405185a700Sflorian range = 0; 2411fb015a8Sflorian have_atom = 1; 2425185a700Sflorian state = none; 2435185a700Sflorian break; 2445185a700Sflorian default: 2455185a700Sflorian inside: 2461fb015a8Sflorian seen_char = 1; 2475185a700Sflorian if (range == 2 && (*c & 0xff) < range_start) 248d28dedc0Sflorian return(-1); 2495185a700Sflorian if (range != 0) 2505185a700Sflorian --range; 2515185a700Sflorian range_start = *c & 0xff; 2525185a700Sflorian ++c; 2535185a700Sflorian break; 254*479c151dSjsg } 2555185a700Sflorian break; 2565185a700Sflorian case parse_ce: 2575185a700Sflorian switch (*c) { 2585185a700Sflorian case '.': 2595185a700Sflorian ++c; 2605185a700Sflorian switch (*c) { 2615185a700Sflorian case ']': 2625185a700Sflorian if (!seen_ce) 263d28dedc0Sflorian return(-1); 2645185a700Sflorian ++c; 2655185a700Sflorian state = parse_bracket; 2665185a700Sflorian break; 2675185a700Sflorian default: 2685185a700Sflorian if (seen_ce) 2695185a700Sflorian range_start = 256; 2705185a700Sflorian else 2715185a700Sflorian range_start = '.'; 2721fb015a8Sflorian seen_ce = 1; 2735185a700Sflorian break; 2745185a700Sflorian } 2755185a700Sflorian break; 2765185a700Sflorian default: 2775185a700Sflorian if (seen_ce) 2785185a700Sflorian range_start = 256; 2795185a700Sflorian else 2805185a700Sflorian range_start = *c; 2811fb015a8Sflorian seen_ce = 1; 2825185a700Sflorian ++c; 2835185a700Sflorian break; 2845185a700Sflorian } 2855185a700Sflorian break; 2865185a700Sflorian case parse_ec: 2875185a700Sflorian switch (*c) { 2885185a700Sflorian case '=': 2895185a700Sflorian ++c; 2905185a700Sflorian switch (*c) { 2915185a700Sflorian case ']': 2925185a700Sflorian if (!seen_ec) 293d28dedc0Sflorian return(-1); 2945185a700Sflorian ++c; 2955185a700Sflorian state = parse_bracket; 2965185a700Sflorian break; 2975185a700Sflorian default: 2981fb015a8Sflorian seen_ec = 1; 2995185a700Sflorian break; 3005185a700Sflorian } 3015185a700Sflorian break; 3025185a700Sflorian default: 3031fb015a8Sflorian seen_ec = 1; 3045185a700Sflorian ++c; 3055185a700Sflorian break; 3065185a700Sflorian } 3075185a700Sflorian break; 3085185a700Sflorian case parse_cc: 3095185a700Sflorian switch (*c) { 3105185a700Sflorian case ':': 3115185a700Sflorian ++c; 3125185a700Sflorian switch (*c) { 3135185a700Sflorian case ']': { 3145185a700Sflorian unsigned int i; 3151fb015a8Sflorian int found = 0; 3165185a700Sflorian for (i = 0; 3175185a700Sflorian i < sizeof(cc)/sizeof(*cc); 3185185a700Sflorian i++) 3195185a700Sflorian { 3205185a700Sflorian unsigned int len; 3215185a700Sflorian len = strlen(cc[i]); 3225185a700Sflorian if (len != 3235185a700Sflorian (unsigned int)(c - ccname)) 3245185a700Sflorian continue; 3255185a700Sflorian if (strncmp(cc[i], ccname, len)) 3265185a700Sflorian continue; 3271fb015a8Sflorian found = 1; 3285185a700Sflorian } 3295185a700Sflorian if (!found) 330d28dedc0Sflorian return(-1); 3315185a700Sflorian ++c; 3325185a700Sflorian state = parse_bracket; 3335185a700Sflorian break; 3345185a700Sflorian } 3355185a700Sflorian default: 3365185a700Sflorian break; 3375185a700Sflorian } 3385185a700Sflorian break; 3395185a700Sflorian default: 3405185a700Sflorian ++c; 3415185a700Sflorian break; 3425185a700Sflorian } 3435185a700Sflorian break; 3445185a700Sflorian } 3455185a700Sflorian } 3465185a700Sflorian if (group != 0) 3475185a700Sflorian return(-1); 348d28dedc0Sflorian if (state != none) 349d28dedc0Sflorian return(-1); 350d28dedc0Sflorian if (!have_atom) 351d28dedc0Sflorian return(-1); 352d28dedc0Sflorian return (sub); 3535185a700Sflorian } 354