14684ddb6SLionel Sambuc //===-------------------------- regex.cpp ---------------------------------===//
24684ddb6SLionel Sambuc //
34684ddb6SLionel Sambuc // The LLVM Compiler Infrastructure
44684ddb6SLionel Sambuc //
54684ddb6SLionel Sambuc // This file is dual licensed under the MIT and the University of Illinois Open
64684ddb6SLionel Sambuc // Source Licenses. See LICENSE.TXT for details.
74684ddb6SLionel Sambuc //
84684ddb6SLionel Sambuc //===----------------------------------------------------------------------===//
94684ddb6SLionel Sambuc
104684ddb6SLionel Sambuc #include "regex"
114684ddb6SLionel Sambuc #include "algorithm"
124684ddb6SLionel Sambuc #include "iterator"
134684ddb6SLionel Sambuc
144684ddb6SLionel Sambuc _LIBCPP_BEGIN_NAMESPACE_STD
154684ddb6SLionel Sambuc
164684ddb6SLionel Sambuc static
174684ddb6SLionel Sambuc const char*
make_error_type_string(regex_constants::error_type ecode)184684ddb6SLionel Sambuc make_error_type_string(regex_constants::error_type ecode)
194684ddb6SLionel Sambuc {
204684ddb6SLionel Sambuc switch (ecode)
214684ddb6SLionel Sambuc {
224684ddb6SLionel Sambuc case regex_constants::error_collate:
234684ddb6SLionel Sambuc return "The expression contained an invalid collating element name.";
244684ddb6SLionel Sambuc case regex_constants::error_ctype:
254684ddb6SLionel Sambuc return "The expression contained an invalid character class name.";
264684ddb6SLionel Sambuc case regex_constants::error_escape:
274684ddb6SLionel Sambuc return "The expression contained an invalid escaped character, or a "
284684ddb6SLionel Sambuc "trailing escape.";
294684ddb6SLionel Sambuc case regex_constants::error_backref:
304684ddb6SLionel Sambuc return "The expression contained an invalid back reference.";
314684ddb6SLionel Sambuc case regex_constants::error_brack:
324684ddb6SLionel Sambuc return "The expression contained mismatched [ and ].";
334684ddb6SLionel Sambuc case regex_constants::error_paren:
344684ddb6SLionel Sambuc return "The expression contained mismatched ( and ).";
354684ddb6SLionel Sambuc case regex_constants::error_brace:
364684ddb6SLionel Sambuc return "The expression contained mismatched { and }.";
374684ddb6SLionel Sambuc case regex_constants::error_badbrace:
384684ddb6SLionel Sambuc return "The expression contained an invalid range in a {} expression.";
394684ddb6SLionel Sambuc case regex_constants::error_range:
404684ddb6SLionel Sambuc return "The expression contained an invalid character range, "
414684ddb6SLionel Sambuc "such as [b-a] in most encodings.";
424684ddb6SLionel Sambuc case regex_constants::error_space:
434684ddb6SLionel Sambuc return "There was insufficient memory to convert the expression into "
444684ddb6SLionel Sambuc "a finite state machine.";
454684ddb6SLionel Sambuc case regex_constants::error_badrepeat:
464684ddb6SLionel Sambuc return "One of *?+{ was not preceded by a valid regular expression.";
474684ddb6SLionel Sambuc case regex_constants::error_complexity:
484684ddb6SLionel Sambuc return "The complexity of an attempted match against a regular "
494684ddb6SLionel Sambuc "expression exceeded a pre-set level.";
504684ddb6SLionel Sambuc case regex_constants::error_stack:
514684ddb6SLionel Sambuc return "There was insufficient memory to determine whether the regular "
524684ddb6SLionel Sambuc "expression could match the specified character sequence.";
534684ddb6SLionel Sambuc case regex_constants::__re_err_grammar:
544684ddb6SLionel Sambuc return "An invalid regex grammar has been requested.";
554684ddb6SLionel Sambuc case regex_constants::__re_err_empty:
564684ddb6SLionel Sambuc return "An empty regex is not allowed in the POSIX grammar.";
574684ddb6SLionel Sambuc default:
584684ddb6SLionel Sambuc break;
594684ddb6SLionel Sambuc }
604684ddb6SLionel Sambuc return "Unknown error type";
614684ddb6SLionel Sambuc }
624684ddb6SLionel Sambuc
regex_error(regex_constants::error_type ecode)634684ddb6SLionel Sambuc regex_error::regex_error(regex_constants::error_type ecode)
644684ddb6SLionel Sambuc : runtime_error(make_error_type_string(ecode)),
654684ddb6SLionel Sambuc __code_(ecode)
664684ddb6SLionel Sambuc {}
674684ddb6SLionel Sambuc
~regex_error()684684ddb6SLionel Sambuc regex_error::~regex_error() throw() {}
694684ddb6SLionel Sambuc
704684ddb6SLionel Sambuc namespace {
714684ddb6SLionel Sambuc
72*0a6a1f1dSLionel Sambuc #if defined(__clang__)
734684ddb6SLionel Sambuc #pragma clang diagnostic push
744684ddb6SLionel Sambuc #pragma clang diagnostic ignored "-Wpadded"
75*0a6a1f1dSLionel Sambuc #endif
764684ddb6SLionel Sambuc
774684ddb6SLionel Sambuc struct collationnames
784684ddb6SLionel Sambuc {
794684ddb6SLionel Sambuc const char* elem_;
804684ddb6SLionel Sambuc char char_;
814684ddb6SLionel Sambuc };
824684ddb6SLionel Sambuc
83*0a6a1f1dSLionel Sambuc #if defined(__clang__)
844684ddb6SLionel Sambuc #pragma clang diagnostic pop
85*0a6a1f1dSLionel Sambuc #endif
864684ddb6SLionel Sambuc
874684ddb6SLionel Sambuc const collationnames collatenames[] =
884684ddb6SLionel Sambuc {
894684ddb6SLionel Sambuc {"A", 0x41},
904684ddb6SLionel Sambuc {"B", 0x42},
914684ddb6SLionel Sambuc {"C", 0x43},
924684ddb6SLionel Sambuc {"D", 0x44},
934684ddb6SLionel Sambuc {"E", 0x45},
944684ddb6SLionel Sambuc {"F", 0x46},
954684ddb6SLionel Sambuc {"G", 0x47},
964684ddb6SLionel Sambuc {"H", 0x48},
974684ddb6SLionel Sambuc {"I", 0x49},
984684ddb6SLionel Sambuc {"J", 0x4a},
994684ddb6SLionel Sambuc {"K", 0x4b},
1004684ddb6SLionel Sambuc {"L", 0x4c},
1014684ddb6SLionel Sambuc {"M", 0x4d},
1024684ddb6SLionel Sambuc {"N", 0x4e},
1034684ddb6SLionel Sambuc {"NUL", 0x00},
1044684ddb6SLionel Sambuc {"O", 0x4f},
1054684ddb6SLionel Sambuc {"P", 0x50},
1064684ddb6SLionel Sambuc {"Q", 0x51},
1074684ddb6SLionel Sambuc {"R", 0x52},
1084684ddb6SLionel Sambuc {"S", 0x53},
1094684ddb6SLionel Sambuc {"T", 0x54},
1104684ddb6SLionel Sambuc {"U", 0x55},
1114684ddb6SLionel Sambuc {"V", 0x56},
1124684ddb6SLionel Sambuc {"W", 0x57},
1134684ddb6SLionel Sambuc {"X", 0x58},
1144684ddb6SLionel Sambuc {"Y", 0x59},
1154684ddb6SLionel Sambuc {"Z", 0x5a},
1164684ddb6SLionel Sambuc {"a", 0x61},
1174684ddb6SLionel Sambuc {"alert", 0x07},
1184684ddb6SLionel Sambuc {"ampersand", 0x26},
1194684ddb6SLionel Sambuc {"apostrophe", 0x27},
1204684ddb6SLionel Sambuc {"asterisk", 0x2a},
1214684ddb6SLionel Sambuc {"b", 0x62},
1224684ddb6SLionel Sambuc {"backslash", 0x5c},
1234684ddb6SLionel Sambuc {"backspace", 0x08},
1244684ddb6SLionel Sambuc {"c", 0x63},
1254684ddb6SLionel Sambuc {"carriage-return", 0x0d},
1264684ddb6SLionel Sambuc {"circumflex", 0x5e},
1274684ddb6SLionel Sambuc {"circumflex-accent", 0x5e},
1284684ddb6SLionel Sambuc {"colon", 0x3a},
1294684ddb6SLionel Sambuc {"comma", 0x2c},
1304684ddb6SLionel Sambuc {"commercial-at", 0x40},
1314684ddb6SLionel Sambuc {"d", 0x64},
1324684ddb6SLionel Sambuc {"dollar-sign", 0x24},
1334684ddb6SLionel Sambuc {"e", 0x65},
1344684ddb6SLionel Sambuc {"eight", 0x38},
1354684ddb6SLionel Sambuc {"equals-sign", 0x3d},
1364684ddb6SLionel Sambuc {"exclamation-mark", 0x21},
1374684ddb6SLionel Sambuc {"f", 0x66},
1384684ddb6SLionel Sambuc {"five", 0x35},
1394684ddb6SLionel Sambuc {"form-feed", 0x0c},
1404684ddb6SLionel Sambuc {"four", 0x34},
1414684ddb6SLionel Sambuc {"full-stop", 0x2e},
1424684ddb6SLionel Sambuc {"g", 0x67},
1434684ddb6SLionel Sambuc {"grave-accent", 0x60},
1444684ddb6SLionel Sambuc {"greater-than-sign", 0x3e},
1454684ddb6SLionel Sambuc {"h", 0x68},
1464684ddb6SLionel Sambuc {"hyphen", 0x2d},
1474684ddb6SLionel Sambuc {"hyphen-minus", 0x2d},
1484684ddb6SLionel Sambuc {"i", 0x69},
1494684ddb6SLionel Sambuc {"j", 0x6a},
1504684ddb6SLionel Sambuc {"k", 0x6b},
1514684ddb6SLionel Sambuc {"l", 0x6c},
1524684ddb6SLionel Sambuc {"left-brace", 0x7b},
1534684ddb6SLionel Sambuc {"left-curly-bracket", 0x7b},
1544684ddb6SLionel Sambuc {"left-parenthesis", 0x28},
1554684ddb6SLionel Sambuc {"left-square-bracket", 0x5b},
1564684ddb6SLionel Sambuc {"less-than-sign", 0x3c},
1574684ddb6SLionel Sambuc {"low-line", 0x5f},
1584684ddb6SLionel Sambuc {"m", 0x6d},
1594684ddb6SLionel Sambuc {"n", 0x6e},
1604684ddb6SLionel Sambuc {"newline", 0x0a},
1614684ddb6SLionel Sambuc {"nine", 0x39},
1624684ddb6SLionel Sambuc {"number-sign", 0x23},
1634684ddb6SLionel Sambuc {"o", 0x6f},
1644684ddb6SLionel Sambuc {"one", 0x31},
1654684ddb6SLionel Sambuc {"p", 0x70},
1664684ddb6SLionel Sambuc {"percent-sign", 0x25},
1674684ddb6SLionel Sambuc {"period", 0x2e},
1684684ddb6SLionel Sambuc {"plus-sign", 0x2b},
1694684ddb6SLionel Sambuc {"q", 0x71},
1704684ddb6SLionel Sambuc {"question-mark", 0x3f},
1714684ddb6SLionel Sambuc {"quotation-mark", 0x22},
1724684ddb6SLionel Sambuc {"r", 0x72},
1734684ddb6SLionel Sambuc {"reverse-solidus", 0x5c},
1744684ddb6SLionel Sambuc {"right-brace", 0x7d},
1754684ddb6SLionel Sambuc {"right-curly-bracket", 0x7d},
1764684ddb6SLionel Sambuc {"right-parenthesis", 0x29},
1774684ddb6SLionel Sambuc {"right-square-bracket", 0x5d},
1784684ddb6SLionel Sambuc {"s", 0x73},
1794684ddb6SLionel Sambuc {"semicolon", 0x3b},
1804684ddb6SLionel Sambuc {"seven", 0x37},
1814684ddb6SLionel Sambuc {"six", 0x36},
1824684ddb6SLionel Sambuc {"slash", 0x2f},
1834684ddb6SLionel Sambuc {"solidus", 0x2f},
1844684ddb6SLionel Sambuc {"space", 0x20},
1854684ddb6SLionel Sambuc {"t", 0x74},
1864684ddb6SLionel Sambuc {"tab", 0x09},
1874684ddb6SLionel Sambuc {"three", 0x33},
1884684ddb6SLionel Sambuc {"tilde", 0x7e},
1894684ddb6SLionel Sambuc {"two", 0x32},
1904684ddb6SLionel Sambuc {"u", 0x75},
1914684ddb6SLionel Sambuc {"underscore", 0x5f},
1924684ddb6SLionel Sambuc {"v", 0x76},
1934684ddb6SLionel Sambuc {"vertical-line", 0x7c},
1944684ddb6SLionel Sambuc {"vertical-tab", 0x0b},
1954684ddb6SLionel Sambuc {"w", 0x77},
1964684ddb6SLionel Sambuc {"x", 0x78},
1974684ddb6SLionel Sambuc {"y", 0x79},
1984684ddb6SLionel Sambuc {"z", 0x7a},
1994684ddb6SLionel Sambuc {"zero", 0x30}
2004684ddb6SLionel Sambuc };
2014684ddb6SLionel Sambuc
202*0a6a1f1dSLionel Sambuc #if defined(__clang__)
2034684ddb6SLionel Sambuc #pragma clang diagnostic push
2044684ddb6SLionel Sambuc #pragma clang diagnostic ignored "-Wpadded"
205*0a6a1f1dSLionel Sambuc #endif
2064684ddb6SLionel Sambuc
2074684ddb6SLionel Sambuc struct classnames
2084684ddb6SLionel Sambuc {
2094684ddb6SLionel Sambuc const char* elem_;
210*0a6a1f1dSLionel Sambuc regex_traits<char>::char_class_type mask_;
2114684ddb6SLionel Sambuc };
2124684ddb6SLionel Sambuc
213*0a6a1f1dSLionel Sambuc #if defined(__clang__)
2144684ddb6SLionel Sambuc #pragma clang diagnostic pop
215*0a6a1f1dSLionel Sambuc #endif
2164684ddb6SLionel Sambuc
2174684ddb6SLionel Sambuc const classnames ClassNames[] =
2184684ddb6SLionel Sambuc {
2194684ddb6SLionel Sambuc {"alnum", ctype_base::alnum},
2204684ddb6SLionel Sambuc {"alpha", ctype_base::alpha},
2214684ddb6SLionel Sambuc {"blank", ctype_base::blank},
2224684ddb6SLionel Sambuc {"cntrl", ctype_base::cntrl},
2234684ddb6SLionel Sambuc {"d", ctype_base::digit},
2244684ddb6SLionel Sambuc {"digit", ctype_base::digit},
2254684ddb6SLionel Sambuc {"graph", ctype_base::graph},
2264684ddb6SLionel Sambuc {"lower", ctype_base::lower},
2274684ddb6SLionel Sambuc {"print", ctype_base::print},
2284684ddb6SLionel Sambuc {"punct", ctype_base::punct},
2294684ddb6SLionel Sambuc {"s", ctype_base::space},
2304684ddb6SLionel Sambuc {"space", ctype_base::space},
2314684ddb6SLionel Sambuc {"upper", ctype_base::upper},
2324684ddb6SLionel Sambuc {"w", regex_traits<char>::__regex_word},
2334684ddb6SLionel Sambuc {"xdigit", ctype_base::xdigit}
2344684ddb6SLionel Sambuc };
2354684ddb6SLionel Sambuc
2364684ddb6SLionel Sambuc struct use_strcmp
2374684ddb6SLionel Sambuc {
operator ()__anonc0c1ff130111::use_strcmp2384684ddb6SLionel Sambuc bool operator()(const collationnames& x, const char* y)
2394684ddb6SLionel Sambuc {return strcmp(x.elem_, y) < 0;}
operator ()__anonc0c1ff130111::use_strcmp2404684ddb6SLionel Sambuc bool operator()(const classnames& x, const char* y)
2414684ddb6SLionel Sambuc {return strcmp(x.elem_, y) < 0;}
2424684ddb6SLionel Sambuc };
2434684ddb6SLionel Sambuc
2444684ddb6SLionel Sambuc }
2454684ddb6SLionel Sambuc
2464684ddb6SLionel Sambuc string
__get_collation_name(const char * s)2474684ddb6SLionel Sambuc __get_collation_name(const char* s)
2484684ddb6SLionel Sambuc {
2494684ddb6SLionel Sambuc const collationnames* i =
2504684ddb6SLionel Sambuc _VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp());
2514684ddb6SLionel Sambuc string r;
2524684ddb6SLionel Sambuc if (i != end(collatenames) && strcmp(s, i->elem_) == 0)
2534684ddb6SLionel Sambuc r = char(i->char_);
2544684ddb6SLionel Sambuc return r;
2554684ddb6SLionel Sambuc }
2564684ddb6SLionel Sambuc
257*0a6a1f1dSLionel Sambuc regex_traits<char>::char_class_type
__get_classname(const char * s,bool __icase)2584684ddb6SLionel Sambuc __get_classname(const char* s, bool __icase)
2594684ddb6SLionel Sambuc {
2604684ddb6SLionel Sambuc const classnames* i =
2614684ddb6SLionel Sambuc _VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp());
262*0a6a1f1dSLionel Sambuc regex_traits<char>::char_class_type r = 0;
2634684ddb6SLionel Sambuc if (i != end(ClassNames) && strcmp(s, i->elem_) == 0)
2644684ddb6SLionel Sambuc {
2654684ddb6SLionel Sambuc r = i->mask_;
2664684ddb6SLionel Sambuc if (r == regex_traits<char>::__regex_word)
2674684ddb6SLionel Sambuc r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower;
2684684ddb6SLionel Sambuc else if (__icase)
2694684ddb6SLionel Sambuc {
2704684ddb6SLionel Sambuc if (r & (ctype_base::lower | ctype_base::upper))
2714684ddb6SLionel Sambuc r |= ctype_base::alpha;
2724684ddb6SLionel Sambuc }
2734684ddb6SLionel Sambuc }
2744684ddb6SLionel Sambuc return r;
2754684ddb6SLionel Sambuc }
2764684ddb6SLionel Sambuc
2774684ddb6SLionel Sambuc template <>
2784684ddb6SLionel Sambuc void
__exec(__state & __s) const2794684ddb6SLionel Sambuc __match_any_but_newline<char>::__exec(__state& __s) const
2804684ddb6SLionel Sambuc {
2814684ddb6SLionel Sambuc if (__s.__current_ != __s.__last_)
2824684ddb6SLionel Sambuc {
2834684ddb6SLionel Sambuc switch (*__s.__current_)
2844684ddb6SLionel Sambuc {
2854684ddb6SLionel Sambuc case '\r':
2864684ddb6SLionel Sambuc case '\n':
2874684ddb6SLionel Sambuc __s.__do_ = __state::__reject;
2884684ddb6SLionel Sambuc __s.__node_ = nullptr;
2894684ddb6SLionel Sambuc break;
2904684ddb6SLionel Sambuc default:
2914684ddb6SLionel Sambuc __s.__do_ = __state::__accept_and_consume;
2924684ddb6SLionel Sambuc ++__s.__current_;
2934684ddb6SLionel Sambuc __s.__node_ = this->first();
2944684ddb6SLionel Sambuc break;
2954684ddb6SLionel Sambuc }
2964684ddb6SLionel Sambuc }
2974684ddb6SLionel Sambuc else
2984684ddb6SLionel Sambuc {
2994684ddb6SLionel Sambuc __s.__do_ = __state::__reject;
3004684ddb6SLionel Sambuc __s.__node_ = nullptr;
3014684ddb6SLionel Sambuc }
3024684ddb6SLionel Sambuc }
3034684ddb6SLionel Sambuc
3044684ddb6SLionel Sambuc template <>
3054684ddb6SLionel Sambuc void
__exec(__state & __s) const3064684ddb6SLionel Sambuc __match_any_but_newline<wchar_t>::__exec(__state& __s) const
3074684ddb6SLionel Sambuc {
3084684ddb6SLionel Sambuc if (__s.__current_ != __s.__last_)
3094684ddb6SLionel Sambuc {
3104684ddb6SLionel Sambuc switch (*__s.__current_)
3114684ddb6SLionel Sambuc {
3124684ddb6SLionel Sambuc case '\r':
3134684ddb6SLionel Sambuc case '\n':
3144684ddb6SLionel Sambuc case 0x2028:
3154684ddb6SLionel Sambuc case 0x2029:
3164684ddb6SLionel Sambuc __s.__do_ = __state::__reject;
3174684ddb6SLionel Sambuc __s.__node_ = nullptr;
3184684ddb6SLionel Sambuc break;
3194684ddb6SLionel Sambuc default:
3204684ddb6SLionel Sambuc __s.__do_ = __state::__accept_and_consume;
3214684ddb6SLionel Sambuc ++__s.__current_;
3224684ddb6SLionel Sambuc __s.__node_ = this->first();
3234684ddb6SLionel Sambuc break;
3244684ddb6SLionel Sambuc }
3254684ddb6SLionel Sambuc }
3264684ddb6SLionel Sambuc else
3274684ddb6SLionel Sambuc {
3284684ddb6SLionel Sambuc __s.__do_ = __state::__reject;
3294684ddb6SLionel Sambuc __s.__node_ = nullptr;
3304684ddb6SLionel Sambuc }
3314684ddb6SLionel Sambuc }
3324684ddb6SLionel Sambuc
3334684ddb6SLionel Sambuc _LIBCPP_END_NAMESPACE_STD
334