1*4684ddb6SLionel Sambuc //===-------------------------- regex.cpp ---------------------------------===// 2*4684ddb6SLionel Sambuc // 3*4684ddb6SLionel Sambuc // The LLVM Compiler Infrastructure 4*4684ddb6SLionel Sambuc // 5*4684ddb6SLionel Sambuc // This file is dual licensed under the MIT and the University of Illinois Open 6*4684ddb6SLionel Sambuc // Source Licenses. See LICENSE.TXT for details. 7*4684ddb6SLionel Sambuc // 8*4684ddb6SLionel Sambuc //===----------------------------------------------------------------------===// 9*4684ddb6SLionel Sambuc 10*4684ddb6SLionel Sambuc #include "regex" 11*4684ddb6SLionel Sambuc #include "algorithm" 12*4684ddb6SLionel Sambuc #include "iterator" 13*4684ddb6SLionel Sambuc 14*4684ddb6SLionel Sambuc _LIBCPP_BEGIN_NAMESPACE_STD 15*4684ddb6SLionel Sambuc 16*4684ddb6SLionel Sambuc static 17*4684ddb6SLionel Sambuc const char* 18*4684ddb6SLionel Sambuc make_error_type_string(regex_constants::error_type ecode) 19*4684ddb6SLionel Sambuc { 20*4684ddb6SLionel Sambuc switch (ecode) 21*4684ddb6SLionel Sambuc { 22*4684ddb6SLionel Sambuc case regex_constants::error_collate: 23*4684ddb6SLionel Sambuc return "The expression contained an invalid collating element name."; 24*4684ddb6SLionel Sambuc case regex_constants::error_ctype: 25*4684ddb6SLionel Sambuc return "The expression contained an invalid character class name."; 26*4684ddb6SLionel Sambuc case regex_constants::error_escape: 27*4684ddb6SLionel Sambuc return "The expression contained an invalid escaped character, or a " 28*4684ddb6SLionel Sambuc "trailing escape."; 29*4684ddb6SLionel Sambuc case regex_constants::error_backref: 30*4684ddb6SLionel Sambuc return "The expression contained an invalid back reference."; 31*4684ddb6SLionel Sambuc case regex_constants::error_brack: 32*4684ddb6SLionel Sambuc return "The expression contained mismatched [ and ]."; 33*4684ddb6SLionel Sambuc case regex_constants::error_paren: 34*4684ddb6SLionel Sambuc return "The expression contained mismatched ( and )."; 35*4684ddb6SLionel Sambuc case regex_constants::error_brace: 36*4684ddb6SLionel Sambuc return "The expression contained mismatched { and }."; 37*4684ddb6SLionel Sambuc case regex_constants::error_badbrace: 38*4684ddb6SLionel Sambuc return "The expression contained an invalid range in a {} expression."; 39*4684ddb6SLionel Sambuc case regex_constants::error_range: 40*4684ddb6SLionel Sambuc return "The expression contained an invalid character range, " 41*4684ddb6SLionel Sambuc "such as [b-a] in most encodings."; 42*4684ddb6SLionel Sambuc case regex_constants::error_space: 43*4684ddb6SLionel Sambuc return "There was insufficient memory to convert the expression into " 44*4684ddb6SLionel Sambuc "a finite state machine."; 45*4684ddb6SLionel Sambuc case regex_constants::error_badrepeat: 46*4684ddb6SLionel Sambuc return "One of *?+{ was not preceded by a valid regular expression."; 47*4684ddb6SLionel Sambuc case regex_constants::error_complexity: 48*4684ddb6SLionel Sambuc return "The complexity of an attempted match against a regular " 49*4684ddb6SLionel Sambuc "expression exceeded a pre-set level."; 50*4684ddb6SLionel Sambuc case regex_constants::error_stack: 51*4684ddb6SLionel Sambuc return "There was insufficient memory to determine whether the regular " 52*4684ddb6SLionel Sambuc "expression could match the specified character sequence."; 53*4684ddb6SLionel Sambuc case regex_constants::__re_err_grammar: 54*4684ddb6SLionel Sambuc return "An invalid regex grammar has been requested."; 55*4684ddb6SLionel Sambuc case regex_constants::__re_err_empty: 56*4684ddb6SLionel Sambuc return "An empty regex is not allowed in the POSIX grammar."; 57*4684ddb6SLionel Sambuc default: 58*4684ddb6SLionel Sambuc break; 59*4684ddb6SLionel Sambuc } 60*4684ddb6SLionel Sambuc return "Unknown error type"; 61*4684ddb6SLionel Sambuc } 62*4684ddb6SLionel Sambuc 63*4684ddb6SLionel Sambuc regex_error::regex_error(regex_constants::error_type ecode) 64*4684ddb6SLionel Sambuc : runtime_error(make_error_type_string(ecode)), 65*4684ddb6SLionel Sambuc __code_(ecode) 66*4684ddb6SLionel Sambuc {} 67*4684ddb6SLionel Sambuc 68*4684ddb6SLionel Sambuc regex_error::~regex_error() throw() {} 69*4684ddb6SLionel Sambuc 70*4684ddb6SLionel Sambuc namespace { 71*4684ddb6SLionel Sambuc 72*4684ddb6SLionel Sambuc #pragma clang diagnostic push 73*4684ddb6SLionel Sambuc #pragma clang diagnostic ignored "-Wpadded" 74*4684ddb6SLionel Sambuc 75*4684ddb6SLionel Sambuc struct collationnames 76*4684ddb6SLionel Sambuc { 77*4684ddb6SLionel Sambuc const char* elem_; 78*4684ddb6SLionel Sambuc char char_; 79*4684ddb6SLionel Sambuc }; 80*4684ddb6SLionel Sambuc 81*4684ddb6SLionel Sambuc #pragma clang diagnostic pop 82*4684ddb6SLionel Sambuc 83*4684ddb6SLionel Sambuc const collationnames collatenames[] = 84*4684ddb6SLionel Sambuc { 85*4684ddb6SLionel Sambuc {"A", 0x41}, 86*4684ddb6SLionel Sambuc {"B", 0x42}, 87*4684ddb6SLionel Sambuc {"C", 0x43}, 88*4684ddb6SLionel Sambuc {"D", 0x44}, 89*4684ddb6SLionel Sambuc {"E", 0x45}, 90*4684ddb6SLionel Sambuc {"F", 0x46}, 91*4684ddb6SLionel Sambuc {"G", 0x47}, 92*4684ddb6SLionel Sambuc {"H", 0x48}, 93*4684ddb6SLionel Sambuc {"I", 0x49}, 94*4684ddb6SLionel Sambuc {"J", 0x4a}, 95*4684ddb6SLionel Sambuc {"K", 0x4b}, 96*4684ddb6SLionel Sambuc {"L", 0x4c}, 97*4684ddb6SLionel Sambuc {"M", 0x4d}, 98*4684ddb6SLionel Sambuc {"N", 0x4e}, 99*4684ddb6SLionel Sambuc {"NUL", 0x00}, 100*4684ddb6SLionel Sambuc {"O", 0x4f}, 101*4684ddb6SLionel Sambuc {"P", 0x50}, 102*4684ddb6SLionel Sambuc {"Q", 0x51}, 103*4684ddb6SLionel Sambuc {"R", 0x52}, 104*4684ddb6SLionel Sambuc {"S", 0x53}, 105*4684ddb6SLionel Sambuc {"T", 0x54}, 106*4684ddb6SLionel Sambuc {"U", 0x55}, 107*4684ddb6SLionel Sambuc {"V", 0x56}, 108*4684ddb6SLionel Sambuc {"W", 0x57}, 109*4684ddb6SLionel Sambuc {"X", 0x58}, 110*4684ddb6SLionel Sambuc {"Y", 0x59}, 111*4684ddb6SLionel Sambuc {"Z", 0x5a}, 112*4684ddb6SLionel Sambuc {"a", 0x61}, 113*4684ddb6SLionel Sambuc {"alert", 0x07}, 114*4684ddb6SLionel Sambuc {"ampersand", 0x26}, 115*4684ddb6SLionel Sambuc {"apostrophe", 0x27}, 116*4684ddb6SLionel Sambuc {"asterisk", 0x2a}, 117*4684ddb6SLionel Sambuc {"b", 0x62}, 118*4684ddb6SLionel Sambuc {"backslash", 0x5c}, 119*4684ddb6SLionel Sambuc {"backspace", 0x08}, 120*4684ddb6SLionel Sambuc {"c", 0x63}, 121*4684ddb6SLionel Sambuc {"carriage-return", 0x0d}, 122*4684ddb6SLionel Sambuc {"circumflex", 0x5e}, 123*4684ddb6SLionel Sambuc {"circumflex-accent", 0x5e}, 124*4684ddb6SLionel Sambuc {"colon", 0x3a}, 125*4684ddb6SLionel Sambuc {"comma", 0x2c}, 126*4684ddb6SLionel Sambuc {"commercial-at", 0x40}, 127*4684ddb6SLionel Sambuc {"d", 0x64}, 128*4684ddb6SLionel Sambuc {"dollar-sign", 0x24}, 129*4684ddb6SLionel Sambuc {"e", 0x65}, 130*4684ddb6SLionel Sambuc {"eight", 0x38}, 131*4684ddb6SLionel Sambuc {"equals-sign", 0x3d}, 132*4684ddb6SLionel Sambuc {"exclamation-mark", 0x21}, 133*4684ddb6SLionel Sambuc {"f", 0x66}, 134*4684ddb6SLionel Sambuc {"five", 0x35}, 135*4684ddb6SLionel Sambuc {"form-feed", 0x0c}, 136*4684ddb6SLionel Sambuc {"four", 0x34}, 137*4684ddb6SLionel Sambuc {"full-stop", 0x2e}, 138*4684ddb6SLionel Sambuc {"g", 0x67}, 139*4684ddb6SLionel Sambuc {"grave-accent", 0x60}, 140*4684ddb6SLionel Sambuc {"greater-than-sign", 0x3e}, 141*4684ddb6SLionel Sambuc {"h", 0x68}, 142*4684ddb6SLionel Sambuc {"hyphen", 0x2d}, 143*4684ddb6SLionel Sambuc {"hyphen-minus", 0x2d}, 144*4684ddb6SLionel Sambuc {"i", 0x69}, 145*4684ddb6SLionel Sambuc {"j", 0x6a}, 146*4684ddb6SLionel Sambuc {"k", 0x6b}, 147*4684ddb6SLionel Sambuc {"l", 0x6c}, 148*4684ddb6SLionel Sambuc {"left-brace", 0x7b}, 149*4684ddb6SLionel Sambuc {"left-curly-bracket", 0x7b}, 150*4684ddb6SLionel Sambuc {"left-parenthesis", 0x28}, 151*4684ddb6SLionel Sambuc {"left-square-bracket", 0x5b}, 152*4684ddb6SLionel Sambuc {"less-than-sign", 0x3c}, 153*4684ddb6SLionel Sambuc {"low-line", 0x5f}, 154*4684ddb6SLionel Sambuc {"m", 0x6d}, 155*4684ddb6SLionel Sambuc {"n", 0x6e}, 156*4684ddb6SLionel Sambuc {"newline", 0x0a}, 157*4684ddb6SLionel Sambuc {"nine", 0x39}, 158*4684ddb6SLionel Sambuc {"number-sign", 0x23}, 159*4684ddb6SLionel Sambuc {"o", 0x6f}, 160*4684ddb6SLionel Sambuc {"one", 0x31}, 161*4684ddb6SLionel Sambuc {"p", 0x70}, 162*4684ddb6SLionel Sambuc {"percent-sign", 0x25}, 163*4684ddb6SLionel Sambuc {"period", 0x2e}, 164*4684ddb6SLionel Sambuc {"plus-sign", 0x2b}, 165*4684ddb6SLionel Sambuc {"q", 0x71}, 166*4684ddb6SLionel Sambuc {"question-mark", 0x3f}, 167*4684ddb6SLionel Sambuc {"quotation-mark", 0x22}, 168*4684ddb6SLionel Sambuc {"r", 0x72}, 169*4684ddb6SLionel Sambuc {"reverse-solidus", 0x5c}, 170*4684ddb6SLionel Sambuc {"right-brace", 0x7d}, 171*4684ddb6SLionel Sambuc {"right-curly-bracket", 0x7d}, 172*4684ddb6SLionel Sambuc {"right-parenthesis", 0x29}, 173*4684ddb6SLionel Sambuc {"right-square-bracket", 0x5d}, 174*4684ddb6SLionel Sambuc {"s", 0x73}, 175*4684ddb6SLionel Sambuc {"semicolon", 0x3b}, 176*4684ddb6SLionel Sambuc {"seven", 0x37}, 177*4684ddb6SLionel Sambuc {"six", 0x36}, 178*4684ddb6SLionel Sambuc {"slash", 0x2f}, 179*4684ddb6SLionel Sambuc {"solidus", 0x2f}, 180*4684ddb6SLionel Sambuc {"space", 0x20}, 181*4684ddb6SLionel Sambuc {"t", 0x74}, 182*4684ddb6SLionel Sambuc {"tab", 0x09}, 183*4684ddb6SLionel Sambuc {"three", 0x33}, 184*4684ddb6SLionel Sambuc {"tilde", 0x7e}, 185*4684ddb6SLionel Sambuc {"two", 0x32}, 186*4684ddb6SLionel Sambuc {"u", 0x75}, 187*4684ddb6SLionel Sambuc {"underscore", 0x5f}, 188*4684ddb6SLionel Sambuc {"v", 0x76}, 189*4684ddb6SLionel Sambuc {"vertical-line", 0x7c}, 190*4684ddb6SLionel Sambuc {"vertical-tab", 0x0b}, 191*4684ddb6SLionel Sambuc {"w", 0x77}, 192*4684ddb6SLionel Sambuc {"x", 0x78}, 193*4684ddb6SLionel Sambuc {"y", 0x79}, 194*4684ddb6SLionel Sambuc {"z", 0x7a}, 195*4684ddb6SLionel Sambuc {"zero", 0x30} 196*4684ddb6SLionel Sambuc }; 197*4684ddb6SLionel Sambuc 198*4684ddb6SLionel Sambuc #pragma clang diagnostic push 199*4684ddb6SLionel Sambuc #pragma clang diagnostic ignored "-Wpadded" 200*4684ddb6SLionel Sambuc 201*4684ddb6SLionel Sambuc struct classnames 202*4684ddb6SLionel Sambuc { 203*4684ddb6SLionel Sambuc const char* elem_; 204*4684ddb6SLionel Sambuc ctype_base::mask mask_; 205*4684ddb6SLionel Sambuc }; 206*4684ddb6SLionel Sambuc 207*4684ddb6SLionel Sambuc #pragma clang diagnostic pop 208*4684ddb6SLionel Sambuc 209*4684ddb6SLionel Sambuc const classnames ClassNames[] = 210*4684ddb6SLionel Sambuc { 211*4684ddb6SLionel Sambuc {"alnum", ctype_base::alnum}, 212*4684ddb6SLionel Sambuc {"alpha", ctype_base::alpha}, 213*4684ddb6SLionel Sambuc {"blank", ctype_base::blank}, 214*4684ddb6SLionel Sambuc {"cntrl", ctype_base::cntrl}, 215*4684ddb6SLionel Sambuc {"d", ctype_base::digit}, 216*4684ddb6SLionel Sambuc {"digit", ctype_base::digit}, 217*4684ddb6SLionel Sambuc {"graph", ctype_base::graph}, 218*4684ddb6SLionel Sambuc {"lower", ctype_base::lower}, 219*4684ddb6SLionel Sambuc {"print", ctype_base::print}, 220*4684ddb6SLionel Sambuc {"punct", ctype_base::punct}, 221*4684ddb6SLionel Sambuc {"s", ctype_base::space}, 222*4684ddb6SLionel Sambuc {"space", ctype_base::space}, 223*4684ddb6SLionel Sambuc {"upper", ctype_base::upper}, 224*4684ddb6SLionel Sambuc {"w", regex_traits<char>::__regex_word}, 225*4684ddb6SLionel Sambuc {"xdigit", ctype_base::xdigit} 226*4684ddb6SLionel Sambuc }; 227*4684ddb6SLionel Sambuc 228*4684ddb6SLionel Sambuc struct use_strcmp 229*4684ddb6SLionel Sambuc { 230*4684ddb6SLionel Sambuc bool operator()(const collationnames& x, const char* y) 231*4684ddb6SLionel Sambuc {return strcmp(x.elem_, y) < 0;} 232*4684ddb6SLionel Sambuc bool operator()(const classnames& x, const char* y) 233*4684ddb6SLionel Sambuc {return strcmp(x.elem_, y) < 0;} 234*4684ddb6SLionel Sambuc }; 235*4684ddb6SLionel Sambuc 236*4684ddb6SLionel Sambuc } 237*4684ddb6SLionel Sambuc 238*4684ddb6SLionel Sambuc string 239*4684ddb6SLionel Sambuc __get_collation_name(const char* s) 240*4684ddb6SLionel Sambuc { 241*4684ddb6SLionel Sambuc const collationnames* i = 242*4684ddb6SLionel Sambuc _VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp()); 243*4684ddb6SLionel Sambuc string r; 244*4684ddb6SLionel Sambuc if (i != end(collatenames) && strcmp(s, i->elem_) == 0) 245*4684ddb6SLionel Sambuc r = char(i->char_); 246*4684ddb6SLionel Sambuc return r; 247*4684ddb6SLionel Sambuc } 248*4684ddb6SLionel Sambuc 249*4684ddb6SLionel Sambuc ctype_base::mask 250*4684ddb6SLionel Sambuc __get_classname(const char* s, bool __icase) 251*4684ddb6SLionel Sambuc { 252*4684ddb6SLionel Sambuc const classnames* i = 253*4684ddb6SLionel Sambuc _VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp()); 254*4684ddb6SLionel Sambuc ctype_base::mask r = 0; 255*4684ddb6SLionel Sambuc if (i != end(ClassNames) && strcmp(s, i->elem_) == 0) 256*4684ddb6SLionel Sambuc { 257*4684ddb6SLionel Sambuc r = i->mask_; 258*4684ddb6SLionel Sambuc if (r == regex_traits<char>::__regex_word) 259*4684ddb6SLionel Sambuc r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower; 260*4684ddb6SLionel Sambuc else if (__icase) 261*4684ddb6SLionel Sambuc { 262*4684ddb6SLionel Sambuc if (r & (ctype_base::lower | ctype_base::upper)) 263*4684ddb6SLionel Sambuc r |= ctype_base::alpha; 264*4684ddb6SLionel Sambuc } 265*4684ddb6SLionel Sambuc } 266*4684ddb6SLionel Sambuc return r; 267*4684ddb6SLionel Sambuc } 268*4684ddb6SLionel Sambuc 269*4684ddb6SLionel Sambuc template <> 270*4684ddb6SLionel Sambuc void 271*4684ddb6SLionel Sambuc __match_any_but_newline<char>::__exec(__state& __s) const 272*4684ddb6SLionel Sambuc { 273*4684ddb6SLionel Sambuc if (__s.__current_ != __s.__last_) 274*4684ddb6SLionel Sambuc { 275*4684ddb6SLionel Sambuc switch (*__s.__current_) 276*4684ddb6SLionel Sambuc { 277*4684ddb6SLionel Sambuc case '\r': 278*4684ddb6SLionel Sambuc case '\n': 279*4684ddb6SLionel Sambuc __s.__do_ = __state::__reject; 280*4684ddb6SLionel Sambuc __s.__node_ = nullptr; 281*4684ddb6SLionel Sambuc break; 282*4684ddb6SLionel Sambuc default: 283*4684ddb6SLionel Sambuc __s.__do_ = __state::__accept_and_consume; 284*4684ddb6SLionel Sambuc ++__s.__current_; 285*4684ddb6SLionel Sambuc __s.__node_ = this->first(); 286*4684ddb6SLionel Sambuc break; 287*4684ddb6SLionel Sambuc } 288*4684ddb6SLionel Sambuc } 289*4684ddb6SLionel Sambuc else 290*4684ddb6SLionel Sambuc { 291*4684ddb6SLionel Sambuc __s.__do_ = __state::__reject; 292*4684ddb6SLionel Sambuc __s.__node_ = nullptr; 293*4684ddb6SLionel Sambuc } 294*4684ddb6SLionel Sambuc } 295*4684ddb6SLionel Sambuc 296*4684ddb6SLionel Sambuc template <> 297*4684ddb6SLionel Sambuc void 298*4684ddb6SLionel Sambuc __match_any_but_newline<wchar_t>::__exec(__state& __s) const 299*4684ddb6SLionel Sambuc { 300*4684ddb6SLionel Sambuc if (__s.__current_ != __s.__last_) 301*4684ddb6SLionel Sambuc { 302*4684ddb6SLionel Sambuc switch (*__s.__current_) 303*4684ddb6SLionel Sambuc { 304*4684ddb6SLionel Sambuc case '\r': 305*4684ddb6SLionel Sambuc case '\n': 306*4684ddb6SLionel Sambuc case 0x2028: 307*4684ddb6SLionel Sambuc case 0x2029: 308*4684ddb6SLionel Sambuc __s.__do_ = __state::__reject; 309*4684ddb6SLionel Sambuc __s.__node_ = nullptr; 310*4684ddb6SLionel Sambuc break; 311*4684ddb6SLionel Sambuc default: 312*4684ddb6SLionel Sambuc __s.__do_ = __state::__accept_and_consume; 313*4684ddb6SLionel Sambuc ++__s.__current_; 314*4684ddb6SLionel Sambuc __s.__node_ = this->first(); 315*4684ddb6SLionel Sambuc break; 316*4684ddb6SLionel Sambuc } 317*4684ddb6SLionel Sambuc } 318*4684ddb6SLionel Sambuc else 319*4684ddb6SLionel Sambuc { 320*4684ddb6SLionel Sambuc __s.__do_ = __state::__reject; 321*4684ddb6SLionel Sambuc __s.__node_ = nullptr; 322*4684ddb6SLionel Sambuc } 323*4684ddb6SLionel Sambuc } 324*4684ddb6SLionel Sambuc 325*4684ddb6SLionel Sambuc _LIBCPP_END_NAMESPACE_STD 326