1*e4b17023SJohn Marino // class template regex -*- C++ -*- 2*e4b17023SJohn Marino 3*e4b17023SJohn Marino // Copyright (C) 2010, 2011 Free Software Foundation, Inc. 4*e4b17023SJohn Marino // 5*e4b17023SJohn Marino // This file is part of the GNU ISO C++ Library. This library is free 6*e4b17023SJohn Marino // software; you can redistribute it and/or modify it under the 7*e4b17023SJohn Marino // terms of the GNU General Public License as published by the 8*e4b17023SJohn Marino // Free Software Foundation; either version 3, or (at your option) 9*e4b17023SJohn Marino // any later version. 10*e4b17023SJohn Marino 11*e4b17023SJohn Marino // This library is distributed in the hope that it will be useful, 12*e4b17023SJohn Marino // but WITHOUT ANY WARRANTY; without even the implied warranty of 13*e4b17023SJohn Marino // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14*e4b17023SJohn Marino // GNU General Public License for more details. 15*e4b17023SJohn Marino 16*e4b17023SJohn Marino // Under Section 7 of GPL version 3, you are granted additional 17*e4b17023SJohn Marino // permissions described in the GCC Runtime Library Exception, version 18*e4b17023SJohn Marino // 3.1, as published by the Free Software Foundation. 19*e4b17023SJohn Marino 20*e4b17023SJohn Marino // You should have received a copy of the GNU General Public License and 21*e4b17023SJohn Marino // a copy of the GCC Runtime Library Exception along with this program; 22*e4b17023SJohn Marino // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23*e4b17023SJohn Marino // <http://www.gnu.org/licenses/>. 24*e4b17023SJohn Marino 25*e4b17023SJohn Marino /** 26*e4b17023SJohn Marino * @file bits/regex_constants.h 27*e4b17023SJohn Marino * @brief Constant definitions for the std regex library. 28*e4b17023SJohn Marino * 29*e4b17023SJohn Marino * This is an internal header file, included by other library headers. 30*e4b17023SJohn Marino * Do not attempt to use it directly. @headername{regex} 31*e4b17023SJohn Marino */ 32*e4b17023SJohn Marino 33*e4b17023SJohn Marino namespace std _GLIBCXX_VISIBILITY(default) 34*e4b17023SJohn Marino { 35*e4b17023SJohn Marino /** 36*e4b17023SJohn Marino * @namespace std::regex_constants 37*e4b17023SJohn Marino * @brief ISO C++-0x entities sub namespace for regex. 38*e4b17023SJohn Marino */ 39*e4b17023SJohn Marino namespace regex_constants 40*e4b17023SJohn Marino { 41*e4b17023SJohn Marino _GLIBCXX_BEGIN_NAMESPACE_VERSION 42*e4b17023SJohn Marino 43*e4b17023SJohn Marino /** 44*e4b17023SJohn Marino * @name 5.1 Regular Expression Syntax Options 45*e4b17023SJohn Marino */ 46*e4b17023SJohn Marino //@{ 47*e4b17023SJohn Marino enum __syntax_option 48*e4b17023SJohn Marino { 49*e4b17023SJohn Marino _S_icase, 50*e4b17023SJohn Marino _S_nosubs, 51*e4b17023SJohn Marino _S_optimize, 52*e4b17023SJohn Marino _S_collate, 53*e4b17023SJohn Marino _S_ECMAScript, 54*e4b17023SJohn Marino _S_basic, 55*e4b17023SJohn Marino _S_extended, 56*e4b17023SJohn Marino _S_awk, 57*e4b17023SJohn Marino _S_grep, 58*e4b17023SJohn Marino _S_egrep, 59*e4b17023SJohn Marino _S_syntax_last 60*e4b17023SJohn Marino }; 61*e4b17023SJohn Marino 62*e4b17023SJohn Marino /** 63*e4b17023SJohn Marino * @brief This is a bitmask type indicating how to interpret the regex. 64*e4b17023SJohn Marino * 65*e4b17023SJohn Marino * The @c syntax_option_type is implementation defined but it is valid to 66*e4b17023SJohn Marino * perform bitwise operations on these values and expect the right thing to 67*e4b17023SJohn Marino * happen. 68*e4b17023SJohn Marino * 69*e4b17023SJohn Marino * A valid value of type syntax_option_type shall have exactly one of the 70*e4b17023SJohn Marino * elements @c ECMAScript, @c basic, @c extended, @c awk, @c grep, @c egrep 71*e4b17023SJohn Marino * %set. 72*e4b17023SJohn Marino */ 73*e4b17023SJohn Marino typedef unsigned int syntax_option_type; 74*e4b17023SJohn Marino 75*e4b17023SJohn Marino /** 76*e4b17023SJohn Marino * Specifies that the matching of regular expressions against a character 77*e4b17023SJohn Marino * sequence shall be performed without regard to case. 78*e4b17023SJohn Marino */ 79*e4b17023SJohn Marino static constexpr syntax_option_type icase = 1 << _S_icase; 80*e4b17023SJohn Marino 81*e4b17023SJohn Marino /** 82*e4b17023SJohn Marino * Specifies that when a regular expression is matched against a character 83*e4b17023SJohn Marino * container sequence, no sub-expression matches are to be stored in the 84*e4b17023SJohn Marino * supplied match_results structure. 85*e4b17023SJohn Marino */ 86*e4b17023SJohn Marino static constexpr syntax_option_type nosubs = 1 << _S_nosubs; 87*e4b17023SJohn Marino 88*e4b17023SJohn Marino /** 89*e4b17023SJohn Marino * Specifies that the regular expression engine should pay more attention to 90*e4b17023SJohn Marino * the speed with which regular expressions are matched, and less to the 91*e4b17023SJohn Marino * speed with which regular expression objects are constructed. Otherwise 92*e4b17023SJohn Marino * it has no detectable effect on the program output. 93*e4b17023SJohn Marino */ 94*e4b17023SJohn Marino static constexpr syntax_option_type optimize = 1 << _S_optimize; 95*e4b17023SJohn Marino 96*e4b17023SJohn Marino /** 97*e4b17023SJohn Marino * Specifies that character ranges of the form [a-b] should be locale 98*e4b17023SJohn Marino * sensitive. 99*e4b17023SJohn Marino */ 100*e4b17023SJohn Marino static constexpr syntax_option_type collate = 1 << _S_collate; 101*e4b17023SJohn Marino 102*e4b17023SJohn Marino /** 103*e4b17023SJohn Marino * Specifies that the grammar recognized by the regular expression engine is 104*e4b17023SJohn Marino * that used by ECMAScript in ECMA-262 [Ecma International, ECMAScript 105*e4b17023SJohn Marino * Language Specification, Standard Ecma-262, third edition, 1999], as 106*e4b17023SJohn Marino * modified in section [28.13]. This grammar is similar to that defined 107*e4b17023SJohn Marino * in the PERL scripting language but extended with elements found in the 108*e4b17023SJohn Marino * POSIX regular expression grammar. 109*e4b17023SJohn Marino */ 110*e4b17023SJohn Marino static constexpr syntax_option_type ECMAScript = 1 << _S_ECMAScript; 111*e4b17023SJohn Marino 112*e4b17023SJohn Marino /** 113*e4b17023SJohn Marino * Specifies that the grammar recognized by the regular expression engine is 114*e4b17023SJohn Marino * that used by POSIX basic regular expressions in IEEE Std 1003.1-2001, 115*e4b17023SJohn Marino * Portable Operating System Interface (POSIX), Base Definitions and 116*e4b17023SJohn Marino * Headers, Section 9, Regular Expressions [IEEE, Information Technology -- 117*e4b17023SJohn Marino * Portable Operating System Interface (POSIX), IEEE Standard 1003.1-2001]. 118*e4b17023SJohn Marino */ 119*e4b17023SJohn Marino static constexpr syntax_option_type basic = 1 << _S_basic; 120*e4b17023SJohn Marino 121*e4b17023SJohn Marino /** 122*e4b17023SJohn Marino * Specifies that the grammar recognized by the regular expression engine is 123*e4b17023SJohn Marino * that used by POSIX extended regular expressions in IEEE Std 1003.1-2001, 124*e4b17023SJohn Marino * Portable Operating System Interface (POSIX), Base Definitions and Headers, 125*e4b17023SJohn Marino * Section 9, Regular Expressions. 126*e4b17023SJohn Marino */ 127*e4b17023SJohn Marino static constexpr syntax_option_type extended = 1 << _S_extended; 128*e4b17023SJohn Marino 129*e4b17023SJohn Marino /** 130*e4b17023SJohn Marino * Specifies that the grammar recognized by the regular expression engine is 131*e4b17023SJohn Marino * that used by POSIX utility awk in IEEE Std 1003.1-2001. This option is 132*e4b17023SJohn Marino * identical to syntax_option_type extended, except that C-style escape 133*e4b17023SJohn Marino * sequences are supported. These sequences are: 134*e4b17023SJohn Marino * \\\\, \\a, \\b, \\f, \\n, \\r, \\t , \\v, \\', ', 135*e4b17023SJohn Marino * and \\ddd (where ddd is one, two, or three octal digits). 136*e4b17023SJohn Marino */ 137*e4b17023SJohn Marino static constexpr syntax_option_type awk = 1 << _S_awk; 138*e4b17023SJohn Marino 139*e4b17023SJohn Marino /** 140*e4b17023SJohn Marino * Specifies that the grammar recognized by the regular expression engine is 141*e4b17023SJohn Marino * that used by POSIX utility grep in IEEE Std 1003.1-2001. This option is 142*e4b17023SJohn Marino * identical to syntax_option_type basic, except that newlines are treated 143*e4b17023SJohn Marino * as whitespace. 144*e4b17023SJohn Marino */ 145*e4b17023SJohn Marino static constexpr syntax_option_type grep = 1 << _S_grep; 146*e4b17023SJohn Marino 147*e4b17023SJohn Marino /** 148*e4b17023SJohn Marino * Specifies that the grammar recognized by the regular expression engine is 149*e4b17023SJohn Marino * that used by POSIX utility grep when given the -E option in 150*e4b17023SJohn Marino * IEEE Std 1003.1-2001. This option is identical to syntax_option_type 151*e4b17023SJohn Marino * extended, except that newlines are treated as whitespace. 152*e4b17023SJohn Marino */ 153*e4b17023SJohn Marino static constexpr syntax_option_type egrep = 1 << _S_egrep; 154*e4b17023SJohn Marino 155*e4b17023SJohn Marino //@} 156*e4b17023SJohn Marino 157*e4b17023SJohn Marino /** 158*e4b17023SJohn Marino * @name 5.2 Matching Rules 159*e4b17023SJohn Marino * 160*e4b17023SJohn Marino * Matching a regular expression against a sequence of characters [first, 161*e4b17023SJohn Marino * last) proceeds according to the rules of the grammar specified for the 162*e4b17023SJohn Marino * regular expression object, modified according to the effects listed 163*e4b17023SJohn Marino * below for any bitmask elements set. 164*e4b17023SJohn Marino * 165*e4b17023SJohn Marino */ 166*e4b17023SJohn Marino //@{ 167*e4b17023SJohn Marino 168*e4b17023SJohn Marino enum __match_flag 169*e4b17023SJohn Marino { 170*e4b17023SJohn Marino _S_not_bol, 171*e4b17023SJohn Marino _S_not_eol, 172*e4b17023SJohn Marino _S_not_bow, 173*e4b17023SJohn Marino _S_not_eow, 174*e4b17023SJohn Marino _S_any, 175*e4b17023SJohn Marino _S_not_null, 176*e4b17023SJohn Marino _S_continuous, 177*e4b17023SJohn Marino _S_prev_avail, 178*e4b17023SJohn Marino _S_sed, 179*e4b17023SJohn Marino _S_no_copy, 180*e4b17023SJohn Marino _S_first_only, 181*e4b17023SJohn Marino _S_match_flag_last 182*e4b17023SJohn Marino }; 183*e4b17023SJohn Marino 184*e4b17023SJohn Marino /** 185*e4b17023SJohn Marino * @brief This is a bitmask type indicating regex matching rules. 186*e4b17023SJohn Marino * 187*e4b17023SJohn Marino * The @c match_flag_type is implementation defined but it is valid to 188*e4b17023SJohn Marino * perform bitwise operations on these values and expect the right thing to 189*e4b17023SJohn Marino * happen. 190*e4b17023SJohn Marino */ 191*e4b17023SJohn Marino typedef std::bitset<_S_match_flag_last> match_flag_type; 192*e4b17023SJohn Marino 193*e4b17023SJohn Marino /** 194*e4b17023SJohn Marino * The default matching rules. 195*e4b17023SJohn Marino */ 196*e4b17023SJohn Marino static constexpr match_flag_type match_default = 0; 197*e4b17023SJohn Marino 198*e4b17023SJohn Marino /** 199*e4b17023SJohn Marino * The first character in the sequence [first, last) is treated as though it 200*e4b17023SJohn Marino * is not at the beginning of a line, so the character (^) in the regular 201*e4b17023SJohn Marino * expression shall not match [first, first). 202*e4b17023SJohn Marino */ 203*e4b17023SJohn Marino static constexpr match_flag_type match_not_bol = 1 << _S_not_bol; 204*e4b17023SJohn Marino 205*e4b17023SJohn Marino /** 206*e4b17023SJohn Marino * The last character in the sequence [first, last) is treated as though it 207*e4b17023SJohn Marino * is not at the end of a line, so the character ($) in the regular 208*e4b17023SJohn Marino * expression shall not match [last, last). 209*e4b17023SJohn Marino */ 210*e4b17023SJohn Marino static constexpr match_flag_type match_not_eol = 1 << _S_not_eol; 211*e4b17023SJohn Marino 212*e4b17023SJohn Marino /** 213*e4b17023SJohn Marino * The expression \\b is not matched against the sub-sequence 214*e4b17023SJohn Marino * [first,first). 215*e4b17023SJohn Marino */ 216*e4b17023SJohn Marino static constexpr match_flag_type match_not_bow = 1 << _S_not_bow; 217*e4b17023SJohn Marino 218*e4b17023SJohn Marino /** 219*e4b17023SJohn Marino * The expression \\b should not be matched against the sub-sequence 220*e4b17023SJohn Marino * [last,last). 221*e4b17023SJohn Marino */ 222*e4b17023SJohn Marino static constexpr match_flag_type match_not_eow = 1 << _S_not_eow; 223*e4b17023SJohn Marino 224*e4b17023SJohn Marino /** 225*e4b17023SJohn Marino * If more than one match is possible then any match is an acceptable 226*e4b17023SJohn Marino * result. 227*e4b17023SJohn Marino */ 228*e4b17023SJohn Marino static constexpr match_flag_type match_any = 1 << _S_any; 229*e4b17023SJohn Marino 230*e4b17023SJohn Marino /** 231*e4b17023SJohn Marino * The expression does not match an empty sequence. 232*e4b17023SJohn Marino */ 233*e4b17023SJohn Marino static constexpr match_flag_type match_not_null = 1 << _S_not_null; 234*e4b17023SJohn Marino 235*e4b17023SJohn Marino /** 236*e4b17023SJohn Marino * The expression only matches a sub-sequence that begins at first . 237*e4b17023SJohn Marino */ 238*e4b17023SJohn Marino static constexpr match_flag_type match_continuous = 1 << _S_continuous; 239*e4b17023SJohn Marino 240*e4b17023SJohn Marino /** 241*e4b17023SJohn Marino * --first is a valid iterator position. When this flag is set then the 242*e4b17023SJohn Marino * flags match_not_bol and match_not_bow are ignored by the regular 243*e4b17023SJohn Marino * expression algorithms 28.11 and iterators 28.12. 244*e4b17023SJohn Marino */ 245*e4b17023SJohn Marino static constexpr match_flag_type match_prev_avail = 1 << _S_prev_avail; 246*e4b17023SJohn Marino 247*e4b17023SJohn Marino /** 248*e4b17023SJohn Marino * When a regular expression match is to be replaced by a new string, the 249*e4b17023SJohn Marino * new string is constructed using the rules used by the ECMAScript replace 250*e4b17023SJohn Marino * function in ECMA- 262 [Ecma International, ECMAScript Language 251*e4b17023SJohn Marino * Specification, Standard Ecma-262, third edition, 1999], part 15.5.4.11 252*e4b17023SJohn Marino * String.prototype.replace. In addition, during search and replace 253*e4b17023SJohn Marino * operations all non-overlapping occurrences of the regular expression 254*e4b17023SJohn Marino * are located and replaced, and sections of the input that did not match 255*e4b17023SJohn Marino * the expression are copied unchanged to the output string. 256*e4b17023SJohn Marino * 257*e4b17023SJohn Marino * Format strings (from ECMA-262 [15.5.4.11]): 258*e4b17023SJohn Marino * @li $$ The dollar-sign itself ($) 259*e4b17023SJohn Marino * @li $& The matched substring. 260*e4b17023SJohn Marino * @li $` The portion of @a string that precedes the matched substring. 261*e4b17023SJohn Marino * This would be match_results::prefix(). 262*e4b17023SJohn Marino * @li $' The portion of @a string that follows the matched substring. 263*e4b17023SJohn Marino * This would be match_results::suffix(). 264*e4b17023SJohn Marino * @li $n The nth capture, where n is in [1,9] and $n is not followed by a 265*e4b17023SJohn Marino * decimal digit. If n <= match_results::size() and the nth capture 266*e4b17023SJohn Marino * is undefined, use the empty string instead. If n > 267*e4b17023SJohn Marino * match_results::size(), the result is implementation-defined. 268*e4b17023SJohn Marino * @li $nn The nnth capture, where nn is a two-digit decimal number on 269*e4b17023SJohn Marino * [01, 99]. If nn <= match_results::size() and the nth capture is 270*e4b17023SJohn Marino * undefined, use the empty string instead. If 271*e4b17023SJohn Marino * nn > match_results::size(), the result is implementation-defined. 272*e4b17023SJohn Marino */ 273*e4b17023SJohn Marino static constexpr match_flag_type format_default = 0; 274*e4b17023SJohn Marino 275*e4b17023SJohn Marino /** 276*e4b17023SJohn Marino * When a regular expression match is to be replaced by a new string, the 277*e4b17023SJohn Marino * new string is constructed using the rules used by the POSIX sed utility 278*e4b17023SJohn Marino * in IEEE Std 1003.1- 2001 [IEEE, Information Technology -- Portable 279*e4b17023SJohn Marino * Operating System Interface (POSIX), IEEE Standard 1003.1-2001]. 280*e4b17023SJohn Marino */ 281*e4b17023SJohn Marino static constexpr match_flag_type format_sed = 1 << _S_sed; 282*e4b17023SJohn Marino 283*e4b17023SJohn Marino /** 284*e4b17023SJohn Marino * During a search and replace operation, sections of the character 285*e4b17023SJohn Marino * container sequence being searched that do not match the regular 286*e4b17023SJohn Marino * expression shall not be copied to the output string. 287*e4b17023SJohn Marino */ 288*e4b17023SJohn Marino static constexpr match_flag_type format_no_copy = 1 << _S_no_copy; 289*e4b17023SJohn Marino 290*e4b17023SJohn Marino /** 291*e4b17023SJohn Marino * When specified during a search and replace operation, only the first 292*e4b17023SJohn Marino * occurrence of the regular expression shall be replaced. 293*e4b17023SJohn Marino */ 294*e4b17023SJohn Marino static constexpr match_flag_type format_first_only = 1 << _S_first_only; 295*e4b17023SJohn Marino 296*e4b17023SJohn Marino //@} 297*e4b17023SJohn Marino 298*e4b17023SJohn Marino _GLIBCXX_END_NAMESPACE_VERSION 299*e4b17023SJohn Marino } // namespace regex_constants 300*e4b17023SJohn Marino } // namespace 301*e4b17023SJohn Marino 302