1 /* invlist_inline.h 2 * 3 * Copyright (C) 2012 by Larry Wall and others 4 * 5 * You may distribute under the terms of either the GNU General Public 6 * License or the Artistic License, as specified in the README file. 7 */ 8 9 #ifndef PERL_INVLIST_INLINE_H_ 10 #define PERL_INVLIST_INLINE_H_ 11 12 #if defined(PERL_IN_UTF8_C) \ 13 || defined(PERL_IN_REGCOMP_ANY) \ 14 || defined(PERL_IN_REGEXEC_C) \ 15 || defined(PERL_IN_TOKE_C) \ 16 || defined(PERL_IN_PP_C) \ 17 || defined(PERL_IN_OP_C) \ 18 || defined(PERL_IN_DOOP_C) 19 20 /* An element is in an inversion list iff its index is even numbered: 0, 2, 4, 21 * etc */ 22 #define ELEMENT_RANGE_MATCHES_INVLIST(i) (! ((i) & 1)) 23 #define PREV_RANGE_MATCHES_INVLIST(i) (! ELEMENT_RANGE_MATCHES_INVLIST(i)) 24 25 /* This converts to/from our UVs to what the SV code is expecting: bytes. */ 26 #define TO_INTERNAL_SIZE(x) ((x) * sizeof(UV)) 27 #define FROM_INTERNAL_SIZE(x) ((x)/ sizeof(UV)) 28 29 PERL_STATIC_INLINE bool 30 S_is_invlist(const SV* const invlist) 31 { 32 return invlist != NULL && SvTYPE(invlist) == SVt_INVLIST; 33 } 34 35 PERL_STATIC_INLINE bool* 36 S_get_invlist_offset_addr(SV* invlist) 37 { 38 /* Return the address of the field that says whether the inversion list is 39 * offset (it contains 1) or not (contains 0) */ 40 PERL_ARGS_ASSERT_GET_INVLIST_OFFSET_ADDR; 41 42 assert(is_invlist(invlist)); 43 44 return &(((XINVLIST*) SvANY(invlist))->is_offset); 45 } 46 47 PERL_STATIC_INLINE UV 48 S__invlist_len(SV* const invlist) 49 { 50 /* Returns the current number of elements stored in the inversion list's 51 * array */ 52 53 PERL_ARGS_ASSERT__INVLIST_LEN; 54 55 assert(is_invlist(invlist)); 56 57 return (SvCUR(invlist) == 0) 58 ? 0 59 : FROM_INTERNAL_SIZE(SvCUR(invlist)) - *get_invlist_offset_addr(invlist); 60 } 61 62 PERL_STATIC_INLINE bool 63 S__invlist_contains_cp(SV* const invlist, const UV cp) 64 { 65 /* Does <invlist> contain code point <cp> as part of the set? */ 66 67 IV index = _invlist_search(invlist, cp); 68 69 PERL_ARGS_ASSERT__INVLIST_CONTAINS_CP; 70 71 return index >= 0 && ELEMENT_RANGE_MATCHES_INVLIST(index); 72 } 73 74 PERL_STATIC_INLINE UV* 75 S_invlist_array(SV* const invlist) 76 { 77 /* Returns the pointer to the inversion list's array. Every time the 78 * length changes, this needs to be called in case malloc or realloc moved 79 * it */ 80 81 PERL_ARGS_ASSERT_INVLIST_ARRAY; 82 83 /* Must not be empty. If these fail, you probably didn't check for <len> 84 * being non-zero before trying to get the array */ 85 assert(_invlist_len(invlist)); 86 87 /* The very first element always contains zero, The array begins either 88 * there, or if the inversion list is offset, at the element after it. 89 * The offset header field determines which; it contains 0 or 1 to indicate 90 * how much additionally to add */ 91 assert(0 == *(SvPVX(invlist))); 92 return ((UV *) SvPVX(invlist) + *get_invlist_offset_addr(invlist)); 93 } 94 95 #endif 96 #if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_OP_C) || defined(PERL_IN_DOOP_C) 97 98 PERL_STATIC_INLINE void 99 S_invlist_extend(pTHX_ SV* const invlist, const UV new_max) 100 { 101 /* Grow the maximum size of an inversion list */ 102 103 PERL_ARGS_ASSERT_INVLIST_EXTEND; 104 105 assert(SvTYPE(invlist) == SVt_INVLIST); 106 107 /* Add one to account for the zero element at the beginning which may not 108 * be counted by the calling parameters */ 109 SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max + 1)); 110 } 111 112 PERL_STATIC_INLINE void 113 S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset) 114 { 115 /* Sets the current number of elements stored in the inversion list. 116 * Updates SvCUR correspondingly */ 117 PERL_UNUSED_CONTEXT; 118 PERL_ARGS_ASSERT_INVLIST_SET_LEN; 119 120 assert(SvTYPE(invlist) == SVt_INVLIST); 121 122 SvCUR_set(invlist, 123 (len == 0) 124 ? 0 125 : TO_INTERNAL_SIZE(len + offset)); 126 assert(SvLEN(invlist) == 0 || SvCUR(invlist) <= SvLEN(invlist)); 127 } 128 129 PERL_STATIC_INLINE SV* 130 S_add_cp_to_invlist(pTHX_ SV* invlist, const UV cp) { 131 return _add_range_to_invlist(invlist, cp, cp); 132 } 133 134 PERL_STATIC_INLINE UV 135 S_invlist_highest(SV* const invlist) 136 { 137 /* Returns the highest code point that matches an inversion list. This API 138 * has an ambiguity, as it returns 0 under either the highest is actually 139 * 0, or if the list is empty. If this distinction matters to you, check 140 * for emptiness before calling this function */ 141 142 UV len = _invlist_len(invlist); 143 UV *array; 144 145 PERL_ARGS_ASSERT_INVLIST_HIGHEST; 146 147 if (len == 0) { 148 return 0; 149 } 150 151 array = invlist_array(invlist); 152 153 /* The last element in the array in the inversion list always starts a 154 * range that goes to infinity. That range may be for code points that are 155 * matched in the inversion list, or it may be for ones that aren't 156 * matched. In the latter case, the highest code point in the set is one 157 * less than the beginning of this range; otherwise it is the final element 158 * of this range: infinity */ 159 return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1)) 160 ? UV_MAX 161 : array[len - 1] - 1; 162 } 163 164 # if defined(PERL_IN_REGCOMP_ANY) 165 166 PERL_STATIC_INLINE UV 167 S_invlist_highest_range_start(SV* const invlist) 168 { 169 /* Returns the lowest code point of the highest range in the inversion 170 * list parameter. This API has an ambiguity: it returns 0 either when 171 * the lowest such point is actually 0 or when the list is empty. If this 172 * distinction matters to you, check for emptiness before calling this 173 * function. */ 174 175 UV len = _invlist_len(invlist); 176 UV *array; 177 178 PERL_ARGS_ASSERT_INVLIST_HIGHEST_RANGE_START; 179 180 if (len == 0) { 181 return 0; 182 } 183 184 array = invlist_array(invlist); 185 186 /* The last element in the array in the inversion list always starts a 187 * range that goes to infinity. That range may be for code points that are 188 * matched in the inversion list, or it may be for ones that aren't 189 * matched. In the first case, the lowest code point in the matching range 190 * is that the one that started the range. If the other case, the final 191 * matching range begins at the next element down (which may be 0 in the 192 * edge case). */ 193 return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1)) 194 ? array[len - 1] 195 : len == 1 196 ? 0 197 : array[len - 2]; 198 } 199 200 # endif 201 #endif 202 #if defined(PERL_IN_REGCOMP_ANY) || defined(PERL_IN_OP_C) 203 204 PERL_STATIC_INLINE STRLEN* 205 S_get_invlist_iter_addr(SV* invlist) 206 { 207 /* Return the address of the UV that contains the current iteration 208 * position */ 209 210 PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR; 211 212 assert(is_invlist(invlist)); 213 214 return &(((XINVLIST*) SvANY(invlist))->iterator); 215 } 216 217 PERL_STATIC_INLINE void 218 S_invlist_iterinit(SV* invlist) /* Initialize iterator for invlist */ 219 { 220 PERL_ARGS_ASSERT_INVLIST_ITERINIT; 221 222 *get_invlist_iter_addr(invlist) = 0; 223 } 224 225 PERL_STATIC_INLINE void 226 S_invlist_iterfinish(SV* invlist) 227 { 228 /* Terminate iterator for invlist. This is to catch development errors. 229 * Any iteration that is interrupted before completed should call this 230 * function. Functions that add code points anywhere else but to the end 231 * of an inversion list assert that they are not in the middle of an 232 * iteration. If they were, the addition would make the iteration 233 * problematical: if the iteration hadn't reached the place where things 234 * were being added, it would be ok */ 235 236 PERL_ARGS_ASSERT_INVLIST_ITERFINISH; 237 238 *get_invlist_iter_addr(invlist) = (STRLEN) UV_MAX; 239 } 240 241 STATIC bool 242 S_invlist_iternext(SV* invlist, UV* start, UV* end) 243 { 244 /* An C<invlist_iterinit> call on <invlist> must be used to set this up. 245 * This call sets in <*start> and <*end>, the next range in <invlist>. 246 * Returns <TRUE> if successful and the next call will return the next 247 * range; <FALSE> if was already at the end of the list. If the latter, 248 * <*start> and <*end> are unchanged, and the next call to this function 249 * will start over at the beginning of the list */ 250 251 STRLEN* pos = get_invlist_iter_addr(invlist); 252 UV len = _invlist_len(invlist); 253 UV *array; 254 255 PERL_ARGS_ASSERT_INVLIST_ITERNEXT; 256 257 if (*pos >= len) { 258 *pos = (STRLEN) UV_MAX; /* Force iterinit() to be required next time */ 259 return FALSE; 260 } 261 262 array = invlist_array(invlist); 263 264 *start = array[(*pos)++]; 265 266 if (*pos >= len) { 267 *end = UV_MAX; 268 } 269 else { 270 *end = array[(*pos)++] - 1; 271 } 272 273 return TRUE; 274 } 275 276 #endif 277 278 #ifndef PERL_IN_REGCOMP_ANY 279 280 /* These symbols are only needed later in regcomp.c */ 281 # undef TO_INTERNAL_SIZE 282 # undef FROM_INTERNAL_SIZE 283 #endif 284 285 #ifdef PERL_IN_REGCOMP_ANY 286 PERL_STATIC_INLINE 287 bool 288 S_invlist_is_iterating(const SV* const invlist) 289 { 290 PERL_ARGS_ASSERT_INVLIST_IS_ITERATING; 291 292 /* get_invlist_iter_addr()'s sv is non-const only because it returns a 293 * value that can be used to modify the invlist, it doesn't modify the 294 * invlist itself */ 295 return *(get_invlist_iter_addr((SV*)invlist)) < (STRLEN) UV_MAX; 296 } 297 298 PERL_STATIC_INLINE 299 SV * 300 S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style) 301 { 302 /* Get the contents of an inversion list into a string SV so that they can 303 * be printed out. If 'traditional_style' is TRUE, it uses the format 304 * traditionally done for debug tracing; otherwise it uses a format 305 * suitable for just copying to the output, with blanks between ranges and 306 * a dash between range components */ 307 308 UV start, end; 309 SV* output; 310 const char intra_range_delimiter = (traditional_style ? '\t' : '-'); 311 const char inter_range_delimiter = (traditional_style ? '\n' : ' '); 312 313 if (traditional_style) { 314 output = newSVpvs("\n"); 315 } 316 else { 317 output = newSVpvs(""); 318 } 319 320 PERL_ARGS_ASSERT_INVLIST_CONTENTS; 321 322 assert(! invlist_is_iterating(invlist)); 323 324 invlist_iterinit(invlist); 325 while (invlist_iternext(invlist, &start, &end)) { 326 if (end == UV_MAX) { 327 Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFTY%c", 328 start, intra_range_delimiter, 329 inter_range_delimiter); 330 } 331 else if (end != start) { 332 Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c%04" UVXf "%c", 333 start, 334 intra_range_delimiter, 335 end, inter_range_delimiter); 336 } 337 else { 338 Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c", 339 start, inter_range_delimiter); 340 } 341 } 342 343 if (SvCUR(output) && ! traditional_style) {/* Get rid of trailing blank */ 344 SvCUR_set(output, SvCUR(output) - 1); 345 } 346 347 return output; 348 } 349 350 PERL_STATIC_INLINE 351 UV 352 S_invlist_lowest(SV* const invlist) 353 { 354 /* Returns the lowest code point that matches an inversion list. This API 355 * has an ambiguity, as it returns 0 under either the lowest is actually 356 * 0, or if the list is empty. If this distinction matters to you, check 357 * for emptiness before calling this function */ 358 359 UV len = _invlist_len(invlist); 360 UV *array; 361 362 PERL_ARGS_ASSERT_INVLIST_LOWEST; 363 364 if (len == 0) { 365 return 0; 366 } 367 368 array = invlist_array(invlist); 369 370 return array[0]; 371 } 372 373 #endif 374 375 #endif /* PERL_INVLIST_INLINE_H_ */ 376