1 /* pp_pack.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * He still hopefully carried some of his gear in his pack: a small tinder-box, 13 * two small shallow pans, the smaller fitting into the larger; inside them a 14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and 15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure, 16 * some salt. 17 * 18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"] 19 */ 20 21 /* This file contains pp ("push/pop") functions that 22 * execute the opcodes that make up a perl program. A typical pp function 23 * expects to find its arguments on the stack, and usually pushes its 24 * results onto the stack, hence the 'pp' terminology. Each OP structure 25 * contains a pointer to the relevant pp_foo() function. 26 * 27 * This particular file just contains pp_pack() and pp_unpack(). See the 28 * other pp*.c files for the rest of the pp_ functions. 29 */ 30 31 #include "EXTERN.h" 32 #define PERL_IN_PP_PACK_C 33 #include "perl.h" 34 35 /* Types used by pack/unpack */ 36 typedef enum { 37 e_no_len, /* no length */ 38 e_number, /* number, [] */ 39 e_star /* asterisk */ 40 } howlen_t; 41 42 typedef struct tempsym { 43 const char* patptr; /* current template char */ 44 const char* patend; /* one after last char */ 45 const char* grpbeg; /* 1st char of ()-group */ 46 const char* grpend; /* end of ()-group */ 47 I32 code; /* template code (!<>) */ 48 I32 length; /* length/repeat count */ 49 howlen_t howlen; /* how length is given */ 50 int level; /* () nesting level */ 51 U32 flags; /* /=4, comma=2, pack=1 */ 52 /* and group modifiers */ 53 STRLEN strbeg; /* offset of group start */ 54 struct tempsym *previous; /* previous group */ 55 } tempsym_t; 56 57 #define TEMPSYM_INIT(symptr, p, e, f) \ 58 STMT_START { \ 59 (symptr)->patptr = (p); \ 60 (symptr)->patend = (e); \ 61 (symptr)->grpbeg = NULL; \ 62 (symptr)->grpend = NULL; \ 63 (symptr)->grpend = NULL; \ 64 (symptr)->code = 0; \ 65 (symptr)->length = 0; \ 66 (symptr)->howlen = e_no_len; \ 67 (symptr)->level = 0; \ 68 (symptr)->flags = (f); \ 69 (symptr)->strbeg = 0; \ 70 (symptr)->previous = NULL; \ 71 } STMT_END 72 73 typedef union { 74 NV nv; 75 U8 bytes[sizeof(NV)]; 76 } NV_bytes; 77 78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) 79 typedef union { 80 long double ld; 81 U8 bytes[sizeof(long double)]; 82 } ld_bytes; 83 #endif 84 85 #ifndef CHAR_BIT 86 # define CHAR_BIT 8 87 #endif 88 /* Maximum number of bytes to which a byte can grow due to upgrade */ 89 #define UTF8_EXPAND 2 90 91 /* 92 * Offset for integer pack/unpack. 93 * 94 * On architectures where I16 and I32 aren't really 16 and 32 bits, 95 * which for now are all Crays, pack and unpack have to play games. 96 */ 97 98 /* 99 * These values are required for portability of pack() output. 100 * If they're not right on your machine, then pack() and unpack() 101 * wouldn't work right anyway; you'll need to apply the Cray hack. 102 * (I'd like to check them with #if, but you can't use sizeof() in 103 * the preprocessor.) --??? 104 */ 105 /* 106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE 107 defines are now in config.h. --Andy Dougherty April 1998 108 */ 109 #define SIZE16 2 110 #define SIZE32 4 111 112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack(). 113 --jhi Feb 1999 */ 114 115 #if U16SIZE > SIZE16 || U32SIZE > SIZE32 116 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */ 117 # define OFF16(p) ((char*)(p)) 118 # define OFF32(p) ((char*)(p)) 119 # else 120 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */ 121 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16)) 122 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32)) 123 # else 124 ++++ bad cray byte order 125 # endif 126 # endif 127 #else 128 # define OFF16(p) ((char *) (p)) 129 # define OFF32(p) ((char *) (p)) 130 #endif 131 132 #define PUSH16(utf8, cur, p, needs_swap) \ 133 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap) 134 #define PUSH32(utf8, cur, p, needs_swap) \ 135 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap) 136 137 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */ 138 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN) 139 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */ 140 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN) 141 #else 142 # error "Unsupported byteorder" 143 /* Need to add code here to re-instate mixed endian support. 144 NEEDS_SWAP would need to hold a flag indicating which action to 145 take, and S_reverse_copy and the code in S_utf8_to_bytes would need 146 logic adding to deal with any mixed-endian transformations needed. 147 */ 148 #endif 149 150 /* Only to be used inside a loop (see the break) */ 151 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \ 152 STMT_START { \ 153 if (UNLIKELY(utf8)) { \ 154 if (!S_utf8_to_bytes(aTHX_ &s, strend, \ 155 (char *) (buf), len, datumtype)) break; \ 156 } else { \ 157 if (UNLIKELY(needs_swap)) \ 158 S_reverse_copy(s, (char *) (buf), len); \ 159 else \ 160 Copy(s, (char *) (buf), len, char); \ 161 s += len; \ 162 } \ 163 } STMT_END 164 165 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \ 166 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap) 167 168 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \ 169 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap) 170 171 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \ 172 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap) 173 174 #define PUSH_VAR(utf8, aptr, var, needs_swap) \ 175 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap) 176 177 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */ 178 #define MAX_SUB_TEMPLATE_LEVEL 100 179 180 /* flags (note that type modifiers can also be used as flags!) */ 181 #define FLAG_WAS_UTF8 0x40 182 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */ 183 #define FLAG_UNPACK_ONLY_ONE 0x10 184 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */ 185 #define FLAG_SLASH 0x04 186 #define FLAG_COMMA 0x02 187 #define FLAG_PACK 0x01 188 189 STATIC SV * 190 S_mul128(pTHX_ SV *sv, U8 m) 191 { 192 STRLEN len; 193 char *s = SvPV(sv, len); 194 char *t; 195 196 PERL_ARGS_ASSERT_MUL128; 197 198 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */ 199 SV * const tmpNew = newSVpvs("0000000000"); 200 201 sv_catsv(tmpNew, sv); 202 SvREFCNT_dec(sv); /* free old sv */ 203 sv = tmpNew; 204 s = SvPV(sv, len); 205 } 206 t = s + len - 1; 207 while (!*t) /* trailing '\0'? */ 208 t--; 209 while (t > s) { 210 const U32 i = ((*t - '0') << 7) + m; 211 *(t--) = '0' + (char)(i % 10); 212 m = (char)(i / 10); 213 } 214 return (sv); 215 } 216 217 /* Explosives and implosives. */ 218 219 #define ISUUCHAR(ch) (NATIVE_TO_LATIN1(ch) >= NATIVE_TO_LATIN1(' ') \ 220 && NATIVE_TO_LATIN1(ch) < NATIVE_TO_LATIN1('a')) 221 222 /* type modifiers */ 223 #define TYPE_IS_SHRIEKING 0x100 224 #define TYPE_IS_BIG_ENDIAN 0x200 225 #define TYPE_IS_LITTLE_ENDIAN 0x400 226 #define TYPE_IS_PACK 0x800 227 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN) 228 #define TYPE_MODIFIERS(t) ((t) & ~0xFF) 229 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF) 230 231 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK) 232 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK) 233 234 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP(" 235 236 #define PACK_SIZE_CANNOT_CSUM 0x80 237 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */ 238 #define PACK_SIZE_MASK 0x3F 239 240 #include "packsizetables.inc" 241 242 static void 243 S_reverse_copy(const char *src, char *dest, STRLEN len) 244 { 245 dest += len; 246 while (len--) 247 *--dest = *src++; 248 } 249 250 STATIC U8 251 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype) 252 { 253 STRLEN retlen; 254 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, 255 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 256 /* We try to process malformed UTF-8 as much as possible (preferably with 257 warnings), but these two mean we make no progress in the string and 258 might enter an infinite loop */ 259 if (retlen == (STRLEN) -1 || retlen == 0) 260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack", 261 (int) TYPE_NO_MODIFIERS(datumtype)); 262 if (val >= 0x100) { 263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK), 264 "Character in '%c' format wrapped in unpack", 265 (int) TYPE_NO_MODIFIERS(datumtype)); 266 val &= 0xff; 267 } 268 *s += retlen; 269 return (U8)val; 270 } 271 272 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \ 273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \ 274 *(U8 *)(s)++) 275 276 STATIC bool 277 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype) 278 { 279 UV val; 280 STRLEN retlen; 281 const char *from = *s; 282 int bad = 0; 283 const U32 flags = ckWARN(WARN_UTF8) ? 284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY); 285 const bool needs_swap = NEEDS_SWAP(datumtype); 286 287 if (UNLIKELY(needs_swap)) 288 buf += buf_len; 289 290 for (;buf_len > 0; buf_len--) { 291 if (from >= end) return FALSE; 292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags); 293 if (retlen == (STRLEN) -1 || retlen == 0) { 294 from += UTF8SKIP(from); 295 bad |= 1; 296 } else from += retlen; 297 if (val >= 0x100) { 298 bad |= 2; 299 val &= 0xff; 300 } 301 if (UNLIKELY(needs_swap)) 302 *(U8 *)--buf = (U8)val; 303 else 304 *(U8 *)buf++ = (U8)val; 305 } 306 /* We have enough characters for the buffer. Did we have problems ? */ 307 if (bad) { 308 if (bad & 1) { 309 /* Rewalk the string fragment while warning */ 310 const char *ptr; 311 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY; 312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) { 313 if (ptr >= end) break; 314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags); 315 } 316 if (from > end) from = end; 317 } 318 if ((bad & 2)) 319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ? 320 WARN_PACK : WARN_UNPACK), 321 "Character(s) in '%c' format wrapped in %s", 322 (int) TYPE_NO_MODIFIERS(datumtype), 323 datumtype & TYPE_IS_PACK ? "pack" : "unpack"); 324 } 325 *s = from; 326 return TRUE; 327 } 328 329 STATIC char * 330 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) { 331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8; 332 333 if (UNLIKELY(needs_swap)) { 334 const U8 *p = start + len; 335 while (p-- > start) { 336 append_utf8_from_native_byte(*p, (U8 **) & dest); 337 } 338 } else { 339 const U8 * const end = start + len; 340 while (start < end) { 341 append_utf8_from_native_byte(*start, (U8 **) & dest); 342 start++; 343 } 344 } 345 return dest; 346 } 347 348 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \ 349 STMT_START { \ 350 if (UNLIKELY(utf8)) \ 351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \ 352 else { \ 353 if (UNLIKELY(needs_swap)) \ 354 S_reverse_copy((char *)(buf), cur, len); \ 355 else \ 356 Copy(buf, cur, len, char); \ 357 (cur) += (len); \ 358 } \ 359 } STMT_END 360 361 #define SAFE_UTF8_EXPAND(var) \ 362 STMT_START { \ 363 if ((var) > SSize_t_MAX / UTF8_EXPAND) \ 364 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \ 365 (var) = (var) * UTF8_EXPAND; \ 366 } STMT_END 367 368 #define GROWING2(utf8, cat, start, cur, item_size, item_count) \ 369 STMT_START { \ 370 if (SSize_t_MAX / (item_size) < (item_count)) \ 371 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \ 372 GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \ 373 } STMT_END 374 375 #define GROWING(utf8, cat, start, cur, in_len) \ 376 STMT_START { \ 377 STRLEN glen = (in_len); \ 378 STRLEN catcur = (STRLEN)((cur) - (start)); \ 379 if (utf8) SAFE_UTF8_EXPAND(glen); \ 380 if (SSize_t_MAX - glen < catcur) \ 381 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \ 382 if (catcur + glen >= SvLEN(cat)) { \ 383 (start) = sv_exp_grow(cat, glen); \ 384 (cur) = (start) + SvCUR(cat); \ 385 } \ 386 } STMT_END 387 388 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \ 389 STMT_START { \ 390 const STRLEN glen = (in_len); \ 391 STRLEN gl = glen; \ 392 if (utf8) SAFE_UTF8_EXPAND(gl); \ 393 if ((cur) + gl >= (start) + SvLEN(cat)) { \ 394 *cur = '\0'; \ 395 SvCUR_set((cat), (cur) - (start)); \ 396 (start) = sv_exp_grow(cat, gl); \ 397 (cur) = (start) + SvCUR(cat); \ 398 } \ 399 PUSH_BYTES(utf8, cur, buf, glen, 0); \ 400 } STMT_END 401 402 #define PUSH_BYTE(utf8, s, byte) \ 403 STMT_START { \ 404 if (utf8) { \ 405 const U8 au8 = (byte); \ 406 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\ 407 } else *(U8 *)(s)++ = (byte); \ 408 } STMT_END 409 410 /* Only to be used inside a loop (see the break) */ 411 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \ 412 STMT_START { \ 413 STRLEN retlen; \ 414 if (str >= end) break; \ 415 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \ 416 if (retlen == (STRLEN) -1 || retlen == 0) { \ 417 *cur = '\0'; \ 418 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \ 419 } \ 420 str += retlen; \ 421 } STMT_END 422 423 static const char *_action( const tempsym_t* symptr ) 424 { 425 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack"); 426 } 427 428 /* Returns the sizeof() struct described by pat */ 429 STATIC I32 430 S_measure_struct(pTHX_ tempsym_t* symptr) 431 { 432 I32 total = 0; 433 434 PERL_ARGS_ASSERT_MEASURE_STRUCT; 435 436 while (next_symbol(symptr)) { 437 I32 len; 438 int size; 439 440 switch (symptr->howlen) { 441 case e_star: 442 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s", 443 _action( symptr ) ); 444 445 default: 446 /* e_no_len and e_number */ 447 len = symptr->length; 448 break; 449 } 450 451 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK; 452 if (!size) { 453 int star; 454 /* endianness doesn't influence the size of a type */ 455 switch(TYPE_NO_ENDIANNESS(symptr->code)) { 456 default: 457 Perl_croak(aTHX_ "Invalid type '%c' in %s", 458 (int)TYPE_NO_MODIFIERS(symptr->code), 459 _action( symptr ) ); 460 case '.' | TYPE_IS_SHRIEKING: 461 case '@' | TYPE_IS_SHRIEKING: 462 case '@': 463 case '.': 464 case '/': 465 case 'U': /* XXXX Is it correct? */ 466 case 'w': 467 case 'u': 468 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s", 469 (int) TYPE_NO_MODIFIERS(symptr->code), 470 _action( symptr ) ); 471 case '%': 472 size = 0; 473 break; 474 case '(': 475 { 476 tempsym_t savsym = *symptr; 477 symptr->patptr = savsym.grpbeg; 478 symptr->patend = savsym.grpend; 479 /* XXXX Theoretically, we need to measure many times at 480 different positions, since the subexpression may contain 481 alignment commands, but be not of aligned length. 482 Need to detect this and croak(). */ 483 size = measure_struct(symptr); 484 *symptr = savsym; 485 break; 486 } 487 case 'X' | TYPE_IS_SHRIEKING: 488 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS. 489 */ 490 if (!len) /* Avoid division by 0 */ 491 len = 1; 492 len = total % len; /* Assumed: the start is aligned. */ 493 /* FALLTHROUGH */ 494 case 'X': 495 size = -1; 496 if (total < len) 497 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) ); 498 break; 499 case 'x' | TYPE_IS_SHRIEKING: 500 if (!len) /* Avoid division by 0 */ 501 len = 1; 502 star = total % len; /* Assumed: the start is aligned. */ 503 if (star) /* Other portable ways? */ 504 len = len - star; 505 else 506 len = 0; 507 /* FALLTHROUGH */ 508 case 'x': 509 case 'A': 510 case 'Z': 511 case 'a': 512 size = 1; 513 break; 514 case 'B': 515 case 'b': 516 len = (len + 7)/8; 517 size = 1; 518 break; 519 case 'H': 520 case 'h': 521 len = (len + 1)/2; 522 size = 1; 523 break; 524 525 case 'P': 526 len = 1; 527 size = sizeof(char*); 528 break; 529 } 530 } 531 total += len * size; 532 } 533 return total; 534 } 535 536 537 /* locate matching closing parenthesis or bracket 538 * returns char pointer to char after match, or NULL 539 */ 540 STATIC const char * 541 S_group_end(pTHX_ const char *patptr, const char *patend, char ender) 542 { 543 PERL_ARGS_ASSERT_GROUP_END; 544 545 while (patptr < patend) { 546 const char c = *patptr++; 547 548 if (isSPACE(c)) 549 continue; 550 else if (c == ender) 551 return patptr-1; 552 else if (c == '#') { 553 while (patptr < patend && *patptr != '\n') 554 patptr++; 555 continue; 556 } else if (c == '(') 557 patptr = group_end(patptr, patend, ')') + 1; 558 else if (c == '[') 559 patptr = group_end(patptr, patend, ']') + 1; 560 } 561 Perl_croak(aTHX_ "No group ending character '%c' found in template", 562 ender); 563 NOT_REACHED; /* NOTREACHED */ 564 } 565 566 567 /* Convert unsigned decimal number to binary. 568 * Expects a pointer to the first digit and address of length variable 569 * Advances char pointer to 1st non-digit char and returns number 570 */ 571 STATIC const char * 572 S_get_num(pTHX_ const char *patptr, I32 *lenptr ) 573 { 574 I32 len = *patptr++ - '0'; 575 576 PERL_ARGS_ASSERT_GET_NUM; 577 578 while (isDIGIT(*patptr)) { 579 if (len >= 0x7FFFFFFF/10) 580 Perl_croak(aTHX_ "pack/unpack repeat count overflow"); 581 len = (len * 10) + (*patptr++ - '0'); 582 } 583 *lenptr = len; 584 return patptr; 585 } 586 587 /* The marvellous template parsing routine: Using state stored in *symptr, 588 * locates next template code and count 589 */ 590 STATIC bool 591 S_next_symbol(pTHX_ tempsym_t* symptr ) 592 { 593 const char* patptr = symptr->patptr; 594 const char* const patend = symptr->patend; 595 596 PERL_ARGS_ASSERT_NEXT_SYMBOL; 597 598 symptr->flags &= ~FLAG_SLASH; 599 600 while (patptr < patend) { 601 if (isSPACE(*patptr)) 602 patptr++; 603 else if (*patptr == '#') { 604 patptr++; 605 while (patptr < patend && *patptr != '\n') 606 patptr++; 607 if (patptr < patend) 608 patptr++; 609 } else { 610 /* We should have found a template code */ 611 I32 code = *patptr++ & 0xFF; 612 U32 inherited_modifiers = 0; 613 614 if (code == ','){ /* grandfather in commas but with a warning */ 615 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){ 616 symptr->flags |= FLAG_COMMA; 617 Perl_warner(aTHX_ packWARN(WARN_UNPACK), 618 "Invalid type ',' in %s", _action( symptr ) ); 619 } 620 continue; 621 } 622 623 /* for '(', skip to ')' */ 624 if (code == '(') { 625 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' ) 626 Perl_croak(aTHX_ "()-group starts with a count in %s", 627 _action( symptr ) ); 628 symptr->grpbeg = patptr; 629 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') ); 630 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL ) 631 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s", 632 _action( symptr ) ); 633 } 634 635 /* look for group modifiers to inherit */ 636 if (TYPE_ENDIANNESS(symptr->flags)) { 637 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code))) 638 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags); 639 } 640 641 /* look for modifiers */ 642 while (patptr < patend) { 643 const char *allowed; 644 I32 modifier; 645 switch (*patptr) { 646 case '!': 647 modifier = TYPE_IS_SHRIEKING; 648 allowed = "sSiIlLxXnNvV@."; 649 break; 650 case '>': 651 modifier = TYPE_IS_BIG_ENDIAN; 652 allowed = ENDIANNESS_ALLOWED_TYPES; 653 break; 654 case '<': 655 modifier = TYPE_IS_LITTLE_ENDIAN; 656 allowed = ENDIANNESS_ALLOWED_TYPES; 657 break; 658 default: 659 allowed = ""; 660 modifier = 0; 661 break; 662 } 663 664 if (modifier == 0) 665 break; 666 667 if (!strchr(allowed, TYPE_NO_MODIFIERS(code))) 668 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr, 669 allowed, _action( symptr ) ); 670 671 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK) 672 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s", 673 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) ); 674 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) == 675 TYPE_ENDIANNESS_MASK) 676 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s", 677 *patptr, _action( symptr ) ); 678 679 if ((code & modifier)) { 680 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK), 681 "Duplicate modifier '%c' after '%c' in %s", 682 *patptr, (int) TYPE_NO_MODIFIERS(code), 683 _action( symptr ) ); 684 } 685 686 code |= modifier; 687 patptr++; 688 } 689 690 /* inherit modifiers */ 691 code |= inherited_modifiers; 692 693 /* look for count and/or / */ 694 if (patptr < patend) { 695 if (isDIGIT(*patptr)) { 696 patptr = get_num( patptr, &symptr->length ); 697 symptr->howlen = e_number; 698 699 } else if (*patptr == '*') { 700 patptr++; 701 symptr->howlen = e_star; 702 703 } else if (*patptr == '[') { 704 const char* lenptr = ++patptr; 705 symptr->howlen = e_number; 706 patptr = group_end( patptr, patend, ']' ) + 1; 707 /* what kind of [] is it? */ 708 if (isDIGIT(*lenptr)) { 709 lenptr = get_num( lenptr, &symptr->length ); 710 if( *lenptr != ']' ) 711 Perl_croak(aTHX_ "Malformed integer in [] in %s", 712 _action( symptr ) ); 713 } else { 714 tempsym_t savsym = *symptr; 715 symptr->patend = patptr-1; 716 symptr->patptr = lenptr; 717 savsym.length = measure_struct(symptr); 718 *symptr = savsym; 719 } 720 } else { 721 symptr->howlen = e_no_len; 722 symptr->length = 1; 723 } 724 725 /* try to find / */ 726 while (patptr < patend) { 727 if (isSPACE(*patptr)) 728 patptr++; 729 else if (*patptr == '#') { 730 patptr++; 731 while (patptr < patend && *patptr != '\n') 732 patptr++; 733 if (patptr < patend) 734 patptr++; 735 } else { 736 if (*patptr == '/') { 737 symptr->flags |= FLAG_SLASH; 738 patptr++; 739 if (patptr < patend && 740 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '[')) 741 Perl_croak(aTHX_ "'/' does not take a repeat count in %s", 742 _action( symptr ) ); 743 } 744 break; 745 } 746 } 747 } else { 748 /* at end - no count, no / */ 749 symptr->howlen = e_no_len; 750 symptr->length = 1; 751 } 752 753 symptr->code = code; 754 symptr->patptr = patptr; 755 return TRUE; 756 } 757 } 758 symptr->patptr = patptr; 759 return FALSE; 760 } 761 762 /* 763 There is no way to cleanly handle the case where we should process the 764 string per byte in its upgraded form while it's really in downgraded form 765 (e.g. estimates like strend-s as an upper bound for the number of 766 characters left wouldn't work). So if we foresee the need of this 767 (pattern starts with U or contains U0), we want to work on the encoded 768 version of the string. Users are advised to upgrade their pack string 769 themselves if they need to do a lot of unpacks like this on it 770 */ 771 STATIC bool 772 need_utf8(const char *pat, const char *patend) 773 { 774 bool first = TRUE; 775 776 PERL_ARGS_ASSERT_NEED_UTF8; 777 778 while (pat < patend) { 779 if (pat[0] == '#') { 780 pat++; 781 pat = (const char *) memchr(pat, '\n', patend-pat); 782 if (!pat) return FALSE; 783 } else if (pat[0] == 'U') { 784 if (first || pat[1] == '0') return TRUE; 785 } else first = FALSE; 786 pat++; 787 } 788 return FALSE; 789 } 790 791 STATIC char 792 first_symbol(const char *pat, const char *patend) { 793 PERL_ARGS_ASSERT_FIRST_SYMBOL; 794 795 while (pat < patend) { 796 if (pat[0] != '#') return pat[0]; 797 pat++; 798 pat = (const char *) memchr(pat, '\n', patend-pat); 799 if (!pat) return 0; 800 pat++; 801 } 802 return 0; 803 } 804 805 /* 806 807 =head1 Pack and Unpack 808 809 =for apidoc unpackstring 810 811 The engine implementing the C<unpack()> Perl function. 812 813 Using the template C<pat..patend>, this function unpacks the string 814 C<s..strend> into a number of mortal SVs, which it pushes onto the perl 815 argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and 816 C<SPAGAIN> after the call to this function). It returns the number of 817 pushed elements. 818 819 The C<strend> and C<patend> pointers should point to the byte following the 820 last character of each string. 821 822 Although this function returns its values on the perl argument stack, it 823 doesn't take any parameters from that stack (and thus in particular 824 there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for 825 example). 826 827 =cut */ 828 829 I32 830 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags) 831 { 832 tempsym_t sym; 833 834 PERL_ARGS_ASSERT_UNPACKSTRING; 835 836 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8; 837 else if (need_utf8(pat, patend)) { 838 /* We probably should try to avoid this in case a scalar context call 839 wouldn't get to the "U0" */ 840 STRLEN len = strend - s; 841 s = (char *) bytes_to_utf8((U8 *) s, &len); 842 SAVEFREEPV(s); 843 strend = s + len; 844 flags |= FLAG_DO_UTF8; 845 } 846 847 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8)) 848 flags |= FLAG_PARSE_UTF8; 849 850 TEMPSYM_INIT(&sym, pat, patend, flags); 851 852 return unpack_rec(&sym, s, s, strend, NULL ); 853 } 854 855 STATIC I32 856 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s ) 857 { 858 dSP; 859 SV *sv = NULL; 860 const I32 start_sp_offset = SP - PL_stack_base; 861 howlen_t howlen; 862 I32 checksum = 0; 863 UV cuv = 0; 864 NV cdouble = 0.0; 865 const int bits_in_uv = CHAR_BIT * sizeof(cuv); 866 bool beyond = FALSE; 867 bool explicit_length; 868 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0; 869 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0; 870 871 PERL_ARGS_ASSERT_UNPACK_REC; 872 873 symptr->strbeg = s - strbeg; 874 875 while (next_symbol(symptr)) { 876 packprops_t props; 877 I32 len; 878 I32 datumtype = symptr->code; 879 bool needs_swap; 880 /* do first one only unless in list context 881 / is implemented by unpacking the count, then popping it from the 882 stack, so must check that we're not in the middle of a / */ 883 if ( unpack_only_one 884 && (SP - PL_stack_base == start_sp_offset + 1) 885 && (datumtype != '/') ) /* XXX can this be omitted */ 886 break; 887 888 switch (howlen = symptr->howlen) { 889 case e_star: 890 len = strend - strbeg; /* long enough */ 891 break; 892 default: 893 /* e_no_len and e_number */ 894 len = symptr->length; 895 break; 896 } 897 898 explicit_length = TRUE; 899 redo_switch: 900 beyond = s >= strend; 901 902 props = packprops[TYPE_NO_ENDIANNESS(datumtype)]; 903 if (props) { 904 /* props nonzero means we can process this letter. */ 905 const long size = props & PACK_SIZE_MASK; 906 const long howmany = (strend - s) / size; 907 if (len > howmany) 908 len = howmany; 909 910 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) { 911 if (len && unpack_only_one) len = 1; 912 EXTEND(SP, len); 913 EXTEND_MORTAL(len); 914 } 915 } 916 917 needs_swap = NEEDS_SWAP(datumtype); 918 919 switch(TYPE_NO_ENDIANNESS(datumtype)) { 920 default: 921 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) ); 922 923 case '%': 924 if (howlen == e_no_len) 925 len = 16; /* len is not specified */ 926 checksum = len; 927 cuv = 0; 928 cdouble = 0; 929 continue; 930 931 case '(': 932 { 933 tempsym_t savsym = *symptr; 934 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags); 935 symptr->flags |= group_modifiers; 936 symptr->patend = savsym.grpend; 937 symptr->previous = &savsym; 938 symptr->level++; 939 PUTBACK; 940 if (len && unpack_only_one) len = 1; 941 while (len--) { 942 symptr->patptr = savsym.grpbeg; 943 if (utf8) symptr->flags |= FLAG_PARSE_UTF8; 944 else symptr->flags &= ~FLAG_PARSE_UTF8; 945 unpack_rec(symptr, s, strbeg, strend, &s); 946 if (s == strend && savsym.howlen == e_star) 947 break; /* No way to continue */ 948 } 949 SPAGAIN; 950 savsym.flags = symptr->flags & ~group_modifiers; 951 *symptr = savsym; 952 break; 953 } 954 case '.' | TYPE_IS_SHRIEKING: 955 case '.': { 956 const char *from; 957 SV *sv; 958 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING); 959 if (howlen == e_star) from = strbeg; 960 else if (len <= 0) from = s; 961 else { 962 tempsym_t *group = symptr; 963 964 while (--len && group) group = group->previous; 965 from = group ? strbeg + group->strbeg : strbeg; 966 } 967 sv = from <= s ? 968 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) : 969 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s))); 970 mXPUSHs(sv); 971 break; 972 } 973 case '@' | TYPE_IS_SHRIEKING: 974 case '@': 975 s = strbeg + symptr->strbeg; 976 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING)) 977 { 978 while (len > 0) { 979 if (s >= strend) 980 Perl_croak(aTHX_ "'@' outside of string in unpack"); 981 s += UTF8SKIP(s); 982 len--; 983 } 984 if (s > strend) 985 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack"); 986 } else { 987 if (strend-s < len) 988 Perl_croak(aTHX_ "'@' outside of string in unpack"); 989 s += len; 990 } 991 break; 992 case 'X' | TYPE_IS_SHRIEKING: 993 if (!len) /* Avoid division by 0 */ 994 len = 1; 995 if (utf8) { 996 const char *hop, *last; 997 I32 l = len; 998 hop = last = strbeg; 999 while (hop < s) { 1000 hop += UTF8SKIP(hop); 1001 if (--l == 0) { 1002 last = hop; 1003 l = len; 1004 } 1005 } 1006 if (last > s) 1007 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1008 s = last; 1009 break; 1010 } 1011 len = (s - strbeg) % len; 1012 /* FALLTHROUGH */ 1013 case 'X': 1014 if (utf8) { 1015 while (len > 0) { 1016 if (s <= strbeg) 1017 Perl_croak(aTHX_ "'X' outside of string in unpack"); 1018 while (--s, UTF8_IS_CONTINUATION(*s)) { 1019 if (s <= strbeg) 1020 Perl_croak(aTHX_ "'X' outside of string in unpack"); 1021 } 1022 len--; 1023 } 1024 } else { 1025 if (len > s - strbeg) 1026 Perl_croak(aTHX_ "'X' outside of string in unpack" ); 1027 s -= len; 1028 } 1029 break; 1030 case 'x' | TYPE_IS_SHRIEKING: { 1031 I32 ai32; 1032 if (!len) /* Avoid division by 0 */ 1033 len = 1; 1034 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len; 1035 else ai32 = (s - strbeg) % len; 1036 if (ai32 == 0) break; 1037 len -= ai32; 1038 } 1039 /* FALLTHROUGH */ 1040 case 'x': 1041 if (utf8) { 1042 while (len>0) { 1043 if (s >= strend) 1044 Perl_croak(aTHX_ "'x' outside of string in unpack"); 1045 s += UTF8SKIP(s); 1046 len--; 1047 } 1048 } else { 1049 if (len > strend - s) 1050 Perl_croak(aTHX_ "'x' outside of string in unpack"); 1051 s += len; 1052 } 1053 break; 1054 case '/': 1055 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack"); 1056 1057 case 'A': 1058 case 'Z': 1059 case 'a': 1060 if (checksum) { 1061 /* Preliminary length estimate is assumed done in 'W' */ 1062 if (len > strend - s) len = strend - s; 1063 goto W_checksum; 1064 } 1065 if (utf8) { 1066 I32 l; 1067 const char *hop; 1068 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) { 1069 if (hop >= strend) { 1070 if (hop > strend) 1071 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1072 break; 1073 } 1074 } 1075 if (hop > strend) 1076 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1077 len = hop - s; 1078 } else if (len > strend - s) 1079 len = strend - s; 1080 1081 if (datumtype == 'Z') { 1082 /* 'Z' strips stuff after first null */ 1083 const char *ptr, *end; 1084 end = s + len; 1085 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break; 1086 sv = newSVpvn(s, ptr-s); 1087 if (howlen == e_star) /* exact for 'Z*' */ 1088 len = ptr-s + (ptr != strend ? 1 : 0); 1089 } else if (datumtype == 'A') { 1090 /* 'A' strips both nulls and spaces */ 1091 const char *ptr; 1092 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) { 1093 for (ptr = s+len-1; ptr >= s; ptr--) 1094 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) && 1095 !isSPACE_utf8(ptr)) break; 1096 if (ptr >= s) ptr += UTF8SKIP(ptr); 1097 else ptr++; 1098 if (ptr > s+len) 1099 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1100 } else { 1101 for (ptr = s+len-1; ptr >= s; ptr--) 1102 if (*ptr != 0 && !isSPACE(*ptr)) break; 1103 ptr++; 1104 } 1105 sv = newSVpvn(s, ptr-s); 1106 } else sv = newSVpvn(s, len); 1107 1108 if (utf8) { 1109 SvUTF8_on(sv); 1110 /* Undo any upgrade done due to need_utf8() */ 1111 if (!(symptr->flags & FLAG_WAS_UTF8)) 1112 sv_utf8_downgrade(sv, 0); 1113 } 1114 mXPUSHs(sv); 1115 s += len; 1116 break; 1117 case 'B': 1118 case 'b': { 1119 char *str; 1120 if (howlen == e_star || len > (strend - s) * 8) 1121 len = (strend - s) * 8; 1122 if (checksum) { 1123 if (utf8) 1124 while (len >= 8 && s < strend) { 1125 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)]; 1126 len -= 8; 1127 } 1128 else 1129 while (len >= 8) { 1130 cuv += PL_bitcount[*(U8 *)s++]; 1131 len -= 8; 1132 } 1133 if (len && s < strend) { 1134 U8 bits; 1135 bits = SHIFT_BYTE(utf8, s, strend, datumtype); 1136 if (datumtype == 'b') 1137 while (len-- > 0) { 1138 if (bits & 1) cuv++; 1139 bits >>= 1; 1140 } 1141 else 1142 while (len-- > 0) { 1143 if (bits & 0x80) cuv++; 1144 bits <<= 1; 1145 } 1146 } 1147 break; 1148 } 1149 1150 sv = sv_2mortal(newSV(len ? len : 1)); 1151 SvPOK_on(sv); 1152 str = SvPVX(sv); 1153 if (datumtype == 'b') { 1154 U8 bits = 0; 1155 const I32 ai32 = len; 1156 for (len = 0; len < ai32; len++) { 1157 if (len & 7) bits >>= 1; 1158 else if (utf8) { 1159 if (s >= strend) break; 1160 bits = utf8_to_byte(aTHX_ &s, strend, datumtype); 1161 } else bits = *(U8 *) s++; 1162 *str++ = bits & 1 ? '1' : '0'; 1163 } 1164 } else { 1165 U8 bits = 0; 1166 const I32 ai32 = len; 1167 for (len = 0; len < ai32; len++) { 1168 if (len & 7) bits <<= 1; 1169 else if (utf8) { 1170 if (s >= strend) break; 1171 bits = utf8_to_byte(aTHX_ &s, strend, datumtype); 1172 } else bits = *(U8 *) s++; 1173 *str++ = bits & 0x80 ? '1' : '0'; 1174 } 1175 } 1176 *str = '\0'; 1177 SvCUR_set(sv, str - SvPVX_const(sv)); 1178 XPUSHs(sv); 1179 break; 1180 } 1181 case 'H': 1182 case 'h': { 1183 char *str = NULL; 1184 /* Preliminary length estimate, acceptable for utf8 too */ 1185 if (howlen == e_star || len > (strend - s) * 2) 1186 len = (strend - s) * 2; 1187 if (!checksum) { 1188 sv = sv_2mortal(newSV(len ? len : 1)); 1189 SvPOK_on(sv); 1190 str = SvPVX(sv); 1191 } 1192 if (datumtype == 'h') { 1193 U8 bits = 0; 1194 I32 ai32 = len; 1195 for (len = 0; len < ai32; len++) { 1196 if (len & 1) bits >>= 4; 1197 else if (utf8) { 1198 if (s >= strend) break; 1199 bits = utf8_to_byte(aTHX_ &s, strend, datumtype); 1200 } else bits = * (U8 *) s++; 1201 if (!checksum) 1202 *str++ = PL_hexdigit[bits & 15]; 1203 } 1204 } else { 1205 U8 bits = 0; 1206 const I32 ai32 = len; 1207 for (len = 0; len < ai32; len++) { 1208 if (len & 1) bits <<= 4; 1209 else if (utf8) { 1210 if (s >= strend) break; 1211 bits = utf8_to_byte(aTHX_ &s, strend, datumtype); 1212 } else bits = *(U8 *) s++; 1213 if (!checksum) 1214 *str++ = PL_hexdigit[(bits >> 4) & 15]; 1215 } 1216 } 1217 if (!checksum) { 1218 *str = '\0'; 1219 SvCUR_set(sv, str - SvPVX_const(sv)); 1220 XPUSHs(sv); 1221 } 1222 break; 1223 } 1224 case 'C': 1225 if (len == 0) { 1226 if (explicit_length) 1227 /* Switch to "character" mode */ 1228 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0; 1229 break; 1230 } 1231 /* FALLTHROUGH */ 1232 case 'c': 1233 while (len-- > 0 && s < strend) { 1234 int aint; 1235 if (utf8) 1236 { 1237 STRLEN retlen; 1238 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, 1239 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 1240 if (retlen == (STRLEN) -1 || retlen == 0) 1241 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1242 s += retlen; 1243 } 1244 else 1245 aint = *(U8 *)(s)++; 1246 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */ 1247 aint -= 256; 1248 if (!checksum) 1249 mPUSHi(aint); 1250 else if (checksum > bits_in_uv) 1251 cdouble += (NV)aint; 1252 else 1253 cuv += aint; 1254 } 1255 break; 1256 case 'W': 1257 W_checksum: 1258 if (utf8) { 1259 while (len-- > 0 && s < strend) { 1260 STRLEN retlen; 1261 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, 1262 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 1263 if (retlen == (STRLEN) -1 || retlen == 0) 1264 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1265 s += retlen; 1266 if (!checksum) 1267 mPUSHu(val); 1268 else if (checksum > bits_in_uv) 1269 cdouble += (NV) val; 1270 else 1271 cuv += val; 1272 } 1273 } else if (!checksum) 1274 while (len-- > 0) { 1275 const U8 ch = *(U8 *) s++; 1276 mPUSHu(ch); 1277 } 1278 else if (checksum > bits_in_uv) 1279 while (len-- > 0) cdouble += (NV) *(U8 *) s++; 1280 else 1281 while (len-- > 0) cuv += *(U8 *) s++; 1282 break; 1283 case 'U': 1284 if (len == 0) { 1285 if (explicit_length && howlen != e_star) { 1286 /* Switch to "bytes in UTF-8" mode */ 1287 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0; 1288 else 1289 /* Should be impossible due to the need_utf8() test */ 1290 Perl_croak(aTHX_ "U0 mode on a byte string"); 1291 } 1292 break; 1293 } 1294 if (len > strend - s) len = strend - s; 1295 if (!checksum) { 1296 if (len && unpack_only_one) len = 1; 1297 EXTEND(SP, len); 1298 EXTEND_MORTAL(len); 1299 } 1300 while (len-- > 0 && s < strend) { 1301 STRLEN retlen; 1302 UV auv; 1303 if (utf8) { 1304 U8 result[UTF8_MAXLEN]; 1305 const char *ptr = s; 1306 STRLEN len; 1307 /* Bug: warns about bad utf8 even if we are short on bytes 1308 and will break out of the loop */ 1309 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1, 1310 'U')) 1311 break; 1312 len = UTF8SKIP(result); 1313 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, 1314 (char *) &result[1], len-1, 'U')) break; 1315 auv = NATIVE_TO_UNI(utf8n_to_uvchr(result, 1316 len, 1317 &retlen, 1318 UTF8_ALLOW_DEFAULT)); 1319 s = ptr; 1320 } else { 1321 auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s, 1322 strend - s, 1323 &retlen, 1324 UTF8_ALLOW_DEFAULT)); 1325 if (retlen == (STRLEN) -1 || retlen == 0) 1326 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1327 s += retlen; 1328 } 1329 if (!checksum) 1330 mPUSHu(auv); 1331 else if (checksum > bits_in_uv) 1332 cdouble += (NV) auv; 1333 else 1334 cuv += auv; 1335 } 1336 break; 1337 case 's' | TYPE_IS_SHRIEKING: 1338 #if SHORTSIZE != SIZE16 1339 while (len-- > 0) { 1340 short ashort; 1341 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap); 1342 if (!checksum) 1343 mPUSHi(ashort); 1344 else if (checksum > bits_in_uv) 1345 cdouble += (NV)ashort; 1346 else 1347 cuv += ashort; 1348 } 1349 break; 1350 #else 1351 /* FALLTHROUGH */ 1352 #endif 1353 case 's': 1354 while (len-- > 0) { 1355 I16 ai16; 1356 1357 #if U16SIZE > SIZE16 1358 ai16 = 0; 1359 #endif 1360 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap); 1361 #if U16SIZE > SIZE16 1362 if (ai16 > 32767) 1363 ai16 -= 65536; 1364 #endif 1365 if (!checksum) 1366 mPUSHi(ai16); 1367 else if (checksum > bits_in_uv) 1368 cdouble += (NV)ai16; 1369 else 1370 cuv += ai16; 1371 } 1372 break; 1373 case 'S' | TYPE_IS_SHRIEKING: 1374 #if SHORTSIZE != SIZE16 1375 while (len-- > 0) { 1376 unsigned short aushort; 1377 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap, 1378 needs_swap); 1379 if (!checksum) 1380 mPUSHu(aushort); 1381 else if (checksum > bits_in_uv) 1382 cdouble += (NV)aushort; 1383 else 1384 cuv += aushort; 1385 } 1386 break; 1387 #else 1388 /* FALLTHROUGH */ 1389 #endif 1390 case 'v': 1391 case 'n': 1392 case 'S': 1393 while (len-- > 0) { 1394 U16 au16; 1395 #if U16SIZE > SIZE16 1396 au16 = 0; 1397 #endif 1398 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap); 1399 if (datumtype == 'n') 1400 au16 = PerlSock_ntohs(au16); 1401 if (datumtype == 'v') 1402 au16 = vtohs(au16); 1403 if (!checksum) 1404 mPUSHu(au16); 1405 else if (checksum > bits_in_uv) 1406 cdouble += (NV) au16; 1407 else 1408 cuv += au16; 1409 } 1410 break; 1411 case 'v' | TYPE_IS_SHRIEKING: 1412 case 'n' | TYPE_IS_SHRIEKING: 1413 while (len-- > 0) { 1414 I16 ai16; 1415 # if U16SIZE > SIZE16 1416 ai16 = 0; 1417 # endif 1418 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap); 1419 /* There should never be any byte-swapping here. */ 1420 assert(!TYPE_ENDIANNESS(datumtype)); 1421 if (datumtype == ('n' | TYPE_IS_SHRIEKING)) 1422 ai16 = (I16) PerlSock_ntohs((U16) ai16); 1423 if (datumtype == ('v' | TYPE_IS_SHRIEKING)) 1424 ai16 = (I16) vtohs((U16) ai16); 1425 if (!checksum) 1426 mPUSHi(ai16); 1427 else if (checksum > bits_in_uv) 1428 cdouble += (NV) ai16; 1429 else 1430 cuv += ai16; 1431 } 1432 break; 1433 case 'i': 1434 case 'i' | TYPE_IS_SHRIEKING: 1435 while (len-- > 0) { 1436 int aint; 1437 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap); 1438 if (!checksum) 1439 mPUSHi(aint); 1440 else if (checksum > bits_in_uv) 1441 cdouble += (NV)aint; 1442 else 1443 cuv += aint; 1444 } 1445 break; 1446 case 'I': 1447 case 'I' | TYPE_IS_SHRIEKING: 1448 while (len-- > 0) { 1449 unsigned int auint; 1450 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap); 1451 if (!checksum) 1452 mPUSHu(auint); 1453 else if (checksum > bits_in_uv) 1454 cdouble += (NV)auint; 1455 else 1456 cuv += auint; 1457 } 1458 break; 1459 case 'j': 1460 while (len-- > 0) { 1461 IV aiv; 1462 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap); 1463 if (!checksum) 1464 mPUSHi(aiv); 1465 else if (checksum > bits_in_uv) 1466 cdouble += (NV)aiv; 1467 else 1468 cuv += aiv; 1469 } 1470 break; 1471 case 'J': 1472 while (len-- > 0) { 1473 UV auv; 1474 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap); 1475 if (!checksum) 1476 mPUSHu(auv); 1477 else if (checksum > bits_in_uv) 1478 cdouble += (NV)auv; 1479 else 1480 cuv += auv; 1481 } 1482 break; 1483 case 'l' | TYPE_IS_SHRIEKING: 1484 #if LONGSIZE != SIZE32 1485 while (len-- > 0) { 1486 long along; 1487 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap); 1488 if (!checksum) 1489 mPUSHi(along); 1490 else if (checksum > bits_in_uv) 1491 cdouble += (NV)along; 1492 else 1493 cuv += along; 1494 } 1495 break; 1496 #else 1497 /* FALLTHROUGH */ 1498 #endif 1499 case 'l': 1500 while (len-- > 0) { 1501 I32 ai32; 1502 #if U32SIZE > SIZE32 1503 ai32 = 0; 1504 #endif 1505 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap); 1506 #if U32SIZE > SIZE32 1507 if (ai32 > 2147483647) ai32 -= 4294967296; 1508 #endif 1509 if (!checksum) 1510 mPUSHi(ai32); 1511 else if (checksum > bits_in_uv) 1512 cdouble += (NV)ai32; 1513 else 1514 cuv += ai32; 1515 } 1516 break; 1517 case 'L' | TYPE_IS_SHRIEKING: 1518 #if LONGSIZE != SIZE32 1519 while (len-- > 0) { 1520 unsigned long aulong; 1521 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap); 1522 if (!checksum) 1523 mPUSHu(aulong); 1524 else if (checksum > bits_in_uv) 1525 cdouble += (NV)aulong; 1526 else 1527 cuv += aulong; 1528 } 1529 break; 1530 #else 1531 /* FALLTHROUGH */ 1532 #endif 1533 case 'V': 1534 case 'N': 1535 case 'L': 1536 while (len-- > 0) { 1537 U32 au32; 1538 #if U32SIZE > SIZE32 1539 au32 = 0; 1540 #endif 1541 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap); 1542 if (datumtype == 'N') 1543 au32 = PerlSock_ntohl(au32); 1544 if (datumtype == 'V') 1545 au32 = vtohl(au32); 1546 if (!checksum) 1547 mPUSHu(au32); 1548 else if (checksum > bits_in_uv) 1549 cdouble += (NV)au32; 1550 else 1551 cuv += au32; 1552 } 1553 break; 1554 case 'V' | TYPE_IS_SHRIEKING: 1555 case 'N' | TYPE_IS_SHRIEKING: 1556 while (len-- > 0) { 1557 I32 ai32; 1558 #if U32SIZE > SIZE32 1559 ai32 = 0; 1560 #endif 1561 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap); 1562 /* There should never be any byte swapping here. */ 1563 assert(!TYPE_ENDIANNESS(datumtype)); 1564 if (datumtype == ('N' | TYPE_IS_SHRIEKING)) 1565 ai32 = (I32)PerlSock_ntohl((U32)ai32); 1566 if (datumtype == ('V' | TYPE_IS_SHRIEKING)) 1567 ai32 = (I32)vtohl((U32)ai32); 1568 if (!checksum) 1569 mPUSHi(ai32); 1570 else if (checksum > bits_in_uv) 1571 cdouble += (NV)ai32; 1572 else 1573 cuv += ai32; 1574 } 1575 break; 1576 case 'p': 1577 while (len-- > 0) { 1578 const char *aptr; 1579 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap); 1580 /* newSVpv generates undef if aptr is NULL */ 1581 mPUSHs(newSVpv(aptr, 0)); 1582 } 1583 break; 1584 case 'w': 1585 { 1586 UV auv = 0; 1587 U32 bytes = 0; 1588 1589 while (len > 0 && s < strend) { 1590 U8 ch; 1591 ch = SHIFT_BYTE(utf8, s, strend, datumtype); 1592 auv = (auv << 7) | (ch & 0x7f); 1593 /* UTF8_IS_XXXXX not right here because this is a BER, not 1594 * UTF-8 format - using constant 0x80 */ 1595 if (ch < 0x80) { 1596 bytes = 0; 1597 mPUSHu(auv); 1598 len--; 1599 auv = 0; 1600 continue; 1601 } 1602 if (++bytes >= sizeof(UV)) { /* promote to string */ 1603 const char *t; 1604 1605 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv); 1606 while (s < strend) { 1607 ch = SHIFT_BYTE(utf8, s, strend, datumtype); 1608 sv = mul128(sv, (U8)(ch & 0x7f)); 1609 if (!(ch & 0x80)) { 1610 bytes = 0; 1611 break; 1612 } 1613 } 1614 t = SvPV_nolen_const(sv); 1615 while (*t == '0') 1616 t++; 1617 sv_chop(sv, t); 1618 mPUSHs(sv); 1619 len--; 1620 auv = 0; 1621 } 1622 } 1623 if ((s >= strend) && bytes) 1624 Perl_croak(aTHX_ "Unterminated compressed integer in unpack"); 1625 } 1626 break; 1627 case 'P': 1628 if (symptr->howlen == e_star) 1629 Perl_croak(aTHX_ "'P' must have an explicit size in unpack"); 1630 EXTEND(SP, 1); 1631 if (s + sizeof(char*) <= strend) { 1632 char *aptr; 1633 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap); 1634 /* newSVpvn generates undef if aptr is NULL */ 1635 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP)); 1636 } 1637 break; 1638 #if defined(HAS_QUAD) && IVSIZE >= 8 1639 case 'q': 1640 while (len-- > 0) { 1641 Quad_t aquad; 1642 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap); 1643 if (!checksum) 1644 mPUSHs(newSViv((IV)aquad)); 1645 else if (checksum > bits_in_uv) 1646 cdouble += (NV)aquad; 1647 else 1648 cuv += aquad; 1649 } 1650 break; 1651 case 'Q': 1652 while (len-- > 0) { 1653 Uquad_t auquad; 1654 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap); 1655 if (!checksum) 1656 mPUSHs(newSVuv((UV)auquad)); 1657 else if (checksum > bits_in_uv) 1658 cdouble += (NV)auquad; 1659 else 1660 cuv += auquad; 1661 } 1662 break; 1663 #endif 1664 /* float and double added gnb@melba.bby.oz.au 22/11/89 */ 1665 case 'f': 1666 while (len-- > 0) { 1667 float afloat; 1668 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap); 1669 if (!checksum) 1670 mPUSHn(afloat); 1671 else 1672 cdouble += afloat; 1673 } 1674 break; 1675 case 'd': 1676 while (len-- > 0) { 1677 double adouble; 1678 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap); 1679 if (!checksum) 1680 mPUSHn(adouble); 1681 else 1682 cdouble += adouble; 1683 } 1684 break; 1685 case 'F': 1686 while (len-- > 0) { 1687 NV_bytes anv; 1688 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), 1689 datumtype, needs_swap); 1690 if (!checksum) 1691 mPUSHn(anv.nv); 1692 else 1693 cdouble += anv.nv; 1694 } 1695 break; 1696 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) 1697 case 'D': 1698 while (len-- > 0) { 1699 ld_bytes aldouble; 1700 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, 1701 sizeof(aldouble.bytes), datumtype, needs_swap); 1702 /* The most common long double format, the x86 80-bit 1703 * extended precision, has either 2 or 6 unused bytes, 1704 * which may contain garbage, which may contain 1705 * unintentional data. While we do zero the bytes of 1706 * the long double data in pack(), here in unpack() we 1707 * don't, because it's really hard to envision that 1708 * reading the long double off aldouble would be 1709 * affected by the unused bytes. 1710 * 1711 * Note that trying to unpack 'long doubles' of 'long 1712 * doubles' packed in another system is in the general 1713 * case doomed without having more detail. */ 1714 if (!checksum) 1715 mPUSHn(aldouble.ld); 1716 else 1717 cdouble += aldouble.ld; 1718 } 1719 break; 1720 #endif 1721 case 'u': 1722 if (!checksum) { 1723 const STRLEN l = (STRLEN) (strend - s) * 3 / 4; 1724 sv = sv_2mortal(newSV(l)); 1725 if (l) SvPOK_on(sv); 1726 } 1727 1728 /* Note that all legal uuencoded strings are ASCII printables, so 1729 * have the same representation under UTF-8 vs not. This means we 1730 * can ignore UTF8ness on legal input. For illegal we stop at the 1731 * first failure, and don't report where/what that is, so again we 1732 * can ignore UTF8ness */ 1733 1734 while (s < strend && *s != ' ' && ISUUCHAR(*s)) { 1735 I32 a, b, c, d; 1736 char hunk[3]; 1737 1738 len = PL_uudmap[*(U8*)s++] & 077; 1739 while (len > 0) { 1740 if (s < strend && ISUUCHAR(*s)) 1741 a = PL_uudmap[*(U8*)s++] & 077; 1742 else 1743 a = 0; 1744 if (s < strend && ISUUCHAR(*s)) 1745 b = PL_uudmap[*(U8*)s++] & 077; 1746 else 1747 b = 0; 1748 if (s < strend && ISUUCHAR(*s)) 1749 c = PL_uudmap[*(U8*)s++] & 077; 1750 else 1751 c = 0; 1752 if (s < strend && ISUUCHAR(*s)) 1753 d = PL_uudmap[*(U8*)s++] & 077; 1754 else 1755 d = 0; 1756 hunk[0] = (char)((a << 2) | (b >> 4)); 1757 hunk[1] = (char)((b << 4) | (c >> 2)); 1758 hunk[2] = (char)((c << 6) | d); 1759 if (!checksum) 1760 sv_catpvn(sv, hunk, (len > 3) ? 3 : len); 1761 len -= 3; 1762 } 1763 if (*s == '\n') 1764 s++; 1765 else /* possible checksum byte */ 1766 if (s + 1 < strend && s[1] == '\n') 1767 s += 2; 1768 } 1769 if (!checksum) 1770 XPUSHs(sv); 1771 break; 1772 } /* End of switch */ 1773 1774 if (checksum) { 1775 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) || 1776 (checksum > bits_in_uv && 1777 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) { 1778 NV trouble, anv; 1779 1780 anv = (NV) (1 << (checksum & 15)); 1781 while (checksum >= 16) { 1782 checksum -= 16; 1783 anv *= 65536.0; 1784 } 1785 while (cdouble < 0.0) 1786 cdouble += anv; 1787 cdouble = Perl_modf(cdouble / anv, &trouble); 1788 #ifdef LONGDOUBLE_DOUBLEDOUBLE 1789 /* Workaround for powerpc doubledouble modfl bug: 1790 * close to 1.0L and -1.0L cdouble is 0, and trouble 1791 * is cdouble / anv. */ 1792 if (trouble != Perl_ceil(trouble)) { 1793 cdouble = trouble; 1794 if (cdouble > 1.0L) cdouble -= 1.0L; 1795 if (cdouble < -1.0L) cdouble += 1.0L; 1796 } 1797 #endif 1798 cdouble *= anv; 1799 sv = newSVnv(cdouble); 1800 } 1801 else { 1802 if (checksum < bits_in_uv) { 1803 UV mask = ((UV)1 << checksum) - 1; 1804 cuv &= mask; 1805 } 1806 sv = newSVuv(cuv); 1807 } 1808 mXPUSHs(sv); 1809 checksum = 0; 1810 } 1811 1812 if (symptr->flags & FLAG_SLASH){ 1813 if (SP - PL_stack_base - start_sp_offset <= 0) 1814 break; 1815 if( next_symbol(symptr) ){ 1816 if( symptr->howlen == e_number ) 1817 Perl_croak(aTHX_ "Count after length/code in unpack" ); 1818 if( beyond ){ 1819 /* ...end of char buffer then no decent length available */ 1820 Perl_croak(aTHX_ "length/code after end of string in unpack" ); 1821 } else { 1822 /* take top of stack (hope it's numeric) */ 1823 len = POPi; 1824 if( len < 0 ) 1825 Perl_croak(aTHX_ "Negative '/' count in unpack" ); 1826 } 1827 } else { 1828 Perl_croak(aTHX_ "Code missing after '/' in unpack" ); 1829 } 1830 datumtype = symptr->code; 1831 explicit_length = FALSE; 1832 goto redo_switch; 1833 } 1834 } 1835 1836 if (new_s) 1837 *new_s = s; 1838 PUTBACK; 1839 return SP - PL_stack_base - start_sp_offset; 1840 } 1841 1842 PP(pp_unpack) 1843 { 1844 dSP; 1845 dPOPPOPssrl; 1846 U8 gimme = GIMME_V; 1847 STRLEN llen; 1848 STRLEN rlen; 1849 const char *pat = SvPV_const(left, llen); 1850 const char *s = SvPV_const(right, rlen); 1851 const char *strend = s + rlen; 1852 const char *patend = pat + llen; 1853 I32 cnt; 1854 1855 PUTBACK; 1856 cnt = unpackstring(pat, patend, s, strend, 1857 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0) 1858 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0)); 1859 1860 SPAGAIN; 1861 if ( !cnt && gimme == G_SCALAR ) 1862 PUSHs(&PL_sv_undef); 1863 RETURN; 1864 } 1865 1866 STATIC U8 * 1867 doencodes(U8 *h, const U8 *s, I32 len) 1868 { 1869 *h++ = PL_uuemap[len]; 1870 while (len > 2) { 1871 *h++ = PL_uuemap[(077 & (s[0] >> 2))]; 1872 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))]; 1873 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))]; 1874 *h++ = PL_uuemap[(077 & (s[2] & 077))]; 1875 s += 3; 1876 len -= 3; 1877 } 1878 if (len > 0) { 1879 const U8 r = (len > 1 ? s[1] : '\0'); 1880 *h++ = PL_uuemap[(077 & (s[0] >> 2))]; 1881 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))]; 1882 *h++ = PL_uuemap[(077 & ((r << 2) & 074))]; 1883 *h++ = PL_uuemap[0]; 1884 } 1885 *h++ = '\n'; 1886 return h; 1887 } 1888 1889 STATIC SV * 1890 S_is_an_int(pTHX_ const char *s, STRLEN l) 1891 { 1892 SV *result = newSVpvn(s, l); 1893 char *const result_c = SvPV_nolen(result); /* convenience */ 1894 char *out = result_c; 1895 bool skip = 1; 1896 bool ignore = 0; 1897 1898 PERL_ARGS_ASSERT_IS_AN_INT; 1899 1900 while (*s) { 1901 switch (*s) { 1902 case ' ': 1903 break; 1904 case '+': 1905 if (!skip) { 1906 SvREFCNT_dec(result); 1907 return (NULL); 1908 } 1909 break; 1910 case '0': 1911 case '1': 1912 case '2': 1913 case '3': 1914 case '4': 1915 case '5': 1916 case '6': 1917 case '7': 1918 case '8': 1919 case '9': 1920 skip = 0; 1921 if (!ignore) { 1922 *(out++) = *s; 1923 } 1924 break; 1925 case '.': 1926 ignore = 1; 1927 break; 1928 default: 1929 SvREFCNT_dec(result); 1930 return (NULL); 1931 } 1932 s++; 1933 } 1934 *(out++) = '\0'; 1935 SvCUR_set(result, out - result_c); 1936 return (result); 1937 } 1938 1939 /* pnum must be '\0' terminated */ 1940 STATIC int 1941 S_div128(pTHX_ SV *pnum, bool *done) 1942 { 1943 STRLEN len; 1944 char * const s = SvPV(pnum, len); 1945 char *t = s; 1946 int m = 0; 1947 1948 PERL_ARGS_ASSERT_DIV128; 1949 1950 *done = 1; 1951 while (*t) { 1952 const int i = m * 10 + (*t - '0'); 1953 const int r = (i >> 7); /* r < 10 */ 1954 m = i & 0x7F; 1955 if (r) { 1956 *done = 0; 1957 } 1958 *(t++) = '0' + r; 1959 } 1960 *(t++) = '\0'; 1961 SvCUR_set(pnum, (STRLEN) (t - s)); 1962 return (m); 1963 } 1964 1965 /* 1966 =for apidoc packlist 1967 1968 The engine implementing C<pack()> Perl function. 1969 1970 =cut 1971 */ 1972 1973 void 1974 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist ) 1975 { 1976 tempsym_t sym; 1977 1978 PERL_ARGS_ASSERT_PACKLIST; 1979 1980 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK); 1981 1982 /* We're going to do changes through SvPVX(cat). Make sure it's valid. 1983 Also make sure any UTF8 flag is loaded */ 1984 SvPV_force_nolen(cat); 1985 if (DO_UTF8(cat)) 1986 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8; 1987 1988 (void)pack_rec( cat, &sym, beglist, endlist ); 1989 } 1990 1991 /* like sv_utf8_upgrade, but also repoint the group start markers */ 1992 STATIC void 1993 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) { 1994 STRLEN len; 1995 tempsym_t *group; 1996 const char *from_ptr, *from_start, *from_end, **marks, **m; 1997 char *to_start, *to_ptr; 1998 1999 if (SvUTF8(sv)) return; 2000 2001 from_start = SvPVX_const(sv); 2002 from_end = from_start + SvCUR(sv); 2003 for (from_ptr = from_start; from_ptr < from_end; from_ptr++) 2004 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break; 2005 if (from_ptr == from_end) { 2006 /* Simple case: no character needs to be changed */ 2007 SvUTF8_on(sv); 2008 return; 2009 } 2010 2011 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1; 2012 Newx(to_start, len, char); 2013 Copy(from_start, to_start, from_ptr-from_start, char); 2014 to_ptr = to_start + (from_ptr-from_start); 2015 2016 Newx(marks, sym_ptr->level+2, const char *); 2017 for (group=sym_ptr; group; group = group->previous) 2018 marks[group->level] = from_start + group->strbeg; 2019 marks[sym_ptr->level+1] = from_end+1; 2020 for (m = marks; *m < from_ptr; m++) 2021 *m = to_start + (*m-from_start); 2022 2023 for (;from_ptr < from_end; from_ptr++) { 2024 while (*m == from_ptr) *m++ = to_ptr; 2025 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr); 2026 } 2027 *to_ptr = 0; 2028 2029 while (*m == from_ptr) *m++ = to_ptr; 2030 if (m != marks + sym_ptr->level+1) { 2031 Safefree(marks); 2032 Safefree(to_start); 2033 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, " 2034 "level=%d", m, marks, sym_ptr->level); 2035 } 2036 for (group=sym_ptr; group; group = group->previous) 2037 group->strbeg = marks[group->level] - to_start; 2038 Safefree(marks); 2039 2040 if (SvOOK(sv)) { 2041 if (SvIVX(sv)) { 2042 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv)); 2043 from_start -= SvIVX(sv); 2044 SvIV_set(sv, 0); 2045 } 2046 SvFLAGS(sv) &= ~SVf_OOK; 2047 } 2048 if (SvLEN(sv) != 0) 2049 Safefree(from_start); 2050 SvPV_set(sv, to_start); 2051 SvCUR_set(sv, to_ptr - to_start); 2052 SvLEN_set(sv, len); 2053 SvUTF8_on(sv); 2054 } 2055 2056 /* Exponential string grower. Makes string extension effectively O(n) 2057 needed says how many extra bytes we need (not counting the final '\0') 2058 Only grows the string if there is an actual lack of space 2059 */ 2060 STATIC char * 2061 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) { 2062 const STRLEN cur = SvCUR(sv); 2063 const STRLEN len = SvLEN(sv); 2064 STRLEN extend; 2065 2066 PERL_ARGS_ASSERT_SV_EXP_GROW; 2067 2068 if (len - cur > needed) return SvPVX(sv); 2069 extend = needed > len ? needed : len; 2070 return SvGROW(sv, len+extend+1); 2071 } 2072 2073 static SV * 2074 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype) 2075 { 2076 SvGETMAGIC(sv); 2077 if (UNLIKELY(SvAMAGIC(sv))) 2078 sv = sv_2num(sv); 2079 if (UNLIKELY(isinfnansv(sv))) { 2080 const I32 c = TYPE_NO_MODIFIERS(datumtype); 2081 const NV nv = SvNV_nomg(sv); 2082 if (c == 'w') 2083 Perl_croak(aTHX_ "Cannot compress %"NVgf" in pack", nv); 2084 else 2085 Perl_croak(aTHX_ "Cannot pack %"NVgf" with '%c'", nv, (int) c); 2086 } 2087 return sv; 2088 } 2089 2090 #define SvIV_no_inf(sv,d) \ 2091 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv)) 2092 #define SvUV_no_inf(sv,d) \ 2093 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv)) 2094 2095 STATIC 2096 SV ** 2097 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) 2098 { 2099 tempsym_t lookahead; 2100 I32 items = endlist - beglist; 2101 bool found = next_symbol(symptr); 2102 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0; 2103 bool warn_utf8 = ckWARN(WARN_UTF8); 2104 char* from; 2105 2106 PERL_ARGS_ASSERT_PACK_REC; 2107 2108 if (symptr->level == 0 && found && symptr->code == 'U') { 2109 marked_upgrade(aTHX_ cat, symptr); 2110 symptr->flags |= FLAG_DO_UTF8; 2111 utf8 = 0; 2112 } 2113 symptr->strbeg = SvCUR(cat); 2114 2115 while (found) { 2116 SV *fromstr; 2117 STRLEN fromlen; 2118 I32 len; 2119 SV *lengthcode = NULL; 2120 I32 datumtype = symptr->code; 2121 howlen_t howlen = symptr->howlen; 2122 char *start = SvPVX(cat); 2123 char *cur = start + SvCUR(cat); 2124 bool needs_swap; 2125 2126 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no) 2127 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no) 2128 2129 switch (howlen) { 2130 case e_star: 2131 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ? 2132 0 : items; 2133 break; 2134 default: 2135 /* e_no_len and e_number */ 2136 len = symptr->length; 2137 break; 2138 } 2139 2140 if (len) { 2141 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)]; 2142 2143 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) { 2144 /* We can process this letter. */ 2145 STRLEN size = props & PACK_SIZE_MASK; 2146 GROWING2(utf8, cat, start, cur, size, (STRLEN)len); 2147 } 2148 } 2149 2150 /* Look ahead for next symbol. Do we have code/code? */ 2151 lookahead = *symptr; 2152 found = next_symbol(&lookahead); 2153 if (symptr->flags & FLAG_SLASH) { 2154 IV count; 2155 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack"); 2156 if (strchr("aAZ", lookahead.code)) { 2157 if (lookahead.howlen == e_number) count = lookahead.length; 2158 else { 2159 if (items > 0) { 2160 count = sv_len_utf8(*beglist); 2161 } 2162 else count = 0; 2163 if (lookahead.code == 'Z') count++; 2164 } 2165 } else { 2166 if (lookahead.howlen == e_number && lookahead.length < items) 2167 count = lookahead.length; 2168 else count = items; 2169 } 2170 lookahead.howlen = e_number; 2171 lookahead.length = count; 2172 lengthcode = sv_2mortal(newSViv(count)); 2173 } 2174 2175 needs_swap = NEEDS_SWAP(datumtype); 2176 2177 /* Code inside the switch must take care to properly update 2178 cat (CUR length and '\0' termination) if it updated *cur and 2179 doesn't simply leave using break */ 2180 switch (TYPE_NO_ENDIANNESS(datumtype)) { 2181 default: 2182 Perl_croak(aTHX_ "Invalid type '%c' in pack", 2183 (int) TYPE_NO_MODIFIERS(datumtype)); 2184 case '%': 2185 Perl_croak(aTHX_ "'%%' may not be used in pack"); 2186 2187 case '.' | TYPE_IS_SHRIEKING: 2188 case '.': 2189 if (howlen == e_star) from = start; 2190 else if (len == 0) from = cur; 2191 else { 2192 tempsym_t *group = symptr; 2193 2194 while (--len && group) group = group->previous; 2195 from = group ? start + group->strbeg : start; 2196 } 2197 fromstr = NEXTFROM; 2198 len = SvIV_no_inf(fromstr, datumtype); 2199 goto resize; 2200 case '@' | TYPE_IS_SHRIEKING: 2201 case '@': 2202 from = start + symptr->strbeg; 2203 resize: 2204 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING)) 2205 if (len >= 0) { 2206 while (len && from < cur) { 2207 from += UTF8SKIP(from); 2208 len--; 2209 } 2210 if (from > cur) 2211 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); 2212 if (len) { 2213 /* Here we know from == cur */ 2214 grow: 2215 GROWING(0, cat, start, cur, len); 2216 Zero(cur, len, char); 2217 cur += len; 2218 } else if (from < cur) { 2219 len = cur - from; 2220 goto shrink; 2221 } else goto no_change; 2222 } else { 2223 cur = from; 2224 len = -len; 2225 goto utf8_shrink; 2226 } 2227 else { 2228 len -= cur - from; 2229 if (len > 0) goto grow; 2230 if (len == 0) goto no_change; 2231 len = -len; 2232 goto shrink; 2233 } 2234 break; 2235 2236 case '(': { 2237 tempsym_t savsym = *symptr; 2238 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags); 2239 symptr->flags |= group_modifiers; 2240 symptr->patend = savsym.grpend; 2241 symptr->level++; 2242 symptr->previous = &lookahead; 2243 while (len--) { 2244 U32 was_utf8; 2245 if (utf8) symptr->flags |= FLAG_PARSE_UTF8; 2246 else symptr->flags &= ~FLAG_PARSE_UTF8; 2247 was_utf8 = SvUTF8(cat); 2248 symptr->patptr = savsym.grpbeg; 2249 beglist = pack_rec(cat, symptr, beglist, endlist); 2250 if (SvUTF8(cat) != was_utf8) 2251 /* This had better be an upgrade while in utf8==0 mode */ 2252 utf8 = 1; 2253 2254 if (savsym.howlen == e_star && beglist == endlist) 2255 break; /* No way to continue */ 2256 } 2257 items = endlist - beglist; 2258 lookahead.flags = symptr->flags & ~group_modifiers; 2259 goto no_change; 2260 } 2261 case 'X' | TYPE_IS_SHRIEKING: 2262 if (!len) /* Avoid division by 0 */ 2263 len = 1; 2264 if (utf8) { 2265 char *hop, *last; 2266 I32 l = len; 2267 hop = last = start; 2268 while (hop < cur) { 2269 hop += UTF8SKIP(hop); 2270 if (--l == 0) { 2271 last = hop; 2272 l = len; 2273 } 2274 } 2275 if (last > cur) 2276 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); 2277 cur = last; 2278 break; 2279 } 2280 len = (cur-start) % len; 2281 /* FALLTHROUGH */ 2282 case 'X': 2283 if (utf8) { 2284 if (len < 1) goto no_change; 2285 utf8_shrink: 2286 while (len > 0) { 2287 if (cur <= start) 2288 Perl_croak(aTHX_ "'%c' outside of string in pack", 2289 (int) TYPE_NO_MODIFIERS(datumtype)); 2290 while (--cur, UTF8_IS_CONTINUATION(*cur)) { 2291 if (cur <= start) 2292 Perl_croak(aTHX_ "'%c' outside of string in pack", 2293 (int) TYPE_NO_MODIFIERS(datumtype)); 2294 } 2295 len--; 2296 } 2297 } else { 2298 shrink: 2299 if (cur - start < len) 2300 Perl_croak(aTHX_ "'%c' outside of string in pack", 2301 (int) TYPE_NO_MODIFIERS(datumtype)); 2302 cur -= len; 2303 } 2304 if (cur < start+symptr->strbeg) { 2305 /* Make sure group starts don't point into the void */ 2306 tempsym_t *group; 2307 const STRLEN length = cur-start; 2308 for (group = symptr; 2309 group && length < group->strbeg; 2310 group = group->previous) group->strbeg = length; 2311 lookahead.strbeg = length; 2312 } 2313 break; 2314 case 'x' | TYPE_IS_SHRIEKING: { 2315 I32 ai32; 2316 if (!len) /* Avoid division by 0 */ 2317 len = 1; 2318 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len; 2319 else ai32 = (cur - start) % len; 2320 if (ai32 == 0) goto no_change; 2321 len -= ai32; 2322 } 2323 /* FALLTHROUGH */ 2324 case 'x': 2325 goto grow; 2326 case 'A': 2327 case 'Z': 2328 case 'a': { 2329 const char *aptr; 2330 2331 fromstr = NEXTFROM; 2332 aptr = SvPV_const(fromstr, fromlen); 2333 if (DO_UTF8(fromstr)) { 2334 const char *end, *s; 2335 2336 if (!utf8 && !SvUTF8(cat)) { 2337 marked_upgrade(aTHX_ cat, symptr); 2338 lookahead.flags |= FLAG_DO_UTF8; 2339 lookahead.strbeg = symptr->strbeg; 2340 utf8 = 1; 2341 start = SvPVX(cat); 2342 cur = start + SvCUR(cat); 2343 } 2344 if (howlen == e_star) { 2345 if (utf8) goto string_copy; 2346 len = fromlen+1; 2347 } 2348 s = aptr; 2349 end = aptr + fromlen; 2350 fromlen = datumtype == 'Z' ? len-1 : len; 2351 while ((I32) fromlen > 0 && s < end) { 2352 s += UTF8SKIP(s); 2353 fromlen--; 2354 } 2355 if (s > end) 2356 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); 2357 if (utf8) { 2358 len = fromlen; 2359 if (datumtype == 'Z') len++; 2360 fromlen = s-aptr; 2361 len += fromlen; 2362 2363 goto string_copy; 2364 } 2365 fromlen = len - fromlen; 2366 if (datumtype == 'Z') fromlen--; 2367 if (howlen == e_star) { 2368 len = fromlen; 2369 if (datumtype == 'Z') len++; 2370 } 2371 GROWING(0, cat, start, cur, len); 2372 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen, 2373 datumtype | TYPE_IS_PACK)) 2374 Perl_croak(aTHX_ "panic: predicted utf8 length not available, " 2375 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf, 2376 (int)datumtype, aptr, end, cur, (UV)fromlen); 2377 cur += fromlen; 2378 len -= fromlen; 2379 } else if (utf8) { 2380 if (howlen == e_star) { 2381 len = fromlen; 2382 if (datumtype == 'Z') len++; 2383 } 2384 if (len <= (I32) fromlen) { 2385 fromlen = len; 2386 if (datumtype == 'Z' && fromlen > 0) fromlen--; 2387 } 2388 /* assumes a byte expands to at most UTF8_EXPAND bytes on 2389 upgrade, so: 2390 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */ 2391 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len); 2392 len -= fromlen; 2393 while (fromlen > 0) { 2394 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr); 2395 aptr++; 2396 fromlen--; 2397 } 2398 } else { 2399 string_copy: 2400 if (howlen == e_star) { 2401 len = fromlen; 2402 if (datumtype == 'Z') len++; 2403 } 2404 if (len <= (I32) fromlen) { 2405 fromlen = len; 2406 if (datumtype == 'Z' && fromlen > 0) fromlen--; 2407 } 2408 GROWING(0, cat, start, cur, len); 2409 Copy(aptr, cur, fromlen, char); 2410 cur += fromlen; 2411 len -= fromlen; 2412 } 2413 memset(cur, datumtype == 'A' ? ' ' : '\0', len); 2414 cur += len; 2415 SvTAINT(cat); 2416 break; 2417 } 2418 case 'B': 2419 case 'b': { 2420 const char *str, *end; 2421 I32 l, field_len; 2422 U8 bits; 2423 bool utf8_source; 2424 U32 utf8_flags; 2425 2426 fromstr = NEXTFROM; 2427 str = SvPV_const(fromstr, fromlen); 2428 end = str + fromlen; 2429 if (DO_UTF8(fromstr)) { 2430 utf8_source = TRUE; 2431 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY; 2432 } else { 2433 utf8_source = FALSE; 2434 utf8_flags = 0; /* Unused, but keep compilers happy */ 2435 } 2436 if (howlen == e_star) len = fromlen; 2437 field_len = (len+7)/8; 2438 GROWING(utf8, cat, start, cur, field_len); 2439 if (len > (I32)fromlen) len = fromlen; 2440 bits = 0; 2441 l = 0; 2442 if (datumtype == 'B') 2443 while (l++ < len) { 2444 if (utf8_source) { 2445 UV val = 0; 2446 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2447 bits |= val & 1; 2448 } else bits |= *str++ & 1; 2449 if (l & 7) bits <<= 1; 2450 else { 2451 PUSH_BYTE(utf8, cur, bits); 2452 bits = 0; 2453 } 2454 } 2455 else 2456 /* datumtype == 'b' */ 2457 while (l++ < len) { 2458 if (utf8_source) { 2459 UV val = 0; 2460 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2461 if (val & 1) bits |= 0x80; 2462 } else if (*str++ & 1) 2463 bits |= 0x80; 2464 if (l & 7) bits >>= 1; 2465 else { 2466 PUSH_BYTE(utf8, cur, bits); 2467 bits = 0; 2468 } 2469 } 2470 l--; 2471 if (l & 7) { 2472 if (datumtype == 'B') 2473 bits <<= 7 - (l & 7); 2474 else 2475 bits >>= 7 - (l & 7); 2476 PUSH_BYTE(utf8, cur, bits); 2477 l += 7; 2478 } 2479 /* Determine how many chars are left in the requested field */ 2480 l /= 8; 2481 if (howlen == e_star) field_len = 0; 2482 else field_len -= l; 2483 Zero(cur, field_len, char); 2484 cur += field_len; 2485 break; 2486 } 2487 case 'H': 2488 case 'h': { 2489 const char *str, *end; 2490 I32 l, field_len; 2491 U8 bits; 2492 bool utf8_source; 2493 U32 utf8_flags; 2494 2495 fromstr = NEXTFROM; 2496 str = SvPV_const(fromstr, fromlen); 2497 end = str + fromlen; 2498 if (DO_UTF8(fromstr)) { 2499 utf8_source = TRUE; 2500 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY; 2501 } else { 2502 utf8_source = FALSE; 2503 utf8_flags = 0; /* Unused, but keep compilers happy */ 2504 } 2505 if (howlen == e_star) len = fromlen; 2506 field_len = (len+1)/2; 2507 GROWING(utf8, cat, start, cur, field_len); 2508 if (!utf8_source && len > (I32)fromlen) len = fromlen; 2509 bits = 0; 2510 l = 0; 2511 if (datumtype == 'H') 2512 while (l++ < len) { 2513 if (utf8_source) { 2514 UV val = 0; 2515 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2516 if (val < 256 && isALPHA(val)) 2517 bits |= (val + 9) & 0xf; 2518 else 2519 bits |= val & 0xf; 2520 } else if (isALPHA(*str)) 2521 bits |= (*str++ + 9) & 0xf; 2522 else 2523 bits |= *str++ & 0xf; 2524 if (l & 1) bits <<= 4; 2525 else { 2526 PUSH_BYTE(utf8, cur, bits); 2527 bits = 0; 2528 } 2529 } 2530 else 2531 while (l++ < len) { 2532 if (utf8_source) { 2533 UV val = 0; 2534 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2535 if (val < 256 && isALPHA(val)) 2536 bits |= ((val + 9) & 0xf) << 4; 2537 else 2538 bits |= (val & 0xf) << 4; 2539 } else if (isALPHA(*str)) 2540 bits |= ((*str++ + 9) & 0xf) << 4; 2541 else 2542 bits |= (*str++ & 0xf) << 4; 2543 if (l & 1) bits >>= 4; 2544 else { 2545 PUSH_BYTE(utf8, cur, bits); 2546 bits = 0; 2547 } 2548 } 2549 l--; 2550 if (l & 1) { 2551 PUSH_BYTE(utf8, cur, bits); 2552 l++; 2553 } 2554 /* Determine how many chars are left in the requested field */ 2555 l /= 2; 2556 if (howlen == e_star) field_len = 0; 2557 else field_len -= l; 2558 Zero(cur, field_len, char); 2559 cur += field_len; 2560 break; 2561 } 2562 case 'c': 2563 while (len-- > 0) { 2564 IV aiv; 2565 fromstr = NEXTFROM; 2566 aiv = SvIV_no_inf(fromstr, datumtype); 2567 if ((-128 > aiv || aiv > 127)) 2568 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 2569 "Character in 'c' format wrapped in pack"); 2570 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff)); 2571 } 2572 break; 2573 case 'C': 2574 if (len == 0) { 2575 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0; 2576 break; 2577 } 2578 while (len-- > 0) { 2579 IV aiv; 2580 fromstr = NEXTFROM; 2581 aiv = SvIV_no_inf(fromstr, datumtype); 2582 if ((0 > aiv || aiv > 0xff)) 2583 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 2584 "Character in 'C' format wrapped in pack"); 2585 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff)); 2586 } 2587 break; 2588 case 'W': { 2589 char *end; 2590 U8 in_bytes = (U8)IN_BYTES; 2591 2592 end = start+SvLEN(cat)-1; 2593 if (utf8) end -= UTF8_MAXLEN-1; 2594 while (len-- > 0) { 2595 UV auv; 2596 fromstr = NEXTFROM; 2597 auv = SvUV_no_inf(fromstr, datumtype); 2598 if (in_bytes) auv = auv % 0x100; 2599 if (utf8) { 2600 W_utf8: 2601 if (cur > end) { 2602 *cur = '\0'; 2603 SvCUR_set(cat, cur - start); 2604 2605 GROWING(0, cat, start, cur, len+UTF8_MAXLEN); 2606 end = start+SvLEN(cat)-UTF8_MAXLEN; 2607 } 2608 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, 2609 auv, 2610 warn_utf8 ? 2611 0 : UNICODE_ALLOW_ANY); 2612 } else { 2613 if (auv >= 0x100) { 2614 if (!SvUTF8(cat)) { 2615 *cur = '\0'; 2616 SvCUR_set(cat, cur - start); 2617 marked_upgrade(aTHX_ cat, symptr); 2618 lookahead.flags |= FLAG_DO_UTF8; 2619 lookahead.strbeg = symptr->strbeg; 2620 utf8 = 1; 2621 start = SvPVX(cat); 2622 cur = start + SvCUR(cat); 2623 end = start+SvLEN(cat)-UTF8_MAXLEN; 2624 goto W_utf8; 2625 } 2626 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 2627 "Character in 'W' format wrapped in pack"); 2628 auv &= 0xff; 2629 } 2630 if (cur >= end) { 2631 *cur = '\0'; 2632 SvCUR_set(cat, cur - start); 2633 GROWING(0, cat, start, cur, len+1); 2634 end = start+SvLEN(cat)-1; 2635 } 2636 *(U8 *) cur++ = (U8)auv; 2637 } 2638 } 2639 break; 2640 } 2641 case 'U': { 2642 char *end; 2643 2644 if (len == 0) { 2645 if (!(symptr->flags & FLAG_DO_UTF8)) { 2646 marked_upgrade(aTHX_ cat, symptr); 2647 lookahead.flags |= FLAG_DO_UTF8; 2648 lookahead.strbeg = symptr->strbeg; 2649 } 2650 utf8 = 0; 2651 goto no_change; 2652 } 2653 2654 end = start+SvLEN(cat); 2655 if (!utf8) end -= UTF8_MAXLEN; 2656 while (len-- > 0) { 2657 UV auv; 2658 fromstr = NEXTFROM; 2659 auv = SvUV_no_inf(fromstr, datumtype); 2660 if (utf8) { 2661 U8 buffer[UTF8_MAXLEN], *endb; 2662 endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 2663 warn_utf8 ? 2664 0 : UNICODE_ALLOW_ANY); 2665 if (cur+(endb-buffer)*UTF8_EXPAND >= end) { 2666 *cur = '\0'; 2667 SvCUR_set(cat, cur - start); 2668 GROWING(0, cat, start, cur, 2669 len+(endb-buffer)*UTF8_EXPAND); 2670 end = start+SvLEN(cat); 2671 } 2672 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0); 2673 } else { 2674 if (cur >= end) { 2675 *cur = '\0'; 2676 SvCUR_set(cat, cur - start); 2677 GROWING(0, cat, start, cur, len+UTF8_MAXLEN); 2678 end = start+SvLEN(cat)-UTF8_MAXLEN; 2679 } 2680 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, UNI_TO_NATIVE(auv), 2681 warn_utf8 ? 2682 0 : UNICODE_ALLOW_ANY); 2683 } 2684 } 2685 break; 2686 } 2687 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */ 2688 case 'f': 2689 while (len-- > 0) { 2690 float afloat; 2691 NV anv; 2692 fromstr = NEXTFROM; 2693 anv = SvNV(fromstr); 2694 # if defined(VMS) && !defined(_IEEE_FP) 2695 /* IEEE fp overflow shenanigans are unavailable on VAX and optional 2696 * on Alpha; fake it if we don't have them. 2697 */ 2698 if (anv > FLT_MAX) 2699 afloat = FLT_MAX; 2700 else if (anv < -FLT_MAX) 2701 afloat = -FLT_MAX; 2702 else afloat = (float)anv; 2703 # else 2704 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 2705 if(Perl_isnan(anv)) 2706 afloat = (float)NV_NAN; 2707 else 2708 #endif 2709 /* a simple cast to float is undefined if outside 2710 * the range of values that can be represented */ 2711 afloat = (float)(anv > FLT_MAX ? NV_INF : 2712 anv < -FLT_MAX ? -NV_INF : anv); 2713 # endif 2714 PUSH_VAR(utf8, cur, afloat, needs_swap); 2715 } 2716 break; 2717 case 'd': 2718 while (len-- > 0) { 2719 double adouble; 2720 NV anv; 2721 fromstr = NEXTFROM; 2722 anv = SvNV(fromstr); 2723 # if defined(VMS) && !defined(_IEEE_FP) 2724 /* IEEE fp overflow shenanigans are unavailable on VAX and optional 2725 * on Alpha; fake it if we don't have them. 2726 */ 2727 if (anv > DBL_MAX) 2728 adouble = DBL_MAX; 2729 else if (anv < -DBL_MAX) 2730 adouble = -DBL_MAX; 2731 else adouble = (double)anv; 2732 # else 2733 adouble = (double)anv; 2734 # endif 2735 PUSH_VAR(utf8, cur, adouble, needs_swap); 2736 } 2737 break; 2738 case 'F': { 2739 NV_bytes anv; 2740 Zero(&anv, 1, NV); /* can be long double with unused bits */ 2741 while (len-- > 0) { 2742 fromstr = NEXTFROM; 2743 #ifdef __GNUC__ 2744 /* to work round a gcc/x86 bug; don't use SvNV */ 2745 anv.nv = sv_2nv(fromstr); 2746 # if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \ 2747 && LONG_DOUBLESIZE > 10 2748 /* GCC sometimes overwrites the padding in the 2749 assignment above */ 2750 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8); 2751 # endif 2752 #else 2753 anv.nv = SvNV(fromstr); 2754 #endif 2755 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap); 2756 } 2757 break; 2758 } 2759 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) 2760 case 'D': { 2761 ld_bytes aldouble; 2762 /* long doubles can have unused bits, which may be nonzero */ 2763 Zero(&aldouble, 1, long double); 2764 while (len-- > 0) { 2765 fromstr = NEXTFROM; 2766 # ifdef __GNUC__ 2767 /* to work round a gcc/x86 bug; don't use SvNV */ 2768 aldouble.ld = (long double)sv_2nv(fromstr); 2769 # if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10 2770 /* GCC sometimes overwrites the padding in the 2771 assignment above */ 2772 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8); 2773 # endif 2774 # else 2775 aldouble.ld = (long double)SvNV(fromstr); 2776 # endif 2777 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes), 2778 needs_swap); 2779 } 2780 break; 2781 } 2782 #endif 2783 case 'n' | TYPE_IS_SHRIEKING: 2784 case 'n': 2785 while (len-- > 0) { 2786 I16 ai16; 2787 fromstr = NEXTFROM; 2788 ai16 = (I16)SvIV_no_inf(fromstr, datumtype); 2789 ai16 = PerlSock_htons(ai16); 2790 PUSH16(utf8, cur, &ai16, FALSE); 2791 } 2792 break; 2793 case 'v' | TYPE_IS_SHRIEKING: 2794 case 'v': 2795 while (len-- > 0) { 2796 I16 ai16; 2797 fromstr = NEXTFROM; 2798 ai16 = (I16)SvIV_no_inf(fromstr, datumtype); 2799 ai16 = htovs(ai16); 2800 PUSH16(utf8, cur, &ai16, FALSE); 2801 } 2802 break; 2803 case 'S' | TYPE_IS_SHRIEKING: 2804 #if SHORTSIZE != SIZE16 2805 while (len-- > 0) { 2806 unsigned short aushort; 2807 fromstr = NEXTFROM; 2808 aushort = SvUV_no_inf(fromstr, datumtype); 2809 PUSH_VAR(utf8, cur, aushort, needs_swap); 2810 } 2811 break; 2812 #else 2813 /* FALLTHROUGH */ 2814 #endif 2815 case 'S': 2816 while (len-- > 0) { 2817 U16 au16; 2818 fromstr = NEXTFROM; 2819 au16 = (U16)SvUV_no_inf(fromstr, datumtype); 2820 PUSH16(utf8, cur, &au16, needs_swap); 2821 } 2822 break; 2823 case 's' | TYPE_IS_SHRIEKING: 2824 #if SHORTSIZE != SIZE16 2825 while (len-- > 0) { 2826 short ashort; 2827 fromstr = NEXTFROM; 2828 ashort = SvIV_no_inf(fromstr, datumtype); 2829 PUSH_VAR(utf8, cur, ashort, needs_swap); 2830 } 2831 break; 2832 #else 2833 /* FALLTHROUGH */ 2834 #endif 2835 case 's': 2836 while (len-- > 0) { 2837 I16 ai16; 2838 fromstr = NEXTFROM; 2839 ai16 = (I16)SvIV_no_inf(fromstr, datumtype); 2840 PUSH16(utf8, cur, &ai16, needs_swap); 2841 } 2842 break; 2843 case 'I': 2844 case 'I' | TYPE_IS_SHRIEKING: 2845 while (len-- > 0) { 2846 unsigned int auint; 2847 fromstr = NEXTFROM; 2848 auint = SvUV_no_inf(fromstr, datumtype); 2849 PUSH_VAR(utf8, cur, auint, needs_swap); 2850 } 2851 break; 2852 case 'j': 2853 while (len-- > 0) { 2854 IV aiv; 2855 fromstr = NEXTFROM; 2856 aiv = SvIV_no_inf(fromstr, datumtype); 2857 PUSH_VAR(utf8, cur, aiv, needs_swap); 2858 } 2859 break; 2860 case 'J': 2861 while (len-- > 0) { 2862 UV auv; 2863 fromstr = NEXTFROM; 2864 auv = SvUV_no_inf(fromstr, datumtype); 2865 PUSH_VAR(utf8, cur, auv, needs_swap); 2866 } 2867 break; 2868 case 'w': 2869 while (len-- > 0) { 2870 NV anv; 2871 fromstr = NEXTFROM; 2872 S_sv_check_infnan(aTHX_ fromstr, datumtype); 2873 anv = SvNV_nomg(fromstr); 2874 2875 if (anv < 0) { 2876 *cur = '\0'; 2877 SvCUR_set(cat, cur - start); 2878 Perl_croak(aTHX_ "Cannot compress negative numbers in pack"); 2879 } 2880 2881 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0, 2882 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as 2883 any negative IVs will have already been got by the croak() 2884 above. IOK is untrue for fractions, so we test them 2885 against UV_MAX_P1. */ 2886 if (SvIOK(fromstr) || anv < UV_MAX_P1) { 2887 char buf[(sizeof(UV)*CHAR_BIT)/7+1]; 2888 char *in = buf + sizeof(buf); 2889 UV auv = SvUV_nomg(fromstr); 2890 2891 do { 2892 *--in = (char)((auv & 0x7f) | 0x80); 2893 auv >>= 7; 2894 } while (auv); 2895 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */ 2896 PUSH_GROWING_BYTES(utf8, cat, start, cur, 2897 in, (buf + sizeof(buf)) - in); 2898 } else if (SvPOKp(fromstr)) 2899 goto w_string; 2900 else if (SvNOKp(fromstr)) { 2901 /* 10**NV_MAX_10_EXP is the largest power of 10 2902 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable 2903 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x: 2904 x = (NV_MAX_10_EXP+1) * log (10) / log (128) 2905 And with that many bytes only Inf can overflow. 2906 Some C compilers are strict about integral constant 2907 expressions so we conservatively divide by a slightly 2908 smaller integer instead of multiplying by the exact 2909 floating-point value. 2910 */ 2911 #ifdef NV_MAX_10_EXP 2912 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */ 2913 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */ 2914 #else 2915 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */ 2916 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */ 2917 #endif 2918 char *in = buf + sizeof(buf); 2919 2920 anv = Perl_floor(anv); 2921 do { 2922 const NV next = Perl_floor(anv / 128); 2923 if (in <= buf) /* this cannot happen ;-) */ 2924 Perl_croak(aTHX_ "Cannot compress integer in pack"); 2925 *--in = (unsigned char)(anv - (next * 128)) | 0x80; 2926 anv = next; 2927 } while (anv > 0); 2928 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */ 2929 PUSH_GROWING_BYTES(utf8, cat, start, cur, 2930 in, (buf + sizeof(buf)) - in); 2931 } else { 2932 const char *from; 2933 char *result, *in; 2934 SV *norm; 2935 STRLEN len; 2936 bool done; 2937 2938 w_string: 2939 /* Copy string and check for compliance */ 2940 from = SvPV_nomg_const(fromstr, len); 2941 if ((norm = is_an_int(from, len)) == NULL) 2942 Perl_croak(aTHX_ "Can only compress unsigned integers in pack"); 2943 2944 Newx(result, len, char); 2945 in = result + len; 2946 done = FALSE; 2947 while (!done) *--in = div128(norm, &done) | 0x80; 2948 result[len - 1] &= 0x7F; /* clear continue bit */ 2949 PUSH_GROWING_BYTES(utf8, cat, start, cur, 2950 in, (result + len) - in); 2951 Safefree(result); 2952 SvREFCNT_dec(norm); /* free norm */ 2953 } 2954 } 2955 break; 2956 case 'i': 2957 case 'i' | TYPE_IS_SHRIEKING: 2958 while (len-- > 0) { 2959 int aint; 2960 fromstr = NEXTFROM; 2961 aint = SvIV_no_inf(fromstr, datumtype); 2962 PUSH_VAR(utf8, cur, aint, needs_swap); 2963 } 2964 break; 2965 case 'N' | TYPE_IS_SHRIEKING: 2966 case 'N': 2967 while (len-- > 0) { 2968 U32 au32; 2969 fromstr = NEXTFROM; 2970 au32 = SvUV_no_inf(fromstr, datumtype); 2971 au32 = PerlSock_htonl(au32); 2972 PUSH32(utf8, cur, &au32, FALSE); 2973 } 2974 break; 2975 case 'V' | TYPE_IS_SHRIEKING: 2976 case 'V': 2977 while (len-- > 0) { 2978 U32 au32; 2979 fromstr = NEXTFROM; 2980 au32 = SvUV_no_inf(fromstr, datumtype); 2981 au32 = htovl(au32); 2982 PUSH32(utf8, cur, &au32, FALSE); 2983 } 2984 break; 2985 case 'L' | TYPE_IS_SHRIEKING: 2986 #if LONGSIZE != SIZE32 2987 while (len-- > 0) { 2988 unsigned long aulong; 2989 fromstr = NEXTFROM; 2990 aulong = SvUV_no_inf(fromstr, datumtype); 2991 PUSH_VAR(utf8, cur, aulong, needs_swap); 2992 } 2993 break; 2994 #else 2995 /* Fall though! */ 2996 #endif 2997 case 'L': 2998 while (len-- > 0) { 2999 U32 au32; 3000 fromstr = NEXTFROM; 3001 au32 = SvUV_no_inf(fromstr, datumtype); 3002 PUSH32(utf8, cur, &au32, needs_swap); 3003 } 3004 break; 3005 case 'l' | TYPE_IS_SHRIEKING: 3006 #if LONGSIZE != SIZE32 3007 while (len-- > 0) { 3008 long along; 3009 fromstr = NEXTFROM; 3010 along = SvIV_no_inf(fromstr, datumtype); 3011 PUSH_VAR(utf8, cur, along, needs_swap); 3012 } 3013 break; 3014 #else 3015 /* Fall though! */ 3016 #endif 3017 case 'l': 3018 while (len-- > 0) { 3019 I32 ai32; 3020 fromstr = NEXTFROM; 3021 ai32 = SvIV_no_inf(fromstr, datumtype); 3022 PUSH32(utf8, cur, &ai32, needs_swap); 3023 } 3024 break; 3025 #if defined(HAS_QUAD) && IVSIZE >= 8 3026 case 'Q': 3027 while (len-- > 0) { 3028 Uquad_t auquad; 3029 fromstr = NEXTFROM; 3030 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype); 3031 PUSH_VAR(utf8, cur, auquad, needs_swap); 3032 } 3033 break; 3034 case 'q': 3035 while (len-- > 0) { 3036 Quad_t aquad; 3037 fromstr = NEXTFROM; 3038 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype); 3039 PUSH_VAR(utf8, cur, aquad, needs_swap); 3040 } 3041 break; 3042 #endif 3043 case 'P': 3044 len = 1; /* assume SV is correct length */ 3045 GROWING(utf8, cat, start, cur, sizeof(char *)); 3046 /* FALLTHROUGH */ 3047 case 'p': 3048 while (len-- > 0) { 3049 const char *aptr; 3050 3051 fromstr = NEXTFROM; 3052 SvGETMAGIC(fromstr); 3053 if (!SvOK(fromstr)) aptr = NULL; 3054 else { 3055 /* XXX better yet, could spirit away the string to 3056 * a safe spot and hang on to it until the result 3057 * of pack() (and all copies of the result) are 3058 * gone. 3059 */ 3060 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) && 3061 !SvREADONLY(fromstr)))) { 3062 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 3063 "Attempt to pack pointer to temporary value"); 3064 } 3065 if (SvPOK(fromstr) || SvNIOK(fromstr)) 3066 aptr = SvPV_nomg_const_nolen(fromstr); 3067 else 3068 aptr = SvPV_force_flags_nolen(fromstr, 0); 3069 } 3070 PUSH_VAR(utf8, cur, aptr, needs_swap); 3071 } 3072 break; 3073 case 'u': { 3074 const char *aptr, *aend; 3075 bool from_utf8; 3076 3077 fromstr = NEXTFROM; 3078 if (len <= 2) len = 45; 3079 else len = len / 3 * 3; 3080 if (len >= 64) { 3081 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 3082 "Field too wide in 'u' format in pack"); 3083 len = 63; 3084 } 3085 aptr = SvPV_const(fromstr, fromlen); 3086 from_utf8 = DO_UTF8(fromstr); 3087 if (from_utf8) { 3088 aend = aptr + fromlen; 3089 fromlen = sv_len_utf8_nomg(fromstr); 3090 } else aend = NULL; /* Unused, but keep compilers happy */ 3091 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2); 3092 while (fromlen > 0) { 3093 U8 *end; 3094 I32 todo; 3095 U8 hunk[1+63/3*4+1]; 3096 3097 if ((I32)fromlen > len) 3098 todo = len; 3099 else 3100 todo = fromlen; 3101 if (from_utf8) { 3102 char buffer[64]; 3103 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo, 3104 'u' | TYPE_IS_PACK)) { 3105 *cur = '\0'; 3106 SvCUR_set(cat, cur - start); 3107 Perl_croak(aTHX_ "panic: string is shorter than advertised, " 3108 "aptr=%p, aend=%p, buffer=%p, todo=%ld", 3109 aptr, aend, buffer, (long) todo); 3110 } 3111 end = doencodes(hunk, (const U8 *)buffer, todo); 3112 } else { 3113 end = doencodes(hunk, (const U8 *)aptr, todo); 3114 aptr += todo; 3115 } 3116 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0); 3117 fromlen -= todo; 3118 } 3119 break; 3120 } 3121 } 3122 *cur = '\0'; 3123 SvCUR_set(cat, cur - start); 3124 no_change: 3125 *symptr = lookahead; 3126 } 3127 return beglist; 3128 } 3129 #undef NEXTFROM 3130 3131 3132 PP(pp_pack) 3133 { 3134 dSP; dMARK; dORIGMARK; dTARGET; 3135 SV *cat = TARG; 3136 STRLEN fromlen; 3137 SV *pat_sv = *++MARK; 3138 const char *pat = SvPV_const(pat_sv, fromlen); 3139 const char *patend = pat + fromlen; 3140 3141 MARK++; 3142 sv_setpvs(cat, ""); 3143 SvUTF8_off(cat); 3144 3145 packlist(cat, pat, patend, MARK, SP + 1); 3146 3147 SvSETMAGIC(cat); 3148 SP = ORIGMARK; 3149 PUSHs(cat); 3150 RETURN; 3151 } 3152 3153 /* 3154 * ex: set ts=8 sts=4 sw=4 et: 3155 */ 3156