1 /* pp_pack.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * He still hopefully carried some of his gear in his pack: a small tinder-box, 13 * two small shallow pans, the smaller fitting into the larger; inside them a 14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and 15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure, 16 * some salt. 17 * 18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"] 19 */ 20 21 /* This file contains pp ("push/pop") functions that 22 * execute the opcodes that make up a perl program. A typical pp function 23 * expects to find its arguments on the stack, and usually pushes its 24 * results onto the stack, hence the 'pp' terminology. Each OP structure 25 * contains a pointer to the relevant pp_foo() function. 26 * 27 * This particular file just contains pp_pack() and pp_unpack(). See the 28 * other pp*.c files for the rest of the pp_ functions. 29 */ 30 31 #include "EXTERN.h" 32 #define PERL_IN_PP_PACK_C 33 #include "perl.h" 34 35 /* Types used by pack/unpack */ 36 typedef enum { 37 e_no_len, /* no length */ 38 e_number, /* number, [] */ 39 e_star /* asterisk */ 40 } howlen_t; 41 42 typedef struct tempsym { 43 const char* patptr; /* current template char */ 44 const char* patend; /* one after last char */ 45 const char* grpbeg; /* 1st char of ()-group */ 46 const char* grpend; /* end of ()-group */ 47 I32 code; /* template code (!<>) */ 48 U32 flags; /* /=4, comma=2, pack=1 */ 49 /* and group modifiers */ 50 SSize_t length; /* length/repeat count */ 51 howlen_t howlen; /* how length is given */ 52 int level; /* () nesting level */ 53 STRLEN strbeg; /* offset of group start */ 54 struct tempsym *previous; /* previous group */ 55 } tempsym_t; 56 57 #define TEMPSYM_INIT(symptr, p, e, f) \ 58 STMT_START { \ 59 (symptr)->patptr = (p); \ 60 (symptr)->patend = (e); \ 61 (symptr)->grpbeg = NULL; \ 62 (symptr)->grpend = NULL; \ 63 (symptr)->grpend = NULL; \ 64 (symptr)->code = 0; \ 65 (symptr)->length = 0; \ 66 (symptr)->howlen = e_no_len; \ 67 (symptr)->level = 0; \ 68 (symptr)->flags = (f); \ 69 (symptr)->strbeg = 0; \ 70 (symptr)->previous = NULL; \ 71 } STMT_END 72 73 typedef union { 74 NV nv; 75 U8 bytes[sizeof(NV)]; 76 } NV_bytes; 77 78 #if defined(HAS_LONG_DOUBLE) 79 typedef union { 80 long double ld; 81 U8 bytes[sizeof(long double)]; 82 } ld_bytes; 83 #endif 84 85 #ifndef CHAR_BIT 86 # define CHAR_BIT 8 87 #endif 88 /* Maximum number of bytes to which a byte can grow due to upgrade */ 89 #define UTF8_EXPAND 2 90 91 /* 92 * Offset for integer pack/unpack. 93 * 94 * On architectures where I16 and I32 aren't really 16 and 32 bits, 95 * which for now are all Crays, pack and unpack have to play games. 96 */ 97 98 /* 99 * These values are required for portability of pack() output. 100 * If they're not right on your machine, then pack() and unpack() 101 * wouldn't work right anyway; you'll need to apply the Cray hack. 102 * (I'd like to check them with #if, but you can't use sizeof() in 103 * the preprocessor.) --??? 104 */ 105 /* 106 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE 107 defines are now in config.h. --Andy Dougherty April 1998 108 */ 109 #define SIZE16 2 110 #define SIZE32 4 111 112 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack(). 113 --jhi Feb 1999 */ 114 115 #if U16SIZE <= SIZE16 && U32SIZE <= SIZE32 116 # define OFF16(p) ((char *) (p)) 117 # define OFF32(p) ((char *) (p)) 118 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */ 119 # define OFF16(p) ((char*)(p)) 120 # define OFF32(p) ((char*)(p)) 121 #elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */ 122 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16)) 123 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32)) 124 #else 125 # error "bad cray byte order" 126 #endif 127 128 #define PUSH16(utf8, cur, p, needs_swap) \ 129 PUSH_BYTES(utf8, cur, OFF16(p), SIZE16, needs_swap) 130 #define PUSH32(utf8, cur, p, needs_swap) \ 131 PUSH_BYTES(utf8, cur, OFF32(p), SIZE32, needs_swap) 132 133 #if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */ 134 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_LITTLE_ENDIAN) 135 #elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */ 136 # define NEEDS_SWAP(d) (TYPE_ENDIANNESS(d) == TYPE_IS_BIG_ENDIAN) 137 #else 138 # error "Unsupported byteorder" 139 /* Need to add code here to re-instate mixed endian support. 140 NEEDS_SWAP would need to hold a flag indicating which action to 141 take, and S_reverse_copy and the code in S_utf8_to_bytes would need 142 logic adding to deal with any mixed-endian transformations needed. 143 */ 144 #endif 145 146 /* Only to be used inside a loop (see the break) */ 147 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype, needs_swap) \ 148 STMT_START { \ 149 if (UNLIKELY(utf8)) { \ 150 if (!S_utf8_to_bytes(aTHX_ &s, strend, \ 151 (char *) (buf), len, datumtype)) break; \ 152 } else { \ 153 if (UNLIKELY(needs_swap)) \ 154 S_reverse_copy(s, (char *) (buf), len); \ 155 else \ 156 Copy(s, (char *) (buf), len, char); \ 157 s += len; \ 158 } \ 159 } STMT_END 160 161 #define SHIFT16(utf8, s, strend, p, datumtype, needs_swap) \ 162 SHIFT_BYTES(utf8, s, strend, OFF16(p), SIZE16, datumtype, needs_swap) 163 164 #define SHIFT32(utf8, s, strend, p, datumtype, needs_swap) \ 165 SHIFT_BYTES(utf8, s, strend, OFF32(p), SIZE32, datumtype, needs_swap) 166 167 #define SHIFT_VAR(utf8, s, strend, var, datumtype, needs_swap) \ 168 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype, needs_swap) 169 170 #define PUSH_VAR(utf8, aptr, var, needs_swap) \ 171 PUSH_BYTES(utf8, aptr, &(var), sizeof(var), needs_swap) 172 173 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */ 174 #define MAX_SUB_TEMPLATE_LEVEL 100 175 176 /* flags (note that type modifiers can also be used as flags!) */ 177 #define FLAG_WAS_UTF8 0x40 178 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */ 179 #define FLAG_UNPACK_ONLY_ONE 0x10 180 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */ 181 #define FLAG_SLASH 0x04 182 #define FLAG_COMMA 0x02 183 #define FLAG_PACK 0x01 184 185 STATIC SV * 186 S_mul128(pTHX_ SV *sv, U8 m) 187 { 188 STRLEN len; 189 char *s = SvPV(sv, len); 190 char *t; 191 192 PERL_ARGS_ASSERT_MUL128; 193 194 if (! memBEGINs(s, len, "0000")) { /* need to grow sv */ 195 SV * const tmpNew = newSVpvs("0000000000"); 196 197 sv_catsv(tmpNew, sv); 198 SvREFCNT_dec(sv); /* free old sv */ 199 sv = tmpNew; 200 s = SvPV(sv, len); 201 } 202 t = s + len - 1; 203 while (!*t) /* trailing '\0'? */ 204 t--; 205 while (t > s) { 206 const U32 i = ((*t - '0') << 7) + m; 207 *(t--) = '0' + (char)(i % 10); 208 m = (char)(i / 10); 209 } 210 return (sv); 211 } 212 213 /* Explosives and implosives. */ 214 215 #define ISUUCHAR(ch) inRANGE(NATIVE_TO_LATIN1(ch), \ 216 NATIVE_TO_LATIN1(' '), \ 217 NATIVE_TO_LATIN1('a') - 1) 218 219 /* type modifiers */ 220 #define TYPE_IS_SHRIEKING 0x100 221 #define TYPE_IS_BIG_ENDIAN 0x200 222 #define TYPE_IS_LITTLE_ENDIAN 0x400 223 #define TYPE_IS_PACK 0x800 224 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN) 225 #define TYPE_MODIFIERS(t) ((t) & ~0xFF) 226 #define TYPE_NO_MODIFIERS(t) ((U8) (t)) 227 228 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK) 229 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK) 230 231 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP(" 232 233 #define PACK_SIZE_CANNOT_CSUM 0x80 234 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */ 235 #define PACK_SIZE_MASK 0x3F 236 237 #include "packsizetables.inc" 238 239 static void 240 S_reverse_copy(const char *src, char *dest, STRLEN len) 241 { 242 dest += len; 243 while (len--) 244 *--dest = *src++; 245 } 246 247 STATIC U8 248 utf8_to_byte(pTHX_ const char **s, const char *end, I32 datumtype) 249 { 250 STRLEN retlen; 251 UV val; 252 253 if (*s >= end) { 254 goto croak; 255 } 256 val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, 257 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 258 if (retlen == (STRLEN) -1) 259 croak: 260 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack", 261 (int) TYPE_NO_MODIFIERS(datumtype)); 262 if (val >= 0x100) { 263 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK), 264 "Character in '%c' format wrapped in unpack", 265 (int) TYPE_NO_MODIFIERS(datumtype)); 266 val = (U8) val; 267 } 268 *s += retlen; 269 return (U8)val; 270 } 271 272 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \ 273 utf8_to_byte(aTHX_ &(s), (strend), (datumtype)) : \ 274 *(U8 *)(s)++) 275 276 STATIC bool 277 S_utf8_to_bytes(pTHX_ const char **s, const char *end, const char *buf, SSize_t buf_len, I32 datumtype) 278 { 279 UV val; 280 STRLEN retlen; 281 const char *from = *s; 282 int bad = 0; 283 const U32 flags = ckWARN(WARN_UTF8) ? 284 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY); 285 const bool needs_swap = NEEDS_SWAP(datumtype); 286 287 if (UNLIKELY(needs_swap)) 288 buf += buf_len; 289 290 for (;buf_len > 0; buf_len--) { 291 if (from >= end) return FALSE; 292 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags); 293 if (retlen == (STRLEN) -1) { 294 from += UTF8_SAFE_SKIP(from, end); 295 bad |= 1; 296 } else from += retlen; 297 if (val >= 0x100) { 298 bad |= 2; 299 val = (U8) val; 300 } 301 if (UNLIKELY(needs_swap)) 302 *(U8 *)--buf = (U8)val; 303 else 304 *(U8 *)buf++ = (U8)val; 305 } 306 /* We have enough characters for the buffer. Did we have problems ? */ 307 if (bad) { 308 if (bad & 1) { 309 /* Rewalk the string fragment while warning */ 310 const char *ptr; 311 const U32 flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY; 312 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) { 313 if (ptr >= end) break; 314 utf8n_to_uvchr((U8 *) ptr, end-ptr, &retlen, flags); 315 } 316 if (from > end) from = end; 317 } 318 if ((bad & 2)) 319 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ? 320 WARN_PACK : WARN_UNPACK), 321 "Character(s) in '%c' format wrapped in %s", 322 (int) TYPE_NO_MODIFIERS(datumtype), 323 datumtype & TYPE_IS_PACK ? "pack" : "unpack"); 324 } 325 *s = from; 326 return TRUE; 327 } 328 329 STATIC char * 330 S_my_bytes_to_utf8(const U8 *start, STRLEN len, char *dest, const bool needs_swap) { 331 PERL_ARGS_ASSERT_MY_BYTES_TO_UTF8; 332 333 if (UNLIKELY(needs_swap)) { 334 const U8 *p = start + len; 335 while (p-- > start) { 336 append_utf8_from_native_byte(*p, (U8 **) & dest); 337 } 338 } else { 339 const U8 * const end = start + len; 340 while (start < end) { 341 append_utf8_from_native_byte(*start, (U8 **) & dest); 342 start++; 343 } 344 } 345 return dest; 346 } 347 348 #define PUSH_BYTES(utf8, cur, buf, len, needs_swap) \ 349 STMT_START { \ 350 if (UNLIKELY(utf8)) \ 351 (cur) = my_bytes_to_utf8((U8 *) buf, len, (cur), needs_swap); \ 352 else { \ 353 if (UNLIKELY(needs_swap)) \ 354 S_reverse_copy((char *)(buf), cur, len); \ 355 else \ 356 Copy(buf, cur, len, char); \ 357 (cur) += (len); \ 358 } \ 359 } STMT_END 360 361 #define SAFE_UTF8_EXPAND(var) \ 362 STMT_START { \ 363 if ((var) > SSize_t_MAX / UTF8_EXPAND) \ 364 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \ 365 (var) = (var) * UTF8_EXPAND; \ 366 } STMT_END 367 368 #define GROWING2(utf8, cat, start, cur, item_size, item_count) \ 369 STMT_START { \ 370 if (SSize_t_MAX / (item_size) < (item_count)) \ 371 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \ 372 GROWING((utf8), (cat), (start), (cur), (item_size) * (item_count)); \ 373 } STMT_END 374 375 #define GROWING(utf8, cat, start, cur, in_len) \ 376 STMT_START { \ 377 STRLEN glen = (in_len); \ 378 STRLEN catcur = (STRLEN)((cur) - (start)); \ 379 if (utf8) SAFE_UTF8_EXPAND(glen); \ 380 if (SSize_t_MAX - glen < catcur) \ 381 Perl_croak(aTHX_ "%s", "Out of memory during pack()"); \ 382 if (catcur + glen >= SvLEN(cat)) { \ 383 (start) = sv_exp_grow(cat, glen); \ 384 (cur) = (start) + SvCUR(cat); \ 385 } \ 386 } STMT_END 387 388 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \ 389 STMT_START { \ 390 const STRLEN glen = (in_len); \ 391 STRLEN gl = glen; \ 392 if (utf8) SAFE_UTF8_EXPAND(gl); \ 393 if ((cur) + gl >= (start) + SvLEN(cat)) { \ 394 *cur = '\0'; \ 395 SvCUR_set((cat), (cur) - (start)); \ 396 (start) = sv_exp_grow(cat, gl); \ 397 (cur) = (start) + SvCUR(cat); \ 398 } \ 399 PUSH_BYTES(utf8, cur, buf, glen, 0); \ 400 } STMT_END 401 402 #define PUSH_BYTE(utf8, s, byte) \ 403 STMT_START { \ 404 if (utf8) { \ 405 const U8 au8 = (byte); \ 406 (s) = my_bytes_to_utf8(&au8, 1, (s), 0);\ 407 } else *(U8 *)(s)++ = (byte); \ 408 } STMT_END 409 410 /* Only to be used inside a loop (see the break) */ 411 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \ 412 STMT_START { \ 413 STRLEN retlen; \ 414 if (str >= end) break; \ 415 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \ 416 if (retlen == (STRLEN) -1) { \ 417 *cur = '\0'; \ 418 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \ 419 } \ 420 str += retlen; \ 421 } STMT_END 422 423 static const char *_action( const tempsym_t* symptr ) 424 { 425 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack"); 426 } 427 428 /* Returns the sizeof() struct described by pat */ 429 STATIC SSize_t 430 S_measure_struct(pTHX_ tempsym_t* symptr) 431 { 432 SSize_t total = 0; 433 434 PERL_ARGS_ASSERT_MEASURE_STRUCT; 435 436 while (next_symbol(symptr)) { 437 SSize_t len, size; 438 439 switch (symptr->howlen) { 440 case e_star: 441 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s", 442 _action( symptr ) ); 443 444 default: 445 /* e_no_len and e_number */ 446 len = symptr->length; 447 break; 448 } 449 450 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK; 451 if (!size) { 452 SSize_t star; 453 /* endianness doesn't influence the size of a type */ 454 switch(TYPE_NO_ENDIANNESS(symptr->code)) { 455 default: 456 /* diag_listed_as: Invalid type '%s' in %s */ 457 Perl_croak(aTHX_ "Invalid type '%c' in %s", 458 (int)TYPE_NO_MODIFIERS(symptr->code), 459 _action( symptr ) ); 460 case '.' | TYPE_IS_SHRIEKING: 461 case '@' | TYPE_IS_SHRIEKING: 462 case '@': 463 case '.': 464 case '/': 465 case 'U': /* XXXX Is it correct? */ 466 case 'w': 467 case 'u': 468 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s", 469 (int) TYPE_NO_MODIFIERS(symptr->code), 470 _action( symptr ) ); 471 case '%': 472 size = 0; 473 break; 474 case '(': 475 { 476 tempsym_t savsym = *symptr; 477 symptr->patptr = savsym.grpbeg; 478 symptr->patend = savsym.grpend; 479 /* XXXX Theoretically, we need to measure many times at 480 different positions, since the subexpression may contain 481 alignment commands, but be not of aligned length. 482 Need to detect this and croak(). */ 483 size = measure_struct(symptr); 484 *symptr = savsym; 485 break; 486 } 487 case 'X' | TYPE_IS_SHRIEKING: 488 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS. 489 */ 490 if (!len) /* Avoid division by 0 */ 491 len = 1; 492 len = total % len; /* Assumed: the start is aligned. */ 493 /* FALLTHROUGH */ 494 case 'X': 495 size = -1; 496 if (total < len) 497 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) ); 498 break; 499 case 'x' | TYPE_IS_SHRIEKING: 500 if (!len) /* Avoid division by 0 */ 501 len = 1; 502 star = total % len; /* Assumed: the start is aligned. */ 503 if (star) /* Other portable ways? */ 504 len = len - star; 505 else 506 len = 0; 507 /* FALLTHROUGH */ 508 case 'x': 509 case 'A': 510 case 'Z': 511 case 'a': 512 size = 1; 513 break; 514 case 'B': 515 case 'b': 516 len = (len + 7)/8; 517 size = 1; 518 break; 519 case 'H': 520 case 'h': 521 len = (len + 1)/2; 522 size = 1; 523 break; 524 525 case 'P': 526 len = 1; 527 size = sizeof(char*); 528 break; 529 } 530 } 531 total += len * size; 532 } 533 return total; 534 } 535 536 537 /* locate matching closing parenthesis or bracket 538 * returns char pointer to char after match, or NULL 539 */ 540 STATIC const char * 541 S_group_end(pTHX_ const char *patptr, const char *patend, char ender) 542 { 543 PERL_ARGS_ASSERT_GROUP_END; 544 545 while (patptr < patend) { 546 const char c = *patptr++; 547 548 if (isSPACE(c)) 549 continue; 550 else if (c == ender) 551 return patptr-1; 552 else if (c == '#') { 553 while (patptr < patend && *patptr != '\n') 554 patptr++; 555 continue; 556 } else if (c == '(') 557 patptr = group_end(patptr, patend, ')') + 1; 558 else if (c == '[') 559 patptr = group_end(patptr, patend, ']') + 1; 560 } 561 Perl_croak(aTHX_ "No group ending character '%c' found in template", 562 ender); 563 NOT_REACHED; /* NOTREACHED */ 564 } 565 566 567 /* Convert unsigned decimal number to binary. 568 * Expects a pointer to the first digit and address of length variable 569 * Advances char pointer to 1st non-digit char and returns number 570 */ 571 STATIC const char * 572 S_get_num(pTHX_ const char *patptr, SSize_t *lenptr ) 573 { 574 SSize_t len = *patptr++ - '0'; 575 576 PERL_ARGS_ASSERT_GET_NUM; 577 578 while (isDIGIT(*patptr)) { 579 SSize_t nlen = (len * 10) + (*patptr++ - '0'); 580 if (nlen < 0 || nlen/10 != len) 581 Perl_croak(aTHX_ "pack/unpack repeat count overflow"); 582 len = nlen; 583 } 584 *lenptr = len; 585 return patptr; 586 } 587 588 /* The marvellous template parsing routine: Using state stored in *symptr, 589 * locates next template code and count 590 */ 591 STATIC bool 592 S_next_symbol(pTHX_ tempsym_t* symptr ) 593 { 594 const char* patptr = symptr->patptr; 595 const char* const patend = symptr->patend; 596 597 PERL_ARGS_ASSERT_NEXT_SYMBOL; 598 599 symptr->flags &= ~FLAG_SLASH; 600 601 while (patptr < patend) { 602 if (isSPACE(*patptr)) 603 patptr++; 604 else if (*patptr == '#') { 605 patptr++; 606 while (patptr < patend && *patptr != '\n') 607 patptr++; 608 if (patptr < patend) 609 patptr++; 610 } else { 611 /* We should have found a template code */ 612 I32 code = (U8) *patptr++; 613 U32 inherited_modifiers = 0; 614 615 /* unrecognised characters in pack/unpack formats were made fatal in 616 * 5.004, with an exception added in 5.004_04 for ',' to "just" warn: */ 617 if (code == ','){ 618 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){ 619 symptr->flags |= FLAG_COMMA; 620 /* diag_listed_as: Invalid type '%s' in %s */ 621 Perl_warner(aTHX_ packWARN(WARN_UNPACK), 622 "Invalid type ',' in %s", _action( symptr ) ); 623 } 624 continue; 625 } 626 627 /* for '(', skip to ')' */ 628 if (code == '(') { 629 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' ) 630 Perl_croak(aTHX_ "()-group starts with a count in %s", 631 _action( symptr ) ); 632 symptr->grpbeg = patptr; 633 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') ); 634 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL ) 635 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s", 636 _action( symptr ) ); 637 } 638 639 /* look for group modifiers to inherit */ 640 if (TYPE_ENDIANNESS(symptr->flags)) { 641 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code))) 642 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags); 643 } 644 645 /* look for modifiers */ 646 while (patptr < patend) { 647 const char *allowed; 648 I32 modifier; 649 switch (*patptr) { 650 case '!': 651 modifier = TYPE_IS_SHRIEKING; 652 allowed = "sSiIlLxXnNvV@."; 653 break; 654 case '>': 655 modifier = TYPE_IS_BIG_ENDIAN; 656 allowed = ENDIANNESS_ALLOWED_TYPES; 657 break; 658 case '<': 659 modifier = TYPE_IS_LITTLE_ENDIAN; 660 allowed = ENDIANNESS_ALLOWED_TYPES; 661 break; 662 default: 663 allowed = ""; 664 modifier = 0; 665 break; 666 } 667 668 if (modifier == 0) 669 break; 670 671 if (!strchr(allowed, TYPE_NO_MODIFIERS(code))) 672 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr, 673 allowed, _action( symptr ) ); 674 675 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK) 676 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s", 677 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) ); 678 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) == 679 TYPE_ENDIANNESS_MASK) 680 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s", 681 *patptr, _action( symptr ) ); 682 683 if ((code & modifier)) { 684 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK), 685 "Duplicate modifier '%c' after '%c' in %s", 686 *patptr, (int) TYPE_NO_MODIFIERS(code), 687 _action( symptr ) ); 688 } 689 690 code |= modifier; 691 patptr++; 692 } 693 694 /* inherit modifiers */ 695 code |= inherited_modifiers; 696 697 /* look for count and/or / */ 698 if (patptr < patend) { 699 if (isDIGIT(*patptr)) { 700 patptr = get_num( patptr, &symptr->length ); 701 symptr->howlen = e_number; 702 703 } else if (*patptr == '*') { 704 patptr++; 705 symptr->howlen = e_star; 706 707 } else if (*patptr == '[') { 708 const char* lenptr = ++patptr; 709 symptr->howlen = e_number; 710 patptr = group_end( patptr, patend, ']' ) + 1; 711 /* what kind of [] is it? */ 712 if (isDIGIT(*lenptr)) { 713 lenptr = get_num( lenptr, &symptr->length ); 714 if( *lenptr != ']' ) 715 Perl_croak(aTHX_ "Malformed integer in [] in %s", 716 _action( symptr ) ); 717 } else { 718 tempsym_t savsym = *symptr; 719 symptr->patend = patptr-1; 720 symptr->patptr = lenptr; 721 savsym.length = measure_struct(symptr); 722 *symptr = savsym; 723 } 724 } else { 725 symptr->howlen = e_no_len; 726 symptr->length = 1; 727 } 728 729 /* try to find / */ 730 while (patptr < patend) { 731 if (isSPACE(*patptr)) 732 patptr++; 733 else if (*patptr == '#') { 734 patptr++; 735 while (patptr < patend && *patptr != '\n') 736 patptr++; 737 if (patptr < patend) 738 patptr++; 739 } else { 740 if (*patptr == '/') { 741 symptr->flags |= FLAG_SLASH; 742 patptr++; 743 if (patptr < patend && 744 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '[')) 745 Perl_croak(aTHX_ "'/' does not take a repeat count in %s", 746 _action( symptr ) ); 747 } 748 break; 749 } 750 } 751 } else { 752 /* at end - no count, no / */ 753 symptr->howlen = e_no_len; 754 symptr->length = 1; 755 } 756 757 symptr->code = code; 758 symptr->patptr = patptr; 759 return TRUE; 760 } 761 } 762 symptr->patptr = patptr; 763 return FALSE; 764 } 765 766 /* 767 There is no way to cleanly handle the case where we should process the 768 string per byte in its upgraded form while it's really in downgraded form 769 (e.g. estimates like strend-s as an upper bound for the number of 770 characters left wouldn't work). So if we foresee the need of this 771 (pattern starts with U or contains U0), we want to work on the encoded 772 version of the string. Users are advised to upgrade their pack string 773 themselves if they need to do a lot of unpacks like this on it 774 */ 775 STATIC bool 776 need_utf8(const char *pat, const char *patend) 777 { 778 bool first = TRUE; 779 780 PERL_ARGS_ASSERT_NEED_UTF8; 781 782 while (pat < patend) { 783 if (pat[0] == '#') { 784 pat++; 785 pat = (const char *) memchr(pat, '\n', patend-pat); 786 if (!pat) return FALSE; 787 } else if (pat[0] == 'U') { 788 if (first || pat[1] == '0') return TRUE; 789 } else first = FALSE; 790 pat++; 791 } 792 return FALSE; 793 } 794 795 STATIC char 796 first_symbol(const char *pat, const char *patend) { 797 PERL_ARGS_ASSERT_FIRST_SYMBOL; 798 799 while (pat < patend) { 800 if (pat[0] != '#') return pat[0]; 801 pat++; 802 pat = (const char *) memchr(pat, '\n', patend-pat); 803 if (!pat) return 0; 804 pat++; 805 } 806 return 0; 807 } 808 809 /* 810 811 =for apidoc unpackstring 812 813 The engine implementing the C<unpack()> Perl function. 814 815 Using the template C<pat..patend>, this function unpacks the string 816 C<s..strend> into a number of mortal SVs, which it pushes onto the perl 817 argument (C<@_>) stack (so you will need to issue a C<PUTBACK> before and 818 C<SPAGAIN> after the call to this function). It returns the number of 819 pushed elements. 820 821 The C<strend> and C<patend> pointers should point to the byte following the 822 last character of each string. 823 824 Although this function returns its values on the perl argument stack, it 825 doesn't take any parameters from that stack (and thus in particular 826 there's no need to do a C<PUSHMARK> before calling it, unlike L</call_pv> for 827 example). 828 829 =cut */ 830 831 SSize_t 832 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags) 833 { 834 tempsym_t sym; 835 836 PERL_ARGS_ASSERT_UNPACKSTRING; 837 838 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8; 839 else if (need_utf8(pat, patend)) { 840 /* We probably should try to avoid this in case a scalar context call 841 wouldn't get to the "U0" */ 842 STRLEN len = strend - s; 843 s = (char *) bytes_to_utf8((U8 *) s, &len); 844 SAVEFREEPV(s); 845 strend = s + len; 846 flags |= FLAG_DO_UTF8; 847 } 848 849 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8)) 850 flags |= FLAG_PARSE_UTF8; 851 852 TEMPSYM_INIT(&sym, pat, patend, flags); 853 854 return unpack_rec(&sym, s, s, strend, NULL ); 855 } 856 857 STATIC SSize_t 858 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s ) 859 { 860 dSP; 861 SV *sv = NULL; 862 const SSize_t start_sp_offset = SP - PL_stack_base; 863 howlen_t howlen; 864 SSize_t checksum = 0; 865 UV cuv = 0; 866 NV cdouble = 0.0; 867 const SSize_t bits_in_uv = CHAR_BIT * sizeof(cuv); 868 bool beyond = FALSE; 869 bool explicit_length; 870 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0; 871 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0; 872 873 PERL_ARGS_ASSERT_UNPACK_REC; 874 875 symptr->strbeg = s - strbeg; 876 877 while (next_symbol(symptr)) { 878 packprops_t props; 879 SSize_t len; 880 I32 datumtype = symptr->code; 881 bool needs_swap; 882 /* do first one only unless in list context 883 / is implemented by unpacking the count, then popping it from the 884 stack, so must check that we're not in the middle of a / */ 885 if ( unpack_only_one 886 && (SP - PL_stack_base == start_sp_offset + 1) 887 && (datumtype != '/') ) /* XXX can this be omitted */ 888 break; 889 890 switch (howlen = symptr->howlen) { 891 case e_star: 892 len = strend - strbeg; /* long enough */ 893 break; 894 default: 895 /* e_no_len and e_number */ 896 len = symptr->length; 897 break; 898 } 899 900 explicit_length = TRUE; 901 redo_switch: 902 beyond = s >= strend; 903 904 props = packprops[TYPE_NO_ENDIANNESS(datumtype)]; 905 if (props) { 906 /* props nonzero means we can process this letter. */ 907 const SSize_t size = props & PACK_SIZE_MASK; 908 const SSize_t howmany = (strend - s) / size; 909 if (len > howmany) 910 len = howmany; 911 912 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) { 913 if (len && unpack_only_one) len = 1; 914 EXTEND(SP, len); 915 EXTEND_MORTAL(len); 916 } 917 } 918 919 needs_swap = NEEDS_SWAP(datumtype); 920 921 switch(TYPE_NO_ENDIANNESS(datumtype)) { 922 default: 923 /* diag_listed_as: Invalid type '%s' in %s */ 924 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) ); 925 926 case '%': 927 if (howlen == e_no_len) 928 len = 16; /* len is not specified */ 929 checksum = len; 930 cuv = 0; 931 cdouble = 0; 932 continue; 933 934 case '(': 935 { 936 tempsym_t savsym = *symptr; 937 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags); 938 symptr->flags |= group_modifiers; 939 symptr->patend = savsym.grpend; 940 symptr->previous = &savsym; 941 symptr->level++; 942 PUTBACK; 943 if (len && unpack_only_one) len = 1; 944 while (len--) { 945 symptr->patptr = savsym.grpbeg; 946 if (utf8) symptr->flags |= FLAG_PARSE_UTF8; 947 else symptr->flags &= ~FLAG_PARSE_UTF8; 948 unpack_rec(symptr, s, strbeg, strend, &s); 949 if (s == strend && savsym.howlen == e_star) 950 break; /* No way to continue */ 951 } 952 SPAGAIN; 953 savsym.flags = symptr->flags & ~group_modifiers; 954 *symptr = savsym; 955 break; 956 } 957 case '.' | TYPE_IS_SHRIEKING: 958 case '.': { 959 const char *from; 960 SV *sv; 961 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING); 962 if (howlen == e_star) from = strbeg; 963 else if (len <= 0) from = s; 964 else { 965 tempsym_t *group = symptr; 966 967 while (--len && group) group = group->previous; 968 from = group ? strbeg + group->strbeg : strbeg; 969 } 970 sv = from <= s ? 971 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) : 972 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s))); 973 mXPUSHs(sv); 974 break; 975 } 976 case '@' | TYPE_IS_SHRIEKING: 977 case '@': 978 s = strbeg + symptr->strbeg; 979 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING)) 980 { 981 while (len > 0) { 982 if (s >= strend) 983 Perl_croak(aTHX_ "'@' outside of string in unpack"); 984 s += UTF8SKIP(s); 985 len--; 986 } 987 if (s > strend) 988 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack"); 989 } else { 990 if (strend-s < len) 991 Perl_croak(aTHX_ "'@' outside of string in unpack"); 992 s += len; 993 } 994 break; 995 case 'X' | TYPE_IS_SHRIEKING: 996 if (!len) /* Avoid division by 0 */ 997 len = 1; 998 if (utf8) { 999 const char *hop, *last; 1000 SSize_t l = len; 1001 hop = last = strbeg; 1002 while (hop < s) { 1003 hop += UTF8SKIP(hop); 1004 if (--l == 0) { 1005 last = hop; 1006 l = len; 1007 } 1008 } 1009 if (last > s) 1010 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1011 s = last; 1012 break; 1013 } 1014 len = (s - strbeg) % len; 1015 /* FALLTHROUGH */ 1016 case 'X': 1017 if (utf8) { 1018 while (len > 0) { 1019 if (s <= strbeg) 1020 Perl_croak(aTHX_ "'X' outside of string in unpack"); 1021 while (--s, UTF8_IS_CONTINUATION(*s)) { 1022 if (s <= strbeg) 1023 Perl_croak(aTHX_ "'X' outside of string in unpack"); 1024 } 1025 len--; 1026 } 1027 } else { 1028 if (len > s - strbeg) 1029 Perl_croak(aTHX_ "'X' outside of string in unpack" ); 1030 s -= len; 1031 } 1032 break; 1033 case 'x' | TYPE_IS_SHRIEKING: { 1034 SSize_t ai32; 1035 if (!len) /* Avoid division by 0 */ 1036 len = 1; 1037 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len; 1038 else ai32 = (s - strbeg) % len; 1039 if (ai32 == 0) break; 1040 len -= ai32; 1041 } 1042 /* FALLTHROUGH */ 1043 case 'x': 1044 if (utf8) { 1045 while (len>0) { 1046 if (s >= strend) 1047 Perl_croak(aTHX_ "'x' outside of string in unpack"); 1048 s += UTF8SKIP(s); 1049 len--; 1050 } 1051 } else { 1052 if (len > strend - s) 1053 Perl_croak(aTHX_ "'x' outside of string in unpack"); 1054 s += len; 1055 } 1056 break; 1057 case '/': 1058 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack"); 1059 1060 case 'A': 1061 case 'Z': 1062 case 'a': 1063 if (checksum) { 1064 /* Preliminary length estimate is assumed done in 'W' */ 1065 if (len > strend - s) len = strend - s; 1066 goto W_checksum; 1067 } 1068 if (utf8) { 1069 SSize_t l; 1070 const char *hop; 1071 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) { 1072 if (hop >= strend) { 1073 if (hop > strend) 1074 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1075 break; 1076 } 1077 } 1078 if (hop > strend) 1079 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1080 len = hop - s; 1081 } else if (len > strend - s) 1082 len = strend - s; 1083 1084 if (datumtype == 'Z') { 1085 /* 'Z' strips stuff after first null */ 1086 const char *ptr, *end; 1087 end = s + len; 1088 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break; 1089 sv = newSVpvn(s, ptr-s); 1090 if (howlen == e_star) /* exact for 'Z*' */ 1091 len = ptr-s + (ptr != strend ? 1 : 0); 1092 } else if (datumtype == 'A') { 1093 /* 'A' strips both nulls and spaces */ 1094 const char *ptr; 1095 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) { 1096 for (ptr = s+len-1; ptr >= s; ptr--) { 1097 if ( *ptr != 0 1098 && !UTF8_IS_CONTINUATION(*ptr) 1099 && !isSPACE_utf8_safe(ptr, strend)) 1100 { 1101 break; 1102 } 1103 } 1104 if (ptr >= s) ptr += UTF8SKIP(ptr); 1105 else ptr++; 1106 if (ptr > s+len) 1107 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1108 } else { 1109 for (ptr = s+len-1; ptr >= s; ptr--) 1110 if (*ptr != 0 && !isSPACE(*ptr)) break; 1111 ptr++; 1112 } 1113 sv = newSVpvn(s, ptr-s); 1114 } else sv = newSVpvn(s, len); 1115 1116 if (utf8) { 1117 SvUTF8_on(sv); 1118 /* Undo any upgrade done due to need_utf8() */ 1119 if (!(symptr->flags & FLAG_WAS_UTF8)) 1120 sv_utf8_downgrade(sv, 0); 1121 } 1122 mXPUSHs(sv); 1123 s += len; 1124 break; 1125 case 'B': 1126 case 'b': { 1127 char *str; 1128 if (howlen == e_star || len > (strend - s) * 8) 1129 len = (strend - s) * 8; 1130 if (checksum) { 1131 if (utf8) 1132 while (len >= 8 && s < strend) { 1133 cuv += PL_bitcount[utf8_to_byte(aTHX_ &s, strend, datumtype)]; 1134 len -= 8; 1135 } 1136 else 1137 while (len >= 8) { 1138 cuv += PL_bitcount[*(U8 *)s++]; 1139 len -= 8; 1140 } 1141 if (len && s < strend) { 1142 U8 bits; 1143 bits = SHIFT_BYTE(utf8, s, strend, datumtype); 1144 if (datumtype == 'b') 1145 while (len-- > 0) { 1146 if (bits & 1) cuv++; 1147 bits >>= 1; 1148 } 1149 else 1150 while (len-- > 0) { 1151 if (bits & 0x80) cuv++; 1152 bits <<= 1; 1153 } 1154 } 1155 break; 1156 } 1157 1158 sv = sv_2mortal(newSV(len ? len : 1)); 1159 SvPOK_on(sv); 1160 str = SvPVX(sv); 1161 if (datumtype == 'b') { 1162 U8 bits = 0; 1163 const SSize_t ai32 = len; 1164 for (len = 0; len < ai32; len++) { 1165 if (len & 7) bits >>= 1; 1166 else if (utf8) { 1167 if (s >= strend) break; 1168 bits = utf8_to_byte(aTHX_ &s, strend, datumtype); 1169 } else bits = *(U8 *) s++; 1170 *str++ = bits & 1 ? '1' : '0'; 1171 } 1172 } else { 1173 U8 bits = 0; 1174 const SSize_t ai32 = len; 1175 for (len = 0; len < ai32; len++) { 1176 if (len & 7) bits <<= 1; 1177 else if (utf8) { 1178 if (s >= strend) break; 1179 bits = utf8_to_byte(aTHX_ &s, strend, datumtype); 1180 } else bits = *(U8 *) s++; 1181 *str++ = bits & 0x80 ? '1' : '0'; 1182 } 1183 } 1184 *str = '\0'; 1185 SvCUR_set(sv, str - SvPVX_const(sv)); 1186 XPUSHs(sv); 1187 break; 1188 } 1189 case 'H': 1190 case 'h': { 1191 char *str = NULL; 1192 /* Preliminary length estimate, acceptable for utf8 too */ 1193 if (howlen == e_star || len > (strend - s) * 2) 1194 len = (strend - s) * 2; 1195 if (!checksum) { 1196 sv = sv_2mortal(newSV(len ? len : 1)); 1197 SvPOK_on(sv); 1198 str = SvPVX(sv); 1199 } 1200 if (datumtype == 'h') { 1201 U8 bits = 0; 1202 SSize_t ai32 = len; 1203 for (len = 0; len < ai32; len++) { 1204 if (len & 1) bits >>= 4; 1205 else if (utf8) { 1206 if (s >= strend) break; 1207 bits = utf8_to_byte(aTHX_ &s, strend, datumtype); 1208 } else bits = * (U8 *) s++; 1209 if (!checksum) 1210 *str++ = PL_hexdigit[bits & 15]; 1211 } 1212 } else { 1213 U8 bits = 0; 1214 const SSize_t ai32 = len; 1215 for (len = 0; len < ai32; len++) { 1216 if (len & 1) bits <<= 4; 1217 else if (utf8) { 1218 if (s >= strend) break; 1219 bits = utf8_to_byte(aTHX_ &s, strend, datumtype); 1220 } else bits = *(U8 *) s++; 1221 if (!checksum) 1222 *str++ = PL_hexdigit[(bits >> 4) & 15]; 1223 } 1224 } 1225 if (!checksum) { 1226 *str = '\0'; 1227 SvCUR_set(sv, str - SvPVX_const(sv)); 1228 XPUSHs(sv); 1229 } 1230 break; 1231 } 1232 case 'C': 1233 if (len == 0) { 1234 if (explicit_length) 1235 /* Switch to "character" mode */ 1236 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0; 1237 break; 1238 } 1239 /* FALLTHROUGH */ 1240 case 'c': 1241 while (len-- > 0 && s < strend) { 1242 int aint; 1243 if (utf8) 1244 { 1245 STRLEN retlen; 1246 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, 1247 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 1248 if (retlen == (STRLEN) -1) 1249 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1250 s += retlen; 1251 } 1252 else 1253 aint = *(U8 *)(s)++; 1254 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */ 1255 aint -= 256; 1256 if (!checksum) 1257 mPUSHi(aint); 1258 else if (checksum > bits_in_uv) 1259 cdouble += (NV)aint; 1260 else 1261 cuv += aint; 1262 } 1263 break; 1264 case 'W': 1265 W_checksum: 1266 if (utf8) { 1267 while (len-- > 0 && s < strend) { 1268 STRLEN retlen; 1269 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, 1270 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 1271 if (retlen == (STRLEN) -1) 1272 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1273 s += retlen; 1274 if (!checksum) 1275 mPUSHu(val); 1276 else if (checksum > bits_in_uv) 1277 cdouble += (NV) val; 1278 else 1279 cuv += val; 1280 } 1281 } else if (!checksum) 1282 while (len-- > 0) { 1283 const U8 ch = *(U8 *) s++; 1284 mPUSHu(ch); 1285 } 1286 else if (checksum > bits_in_uv) 1287 while (len-- > 0) cdouble += (NV) *(U8 *) s++; 1288 else 1289 while (len-- > 0) cuv += *(U8 *) s++; 1290 break; 1291 case 'U': 1292 if (len == 0) { 1293 if (explicit_length && howlen != e_star) { 1294 /* Switch to "bytes in UTF-8" mode */ 1295 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0; 1296 else 1297 /* Should be impossible due to the need_utf8() test */ 1298 Perl_croak(aTHX_ "U0 mode on a byte string"); 1299 } 1300 break; 1301 } 1302 if (len > strend - s) len = strend - s; 1303 if (!checksum) { 1304 if (len && unpack_only_one) len = 1; 1305 EXTEND(SP, len); 1306 EXTEND_MORTAL(len); 1307 } 1308 while (len-- > 0 && s < strend) { 1309 STRLEN retlen; 1310 UV auv; 1311 if (utf8) { 1312 U8 result[UTF8_MAXLEN+1]; 1313 const char *ptr = s; 1314 STRLEN len; 1315 /* Bug: warns about bad utf8 even if we are short on bytes 1316 and will break out of the loop */ 1317 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, (char *) result, 1, 1318 'U')) 1319 break; 1320 len = UTF8SKIP(result); 1321 if (!S_utf8_to_bytes(aTHX_ &ptr, strend, 1322 (char *) &result[1], len-1, 'U')) break; 1323 auv = utf8n_to_uvchr(result, len, &retlen, 1324 UTF8_ALLOW_DEFAULT); 1325 s = ptr; 1326 } else { 1327 auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen, 1328 UTF8_ALLOW_DEFAULT); 1329 if (retlen == (STRLEN) -1) 1330 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1331 s += retlen; 1332 } 1333 if (!checksum) 1334 mPUSHu(auv); 1335 else if (checksum > bits_in_uv) 1336 cdouble += (NV) auv; 1337 else 1338 cuv += auv; 1339 } 1340 break; 1341 case 's' | TYPE_IS_SHRIEKING: 1342 #if SHORTSIZE != SIZE16 1343 while (len-- > 0) { 1344 short ashort; 1345 SHIFT_VAR(utf8, s, strend, ashort, datumtype, needs_swap); 1346 if (!checksum) 1347 mPUSHi(ashort); 1348 else if (checksum > bits_in_uv) 1349 cdouble += (NV)ashort; 1350 else 1351 cuv += ashort; 1352 } 1353 break; 1354 #else 1355 /* FALLTHROUGH */ 1356 #endif 1357 case 's': 1358 while (len-- > 0) { 1359 I16 ai16; 1360 1361 #if U16SIZE > SIZE16 1362 ai16 = 0; 1363 #endif 1364 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap); 1365 #if U16SIZE > SIZE16 1366 if (ai16 > 32767) 1367 ai16 -= 65536; 1368 #endif 1369 if (!checksum) 1370 mPUSHi(ai16); 1371 else if (checksum > bits_in_uv) 1372 cdouble += (NV)ai16; 1373 else 1374 cuv += ai16; 1375 } 1376 break; 1377 case 'S' | TYPE_IS_SHRIEKING: 1378 #if SHORTSIZE != SIZE16 1379 while (len-- > 0) { 1380 unsigned short aushort; 1381 SHIFT_VAR(utf8, s, strend, aushort, datumtype, needs_swap); 1382 if (!checksum) 1383 mPUSHu(aushort); 1384 else if (checksum > bits_in_uv) 1385 cdouble += (NV)aushort; 1386 else 1387 cuv += aushort; 1388 } 1389 break; 1390 #else 1391 /* FALLTHROUGH */ 1392 #endif 1393 case 'v': 1394 case 'n': 1395 case 'S': 1396 while (len-- > 0) { 1397 U16 au16; 1398 #if U16SIZE > SIZE16 1399 au16 = 0; 1400 #endif 1401 SHIFT16(utf8, s, strend, &au16, datumtype, needs_swap); 1402 if (datumtype == 'n') 1403 au16 = PerlSock_ntohs(au16); 1404 if (datumtype == 'v') 1405 au16 = vtohs(au16); 1406 if (!checksum) 1407 mPUSHu(au16); 1408 else if (checksum > bits_in_uv) 1409 cdouble += (NV) au16; 1410 else 1411 cuv += au16; 1412 } 1413 break; 1414 case 'v' | TYPE_IS_SHRIEKING: 1415 case 'n' | TYPE_IS_SHRIEKING: 1416 while (len-- > 0) { 1417 I16 ai16; 1418 # if U16SIZE > SIZE16 1419 ai16 = 0; 1420 # endif 1421 SHIFT16(utf8, s, strend, &ai16, datumtype, needs_swap); 1422 /* There should never be any byte-swapping here. */ 1423 assert(!TYPE_ENDIANNESS(datumtype)); 1424 if (datumtype == ('n' | TYPE_IS_SHRIEKING)) 1425 ai16 = (I16) PerlSock_ntohs((U16) ai16); 1426 if (datumtype == ('v' | TYPE_IS_SHRIEKING)) 1427 ai16 = (I16) vtohs((U16) ai16); 1428 if (!checksum) 1429 mPUSHi(ai16); 1430 else if (checksum > bits_in_uv) 1431 cdouble += (NV) ai16; 1432 else 1433 cuv += ai16; 1434 } 1435 break; 1436 case 'i': 1437 case 'i' | TYPE_IS_SHRIEKING: 1438 while (len-- > 0) { 1439 int aint; 1440 SHIFT_VAR(utf8, s, strend, aint, datumtype, needs_swap); 1441 if (!checksum) 1442 mPUSHi(aint); 1443 else if (checksum > bits_in_uv) 1444 cdouble += (NV)aint; 1445 else 1446 cuv += aint; 1447 } 1448 break; 1449 case 'I': 1450 case 'I' | TYPE_IS_SHRIEKING: 1451 while (len-- > 0) { 1452 unsigned int auint; 1453 SHIFT_VAR(utf8, s, strend, auint, datumtype, needs_swap); 1454 if (!checksum) 1455 mPUSHu(auint); 1456 else if (checksum > bits_in_uv) 1457 cdouble += (NV)auint; 1458 else 1459 cuv += auint; 1460 } 1461 break; 1462 case 'j': 1463 while (len-- > 0) { 1464 IV aiv; 1465 SHIFT_VAR(utf8, s, strend, aiv, datumtype, needs_swap); 1466 if (!checksum) 1467 mPUSHi(aiv); 1468 else if (checksum > bits_in_uv) 1469 cdouble += (NV)aiv; 1470 else 1471 cuv += aiv; 1472 } 1473 break; 1474 case 'J': 1475 while (len-- > 0) { 1476 UV auv; 1477 SHIFT_VAR(utf8, s, strend, auv, datumtype, needs_swap); 1478 if (!checksum) 1479 mPUSHu(auv); 1480 else if (checksum > bits_in_uv) 1481 cdouble += (NV)auv; 1482 else 1483 cuv += auv; 1484 } 1485 break; 1486 case 'l' | TYPE_IS_SHRIEKING: 1487 #if LONGSIZE != SIZE32 1488 while (len-- > 0) { 1489 long along; 1490 SHIFT_VAR(utf8, s, strend, along, datumtype, needs_swap); 1491 if (!checksum) 1492 mPUSHi(along); 1493 else if (checksum > bits_in_uv) 1494 cdouble += (NV)along; 1495 else 1496 cuv += along; 1497 } 1498 break; 1499 #else 1500 /* FALLTHROUGH */ 1501 #endif 1502 case 'l': 1503 while (len-- > 0) { 1504 I32 ai32; 1505 #if U32SIZE > SIZE32 1506 ai32 = 0; 1507 #endif 1508 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap); 1509 #if U32SIZE > SIZE32 1510 if (ai32 > 2147483647) ai32 -= 4294967296; 1511 #endif 1512 if (!checksum) 1513 mPUSHi(ai32); 1514 else if (checksum > bits_in_uv) 1515 cdouble += (NV)ai32; 1516 else 1517 cuv += ai32; 1518 } 1519 break; 1520 case 'L' | TYPE_IS_SHRIEKING: 1521 #if LONGSIZE != SIZE32 1522 while (len-- > 0) { 1523 unsigned long aulong; 1524 SHIFT_VAR(utf8, s, strend, aulong, datumtype, needs_swap); 1525 if (!checksum) 1526 mPUSHu(aulong); 1527 else if (checksum > bits_in_uv) 1528 cdouble += (NV)aulong; 1529 else 1530 cuv += aulong; 1531 } 1532 break; 1533 #else 1534 /* FALLTHROUGH */ 1535 #endif 1536 case 'V': 1537 case 'N': 1538 case 'L': 1539 while (len-- > 0) { 1540 U32 au32; 1541 #if U32SIZE > SIZE32 1542 au32 = 0; 1543 #endif 1544 SHIFT32(utf8, s, strend, &au32, datumtype, needs_swap); 1545 if (datumtype == 'N') 1546 au32 = PerlSock_ntohl(au32); 1547 if (datumtype == 'V') 1548 au32 = vtohl(au32); 1549 if (!checksum) 1550 mPUSHu(au32); 1551 else if (checksum > bits_in_uv) 1552 cdouble += (NV)au32; 1553 else 1554 cuv += au32; 1555 } 1556 break; 1557 case 'V' | TYPE_IS_SHRIEKING: 1558 case 'N' | TYPE_IS_SHRIEKING: 1559 while (len-- > 0) { 1560 I32 ai32; 1561 #if U32SIZE > SIZE32 1562 ai32 = 0; 1563 #endif 1564 SHIFT32(utf8, s, strend, &ai32, datumtype, needs_swap); 1565 /* There should never be any byte swapping here. */ 1566 assert(!TYPE_ENDIANNESS(datumtype)); 1567 if (datumtype == ('N' | TYPE_IS_SHRIEKING)) 1568 ai32 = (I32)PerlSock_ntohl((U32)ai32); 1569 if (datumtype == ('V' | TYPE_IS_SHRIEKING)) 1570 ai32 = (I32)vtohl((U32)ai32); 1571 if (!checksum) 1572 mPUSHi(ai32); 1573 else if (checksum > bits_in_uv) 1574 cdouble += (NV)ai32; 1575 else 1576 cuv += ai32; 1577 } 1578 break; 1579 case 'p': 1580 while (len-- > 0) { 1581 const char *aptr; 1582 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap); 1583 /* newSVpv generates undef if aptr is NULL */ 1584 mPUSHs(newSVpv(aptr, 0)); 1585 } 1586 break; 1587 case 'w': 1588 { 1589 UV auv = 0; 1590 size_t bytes = 0; 1591 1592 while (len > 0 && s < strend) { 1593 U8 ch; 1594 ch = SHIFT_BYTE(utf8, s, strend, datumtype); 1595 auv = (auv << 7) | (ch & 0x7f); 1596 /* UTF8_IS_XXXXX not right here because this is a BER, not 1597 * UTF-8 format - using constant 0x80 */ 1598 if (ch < 0x80) { 1599 bytes = 0; 1600 mPUSHu(auv); 1601 len--; 1602 auv = 0; 1603 continue; 1604 } 1605 if (++bytes >= sizeof(UV)) { /* promote to string */ 1606 const char *t; 1607 1608 sv = Perl_newSVpvf(aTHX_ "%.*" UVuf, 1609 (int)TYPE_DIGITS(UV), auv); 1610 while (s < strend) { 1611 ch = SHIFT_BYTE(utf8, s, strend, datumtype); 1612 sv = mul128(sv, (U8)(ch & 0x7f)); 1613 if (!(ch & 0x80)) { 1614 bytes = 0; 1615 break; 1616 } 1617 } 1618 t = SvPV_nolen_const(sv); 1619 while (*t == '0') 1620 t++; 1621 sv_chop(sv, t); 1622 mPUSHs(sv); 1623 len--; 1624 auv = 0; 1625 } 1626 } 1627 if ((s >= strend) && bytes) 1628 Perl_croak(aTHX_ "Unterminated compressed integer in unpack"); 1629 } 1630 break; 1631 case 'P': 1632 if (symptr->howlen == e_star) 1633 Perl_croak(aTHX_ "'P' must have an explicit size in unpack"); 1634 EXTEND(SP, 1); 1635 if (s + sizeof(char*) <= strend) { 1636 char *aptr; 1637 SHIFT_VAR(utf8, s, strend, aptr, datumtype, needs_swap); 1638 /* newSVpvn generates undef if aptr is NULL */ 1639 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP)); 1640 } 1641 break; 1642 #if defined(HAS_QUAD) && IVSIZE >= 8 1643 case 'q': 1644 while (len-- > 0) { 1645 Quad_t aquad; 1646 SHIFT_VAR(utf8, s, strend, aquad, datumtype, needs_swap); 1647 if (!checksum) 1648 mPUSHs(newSViv((IV)aquad)); 1649 else if (checksum > bits_in_uv) 1650 cdouble += (NV)aquad; 1651 else 1652 cuv += aquad; 1653 } 1654 break; 1655 case 'Q': 1656 while (len-- > 0) { 1657 Uquad_t auquad; 1658 SHIFT_VAR(utf8, s, strend, auquad, datumtype, needs_swap); 1659 if (!checksum) 1660 mPUSHs(newSVuv((UV)auquad)); 1661 else if (checksum > bits_in_uv) 1662 cdouble += (NV)auquad; 1663 else 1664 cuv += auquad; 1665 } 1666 break; 1667 #endif 1668 /* float and double added gnb@melba.bby.oz.au 22/11/89 */ 1669 case 'f': 1670 while (len-- > 0) { 1671 float afloat; 1672 SHIFT_VAR(utf8, s, strend, afloat, datumtype, needs_swap); 1673 if (!checksum) 1674 mPUSHn(afloat); 1675 else 1676 cdouble += afloat; 1677 } 1678 break; 1679 case 'd': 1680 while (len-- > 0) { 1681 double adouble; 1682 SHIFT_VAR(utf8, s, strend, adouble, datumtype, needs_swap); 1683 if (!checksum) 1684 mPUSHn(adouble); 1685 else 1686 cdouble += adouble; 1687 } 1688 break; 1689 case 'F': 1690 while (len-- > 0) { 1691 NV_bytes anv; 1692 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), 1693 datumtype, needs_swap); 1694 if (!checksum) 1695 mPUSHn(anv.nv); 1696 else 1697 cdouble += anv.nv; 1698 } 1699 break; 1700 #if defined(HAS_LONG_DOUBLE) 1701 case 'D': 1702 while (len-- > 0) { 1703 ld_bytes aldouble; 1704 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, 1705 sizeof(aldouble.bytes), datumtype, needs_swap); 1706 /* The most common long double format, the x86 80-bit 1707 * extended precision, has either 2 or 6 unused bytes, 1708 * which may contain garbage, which may contain 1709 * unintentional data. While we do zero the bytes of 1710 * the long double data in pack(), here in unpack() we 1711 * don't, because it's really hard to envision that 1712 * reading the long double off aldouble would be 1713 * affected by the unused bytes. 1714 * 1715 * Note that trying to unpack 'long doubles' of 'long 1716 * doubles' packed in another system is in the general 1717 * case doomed without having more detail. */ 1718 if (!checksum) 1719 mPUSHn(aldouble.ld); 1720 else 1721 cdouble += aldouble.ld; 1722 } 1723 break; 1724 #endif 1725 case 'u': 1726 if (!checksum) { 1727 const STRLEN l = (STRLEN) (strend - s) * 3 / 4; 1728 sv = sv_2mortal(newSV(l)); 1729 if (l) { 1730 SvPOK_on(sv); 1731 *SvEND(sv) = '\0'; 1732 } 1733 } 1734 1735 /* Note that all legal uuencoded strings are ASCII printables, so 1736 * have the same representation under UTF-8 vs not. This means we 1737 * can ignore UTF8ness on legal input. For illegal we stop at the 1738 * first failure, and don't report where/what that is, so again we 1739 * can ignore UTF8ness */ 1740 1741 while (s < strend && *s != ' ' && ISUUCHAR(*s)) { 1742 I32 a, b, c, d; 1743 char hunk[3]; 1744 1745 len = PL_uudmap[*(U8*)s++] & 077; 1746 while (len > 0) { 1747 if (s < strend && ISUUCHAR(*s)) 1748 a = PL_uudmap[*(U8*)s++] & 077; 1749 else 1750 a = 0; 1751 if (s < strend && ISUUCHAR(*s)) 1752 b = PL_uudmap[*(U8*)s++] & 077; 1753 else 1754 b = 0; 1755 if (s < strend && ISUUCHAR(*s)) 1756 c = PL_uudmap[*(U8*)s++] & 077; 1757 else 1758 c = 0; 1759 if (s < strend && ISUUCHAR(*s)) 1760 d = PL_uudmap[*(U8*)s++] & 077; 1761 else 1762 d = 0; 1763 hunk[0] = (char)((a << 2) | (b >> 4)); 1764 hunk[1] = (char)((b << 4) | (c >> 2)); 1765 hunk[2] = (char)((c << 6) | d); 1766 if (!checksum) 1767 sv_catpvn(sv, hunk, (len > 3) ? 3 : len); 1768 len -= 3; 1769 } 1770 if (*s == '\n') 1771 s++; 1772 else /* possible checksum byte */ 1773 if (s + 1 < strend && s[1] == '\n') 1774 s += 2; 1775 } 1776 if (!checksum) 1777 XPUSHs(sv); 1778 break; 1779 } /* End of switch */ 1780 1781 if (checksum) { 1782 if (memCHRs("fFdD", TYPE_NO_MODIFIERS(datumtype)) || 1783 (checksum > bits_in_uv && 1784 memCHRs("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) { 1785 NV trouble, anv; 1786 1787 anv = (NV) (1 << (checksum & 15)); 1788 while (checksum >= 16) { 1789 checksum -= 16; 1790 anv *= 65536.0; 1791 } 1792 while (cdouble < 0.0) 1793 cdouble += anv; 1794 cdouble = Perl_modf(cdouble / anv, &trouble); 1795 #ifdef LONGDOUBLE_DOUBLEDOUBLE 1796 /* Workaround for powerpc doubledouble modfl bug: 1797 * close to 1.0L and -1.0L cdouble is 0, and trouble 1798 * is cdouble / anv. */ 1799 if (trouble != Perl_ceil(trouble)) { 1800 cdouble = trouble; 1801 if (cdouble > 1.0L) cdouble -= 1.0L; 1802 if (cdouble < -1.0L) cdouble += 1.0L; 1803 } 1804 #endif 1805 cdouble *= anv; 1806 sv = newSVnv(cdouble); 1807 } 1808 else { 1809 if (checksum < bits_in_uv) { 1810 UV mask = nBIT_MASK(checksum); 1811 cuv &= mask; 1812 } 1813 sv = newSVuv(cuv); 1814 } 1815 mXPUSHs(sv); 1816 checksum = 0; 1817 } 1818 1819 if (symptr->flags & FLAG_SLASH){ 1820 if (SP - PL_stack_base - start_sp_offset <= 0) 1821 break; 1822 if( next_symbol(symptr) ){ 1823 if( symptr->howlen == e_number ) 1824 Perl_croak(aTHX_ "Count after length/code in unpack" ); 1825 if( beyond ){ 1826 /* ...end of char buffer then no decent length available */ 1827 Perl_croak(aTHX_ "length/code after end of string in unpack" ); 1828 } else { 1829 /* take top of stack (hope it's numeric) */ 1830 len = POPi; 1831 if( len < 0 ) 1832 Perl_croak(aTHX_ "Negative '/' count in unpack" ); 1833 } 1834 } else { 1835 Perl_croak(aTHX_ "Code missing after '/' in unpack" ); 1836 } 1837 datumtype = symptr->code; 1838 explicit_length = FALSE; 1839 goto redo_switch; 1840 } 1841 } 1842 1843 if (new_s) 1844 *new_s = s; 1845 PUTBACK; 1846 return SP - PL_stack_base - start_sp_offset; 1847 } 1848 1849 PP(pp_unpack) 1850 { 1851 dSP; 1852 dPOPPOPssrl; 1853 U8 gimme = GIMME_V; 1854 STRLEN llen; 1855 STRLEN rlen; 1856 const char *pat = SvPV_const(left, llen); 1857 const char *s = SvPV_const(right, rlen); 1858 const char *strend = s + rlen; 1859 const char *patend = pat + llen; 1860 SSize_t cnt; 1861 1862 PUTBACK; 1863 cnt = unpackstring(pat, patend, s, strend, 1864 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0) 1865 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0)); 1866 1867 SPAGAIN; 1868 if ( !cnt && gimme == G_SCALAR ) 1869 PUSHs(&PL_sv_undef); 1870 RETURN; 1871 } 1872 1873 STATIC U8 * 1874 doencodes(U8 *h, const U8 *s, SSize_t len) 1875 { 1876 *h++ = PL_uuemap[len]; 1877 while (len > 2) { 1878 *h++ = PL_uuemap[(077 & (s[0] >> 2))]; 1879 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))]; 1880 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))]; 1881 *h++ = PL_uuemap[(077 & (s[2] & 077))]; 1882 s += 3; 1883 len -= 3; 1884 } 1885 if (len > 0) { 1886 const U8 r = (len > 1 ? s[1] : '\0'); 1887 *h++ = PL_uuemap[(077 & (s[0] >> 2))]; 1888 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))]; 1889 *h++ = PL_uuemap[(077 & ((r << 2) & 074))]; 1890 *h++ = PL_uuemap[0]; 1891 } 1892 *h++ = '\n'; 1893 return h; 1894 } 1895 1896 STATIC SV * 1897 S_is_an_int(pTHX_ const char *s, STRLEN l) 1898 { 1899 SV *result = newSVpvn(s, l); 1900 char *const result_c = SvPV_nolen(result); /* convenience */ 1901 char *out = result_c; 1902 bool skip = 1; 1903 bool ignore = 0; 1904 1905 PERL_ARGS_ASSERT_IS_AN_INT; 1906 1907 while (*s) { 1908 switch (*s) { 1909 case ' ': 1910 break; 1911 case '+': 1912 if (!skip) { 1913 SvREFCNT_dec(result); 1914 return (NULL); 1915 } 1916 break; 1917 case '0': 1918 case '1': 1919 case '2': 1920 case '3': 1921 case '4': 1922 case '5': 1923 case '6': 1924 case '7': 1925 case '8': 1926 case '9': 1927 skip = 0; 1928 if (!ignore) { 1929 *(out++) = *s; 1930 } 1931 break; 1932 case '.': 1933 ignore = 1; 1934 break; 1935 default: 1936 SvREFCNT_dec(result); 1937 return (NULL); 1938 } 1939 s++; 1940 } 1941 *(out++) = '\0'; 1942 SvCUR_set(result, out - result_c); 1943 return (result); 1944 } 1945 1946 /* pnum must be '\0' terminated */ 1947 STATIC int 1948 S_div128(pTHX_ SV *pnum, bool *done) 1949 { 1950 STRLEN len; 1951 char * const s = SvPV(pnum, len); 1952 char *t = s; 1953 int m = 0; 1954 1955 PERL_ARGS_ASSERT_DIV128; 1956 1957 *done = 1; 1958 while (*t) { 1959 const int i = m * 10 + (*t - '0'); 1960 const int r = (i >> 7); /* r < 10 */ 1961 m = i & 0x7F; 1962 if (r) { 1963 *done = 0; 1964 } 1965 *(t++) = '0' + r; 1966 } 1967 *(t++) = '\0'; 1968 SvCUR_set(pnum, (STRLEN) (t - s)); 1969 return (m); 1970 } 1971 1972 /* 1973 =for apidoc packlist 1974 1975 The engine implementing C<pack()> Perl function. 1976 1977 =cut 1978 */ 1979 1980 void 1981 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist ) 1982 { 1983 tempsym_t sym; 1984 1985 PERL_ARGS_ASSERT_PACKLIST; 1986 1987 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK); 1988 1989 /* We're going to do changes through SvPVX(cat). Make sure it's valid. 1990 Also make sure any UTF8 flag is loaded */ 1991 SvPV_force_nolen(cat); 1992 if (DO_UTF8(cat)) 1993 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8; 1994 1995 (void)pack_rec( cat, &sym, beglist, endlist ); 1996 } 1997 1998 /* like sv_utf8_upgrade, but also repoint the group start markers */ 1999 STATIC void 2000 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) { 2001 STRLEN len; 2002 tempsym_t *group; 2003 const char *from_ptr, *from_start, *from_end, **marks, **m; 2004 char *to_start, *to_ptr; 2005 2006 if (SvUTF8(sv)) return; 2007 2008 from_start = SvPVX_const(sv); 2009 from_end = from_start + SvCUR(sv); 2010 for (from_ptr = from_start; from_ptr < from_end; from_ptr++) 2011 if (!NATIVE_BYTE_IS_INVARIANT(*from_ptr)) break; 2012 if (from_ptr == from_end) { 2013 /* Simple case: no character needs to be changed */ 2014 SvUTF8_on(sv); 2015 return; 2016 } 2017 2018 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1; 2019 Newx(to_start, len, char); 2020 Copy(from_start, to_start, from_ptr-from_start, char); 2021 to_ptr = to_start + (from_ptr-from_start); 2022 2023 Newx(marks, sym_ptr->level+2, const char *); 2024 for (group=sym_ptr; group; group = group->previous) 2025 marks[group->level] = from_start + group->strbeg; 2026 marks[sym_ptr->level+1] = from_end+1; 2027 for (m = marks; *m < from_ptr; m++) 2028 *m = to_start + (*m-from_start); 2029 2030 for (;from_ptr < from_end; from_ptr++) { 2031 while (*m == from_ptr) *m++ = to_ptr; 2032 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr); 2033 } 2034 *to_ptr = 0; 2035 2036 while (*m == from_ptr) *m++ = to_ptr; 2037 if (m != marks + sym_ptr->level+1) { 2038 Safefree(marks); 2039 Safefree(to_start); 2040 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, " 2041 "level=%d", m, marks, sym_ptr->level); 2042 } 2043 for (group=sym_ptr; group; group = group->previous) 2044 group->strbeg = marks[group->level] - to_start; 2045 Safefree(marks); 2046 2047 if (SvOOK(sv)) { 2048 if (SvIVX(sv)) { 2049 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv)); 2050 from_start -= SvIVX(sv); 2051 SvIV_set(sv, 0); 2052 } 2053 SvFLAGS(sv) &= ~SVf_OOK; 2054 } 2055 if (SvLEN(sv) != 0) 2056 Safefree(from_start); 2057 SvPV_set(sv, to_start); 2058 SvCUR_set(sv, to_ptr - to_start); 2059 SvLEN_set(sv, len); 2060 SvUTF8_on(sv); 2061 } 2062 2063 /* Exponential string grower. Makes string extension effectively O(n) 2064 needed says how many extra bytes we need (not counting the final '\0') 2065 Only grows the string if there is an actual lack of space 2066 */ 2067 STATIC char * 2068 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) { 2069 const STRLEN cur = SvCUR(sv); 2070 const STRLEN len = SvLEN(sv); 2071 STRLEN extend; 2072 2073 PERL_ARGS_ASSERT_SV_EXP_GROW; 2074 2075 if (len - cur > needed) return SvPVX(sv); 2076 extend = needed > len ? needed : len; 2077 return SvGROW(sv, len+extend+1); 2078 } 2079 2080 static SV * 2081 S_sv_check_infnan(pTHX_ SV *sv, I32 datumtype) 2082 { 2083 SvGETMAGIC(sv); 2084 if (UNLIKELY(SvAMAGIC(sv))) 2085 sv = sv_2num(sv); 2086 if (UNLIKELY(isinfnansv(sv))) { 2087 const I32 c = TYPE_NO_MODIFIERS(datumtype); 2088 const NV nv = SvNV_nomg(sv); 2089 if (c == 'w') 2090 Perl_croak(aTHX_ "Cannot compress %" NVgf " in pack", nv); 2091 else 2092 Perl_croak(aTHX_ "Cannot pack %" NVgf " with '%c'", nv, (int) c); 2093 } 2094 return sv; 2095 } 2096 2097 #define SvIV_no_inf(sv,d) \ 2098 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvIV_nomg(sv)) 2099 #define SvUV_no_inf(sv,d) \ 2100 ((sv) = S_sv_check_infnan(aTHX_ sv,d), SvUV_nomg(sv)) 2101 2102 STATIC 2103 SV ** 2104 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) 2105 { 2106 tempsym_t lookahead; 2107 SSize_t items = endlist - beglist; 2108 bool found = next_symbol(symptr); 2109 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0; 2110 bool warn_utf8 = ckWARN(WARN_UTF8); 2111 char* from; 2112 2113 PERL_ARGS_ASSERT_PACK_REC; 2114 2115 if (symptr->level == 0 && found && symptr->code == 'U') { 2116 marked_upgrade(aTHX_ cat, symptr); 2117 symptr->flags |= FLAG_DO_UTF8; 2118 utf8 = 0; 2119 } 2120 symptr->strbeg = SvCUR(cat); 2121 2122 while (found) { 2123 SV *fromstr; 2124 STRLEN fromlen; 2125 SSize_t len; 2126 SV *lengthcode = NULL; 2127 I32 datumtype = symptr->code; 2128 howlen_t howlen = symptr->howlen; 2129 char *start = SvPVX(cat); 2130 char *cur = start + SvCUR(cat); 2131 bool needs_swap; 2132 2133 #define NEXTFROM (lengthcode ? lengthcode : items > 0 ? (--items, *beglist++) : &PL_sv_no) 2134 #define PEEKFROM (lengthcode ? lengthcode : items > 0 ? *beglist : &PL_sv_no) 2135 2136 switch (howlen) { 2137 case e_star: 2138 len = memCHRs("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ? 2139 0 : items; 2140 break; 2141 default: 2142 /* e_no_len and e_number */ 2143 len = symptr->length; 2144 break; 2145 } 2146 2147 if (len) { 2148 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)]; 2149 2150 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) { 2151 /* We can process this letter. */ 2152 STRLEN size = props & PACK_SIZE_MASK; 2153 GROWING2(utf8, cat, start, cur, size, (STRLEN)len); 2154 } 2155 } 2156 2157 /* Look ahead for next symbol. Do we have code/code? */ 2158 lookahead = *symptr; 2159 found = next_symbol(&lookahead); 2160 if (symptr->flags & FLAG_SLASH) { 2161 IV count; 2162 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack"); 2163 if (memCHRs("aAZ", lookahead.code)) { 2164 if (lookahead.howlen == e_number) count = lookahead.length; 2165 else { 2166 if (items > 0) { 2167 count = sv_len_utf8(*beglist); 2168 } 2169 else count = 0; 2170 if (lookahead.code == 'Z') count++; 2171 } 2172 } else { 2173 if (lookahead.howlen == e_number && lookahead.length < items) 2174 count = lookahead.length; 2175 else count = items; 2176 } 2177 lookahead.howlen = e_number; 2178 lookahead.length = count; 2179 lengthcode = sv_2mortal(newSViv(count)); 2180 } 2181 2182 needs_swap = NEEDS_SWAP(datumtype); 2183 2184 /* Code inside the switch must take care to properly update 2185 cat (CUR length and '\0' termination) if it updated *cur and 2186 doesn't simply leave using break */ 2187 switch (TYPE_NO_ENDIANNESS(datumtype)) { 2188 default: 2189 /* diag_listed_as: Invalid type '%s' in %s */ 2190 Perl_croak(aTHX_ "Invalid type '%c' in pack", 2191 (int) TYPE_NO_MODIFIERS(datumtype)); 2192 case '%': 2193 Perl_croak(aTHX_ "'%%' may not be used in pack"); 2194 2195 case '.' | TYPE_IS_SHRIEKING: 2196 case '.': 2197 if (howlen == e_star) from = start; 2198 else if (len == 0) from = cur; 2199 else { 2200 tempsym_t *group = symptr; 2201 2202 while (--len && group) group = group->previous; 2203 from = group ? start + group->strbeg : start; 2204 } 2205 fromstr = NEXTFROM; 2206 len = SvIV_no_inf(fromstr, datumtype); 2207 goto resize; 2208 case '@' | TYPE_IS_SHRIEKING: 2209 case '@': 2210 from = start + symptr->strbeg; 2211 resize: 2212 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING)) 2213 if (len >= 0) { 2214 while (len && from < cur) { 2215 from += UTF8SKIP(from); 2216 len--; 2217 } 2218 if (from > cur) 2219 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); 2220 if (len) { 2221 /* Here we know from == cur */ 2222 grow: 2223 GROWING(0, cat, start, cur, len); 2224 Zero(cur, len, char); 2225 cur += len; 2226 } else if (from < cur) { 2227 len = cur - from; 2228 goto shrink; 2229 } else goto no_change; 2230 } else { 2231 cur = from; 2232 len = -len; 2233 goto utf8_shrink; 2234 } 2235 else { 2236 len -= cur - from; 2237 if (len > 0) goto grow; 2238 if (len == 0) goto no_change; 2239 len = -len; 2240 goto shrink; 2241 } 2242 break; 2243 2244 case '(': { 2245 tempsym_t savsym = *symptr; 2246 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags); 2247 symptr->flags |= group_modifiers; 2248 symptr->patend = savsym.grpend; 2249 symptr->level++; 2250 symptr->previous = &lookahead; 2251 while (len--) { 2252 U32 was_utf8; 2253 if (utf8) symptr->flags |= FLAG_PARSE_UTF8; 2254 else symptr->flags &= ~FLAG_PARSE_UTF8; 2255 was_utf8 = SvUTF8(cat); 2256 symptr->patptr = savsym.grpbeg; 2257 beglist = pack_rec(cat, symptr, beglist, endlist); 2258 if (SvUTF8(cat) != was_utf8) 2259 /* This had better be an upgrade while in utf8==0 mode */ 2260 utf8 = 1; 2261 2262 if (savsym.howlen == e_star && beglist == endlist) 2263 break; /* No way to continue */ 2264 } 2265 items = endlist - beglist; 2266 lookahead.flags = symptr->flags & ~group_modifiers; 2267 goto no_change; 2268 } 2269 case 'X' | TYPE_IS_SHRIEKING: 2270 if (!len) /* Avoid division by 0 */ 2271 len = 1; 2272 if (utf8) { 2273 char *hop, *last; 2274 SSize_t l = len; 2275 hop = last = start; 2276 while (hop < cur) { 2277 hop += UTF8SKIP(hop); 2278 if (--l == 0) { 2279 last = hop; 2280 l = len; 2281 } 2282 } 2283 if (last > cur) 2284 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); 2285 cur = last; 2286 break; 2287 } 2288 len = (cur-start) % len; 2289 /* FALLTHROUGH */ 2290 case 'X': 2291 if (utf8) { 2292 if (len < 1) goto no_change; 2293 utf8_shrink: 2294 while (len > 0) { 2295 if (cur <= start) 2296 Perl_croak(aTHX_ "'%c' outside of string in pack", 2297 (int) TYPE_NO_MODIFIERS(datumtype)); 2298 while (--cur, UTF8_IS_CONTINUATION(*cur)) { 2299 if (cur <= start) 2300 Perl_croak(aTHX_ "'%c' outside of string in pack", 2301 (int) TYPE_NO_MODIFIERS(datumtype)); 2302 } 2303 len--; 2304 } 2305 } else { 2306 shrink: 2307 if (cur - start < len) 2308 Perl_croak(aTHX_ "'%c' outside of string in pack", 2309 (int) TYPE_NO_MODIFIERS(datumtype)); 2310 cur -= len; 2311 } 2312 if (cur < start+symptr->strbeg) { 2313 /* Make sure group starts don't point into the void */ 2314 tempsym_t *group; 2315 const STRLEN length = cur-start; 2316 for (group = symptr; 2317 group && length < group->strbeg; 2318 group = group->previous) group->strbeg = length; 2319 lookahead.strbeg = length; 2320 } 2321 break; 2322 case 'x' | TYPE_IS_SHRIEKING: { 2323 SSize_t ai32; 2324 if (!len) /* Avoid division by 0 */ 2325 len = 1; 2326 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len; 2327 else ai32 = (cur - start) % len; 2328 if (ai32 == 0) goto no_change; 2329 len -= ai32; 2330 } 2331 /* FALLTHROUGH */ 2332 case 'x': 2333 goto grow; 2334 case 'A': 2335 case 'Z': 2336 case 'a': { 2337 const char *aptr; 2338 2339 fromstr = NEXTFROM; 2340 aptr = SvPV_const(fromstr, fromlen); 2341 if (DO_UTF8(fromstr)) { 2342 const char *end, *s; 2343 2344 if (!utf8 && !SvUTF8(cat)) { 2345 marked_upgrade(aTHX_ cat, symptr); 2346 lookahead.flags |= FLAG_DO_UTF8; 2347 lookahead.strbeg = symptr->strbeg; 2348 utf8 = 1; 2349 start = SvPVX(cat); 2350 cur = start + SvCUR(cat); 2351 } 2352 if (howlen == e_star) { 2353 if (utf8) goto string_copy; 2354 len = fromlen+1; 2355 } 2356 s = aptr; 2357 end = aptr + fromlen; 2358 fromlen = datumtype == 'Z' ? len-1 : len; 2359 while ((SSize_t) fromlen > 0 && s < end) { 2360 s += UTF8SKIP(s); 2361 fromlen--; 2362 } 2363 if (s > end) 2364 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); 2365 if (utf8) { 2366 len = fromlen; 2367 if (datumtype == 'Z') len++; 2368 fromlen = s-aptr; 2369 len += fromlen; 2370 2371 goto string_copy; 2372 } 2373 fromlen = len - fromlen; 2374 if (datumtype == 'Z') fromlen--; 2375 if (howlen == e_star) { 2376 len = fromlen; 2377 if (datumtype == 'Z') len++; 2378 } 2379 GROWING(0, cat, start, cur, len); 2380 if (!S_utf8_to_bytes(aTHX_ &aptr, end, cur, fromlen, 2381 datumtype | TYPE_IS_PACK)) 2382 Perl_croak(aTHX_ "panic: predicted utf8 length not available, " 2383 "for '%c', aptr=%p end=%p cur=%p, fromlen=%zu", 2384 (int)datumtype, aptr, end, cur, fromlen); 2385 cur += fromlen; 2386 len -= fromlen; 2387 } else if (utf8) { 2388 if (howlen == e_star) { 2389 len = fromlen; 2390 if (datumtype == 'Z') len++; 2391 } 2392 if (len <= (SSize_t) fromlen) { 2393 fromlen = len; 2394 if (datumtype == 'Z' && fromlen > 0) fromlen--; 2395 } 2396 /* assumes a byte expands to at most UTF8_EXPAND bytes on 2397 upgrade, so: 2398 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */ 2399 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len); 2400 len -= fromlen; 2401 while (fromlen > 0) { 2402 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr); 2403 aptr++; 2404 fromlen--; 2405 } 2406 } else { 2407 string_copy: 2408 if (howlen == e_star) { 2409 len = fromlen; 2410 if (datumtype == 'Z') len++; 2411 } 2412 if (len <= (SSize_t) fromlen) { 2413 fromlen = len; 2414 if (datumtype == 'Z' && fromlen > 0) fromlen--; 2415 } 2416 GROWING(0, cat, start, cur, len); 2417 Copy(aptr, cur, fromlen, char); 2418 cur += fromlen; 2419 len -= fromlen; 2420 } 2421 memset(cur, datumtype == 'A' ? ' ' : '\0', len); 2422 cur += len; 2423 SvTAINT(cat); 2424 break; 2425 } 2426 case 'B': 2427 case 'b': { 2428 const char *str, *end; 2429 SSize_t l, field_len; 2430 U8 bits; 2431 bool utf8_source; 2432 U32 utf8_flags; 2433 2434 fromstr = NEXTFROM; 2435 str = SvPV_const(fromstr, fromlen); 2436 end = str + fromlen; 2437 if (DO_UTF8(fromstr)) { 2438 utf8_source = TRUE; 2439 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY; 2440 } else { 2441 utf8_source = FALSE; 2442 utf8_flags = 0; /* Unused, but keep compilers happy */ 2443 } 2444 if (howlen == e_star) len = fromlen; 2445 field_len = (len+7)/8; 2446 GROWING(utf8, cat, start, cur, field_len); 2447 if (len > (SSize_t)fromlen) len = fromlen; 2448 bits = 0; 2449 l = 0; 2450 if (datumtype == 'B') 2451 while (l++ < len) { 2452 if (utf8_source) { 2453 UV val = 0; 2454 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2455 bits |= val & 1; 2456 } else bits |= *str++ & 1; 2457 if (l & 7) bits <<= 1; 2458 else { 2459 PUSH_BYTE(utf8, cur, bits); 2460 bits = 0; 2461 } 2462 } 2463 else 2464 /* datumtype == 'b' */ 2465 while (l++ < len) { 2466 if (utf8_source) { 2467 UV val = 0; 2468 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2469 if (val & 1) bits |= 0x80; 2470 } else if (*str++ & 1) 2471 bits |= 0x80; 2472 if (l & 7) bits >>= 1; 2473 else { 2474 PUSH_BYTE(utf8, cur, bits); 2475 bits = 0; 2476 } 2477 } 2478 l--; 2479 if (l & 7) { 2480 if (datumtype == 'B') 2481 bits <<= 7 - (l & 7); 2482 else 2483 bits >>= 7 - (l & 7); 2484 PUSH_BYTE(utf8, cur, bits); 2485 l += 7; 2486 } 2487 /* Determine how many chars are left in the requested field */ 2488 l /= 8; 2489 if (howlen == e_star) field_len = 0; 2490 else field_len -= l; 2491 Zero(cur, field_len, char); 2492 cur += field_len; 2493 break; 2494 } 2495 case 'H': 2496 case 'h': { 2497 const char *str, *end; 2498 SSize_t l, field_len; 2499 U8 bits; 2500 bool utf8_source; 2501 U32 utf8_flags; 2502 2503 fromstr = NEXTFROM; 2504 str = SvPV_const(fromstr, fromlen); 2505 end = str + fromlen; 2506 if (DO_UTF8(fromstr)) { 2507 utf8_source = TRUE; 2508 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY; 2509 } else { 2510 utf8_source = FALSE; 2511 utf8_flags = 0; /* Unused, but keep compilers happy */ 2512 } 2513 if (howlen == e_star) len = fromlen; 2514 field_len = (len+1)/2; 2515 GROWING(utf8, cat, start, cur, field_len); 2516 if (!utf8_source && len > (SSize_t)fromlen) len = fromlen; 2517 bits = 0; 2518 l = 0; 2519 if (datumtype == 'H') 2520 while (l++ < len) { 2521 if (utf8_source) { 2522 UV val = 0; 2523 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2524 if (val < 256 && isALPHA(val)) 2525 bits |= (val + 9) & 0xf; 2526 else 2527 bits |= val & 0xf; 2528 } else if (isALPHA(*str)) 2529 bits |= (*str++ + 9) & 0xf; 2530 else 2531 bits |= *str++ & 0xf; 2532 if (l & 1) bits <<= 4; 2533 else { 2534 PUSH_BYTE(utf8, cur, bits); 2535 bits = 0; 2536 } 2537 } 2538 else 2539 while (l++ < len) { 2540 if (utf8_source) { 2541 UV val = 0; 2542 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2543 if (val < 256 && isALPHA(val)) 2544 bits |= ((val + 9) & 0xf) << 4; 2545 else 2546 bits |= (val & 0xf) << 4; 2547 } else if (isALPHA(*str)) 2548 bits |= ((*str++ + 9) & 0xf) << 4; 2549 else 2550 bits |= (*str++ & 0xf) << 4; 2551 if (l & 1) bits >>= 4; 2552 else { 2553 PUSH_BYTE(utf8, cur, bits); 2554 bits = 0; 2555 } 2556 } 2557 l--; 2558 if (l & 1) { 2559 PUSH_BYTE(utf8, cur, bits); 2560 l++; 2561 } 2562 /* Determine how many chars are left in the requested field */ 2563 l /= 2; 2564 if (howlen == e_star) field_len = 0; 2565 else field_len -= l; 2566 Zero(cur, field_len, char); 2567 cur += field_len; 2568 break; 2569 } 2570 case 'c': 2571 while (len-- > 0) { 2572 IV aiv; 2573 fromstr = NEXTFROM; 2574 aiv = SvIV_no_inf(fromstr, datumtype); 2575 if ((-128 > aiv || aiv > 127)) 2576 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 2577 "Character in 'c' format wrapped in pack"); 2578 PUSH_BYTE(utf8, cur, (U8)aiv); 2579 } 2580 break; 2581 case 'C': 2582 if (len == 0) { 2583 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0; 2584 break; 2585 } 2586 while (len-- > 0) { 2587 IV aiv; 2588 fromstr = NEXTFROM; 2589 aiv = SvIV_no_inf(fromstr, datumtype); 2590 if ((0 > aiv || aiv > 0xff)) 2591 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 2592 "Character in 'C' format wrapped in pack"); 2593 PUSH_BYTE(utf8, cur, (U8)aiv); 2594 } 2595 break; 2596 case 'W': { 2597 char *end; 2598 U8 in_bytes = (U8)IN_BYTES; 2599 2600 end = start+SvLEN(cat)-1; 2601 if (utf8) end -= UTF8_MAXLEN-1; 2602 while (len-- > 0) { 2603 UV auv; 2604 fromstr = NEXTFROM; 2605 auv = SvUV_no_inf(fromstr, datumtype); 2606 if (in_bytes) auv = auv % 0x100; 2607 if (utf8) { 2608 W_utf8: 2609 if (cur >= end) { 2610 *cur = '\0'; 2611 SvCUR_set(cat, cur - start); 2612 2613 GROWING(0, cat, start, cur, len+UTF8_MAXLEN); 2614 end = start+SvLEN(cat)-UTF8_MAXLEN; 2615 } 2616 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0); 2617 } else { 2618 if (auv >= 0x100) { 2619 if (!SvUTF8(cat)) { 2620 *cur = '\0'; 2621 SvCUR_set(cat, cur - start); 2622 marked_upgrade(aTHX_ cat, symptr); 2623 lookahead.flags |= FLAG_DO_UTF8; 2624 lookahead.strbeg = symptr->strbeg; 2625 utf8 = 1; 2626 start = SvPVX(cat); 2627 cur = start + SvCUR(cat); 2628 end = start+SvLEN(cat)-UTF8_MAXLEN; 2629 goto W_utf8; 2630 } 2631 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 2632 "Character in 'W' format wrapped in pack"); 2633 auv = (U8) auv; 2634 } 2635 if (cur >= end) { 2636 *cur = '\0'; 2637 SvCUR_set(cat, cur - start); 2638 GROWING(0, cat, start, cur, len+1); 2639 end = start+SvLEN(cat)-1; 2640 } 2641 *(U8 *) cur++ = (U8)auv; 2642 } 2643 } 2644 break; 2645 } 2646 case 'U': { 2647 char *end; 2648 2649 if (len == 0) { 2650 if (!(symptr->flags & FLAG_DO_UTF8)) { 2651 marked_upgrade(aTHX_ cat, symptr); 2652 lookahead.flags |= FLAG_DO_UTF8; 2653 lookahead.strbeg = symptr->strbeg; 2654 } 2655 utf8 = 0; 2656 goto no_change; 2657 } 2658 2659 end = start+SvLEN(cat); 2660 if (!utf8) end -= UTF8_MAXLEN; 2661 while (len-- > 0) { 2662 UV auv; 2663 fromstr = NEXTFROM; 2664 auv = SvUV_no_inf(fromstr, datumtype); 2665 if (utf8) { 2666 U8 buffer[UTF8_MAXLEN+1], *endb; 2667 endb = uvchr_to_utf8_flags(buffer, auv, 0); 2668 if (cur+(endb-buffer)*UTF8_EXPAND >= end) { 2669 *cur = '\0'; 2670 SvCUR_set(cat, cur - start); 2671 GROWING(0, cat, start, cur, 2672 len+(endb-buffer)*UTF8_EXPAND); 2673 end = start+SvLEN(cat); 2674 } 2675 cur = my_bytes_to_utf8(buffer, endb-buffer, cur, 0); 2676 } else { 2677 if (cur >= end) { 2678 *cur = '\0'; 2679 SvCUR_set(cat, cur - start); 2680 GROWING(0, cat, start, cur, len+UTF8_MAXLEN); 2681 end = start+SvLEN(cat)-UTF8_MAXLEN; 2682 } 2683 cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0); 2684 } 2685 } 2686 break; 2687 } 2688 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */ 2689 case 'f': 2690 while (len-- > 0) { 2691 float afloat; 2692 NV anv; 2693 fromstr = NEXTFROM; 2694 anv = SvNV(fromstr); 2695 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT) 2696 /* IEEE fp overflow shenanigans are unavailable on VAX and optional 2697 * on Alpha; fake it if we don't have them. 2698 */ 2699 if (anv > FLT_MAX) 2700 afloat = FLT_MAX; 2701 else if (anv < -FLT_MAX) 2702 afloat = -FLT_MAX; 2703 else afloat = (float)anv; 2704 # else 2705 # if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan) 2706 if(Perl_isnan(anv)) 2707 afloat = (float)NV_NAN; 2708 else 2709 # endif 2710 # ifdef NV_INF 2711 /* a simple cast to float is undefined if outside 2712 * the range of values that can be represented */ 2713 afloat = (float)(anv > FLT_MAX ? NV_INF : 2714 anv < -FLT_MAX ? -NV_INF : anv); 2715 # endif 2716 # endif 2717 PUSH_VAR(utf8, cur, afloat, needs_swap); 2718 } 2719 break; 2720 case 'd': 2721 while (len-- > 0) { 2722 double adouble; 2723 NV anv; 2724 fromstr = NEXTFROM; 2725 anv = SvNV(fromstr); 2726 # if (defined(VMS) && !defined(_IEEE_FP)) || defined(DOUBLE_IS_VAX_FLOAT) 2727 /* IEEE fp overflow shenanigans are unavailable on VAX and optional 2728 * on Alpha; fake it if we don't have them. 2729 */ 2730 if (anv > DBL_MAX) 2731 adouble = DBL_MAX; 2732 else if (anv < -DBL_MAX) 2733 adouble = -DBL_MAX; 2734 else adouble = (double)anv; 2735 # else 2736 adouble = (double)anv; 2737 # endif 2738 PUSH_VAR(utf8, cur, adouble, needs_swap); 2739 } 2740 break; 2741 case 'F': { 2742 NV_bytes anv; 2743 Zero(&anv, 1, NV); /* can be long double with unused bits */ 2744 while (len-- > 0) { 2745 fromstr = NEXTFROM; 2746 #ifdef __GNUC__ 2747 /* to work round a gcc/x86 bug; don't use SvNV */ 2748 anv.nv = sv_2nv(fromstr); 2749 # if defined(LONGDOUBLE_X86_80_BIT) && defined(USE_LONG_DOUBLE) \ 2750 && LONG_DOUBLESIZE > 10 2751 /* GCC sometimes overwrites the padding in the 2752 assignment above */ 2753 Zero(anv.bytes+10, sizeof(anv.bytes) - 10, U8); 2754 # endif 2755 #else 2756 anv.nv = SvNV(fromstr); 2757 #endif 2758 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes), needs_swap); 2759 } 2760 break; 2761 } 2762 #if defined(HAS_LONG_DOUBLE) 2763 case 'D': { 2764 ld_bytes aldouble; 2765 /* long doubles can have unused bits, which may be nonzero */ 2766 Zero(&aldouble, 1, long double); 2767 while (len-- > 0) { 2768 fromstr = NEXTFROM; 2769 # ifdef __GNUC__ 2770 /* to work round a gcc/x86 bug; don't use SvNV */ 2771 aldouble.ld = (long double)sv_2nv(fromstr); 2772 # if defined(LONGDOUBLE_X86_80_BIT) && LONG_DOUBLESIZE > 10 2773 /* GCC sometimes overwrites the padding in the 2774 assignment above */ 2775 Zero(aldouble.bytes+10, sizeof(aldouble.bytes) - 10, U8); 2776 # endif 2777 # else 2778 aldouble.ld = (long double)SvNV(fromstr); 2779 # endif 2780 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes), 2781 needs_swap); 2782 } 2783 break; 2784 } 2785 #endif 2786 case 'n' | TYPE_IS_SHRIEKING: 2787 case 'n': 2788 while (len-- > 0) { 2789 I16 ai16; 2790 fromstr = NEXTFROM; 2791 ai16 = (I16)SvIV_no_inf(fromstr, datumtype); 2792 ai16 = PerlSock_htons(ai16); 2793 PUSH16(utf8, cur, &ai16, FALSE); 2794 } 2795 break; 2796 case 'v' | TYPE_IS_SHRIEKING: 2797 case 'v': 2798 while (len-- > 0) { 2799 I16 ai16; 2800 fromstr = NEXTFROM; 2801 ai16 = (I16)SvIV_no_inf(fromstr, datumtype); 2802 ai16 = htovs(ai16); 2803 PUSH16(utf8, cur, &ai16, FALSE); 2804 } 2805 break; 2806 case 'S' | TYPE_IS_SHRIEKING: 2807 #if SHORTSIZE != SIZE16 2808 while (len-- > 0) { 2809 unsigned short aushort; 2810 fromstr = NEXTFROM; 2811 aushort = SvUV_no_inf(fromstr, datumtype); 2812 PUSH_VAR(utf8, cur, aushort, needs_swap); 2813 } 2814 break; 2815 #else 2816 /* FALLTHROUGH */ 2817 #endif 2818 case 'S': 2819 while (len-- > 0) { 2820 U16 au16; 2821 fromstr = NEXTFROM; 2822 au16 = (U16)SvUV_no_inf(fromstr, datumtype); 2823 PUSH16(utf8, cur, &au16, needs_swap); 2824 } 2825 break; 2826 case 's' | TYPE_IS_SHRIEKING: 2827 #if SHORTSIZE != SIZE16 2828 while (len-- > 0) { 2829 short ashort; 2830 fromstr = NEXTFROM; 2831 ashort = SvIV_no_inf(fromstr, datumtype); 2832 PUSH_VAR(utf8, cur, ashort, needs_swap); 2833 } 2834 break; 2835 #else 2836 /* FALLTHROUGH */ 2837 #endif 2838 case 's': 2839 while (len-- > 0) { 2840 I16 ai16; 2841 fromstr = NEXTFROM; 2842 ai16 = (I16)SvIV_no_inf(fromstr, datumtype); 2843 PUSH16(utf8, cur, &ai16, needs_swap); 2844 } 2845 break; 2846 case 'I': 2847 case 'I' | TYPE_IS_SHRIEKING: 2848 while (len-- > 0) { 2849 unsigned int auint; 2850 fromstr = NEXTFROM; 2851 auint = SvUV_no_inf(fromstr, datumtype); 2852 PUSH_VAR(utf8, cur, auint, needs_swap); 2853 } 2854 break; 2855 case 'j': 2856 while (len-- > 0) { 2857 IV aiv; 2858 fromstr = NEXTFROM; 2859 aiv = SvIV_no_inf(fromstr, datumtype); 2860 PUSH_VAR(utf8, cur, aiv, needs_swap); 2861 } 2862 break; 2863 case 'J': 2864 while (len-- > 0) { 2865 UV auv; 2866 fromstr = NEXTFROM; 2867 auv = SvUV_no_inf(fromstr, datumtype); 2868 PUSH_VAR(utf8, cur, auv, needs_swap); 2869 } 2870 break; 2871 case 'w': 2872 while (len-- > 0) { 2873 NV anv; 2874 fromstr = NEXTFROM; 2875 S_sv_check_infnan(aTHX_ fromstr, datumtype); 2876 anv = SvNV_nomg(fromstr); 2877 2878 if (anv < 0) { 2879 *cur = '\0'; 2880 SvCUR_set(cat, cur - start); 2881 Perl_croak(aTHX_ "Cannot compress negative numbers in pack"); 2882 } 2883 2884 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0, 2885 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as 2886 any negative IVs will have already been got by the croak() 2887 above. IOK is untrue for fractions, so we test them 2888 against UV_MAX_P1. */ 2889 if (SvIOK(fromstr) || anv < UV_MAX_P1) { 2890 char buf[(sizeof(UV)*CHAR_BIT)/7+1]; 2891 char *in = buf + sizeof(buf); 2892 UV auv = SvUV_nomg(fromstr); 2893 2894 do { 2895 *--in = (char)((auv & 0x7f) | 0x80); 2896 auv >>= 7; 2897 } while (auv); 2898 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */ 2899 PUSH_GROWING_BYTES(utf8, cat, start, cur, 2900 in, (buf + sizeof(buf)) - in); 2901 } else if (SvPOKp(fromstr)) 2902 goto w_string; 2903 else if (SvNOKp(fromstr)) { 2904 /* 10**NV_MAX_10_EXP is the largest power of 10 2905 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable 2906 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x: 2907 x = (NV_MAX_10_EXP+1) * log (10) / log (128) 2908 And with that many bytes only Inf can overflow. 2909 Some C compilers are strict about integral constant 2910 expressions so we conservatively divide by a slightly 2911 smaller integer instead of multiplying by the exact 2912 floating-point value. 2913 */ 2914 #ifdef NV_MAX_10_EXP 2915 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */ 2916 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */ 2917 #else 2918 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */ 2919 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */ 2920 #endif 2921 char *in = buf + sizeof(buf); 2922 2923 anv = Perl_floor(anv); 2924 do { 2925 const NV next = Perl_floor(anv / 128); 2926 if (in <= buf) /* this cannot happen ;-) */ 2927 Perl_croak(aTHX_ "Cannot compress integer in pack"); 2928 *--in = (unsigned char)(anv - (next * 128)) | 0x80; 2929 anv = next; 2930 } while (anv > 0); 2931 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */ 2932 PUSH_GROWING_BYTES(utf8, cat, start, cur, 2933 in, (buf + sizeof(buf)) - in); 2934 } else { 2935 const char *from; 2936 char *result, *in; 2937 SV *norm; 2938 STRLEN len; 2939 bool done; 2940 2941 w_string: 2942 /* Copy string and check for compliance */ 2943 from = SvPV_nomg_const(fromstr, len); 2944 if ((norm = is_an_int(from, len)) == NULL) 2945 Perl_croak(aTHX_ "Can only compress unsigned integers in pack"); 2946 2947 Newx(result, len, char); 2948 in = result + len; 2949 done = FALSE; 2950 while (!done) *--in = div128(norm, &done) | 0x80; 2951 result[len - 1] &= 0x7F; /* clear continue bit */ 2952 PUSH_GROWING_BYTES(utf8, cat, start, cur, 2953 in, (result + len) - in); 2954 Safefree(result); 2955 SvREFCNT_dec(norm); /* free norm */ 2956 } 2957 } 2958 break; 2959 case 'i': 2960 case 'i' | TYPE_IS_SHRIEKING: 2961 while (len-- > 0) { 2962 int aint; 2963 fromstr = NEXTFROM; 2964 aint = SvIV_no_inf(fromstr, datumtype); 2965 PUSH_VAR(utf8, cur, aint, needs_swap); 2966 } 2967 break; 2968 case 'N' | TYPE_IS_SHRIEKING: 2969 case 'N': 2970 while (len-- > 0) { 2971 U32 au32; 2972 fromstr = NEXTFROM; 2973 au32 = SvUV_no_inf(fromstr, datumtype); 2974 au32 = PerlSock_htonl(au32); 2975 PUSH32(utf8, cur, &au32, FALSE); 2976 } 2977 break; 2978 case 'V' | TYPE_IS_SHRIEKING: 2979 case 'V': 2980 while (len-- > 0) { 2981 U32 au32; 2982 fromstr = NEXTFROM; 2983 au32 = SvUV_no_inf(fromstr, datumtype); 2984 au32 = htovl(au32); 2985 PUSH32(utf8, cur, &au32, FALSE); 2986 } 2987 break; 2988 case 'L' | TYPE_IS_SHRIEKING: 2989 #if LONGSIZE != SIZE32 2990 while (len-- > 0) { 2991 unsigned long aulong; 2992 fromstr = NEXTFROM; 2993 aulong = SvUV_no_inf(fromstr, datumtype); 2994 PUSH_VAR(utf8, cur, aulong, needs_swap); 2995 } 2996 break; 2997 #else 2998 /* Fall though! */ 2999 #endif 3000 case 'L': 3001 while (len-- > 0) { 3002 U32 au32; 3003 fromstr = NEXTFROM; 3004 au32 = SvUV_no_inf(fromstr, datumtype); 3005 PUSH32(utf8, cur, &au32, needs_swap); 3006 } 3007 break; 3008 case 'l' | TYPE_IS_SHRIEKING: 3009 #if LONGSIZE != SIZE32 3010 while (len-- > 0) { 3011 long along; 3012 fromstr = NEXTFROM; 3013 along = SvIV_no_inf(fromstr, datumtype); 3014 PUSH_VAR(utf8, cur, along, needs_swap); 3015 } 3016 break; 3017 #else 3018 /* Fall though! */ 3019 #endif 3020 case 'l': 3021 while (len-- > 0) { 3022 I32 ai32; 3023 fromstr = NEXTFROM; 3024 ai32 = SvIV_no_inf(fromstr, datumtype); 3025 PUSH32(utf8, cur, &ai32, needs_swap); 3026 } 3027 break; 3028 #if defined(HAS_QUAD) && IVSIZE >= 8 3029 case 'Q': 3030 while (len-- > 0) { 3031 Uquad_t auquad; 3032 fromstr = NEXTFROM; 3033 auquad = (Uquad_t) SvUV_no_inf(fromstr, datumtype); 3034 PUSH_VAR(utf8, cur, auquad, needs_swap); 3035 } 3036 break; 3037 case 'q': 3038 while (len-- > 0) { 3039 Quad_t aquad; 3040 fromstr = NEXTFROM; 3041 aquad = (Quad_t)SvIV_no_inf(fromstr, datumtype); 3042 PUSH_VAR(utf8, cur, aquad, needs_swap); 3043 } 3044 break; 3045 #endif 3046 case 'P': 3047 len = 1; /* assume SV is correct length */ 3048 GROWING(utf8, cat, start, cur, sizeof(char *)); 3049 /* FALLTHROUGH */ 3050 case 'p': 3051 while (len-- > 0) { 3052 const char *aptr; 3053 3054 fromstr = NEXTFROM; 3055 SvGETMAGIC(fromstr); 3056 if (!SvOK(fromstr)) aptr = NULL; 3057 else { 3058 /* XXX better yet, could spirit away the string to 3059 * a safe spot and hang on to it until the result 3060 * of pack() (and all copies of the result) are 3061 * gone. 3062 */ 3063 if (((SvTEMP(fromstr) && SvREFCNT(fromstr) == 1) 3064 || (SvPADTMP(fromstr) && 3065 !SvREADONLY(fromstr)))) { 3066 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 3067 "Attempt to pack pointer to temporary value"); 3068 } 3069 if (SvPOK(fromstr) || SvNIOK(fromstr)) 3070 aptr = SvPV_nomg_const_nolen(fromstr); 3071 else 3072 aptr = SvPV_force_flags_nolen(fromstr, 0); 3073 } 3074 PUSH_VAR(utf8, cur, aptr, needs_swap); 3075 } 3076 break; 3077 case 'u': { 3078 const char *aptr, *aend; 3079 bool from_utf8; 3080 3081 fromstr = NEXTFROM; 3082 if (len <= 2) len = 45; 3083 else len = len / 3 * 3; 3084 if (len >= 64) { 3085 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 3086 "Field too wide in 'u' format in pack"); 3087 len = 63; 3088 } 3089 aptr = SvPV_const(fromstr, fromlen); 3090 from_utf8 = DO_UTF8(fromstr); 3091 if (from_utf8) { 3092 aend = aptr + fromlen; 3093 fromlen = sv_len_utf8_nomg(fromstr); 3094 } else aend = NULL; /* Unused, but keep compilers happy */ 3095 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2); 3096 while (fromlen > 0) { 3097 U8 *end; 3098 SSize_t todo; 3099 U8 hunk[1+63/3*4+1]; 3100 3101 if ((SSize_t)fromlen > len) 3102 todo = len; 3103 else 3104 todo = fromlen; 3105 if (from_utf8) { 3106 char buffer[64]; 3107 if (!S_utf8_to_bytes(aTHX_ &aptr, aend, buffer, todo, 3108 'u' | TYPE_IS_PACK)) { 3109 *cur = '\0'; 3110 SvCUR_set(cat, cur - start); 3111 Perl_croak(aTHX_ "panic: string is shorter than advertised, " 3112 "aptr=%p, aend=%p, buffer=%p, todo=%zd", 3113 aptr, aend, buffer, todo); 3114 } 3115 end = doencodes(hunk, (const U8 *)buffer, todo); 3116 } else { 3117 end = doencodes(hunk, (const U8 *)aptr, todo); 3118 aptr += todo; 3119 } 3120 PUSH_BYTES(utf8, cur, hunk, end-hunk, 0); 3121 fromlen -= todo; 3122 } 3123 break; 3124 } 3125 } 3126 *cur = '\0'; 3127 SvCUR_set(cat, cur - start); 3128 no_change: 3129 *symptr = lookahead; 3130 } 3131 return beglist; 3132 } 3133 #undef NEXTFROM 3134 3135 3136 PP(pp_pack) 3137 { 3138 dSP; dMARK; dORIGMARK; dTARGET; 3139 SV *cat = TARG; 3140 STRLEN fromlen; 3141 SV *pat_sv = *++MARK; 3142 const char *pat = SvPV_const(pat_sv, fromlen); 3143 const char *patend = pat + fromlen; 3144 3145 MARK++; 3146 SvPVCLEAR(cat); 3147 SvUTF8_off(cat); 3148 3149 packlist(cat, pat, patend, MARK, SP + 1); 3150 3151 if (SvUTF8(cat)) { 3152 STRLEN result_len; 3153 const char * result = SvPV_nomg(cat, result_len); 3154 const U8 * error_pos; 3155 3156 if (! is_utf8_string_loc((U8 *) result, result_len, &error_pos)) { 3157 _force_out_malformed_utf8_message(error_pos, 3158 (U8 *) result + result_len, 3159 0, /* no flags */ 3160 1 /* Die */ 3161 ); 3162 NOT_REACHED; /* NOTREACHED */ 3163 } 3164 } 3165 3166 SvSETMAGIC(cat); 3167 SP = ORIGMARK; 3168 PUSHs(cat); 3169 RETURN; 3170 } 3171 3172 /* 3173 * ex: set ts=8 sts=4 sw=4 et: 3174 */ 3175