1 /* pp_pack.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * He still hopefully carried some of his gear in his pack: a small tinder-box, 13 * two small shallow pans, the smaller fitting into the larger; inside them a 14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and 15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure, 16 * some salt. 17 * 18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"] 19 */ 20 21 /* This file contains pp ("push/pop") functions that 22 * execute the opcodes that make up a perl program. A typical pp function 23 * expects to find its arguments on the stack, and usually pushes its 24 * results onto the stack, hence the 'pp' terminology. Each OP structure 25 * contains a pointer to the relevant pp_foo() function. 26 * 27 * This particular file just contains pp_pack() and pp_unpack(). See the 28 * other pp*.c files for the rest of the pp_ functions. 29 */ 30 31 #include "EXTERN.h" 32 #define PERL_IN_PP_PACK_C 33 #include "perl.h" 34 35 /* Types used by pack/unpack */ 36 typedef enum { 37 e_no_len, /* no length */ 38 e_number, /* number, [] */ 39 e_star /* asterisk */ 40 } howlen_t; 41 42 typedef struct tempsym { 43 const char* patptr; /* current template char */ 44 const char* patend; /* one after last char */ 45 const char* grpbeg; /* 1st char of ()-group */ 46 const char* grpend; /* end of ()-group */ 47 I32 code; /* template code (!<>) */ 48 I32 length; /* length/repeat count */ 49 howlen_t howlen; /* how length is given */ 50 int level; /* () nesting level */ 51 U32 flags; /* /=4, comma=2, pack=1 */ 52 /* and group modifiers */ 53 STRLEN strbeg; /* offset of group start */ 54 struct tempsym *previous; /* previous group */ 55 } tempsym_t; 56 57 #define TEMPSYM_INIT(symptr, p, e, f) \ 58 STMT_START { \ 59 (symptr)->patptr = (p); \ 60 (symptr)->patend = (e); \ 61 (symptr)->grpbeg = NULL; \ 62 (symptr)->grpend = NULL; \ 63 (symptr)->grpend = NULL; \ 64 (symptr)->code = 0; \ 65 (symptr)->length = 0; \ 66 (symptr)->howlen = e_no_len; \ 67 (symptr)->level = 0; \ 68 (symptr)->flags = (f); \ 69 (symptr)->strbeg = 0; \ 70 (symptr)->previous = NULL; \ 71 } STMT_END 72 73 typedef union { 74 NV nv; 75 U8 bytes[sizeof(NV)]; 76 } NV_bytes; 77 78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) 79 typedef union { 80 long double ld; 81 U8 bytes[sizeof(long double)]; 82 } ld_bytes; 83 #endif 84 85 #if PERL_VERSION >= 9 86 # define PERL_PACK_CAN_BYTEORDER 87 # define PERL_PACK_CAN_SHRIEKSIGN 88 #endif 89 90 #ifndef CHAR_BIT 91 # define CHAR_BIT 8 92 #endif 93 /* Maximum number of bytes to which a byte can grow due to upgrade */ 94 #define UTF8_EXPAND 2 95 96 /* 97 * Offset for integer pack/unpack. 98 * 99 * On architectures where I16 and I32 aren't really 16 and 32 bits, 100 * which for now are all Crays, pack and unpack have to play games. 101 */ 102 103 /* 104 * These values are required for portability of pack() output. 105 * If they're not right on your machine, then pack() and unpack() 106 * wouldn't work right anyway; you'll need to apply the Cray hack. 107 * (I'd like to check them with #if, but you can't use sizeof() in 108 * the preprocessor.) --??? 109 */ 110 /* 111 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE 112 defines are now in config.h. --Andy Dougherty April 1998 113 */ 114 #define SIZE16 2 115 #define SIZE32 4 116 117 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack(). 118 --jhi Feb 1999 */ 119 120 #if U16SIZE > SIZE16 || U32SIZE > SIZE32 121 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */ 122 # define OFF16(p) ((char*)(p)) 123 # define OFF32(p) ((char*)(p)) 124 # else 125 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */ 126 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16)) 127 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32)) 128 # else 129 ++++ bad cray byte order 130 # endif 131 # endif 132 #else 133 # define OFF16(p) ((char *) (p)) 134 # define OFF32(p) ((char *) (p)) 135 #endif 136 137 /* Only to be used inside a loop (see the break) */ 138 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \ 139 if (utf8) { \ 140 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \ 141 } else { \ 142 Copy(s, OFF16(p), SIZE16, char); \ 143 (s) += SIZE16; \ 144 } \ 145 } STMT_END 146 147 /* Only to be used inside a loop (see the break) */ 148 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \ 149 if (utf8) { \ 150 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \ 151 } else { \ 152 Copy(s, OFF32(p), SIZE32, char); \ 153 (s) += SIZE32; \ 154 } \ 155 } STMT_END 156 157 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16) 158 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32) 159 160 /* Only to be used inside a loop (see the break) */ 161 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype) \ 162 STMT_START { \ 163 if (utf8) { \ 164 if (!uni_to_bytes(aTHX_ &s, strend, \ 165 (char *) (buf), len, datumtype)) break; \ 166 } else { \ 167 Copy(s, (char *) (buf), len, char); \ 168 s += len; \ 169 } \ 170 } STMT_END 171 172 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \ 173 SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype) 174 175 #define PUSH_VAR(utf8, aptr, var) \ 176 PUSH_BYTES(utf8, aptr, &(var), sizeof(var)) 177 178 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */ 179 #define MAX_SUB_TEMPLATE_LEVEL 100 180 181 /* flags (note that type modifiers can also be used as flags!) */ 182 #define FLAG_WAS_UTF8 0x40 183 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */ 184 #define FLAG_UNPACK_ONLY_ONE 0x10 185 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */ 186 #define FLAG_SLASH 0x04 187 #define FLAG_COMMA 0x02 188 #define FLAG_PACK 0x01 189 190 STATIC SV * 191 S_mul128(pTHX_ SV *sv, U8 m) 192 { 193 STRLEN len; 194 char *s = SvPV(sv, len); 195 char *t; 196 197 PERL_ARGS_ASSERT_MUL128; 198 199 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */ 200 SV * const tmpNew = newSVpvs("0000000000"); 201 202 sv_catsv(tmpNew, sv); 203 SvREFCNT_dec(sv); /* free old sv */ 204 sv = tmpNew; 205 s = SvPV(sv, len); 206 } 207 t = s + len - 1; 208 while (!*t) /* trailing '\0'? */ 209 t--; 210 while (t > s) { 211 const U32 i = ((*t - '0') << 7) + m; 212 *(t--) = '0' + (char)(i % 10); 213 m = (char)(i / 10); 214 } 215 return (sv); 216 } 217 218 /* Explosives and implosives. */ 219 220 #if 'I' == 73 && 'J' == 74 221 /* On an ASCII/ISO kind of system */ 222 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a') 223 #else 224 /* 225 Some other sort of character set - use memchr() so we don't match 226 the null byte. 227 */ 228 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ') 229 #endif 230 231 /* type modifiers */ 232 #define TYPE_IS_SHRIEKING 0x100 233 #define TYPE_IS_BIG_ENDIAN 0x200 234 #define TYPE_IS_LITTLE_ENDIAN 0x400 235 #define TYPE_IS_PACK 0x800 236 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN) 237 #define TYPE_MODIFIERS(t) ((t) & ~0xFF) 238 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF) 239 240 #ifdef PERL_PACK_CAN_SHRIEKSIGN 241 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@." 242 #else 243 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX" 244 #endif 245 246 #ifndef PERL_PACK_CAN_BYTEORDER 247 /* Put "can't" first because it is shorter */ 248 # define TYPE_ENDIANNESS(t) 0 249 # define TYPE_NO_ENDIANNESS(t) (t) 250 251 # define ENDIANNESS_ALLOWED_TYPES "" 252 253 # define DO_BO_UNPACK(var, type) 254 # define DO_BO_PACK(var, type) 255 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) 256 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) 257 # define DO_BO_UNPACK_N(var, type) 258 # define DO_BO_PACK_N(var, type) 259 # define DO_BO_UNPACK_P(var) 260 # define DO_BO_PACK_P(var) 261 # define DO_BO_UNPACK_PC(var) 262 # define DO_BO_PACK_PC(var) 263 264 #else /* PERL_PACK_CAN_BYTEORDER */ 265 266 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK) 267 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK) 268 269 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP(" 270 271 # define DO_BO_UNPACK(var, type) \ 272 STMT_START { \ 273 switch (TYPE_ENDIANNESS(datumtype)) { \ 274 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \ 275 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \ 276 default: break; \ 277 } \ 278 } STMT_END 279 280 # define DO_BO_PACK(var, type) \ 281 STMT_START { \ 282 switch (TYPE_ENDIANNESS(datumtype)) { \ 283 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \ 284 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \ 285 default: break; \ 286 } \ 287 } STMT_END 288 289 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \ 290 STMT_START { \ 291 switch (TYPE_ENDIANNESS(datumtype)) { \ 292 case TYPE_IS_BIG_ENDIAN: \ 293 var = (post_cast*) my_betoh ## type ((pre_cast) var); \ 294 break; \ 295 case TYPE_IS_LITTLE_ENDIAN: \ 296 var = (post_cast *) my_letoh ## type ((pre_cast) var); \ 297 break; \ 298 default: \ 299 break; \ 300 } \ 301 } STMT_END 302 303 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \ 304 STMT_START { \ 305 switch (TYPE_ENDIANNESS(datumtype)) { \ 306 case TYPE_IS_BIG_ENDIAN: \ 307 var = (post_cast *) my_htobe ## type ((pre_cast) var); \ 308 break; \ 309 case TYPE_IS_LITTLE_ENDIAN: \ 310 var = (post_cast *) my_htole ## type ((pre_cast) var); \ 311 break; \ 312 default: \ 313 break; \ 314 } \ 315 } STMT_END 316 317 # define BO_CANT_DOIT(action, type) \ 318 STMT_START { \ 319 switch (TYPE_ENDIANNESS(datumtype)) { \ 320 case TYPE_IS_BIG_ENDIAN: \ 321 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \ 322 "platform", #action, #type); \ 323 break; \ 324 case TYPE_IS_LITTLE_ENDIAN: \ 325 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \ 326 "platform", #action, #type); \ 327 break; \ 328 default: \ 329 break; \ 330 } \ 331 } STMT_END 332 333 # if PTRSIZE == INTSIZE 334 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void) 335 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void) 336 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char) 337 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char) 338 # elif PTRSIZE == LONGSIZE 339 # if LONGSIZE < IVSIZE && IVSIZE == 8 340 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, 64, IV, void) 341 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, 64, IV, void) 342 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, 64, IV, char) 343 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, 64, IV, char) 344 # else 345 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void) 346 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void) 347 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char) 348 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char) 349 # endif 350 # elif PTRSIZE == IVSIZE 351 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void) 352 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void) 353 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char) 354 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char) 355 # else 356 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer) 357 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer) 358 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer) 359 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer) 360 # endif 361 362 # if defined(my_htolen) && defined(my_letohn) && \ 363 defined(my_htoben) && defined(my_betohn) 364 # define DO_BO_UNPACK_N(var, type) \ 365 STMT_START { \ 366 switch (TYPE_ENDIANNESS(datumtype)) { \ 367 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\ 368 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\ 369 default: break; \ 370 } \ 371 } STMT_END 372 373 # define DO_BO_PACK_N(var, type) \ 374 STMT_START { \ 375 switch (TYPE_ENDIANNESS(datumtype)) { \ 376 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\ 377 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\ 378 default: break; \ 379 } \ 380 } STMT_END 381 # else 382 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type) 383 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type) 384 # endif 385 386 #endif /* PERL_PACK_CAN_BYTEORDER */ 387 388 #define PACK_SIZE_CANNOT_CSUM 0x80 389 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */ 390 #define PACK_SIZE_MASK 0x3F 391 392 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted 393 in). You're unlikely ever to need to regenerate them. */ 394 395 #if TYPE_IS_SHRIEKING != 0x100 396 ++++shriek offset should be 256 397 #endif 398 399 typedef U8 packprops_t; 400 #if 'J'-'I' == 1 401 /* ASCII */ 402 STATIC const packprops_t packprops[512] = { 403 /* normal */ 404 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 405 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 406 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 407 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 408 0, 0, 0, 409 /* C */ sizeof(unsigned char), 410 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) 411 /* D */ LONG_DOUBLESIZE, 412 #else 413 0, 414 #endif 415 0, 416 /* F */ NVSIZE, 417 0, 0, 418 /* I */ sizeof(unsigned int), 419 /* J */ UVSIZE, 420 0, 421 /* L */ SIZE32, 422 0, 423 /* N */ SIZE32, 424 0, 0, 425 #if defined(HAS_QUAD) 426 /* Q */ sizeof(Uquad_t), 427 #else 428 0, 429 #endif 430 0, 431 /* S */ SIZE16, 432 0, 433 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE, 434 /* V */ SIZE32, 435 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE, 436 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 437 /* c */ sizeof(char), 438 /* d */ sizeof(double), 439 0, 440 /* f */ sizeof(float), 441 0, 0, 442 /* i */ sizeof(int), 443 /* j */ IVSIZE, 444 0, 445 /* l */ SIZE32, 446 0, 447 /* n */ SIZE16, 448 0, 449 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM, 450 #if defined(HAS_QUAD) 451 /* q */ sizeof(Quad_t), 452 #else 453 0, 454 #endif 455 0, 456 /* s */ SIZE16, 457 0, 0, 458 /* v */ SIZE16, 459 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM, 460 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 462 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 463 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 464 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 465 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 466 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 467 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 468 0, 0, 0, 0, 0, 0, 0, 0, 469 /* shrieking */ 470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 471 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 472 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 473 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 474 0, 0, 0, 0, 0, 0, 0, 0, 0, 475 /* I */ sizeof(unsigned int), 476 0, 0, 477 /* L */ sizeof(unsigned long), 478 0, 479 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 480 /* N */ SIZE32, 481 #else 482 0, 483 #endif 484 0, 0, 0, 0, 485 /* S */ sizeof(unsigned short), 486 0, 0, 487 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 488 /* V */ SIZE32, 489 #else 490 0, 491 #endif 492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 493 0, 0, 494 /* i */ sizeof(int), 495 0, 0, 496 /* l */ sizeof(long), 497 0, 498 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 499 /* n */ SIZE16, 500 #else 501 0, 502 #endif 503 0, 0, 0, 0, 504 /* s */ sizeof(short), 505 0, 0, 506 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 507 /* v */ SIZE16, 508 #else 509 0, 510 #endif 511 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 512 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 513 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 515 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 519 0, 0, 0, 0, 0, 0, 0, 0, 0 520 }; 521 #else 522 /* EBCDIC (or bust) */ 523 STATIC const packprops_t packprops[512] = { 524 /* normal */ 525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 531 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 533 0, 0, 0, 534 /* c */ sizeof(char), 535 /* d */ sizeof(double), 536 0, 537 /* f */ sizeof(float), 538 0, 0, 539 /* i */ sizeof(int), 540 0, 0, 0, 0, 0, 0, 0, 541 /* j */ IVSIZE, 542 0, 543 /* l */ SIZE32, 544 0, 545 /* n */ SIZE16, 546 0, 547 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM, 548 #if defined(HAS_QUAD) 549 /* q */ sizeof(Quad_t), 550 #else 551 0, 552 #endif 553 0, 0, 0, 0, 0, 0, 0, 0, 0, 554 /* s */ SIZE16, 555 0, 0, 556 /* v */ SIZE16, 557 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM, 558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 560 /* C */ sizeof(unsigned char), 561 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) 562 /* D */ LONG_DOUBLESIZE, 563 #else 564 0, 565 #endif 566 0, 567 /* F */ NVSIZE, 568 0, 0, 569 /* I */ sizeof(unsigned int), 570 0, 0, 0, 0, 0, 0, 0, 571 /* J */ UVSIZE, 572 0, 573 /* L */ SIZE32, 574 0, 575 /* N */ SIZE32, 576 0, 0, 577 #if defined(HAS_QUAD) 578 /* Q */ sizeof(Uquad_t), 579 #else 580 0, 581 #endif 582 0, 0, 0, 0, 0, 0, 0, 0, 0, 583 /* S */ SIZE16, 584 0, 585 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE, 586 /* V */ SIZE32, 587 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE, 588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 589 0, 0, 0, 0, 0, 0, 0, 0, 0, 590 /* shrieking */ 591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 594 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 597 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 598 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 599 0, 0, 0, 0, 0, 0, 0, 0, 0, 600 /* i */ sizeof(int), 601 0, 0, 0, 0, 0, 0, 0, 0, 0, 602 /* l */ sizeof(long), 603 0, 604 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 605 /* n */ SIZE16, 606 #else 607 0, 608 #endif 609 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 610 /* s */ sizeof(short), 611 0, 0, 612 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 613 /* v */ SIZE16, 614 #else 615 0, 616 #endif 617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 619 0, 0, 0, 620 /* I */ sizeof(unsigned int), 621 0, 0, 0, 0, 0, 0, 0, 0, 0, 622 /* L */ sizeof(unsigned long), 623 0, 624 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 625 /* N */ SIZE32, 626 #else 627 0, 628 #endif 629 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 630 /* S */ sizeof(unsigned short), 631 0, 0, 632 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 633 /* V */ SIZE32, 634 #else 635 0, 636 #endif 637 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 639 }; 640 #endif 641 642 STATIC U8 643 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype) 644 { 645 STRLEN retlen; 646 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, 647 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 648 /* We try to process malformed UTF-8 as much as possible (preferably with 649 warnings), but these two mean we make no progress in the string and 650 might enter an infinite loop */ 651 if (retlen == (STRLEN) -1 || retlen == 0) 652 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack", 653 (int) TYPE_NO_MODIFIERS(datumtype)); 654 if (val >= 0x100) { 655 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK), 656 "Character in '%c' format wrapped in unpack", 657 (int) TYPE_NO_MODIFIERS(datumtype)); 658 val &= 0xff; 659 } 660 *s += retlen; 661 return (U8)val; 662 } 663 664 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \ 665 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \ 666 *(U8 *)(s)++) 667 668 STATIC bool 669 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype) 670 { 671 UV val; 672 STRLEN retlen; 673 const char *from = *s; 674 int bad = 0; 675 const U32 flags = ckWARN(WARN_UTF8) ? 676 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY); 677 for (;buf_len > 0; buf_len--) { 678 if (from >= end) return FALSE; 679 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags); 680 if (retlen == (STRLEN) -1 || retlen == 0) { 681 from += UTF8SKIP(from); 682 bad |= 1; 683 } else from += retlen; 684 if (val >= 0x100) { 685 bad |= 2; 686 val &= 0xff; 687 } 688 *(U8 *)buf++ = (U8)val; 689 } 690 /* We have enough characters for the buffer. Did we have problems ? */ 691 if (bad) { 692 if (bad & 1) { 693 /* Rewalk the string fragment while warning */ 694 const char *ptr; 695 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY; 696 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) { 697 if (ptr >= end) break; 698 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags); 699 } 700 if (from > end) from = end; 701 } 702 if ((bad & 2)) 703 Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ? 704 WARN_PACK : WARN_UNPACK), 705 "Character(s) in '%c' format wrapped in %s", 706 (int) TYPE_NO_MODIFIERS(datumtype), 707 datumtype & TYPE_IS_PACK ? "pack" : "unpack"); 708 } 709 *s = from; 710 return TRUE; 711 } 712 713 STATIC bool 714 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out) 715 { 716 dVAR; 717 STRLEN retlen; 718 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY); 719 if (val >= 0x100 || !ISUUCHAR(val) || 720 retlen == (STRLEN) -1 || retlen == 0) { 721 *out = 0; 722 return FALSE; 723 } 724 *out = PL_uudmap[val] & 077; 725 *s += retlen; 726 return TRUE; 727 } 728 729 STATIC char * 730 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) { 731 const U8 * const end = start + len; 732 733 PERL_ARGS_ASSERT_BYTES_TO_UNI; 734 735 while (start < end) { 736 const UV uv = NATIVE_TO_ASCII(*start); 737 if (UNI_IS_INVARIANT(uv)) 738 *dest++ = (char)(U8)UTF_TO_NATIVE(uv); 739 else { 740 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv); 741 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv); 742 } 743 start++; 744 } 745 return dest; 746 } 747 748 #define PUSH_BYTES(utf8, cur, buf, len) \ 749 STMT_START { \ 750 if (utf8) \ 751 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \ 752 else { \ 753 Copy(buf, cur, len, char); \ 754 (cur) += (len); \ 755 } \ 756 } STMT_END 757 758 #define GROWING(utf8, cat, start, cur, in_len) \ 759 STMT_START { \ 760 STRLEN glen = (in_len); \ 761 if (utf8) glen *= UTF8_EXPAND; \ 762 if ((cur) + glen >= (start) + SvLEN(cat)) { \ 763 (start) = sv_exp_grow(cat, glen); \ 764 (cur) = (start) + SvCUR(cat); \ 765 } \ 766 } STMT_END 767 768 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \ 769 STMT_START { \ 770 const STRLEN glen = (in_len); \ 771 STRLEN gl = glen; \ 772 if (utf8) gl *= UTF8_EXPAND; \ 773 if ((cur) + gl >= (start) + SvLEN(cat)) { \ 774 *cur = '\0'; \ 775 SvCUR_set((cat), (cur) - (start)); \ 776 (start) = sv_exp_grow(cat, gl); \ 777 (cur) = (start) + SvCUR(cat); \ 778 } \ 779 PUSH_BYTES(utf8, cur, buf, glen); \ 780 } STMT_END 781 782 #define PUSH_BYTE(utf8, s, byte) \ 783 STMT_START { \ 784 if (utf8) { \ 785 const U8 au8 = (byte); \ 786 (s) = bytes_to_uni(&au8, 1, (s)); \ 787 } else *(U8 *)(s)++ = (byte); \ 788 } STMT_END 789 790 /* Only to be used inside a loop (see the break) */ 791 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \ 792 STMT_START { \ 793 STRLEN retlen; \ 794 if (str >= end) break; \ 795 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \ 796 if (retlen == (STRLEN) -1 || retlen == 0) { \ 797 *cur = '\0'; \ 798 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \ 799 } \ 800 str += retlen; \ 801 } STMT_END 802 803 static const char *_action( const tempsym_t* symptr ) 804 { 805 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack"); 806 } 807 808 /* Returns the sizeof() struct described by pat */ 809 STATIC I32 810 S_measure_struct(pTHX_ tempsym_t* symptr) 811 { 812 I32 total = 0; 813 814 PERL_ARGS_ASSERT_MEASURE_STRUCT; 815 816 while (next_symbol(symptr)) { 817 I32 len; 818 int size; 819 820 switch (symptr->howlen) { 821 case e_star: 822 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s", 823 _action( symptr ) ); 824 break; 825 default: 826 /* e_no_len and e_number */ 827 len = symptr->length; 828 break; 829 } 830 831 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK; 832 if (!size) { 833 int star; 834 /* endianness doesn't influence the size of a type */ 835 switch(TYPE_NO_ENDIANNESS(symptr->code)) { 836 default: 837 Perl_croak(aTHX_ "Invalid type '%c' in %s", 838 (int)TYPE_NO_MODIFIERS(symptr->code), 839 _action( symptr ) ); 840 #ifdef PERL_PACK_CAN_SHRIEKSIGN 841 case '.' | TYPE_IS_SHRIEKING: 842 case '@' | TYPE_IS_SHRIEKING: 843 #endif 844 case '@': 845 case '.': 846 case '/': 847 case 'U': /* XXXX Is it correct? */ 848 case 'w': 849 case 'u': 850 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s", 851 (int) TYPE_NO_MODIFIERS(symptr->code), 852 _action( symptr ) ); 853 case '%': 854 size = 0; 855 break; 856 case '(': 857 { 858 tempsym_t savsym = *symptr; 859 symptr->patptr = savsym.grpbeg; 860 symptr->patend = savsym.grpend; 861 /* XXXX Theoretically, we need to measure many times at 862 different positions, since the subexpression may contain 863 alignment commands, but be not of aligned length. 864 Need to detect this and croak(). */ 865 size = measure_struct(symptr); 866 *symptr = savsym; 867 break; 868 } 869 case 'X' | TYPE_IS_SHRIEKING: 870 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS. 871 */ 872 if (!len) /* Avoid division by 0 */ 873 len = 1; 874 len = total % len; /* Assumed: the start is aligned. */ 875 /* FALL THROUGH */ 876 case 'X': 877 size = -1; 878 if (total < len) 879 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) ); 880 break; 881 case 'x' | TYPE_IS_SHRIEKING: 882 if (!len) /* Avoid division by 0 */ 883 len = 1; 884 star = total % len; /* Assumed: the start is aligned. */ 885 if (star) /* Other portable ways? */ 886 len = len - star; 887 else 888 len = 0; 889 /* FALL THROUGH */ 890 case 'x': 891 case 'A': 892 case 'Z': 893 case 'a': 894 size = 1; 895 break; 896 case 'B': 897 case 'b': 898 len = (len + 7)/8; 899 size = 1; 900 break; 901 case 'H': 902 case 'h': 903 len = (len + 1)/2; 904 size = 1; 905 break; 906 907 case 'P': 908 len = 1; 909 size = sizeof(char*); 910 break; 911 } 912 } 913 total += len * size; 914 } 915 return total; 916 } 917 918 919 /* locate matching closing parenthesis or bracket 920 * returns char pointer to char after match, or NULL 921 */ 922 STATIC const char * 923 S_group_end(pTHX_ const char *patptr, const char *patend, char ender) 924 { 925 PERL_ARGS_ASSERT_GROUP_END; 926 927 while (patptr < patend) { 928 const char c = *patptr++; 929 930 if (isSPACE(c)) 931 continue; 932 else if (c == ender) 933 return patptr-1; 934 else if (c == '#') { 935 while (patptr < patend && *patptr != '\n') 936 patptr++; 937 continue; 938 } else if (c == '(') 939 patptr = group_end(patptr, patend, ')') + 1; 940 else if (c == '[') 941 patptr = group_end(patptr, patend, ']') + 1; 942 } 943 Perl_croak(aTHX_ "No group ending character '%c' found in template", 944 ender); 945 return 0; 946 } 947 948 949 /* Convert unsigned decimal number to binary. 950 * Expects a pointer to the first digit and address of length variable 951 * Advances char pointer to 1st non-digit char and returns number 952 */ 953 STATIC const char * 954 S_get_num(pTHX_ const char *patptr, I32 *lenptr ) 955 { 956 I32 len = *patptr++ - '0'; 957 958 PERL_ARGS_ASSERT_GET_NUM; 959 960 while (isDIGIT(*patptr)) { 961 if (len >= 0x7FFFFFFF/10) 962 Perl_croak(aTHX_ "pack/unpack repeat count overflow"); 963 len = (len * 10) + (*patptr++ - '0'); 964 } 965 *lenptr = len; 966 return patptr; 967 } 968 969 /* The marvellous template parsing routine: Using state stored in *symptr, 970 * locates next template code and count 971 */ 972 STATIC bool 973 S_next_symbol(pTHX_ tempsym_t* symptr ) 974 { 975 const char* patptr = symptr->patptr; 976 const char* const patend = symptr->patend; 977 978 PERL_ARGS_ASSERT_NEXT_SYMBOL; 979 980 symptr->flags &= ~FLAG_SLASH; 981 982 while (patptr < patend) { 983 if (isSPACE(*patptr)) 984 patptr++; 985 else if (*patptr == '#') { 986 patptr++; 987 while (patptr < patend && *patptr != '\n') 988 patptr++; 989 if (patptr < patend) 990 patptr++; 991 } else { 992 /* We should have found a template code */ 993 I32 code = *patptr++ & 0xFF; 994 U32 inherited_modifiers = 0; 995 996 if (code == ','){ /* grandfather in commas but with a warning */ 997 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){ 998 symptr->flags |= FLAG_COMMA; 999 Perl_warner(aTHX_ packWARN(WARN_UNPACK), 1000 "Invalid type ',' in %s", _action( symptr ) ); 1001 } 1002 continue; 1003 } 1004 1005 /* for '(', skip to ')' */ 1006 if (code == '(') { 1007 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' ) 1008 Perl_croak(aTHX_ "()-group starts with a count in %s", 1009 _action( symptr ) ); 1010 symptr->grpbeg = patptr; 1011 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') ); 1012 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL ) 1013 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s", 1014 _action( symptr ) ); 1015 } 1016 1017 /* look for group modifiers to inherit */ 1018 if (TYPE_ENDIANNESS(symptr->flags)) { 1019 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code))) 1020 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags); 1021 } 1022 1023 /* look for modifiers */ 1024 while (patptr < patend) { 1025 const char *allowed; 1026 I32 modifier; 1027 switch (*patptr) { 1028 case '!': 1029 modifier = TYPE_IS_SHRIEKING; 1030 allowed = SHRIEKING_ALLOWED_TYPES; 1031 break; 1032 #ifdef PERL_PACK_CAN_BYTEORDER 1033 case '>': 1034 modifier = TYPE_IS_BIG_ENDIAN; 1035 allowed = ENDIANNESS_ALLOWED_TYPES; 1036 break; 1037 case '<': 1038 modifier = TYPE_IS_LITTLE_ENDIAN; 1039 allowed = ENDIANNESS_ALLOWED_TYPES; 1040 break; 1041 #endif /* PERL_PACK_CAN_BYTEORDER */ 1042 default: 1043 allowed = ""; 1044 modifier = 0; 1045 break; 1046 } 1047 1048 if (modifier == 0) 1049 break; 1050 1051 if (!strchr(allowed, TYPE_NO_MODIFIERS(code))) 1052 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr, 1053 allowed, _action( symptr ) ); 1054 1055 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK) 1056 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s", 1057 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) ); 1058 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) == 1059 TYPE_ENDIANNESS_MASK) 1060 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s", 1061 *patptr, _action( symptr ) ); 1062 1063 if ((code & modifier)) { 1064 Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK), 1065 "Duplicate modifier '%c' after '%c' in %s", 1066 *patptr, (int) TYPE_NO_MODIFIERS(code), 1067 _action( symptr ) ); 1068 } 1069 1070 code |= modifier; 1071 patptr++; 1072 } 1073 1074 /* inherit modifiers */ 1075 code |= inherited_modifiers; 1076 1077 /* look for count and/or / */ 1078 if (patptr < patend) { 1079 if (isDIGIT(*patptr)) { 1080 patptr = get_num( patptr, &symptr->length ); 1081 symptr->howlen = e_number; 1082 1083 } else if (*patptr == '*') { 1084 patptr++; 1085 symptr->howlen = e_star; 1086 1087 } else if (*patptr == '[') { 1088 const char* lenptr = ++patptr; 1089 symptr->howlen = e_number; 1090 patptr = group_end( patptr, patend, ']' ) + 1; 1091 /* what kind of [] is it? */ 1092 if (isDIGIT(*lenptr)) { 1093 lenptr = get_num( lenptr, &symptr->length ); 1094 if( *lenptr != ']' ) 1095 Perl_croak(aTHX_ "Malformed integer in [] in %s", 1096 _action( symptr ) ); 1097 } else { 1098 tempsym_t savsym = *symptr; 1099 symptr->patend = patptr-1; 1100 symptr->patptr = lenptr; 1101 savsym.length = measure_struct(symptr); 1102 *symptr = savsym; 1103 } 1104 } else { 1105 symptr->howlen = e_no_len; 1106 symptr->length = 1; 1107 } 1108 1109 /* try to find / */ 1110 while (patptr < patend) { 1111 if (isSPACE(*patptr)) 1112 patptr++; 1113 else if (*patptr == '#') { 1114 patptr++; 1115 while (patptr < patend && *patptr != '\n') 1116 patptr++; 1117 if (patptr < patend) 1118 patptr++; 1119 } else { 1120 if (*patptr == '/') { 1121 symptr->flags |= FLAG_SLASH; 1122 patptr++; 1123 if (patptr < patend && 1124 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '[')) 1125 Perl_croak(aTHX_ "'/' does not take a repeat count in %s", 1126 _action( symptr ) ); 1127 } 1128 break; 1129 } 1130 } 1131 } else { 1132 /* at end - no count, no / */ 1133 symptr->howlen = e_no_len; 1134 symptr->length = 1; 1135 } 1136 1137 symptr->code = code; 1138 symptr->patptr = patptr; 1139 return TRUE; 1140 } 1141 } 1142 symptr->patptr = patptr; 1143 return FALSE; 1144 } 1145 1146 /* 1147 There is no way to cleanly handle the case where we should process the 1148 string per byte in its upgraded form while it's really in downgraded form 1149 (e.g. estimates like strend-s as an upper bound for the number of 1150 characters left wouldn't work). So if we foresee the need of this 1151 (pattern starts with U or contains U0), we want to work on the encoded 1152 version of the string. Users are advised to upgrade their pack string 1153 themselves if they need to do a lot of unpacks like this on it 1154 */ 1155 STATIC bool 1156 need_utf8(const char *pat, const char *patend) 1157 { 1158 bool first = TRUE; 1159 1160 PERL_ARGS_ASSERT_NEED_UTF8; 1161 1162 while (pat < patend) { 1163 if (pat[0] == '#') { 1164 pat++; 1165 pat = (const char *) memchr(pat, '\n', patend-pat); 1166 if (!pat) return FALSE; 1167 } else if (pat[0] == 'U') { 1168 if (first || pat[1] == '0') return TRUE; 1169 } else first = FALSE; 1170 pat++; 1171 } 1172 return FALSE; 1173 } 1174 1175 STATIC char 1176 first_symbol(const char *pat, const char *patend) { 1177 PERL_ARGS_ASSERT_FIRST_SYMBOL; 1178 1179 while (pat < patend) { 1180 if (pat[0] != '#') return pat[0]; 1181 pat++; 1182 pat = (const char *) memchr(pat, '\n', patend-pat); 1183 if (!pat) return 0; 1184 pat++; 1185 } 1186 return 0; 1187 } 1188 1189 /* 1190 =for apidoc unpackstring 1191 1192 The engine implementing the unpack() Perl function. 1193 1194 Using the template pat..patend, this function unpacks the string 1195 s..strend into a number of mortal SVs, which it pushes onto the perl 1196 argument (@_) stack (so you will need to issue a C<PUTBACK> before and 1197 C<SPAGAIN> after the call to this function). It returns the number of 1198 pushed elements. 1199 1200 The strend and patend pointers should point to the byte following the last 1201 character of each string. 1202 1203 Although this function returns its values on the perl argument stack, it 1204 doesn't take any parameters from that stack (and thus in particular 1205 there's no need to do a PUSHMARK before calling it, unlike L</call_pv> for 1206 example). 1207 1208 =cut */ 1209 1210 I32 1211 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags) 1212 { 1213 tempsym_t sym; 1214 1215 PERL_ARGS_ASSERT_UNPACKSTRING; 1216 1217 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8; 1218 else if (need_utf8(pat, patend)) { 1219 /* We probably should try to avoid this in case a scalar context call 1220 wouldn't get to the "U0" */ 1221 STRLEN len = strend - s; 1222 s = (char *) bytes_to_utf8((U8 *) s, &len); 1223 SAVEFREEPV(s); 1224 strend = s + len; 1225 flags |= FLAG_DO_UTF8; 1226 } 1227 1228 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8)) 1229 flags |= FLAG_PARSE_UTF8; 1230 1231 TEMPSYM_INIT(&sym, pat, patend, flags); 1232 1233 return unpack_rec(&sym, s, s, strend, NULL ); 1234 } 1235 1236 STATIC I32 1237 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s ) 1238 { 1239 dVAR; dSP; 1240 SV *sv = NULL; 1241 const I32 start_sp_offset = SP - PL_stack_base; 1242 howlen_t howlen; 1243 I32 checksum = 0; 1244 UV cuv = 0; 1245 NV cdouble = 0.0; 1246 const int bits_in_uv = CHAR_BIT * sizeof(cuv); 1247 bool beyond = FALSE; 1248 bool explicit_length; 1249 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0; 1250 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0; 1251 1252 PERL_ARGS_ASSERT_UNPACK_REC; 1253 1254 symptr->strbeg = s - strbeg; 1255 1256 while (next_symbol(symptr)) { 1257 packprops_t props; 1258 I32 len; 1259 I32 datumtype = symptr->code; 1260 /* do first one only unless in list context 1261 / is implemented by unpacking the count, then popping it from the 1262 stack, so must check that we're not in the middle of a / */ 1263 if ( unpack_only_one 1264 && (SP - PL_stack_base == start_sp_offset + 1) 1265 && (datumtype != '/') ) /* XXX can this be omitted */ 1266 break; 1267 1268 switch (howlen = symptr->howlen) { 1269 case e_star: 1270 len = strend - strbeg; /* long enough */ 1271 break; 1272 default: 1273 /* e_no_len and e_number */ 1274 len = symptr->length; 1275 break; 1276 } 1277 1278 explicit_length = TRUE; 1279 redo_switch: 1280 beyond = s >= strend; 1281 1282 props = packprops[TYPE_NO_ENDIANNESS(datumtype)]; 1283 if (props) { 1284 /* props nonzero means we can process this letter. */ 1285 const long size = props & PACK_SIZE_MASK; 1286 const long howmany = (strend - s) / size; 1287 if (len > howmany) 1288 len = howmany; 1289 1290 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) { 1291 if (len && unpack_only_one) len = 1; 1292 EXTEND(SP, len); 1293 EXTEND_MORTAL(len); 1294 } 1295 } 1296 1297 switch(TYPE_NO_ENDIANNESS(datumtype)) { 1298 default: 1299 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) ); 1300 1301 case '%': 1302 if (howlen == e_no_len) 1303 len = 16; /* len is not specified */ 1304 checksum = len; 1305 cuv = 0; 1306 cdouble = 0; 1307 continue; 1308 break; 1309 case '(': 1310 { 1311 tempsym_t savsym = *symptr; 1312 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags); 1313 symptr->flags |= group_modifiers; 1314 symptr->patend = savsym.grpend; 1315 symptr->previous = &savsym; 1316 symptr->level++; 1317 PUTBACK; 1318 if (len && unpack_only_one) len = 1; 1319 while (len--) { 1320 symptr->patptr = savsym.grpbeg; 1321 if (utf8) symptr->flags |= FLAG_PARSE_UTF8; 1322 else symptr->flags &= ~FLAG_PARSE_UTF8; 1323 unpack_rec(symptr, s, strbeg, strend, &s); 1324 if (s == strend && savsym.howlen == e_star) 1325 break; /* No way to continue */ 1326 } 1327 SPAGAIN; 1328 savsym.flags = symptr->flags & ~group_modifiers; 1329 *symptr = savsym; 1330 break; 1331 } 1332 #ifdef PERL_PACK_CAN_SHRIEKSIGN 1333 case '.' | TYPE_IS_SHRIEKING: 1334 #endif 1335 case '.': { 1336 const char *from; 1337 SV *sv; 1338 #ifdef PERL_PACK_CAN_SHRIEKSIGN 1339 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING); 1340 #else /* PERL_PACK_CAN_SHRIEKSIGN */ 1341 const bool u8 = utf8; 1342 #endif 1343 if (howlen == e_star) from = strbeg; 1344 else if (len <= 0) from = s; 1345 else { 1346 tempsym_t *group = symptr; 1347 1348 while (--len && group) group = group->previous; 1349 from = group ? strbeg + group->strbeg : strbeg; 1350 } 1351 sv = from <= s ? 1352 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) : 1353 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s))); 1354 mXPUSHs(sv); 1355 break; 1356 } 1357 #ifdef PERL_PACK_CAN_SHRIEKSIGN 1358 case '@' | TYPE_IS_SHRIEKING: 1359 #endif 1360 case '@': 1361 s = strbeg + symptr->strbeg; 1362 #ifdef PERL_PACK_CAN_SHRIEKSIGN 1363 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING)) 1364 #else /* PERL_PACK_CAN_SHRIEKSIGN */ 1365 if (utf8) 1366 #endif 1367 { 1368 while (len > 0) { 1369 if (s >= strend) 1370 Perl_croak(aTHX_ "'@' outside of string in unpack"); 1371 s += UTF8SKIP(s); 1372 len--; 1373 } 1374 if (s > strend) 1375 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack"); 1376 } else { 1377 if (strend-s < len) 1378 Perl_croak(aTHX_ "'@' outside of string in unpack"); 1379 s += len; 1380 } 1381 break; 1382 case 'X' | TYPE_IS_SHRIEKING: 1383 if (!len) /* Avoid division by 0 */ 1384 len = 1; 1385 if (utf8) { 1386 const char *hop, *last; 1387 I32 l = len; 1388 hop = last = strbeg; 1389 while (hop < s) { 1390 hop += UTF8SKIP(hop); 1391 if (--l == 0) { 1392 last = hop; 1393 l = len; 1394 } 1395 } 1396 if (last > s) 1397 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1398 s = last; 1399 break; 1400 } 1401 len = (s - strbeg) % len; 1402 /* FALL THROUGH */ 1403 case 'X': 1404 if (utf8) { 1405 while (len > 0) { 1406 if (s <= strbeg) 1407 Perl_croak(aTHX_ "'X' outside of string in unpack"); 1408 while (--s, UTF8_IS_CONTINUATION(*s)) { 1409 if (s <= strbeg) 1410 Perl_croak(aTHX_ "'X' outside of string in unpack"); 1411 } 1412 len--; 1413 } 1414 } else { 1415 if (len > s - strbeg) 1416 Perl_croak(aTHX_ "'X' outside of string in unpack" ); 1417 s -= len; 1418 } 1419 break; 1420 case 'x' | TYPE_IS_SHRIEKING: { 1421 I32 ai32; 1422 if (!len) /* Avoid division by 0 */ 1423 len = 1; 1424 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len; 1425 else ai32 = (s - strbeg) % len; 1426 if (ai32 == 0) break; 1427 len -= ai32; 1428 } 1429 /* FALL THROUGH */ 1430 case 'x': 1431 if (utf8) { 1432 while (len>0) { 1433 if (s >= strend) 1434 Perl_croak(aTHX_ "'x' outside of string in unpack"); 1435 s += UTF8SKIP(s); 1436 len--; 1437 } 1438 } else { 1439 if (len > strend - s) 1440 Perl_croak(aTHX_ "'x' outside of string in unpack"); 1441 s += len; 1442 } 1443 break; 1444 case '/': 1445 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack"); 1446 break; 1447 case 'A': 1448 case 'Z': 1449 case 'a': 1450 if (checksum) { 1451 /* Preliminary length estimate is assumed done in 'W' */ 1452 if (len > strend - s) len = strend - s; 1453 goto W_checksum; 1454 } 1455 if (utf8) { 1456 I32 l; 1457 const char *hop; 1458 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) { 1459 if (hop >= strend) { 1460 if (hop > strend) 1461 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1462 break; 1463 } 1464 } 1465 if (hop > strend) 1466 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1467 len = hop - s; 1468 } else if (len > strend - s) 1469 len = strend - s; 1470 1471 if (datumtype == 'Z') { 1472 /* 'Z' strips stuff after first null */ 1473 const char *ptr, *end; 1474 end = s + len; 1475 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break; 1476 sv = newSVpvn(s, ptr-s); 1477 if (howlen == e_star) /* exact for 'Z*' */ 1478 len = ptr-s + (ptr != strend ? 1 : 0); 1479 } else if (datumtype == 'A') { 1480 /* 'A' strips both nulls and spaces */ 1481 const char *ptr; 1482 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) { 1483 for (ptr = s+len-1; ptr >= s; ptr--) 1484 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) && 1485 !isSPACE_utf8(ptr)) break; 1486 if (ptr >= s) ptr += UTF8SKIP(ptr); 1487 else ptr++; 1488 if (ptr > s+len) 1489 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1490 } else { 1491 for (ptr = s+len-1; ptr >= s; ptr--) 1492 if (*ptr != 0 && !isSPACE(*ptr)) break; 1493 ptr++; 1494 } 1495 sv = newSVpvn(s, ptr-s); 1496 } else sv = newSVpvn(s, len); 1497 1498 if (utf8) { 1499 SvUTF8_on(sv); 1500 /* Undo any upgrade done due to need_utf8() */ 1501 if (!(symptr->flags & FLAG_WAS_UTF8)) 1502 sv_utf8_downgrade(sv, 0); 1503 } 1504 mXPUSHs(sv); 1505 s += len; 1506 break; 1507 case 'B': 1508 case 'b': { 1509 char *str; 1510 if (howlen == e_star || len > (strend - s) * 8) 1511 len = (strend - s) * 8; 1512 if (checksum) { 1513 if (utf8) 1514 while (len >= 8 && s < strend) { 1515 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)]; 1516 len -= 8; 1517 } 1518 else 1519 while (len >= 8) { 1520 cuv += PL_bitcount[*(U8 *)s++]; 1521 len -= 8; 1522 } 1523 if (len && s < strend) { 1524 U8 bits; 1525 bits = SHIFT_BYTE(utf8, s, strend, datumtype); 1526 if (datumtype == 'b') 1527 while (len-- > 0) { 1528 if (bits & 1) cuv++; 1529 bits >>= 1; 1530 } 1531 else 1532 while (len-- > 0) { 1533 if (bits & 0x80) cuv++; 1534 bits <<= 1; 1535 } 1536 } 1537 break; 1538 } 1539 1540 sv = sv_2mortal(newSV(len ? len : 1)); 1541 SvPOK_on(sv); 1542 str = SvPVX(sv); 1543 if (datumtype == 'b') { 1544 U8 bits = 0; 1545 const I32 ai32 = len; 1546 for (len = 0; len < ai32; len++) { 1547 if (len & 7) bits >>= 1; 1548 else if (utf8) { 1549 if (s >= strend) break; 1550 bits = uni_to_byte(aTHX_ &s, strend, datumtype); 1551 } else bits = *(U8 *) s++; 1552 *str++ = bits & 1 ? '1' : '0'; 1553 } 1554 } else { 1555 U8 bits = 0; 1556 const I32 ai32 = len; 1557 for (len = 0; len < ai32; len++) { 1558 if (len & 7) bits <<= 1; 1559 else if (utf8) { 1560 if (s >= strend) break; 1561 bits = uni_to_byte(aTHX_ &s, strend, datumtype); 1562 } else bits = *(U8 *) s++; 1563 *str++ = bits & 0x80 ? '1' : '0'; 1564 } 1565 } 1566 *str = '\0'; 1567 SvCUR_set(sv, str - SvPVX_const(sv)); 1568 XPUSHs(sv); 1569 break; 1570 } 1571 case 'H': 1572 case 'h': { 1573 char *str = NULL; 1574 /* Preliminary length estimate, acceptable for utf8 too */ 1575 if (howlen == e_star || len > (strend - s) * 2) 1576 len = (strend - s) * 2; 1577 if (!checksum) { 1578 sv = sv_2mortal(newSV(len ? len : 1)); 1579 SvPOK_on(sv); 1580 str = SvPVX(sv); 1581 } 1582 if (datumtype == 'h') { 1583 U8 bits = 0; 1584 I32 ai32 = len; 1585 for (len = 0; len < ai32; len++) { 1586 if (len & 1) bits >>= 4; 1587 else if (utf8) { 1588 if (s >= strend) break; 1589 bits = uni_to_byte(aTHX_ &s, strend, datumtype); 1590 } else bits = * (U8 *) s++; 1591 if (!checksum) 1592 *str++ = PL_hexdigit[bits & 15]; 1593 } 1594 } else { 1595 U8 bits = 0; 1596 const I32 ai32 = len; 1597 for (len = 0; len < ai32; len++) { 1598 if (len & 1) bits <<= 4; 1599 else if (utf8) { 1600 if (s >= strend) break; 1601 bits = uni_to_byte(aTHX_ &s, strend, datumtype); 1602 } else bits = *(U8 *) s++; 1603 if (!checksum) 1604 *str++ = PL_hexdigit[(bits >> 4) & 15]; 1605 } 1606 } 1607 if (!checksum) { 1608 *str = '\0'; 1609 SvCUR_set(sv, str - SvPVX_const(sv)); 1610 XPUSHs(sv); 1611 } 1612 break; 1613 } 1614 case 'C': 1615 if (len == 0) { 1616 if (explicit_length) 1617 /* Switch to "character" mode */ 1618 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0; 1619 break; 1620 } 1621 /* FALL THROUGH */ 1622 case 'c': 1623 while (len-- > 0 && s < strend) { 1624 int aint; 1625 if (utf8) 1626 { 1627 STRLEN retlen; 1628 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, 1629 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 1630 if (retlen == (STRLEN) -1 || retlen == 0) 1631 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1632 s += retlen; 1633 } 1634 else 1635 aint = *(U8 *)(s)++; 1636 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */ 1637 aint -= 256; 1638 if (!checksum) 1639 mPUSHi(aint); 1640 else if (checksum > bits_in_uv) 1641 cdouble += (NV)aint; 1642 else 1643 cuv += aint; 1644 } 1645 break; 1646 case 'W': 1647 W_checksum: 1648 if (utf8) { 1649 while (len-- > 0 && s < strend) { 1650 STRLEN retlen; 1651 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, 1652 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 1653 if (retlen == (STRLEN) -1 || retlen == 0) 1654 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1655 s += retlen; 1656 if (!checksum) 1657 mPUSHu(val); 1658 else if (checksum > bits_in_uv) 1659 cdouble += (NV) val; 1660 else 1661 cuv += val; 1662 } 1663 } else if (!checksum) 1664 while (len-- > 0) { 1665 const U8 ch = *(U8 *) s++; 1666 mPUSHu(ch); 1667 } 1668 else if (checksum > bits_in_uv) 1669 while (len-- > 0) cdouble += (NV) *(U8 *) s++; 1670 else 1671 while (len-- > 0) cuv += *(U8 *) s++; 1672 break; 1673 case 'U': 1674 if (len == 0) { 1675 if (explicit_length && howlen != e_star) { 1676 /* Switch to "bytes in UTF-8" mode */ 1677 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0; 1678 else 1679 /* Should be impossible due to the need_utf8() test */ 1680 Perl_croak(aTHX_ "U0 mode on a byte string"); 1681 } 1682 break; 1683 } 1684 if (len > strend - s) len = strend - s; 1685 if (!checksum) { 1686 if (len && unpack_only_one) len = 1; 1687 EXTEND(SP, len); 1688 EXTEND_MORTAL(len); 1689 } 1690 while (len-- > 0 && s < strend) { 1691 STRLEN retlen; 1692 UV auv; 1693 if (utf8) { 1694 U8 result[UTF8_MAXLEN]; 1695 const char *ptr = s; 1696 STRLEN len; 1697 /* Bug: warns about bad utf8 even if we are short on bytes 1698 and will break out of the loop */ 1699 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1, 1700 'U')) 1701 break; 1702 len = UTF8SKIP(result); 1703 if (!uni_to_bytes(aTHX_ &ptr, strend, 1704 (char *) &result[1], len-1, 'U')) break; 1705 auv = utf8n_to_uvuni(result, len, &retlen, UTF8_ALLOW_DEFAULT); 1706 s = ptr; 1707 } else { 1708 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, UTF8_ALLOW_DEFAULT); 1709 if (retlen == (STRLEN) -1 || retlen == 0) 1710 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1711 s += retlen; 1712 } 1713 if (!checksum) 1714 mPUSHu(auv); 1715 else if (checksum > bits_in_uv) 1716 cdouble += (NV) auv; 1717 else 1718 cuv += auv; 1719 } 1720 break; 1721 case 's' | TYPE_IS_SHRIEKING: 1722 #if SHORTSIZE != SIZE16 1723 while (len-- > 0) { 1724 short ashort; 1725 SHIFT_VAR(utf8, s, strend, ashort, datumtype); 1726 DO_BO_UNPACK(ashort, s); 1727 if (!checksum) 1728 mPUSHi(ashort); 1729 else if (checksum > bits_in_uv) 1730 cdouble += (NV)ashort; 1731 else 1732 cuv += ashort; 1733 } 1734 break; 1735 #else 1736 /* Fallthrough! */ 1737 #endif 1738 case 's': 1739 while (len-- > 0) { 1740 I16 ai16; 1741 1742 #if U16SIZE > SIZE16 1743 ai16 = 0; 1744 #endif 1745 SHIFT16(utf8, s, strend, &ai16, datumtype); 1746 DO_BO_UNPACK(ai16, 16); 1747 #if U16SIZE > SIZE16 1748 if (ai16 > 32767) 1749 ai16 -= 65536; 1750 #endif 1751 if (!checksum) 1752 mPUSHi(ai16); 1753 else if (checksum > bits_in_uv) 1754 cdouble += (NV)ai16; 1755 else 1756 cuv += ai16; 1757 } 1758 break; 1759 case 'S' | TYPE_IS_SHRIEKING: 1760 #if SHORTSIZE != SIZE16 1761 while (len-- > 0) { 1762 unsigned short aushort; 1763 SHIFT_VAR(utf8, s, strend, aushort, datumtype); 1764 DO_BO_UNPACK(aushort, s); 1765 if (!checksum) 1766 mPUSHu(aushort); 1767 else if (checksum > bits_in_uv) 1768 cdouble += (NV)aushort; 1769 else 1770 cuv += aushort; 1771 } 1772 break; 1773 #else 1774 /* Fallthrough! */ 1775 #endif 1776 case 'v': 1777 case 'n': 1778 case 'S': 1779 while (len-- > 0) { 1780 U16 au16; 1781 #if U16SIZE > SIZE16 1782 au16 = 0; 1783 #endif 1784 SHIFT16(utf8, s, strend, &au16, datumtype); 1785 DO_BO_UNPACK(au16, 16); 1786 #ifdef HAS_NTOHS 1787 if (datumtype == 'n') 1788 au16 = PerlSock_ntohs(au16); 1789 #endif 1790 #ifdef HAS_VTOHS 1791 if (datumtype == 'v') 1792 au16 = vtohs(au16); 1793 #endif 1794 if (!checksum) 1795 mPUSHu(au16); 1796 else if (checksum > bits_in_uv) 1797 cdouble += (NV) au16; 1798 else 1799 cuv += au16; 1800 } 1801 break; 1802 #ifdef PERL_PACK_CAN_SHRIEKSIGN 1803 case 'v' | TYPE_IS_SHRIEKING: 1804 case 'n' | TYPE_IS_SHRIEKING: 1805 while (len-- > 0) { 1806 I16 ai16; 1807 # if U16SIZE > SIZE16 1808 ai16 = 0; 1809 # endif 1810 SHIFT16(utf8, s, strend, &ai16, datumtype); 1811 # ifdef HAS_NTOHS 1812 if (datumtype == ('n' | TYPE_IS_SHRIEKING)) 1813 ai16 = (I16) PerlSock_ntohs((U16) ai16); 1814 # endif /* HAS_NTOHS */ 1815 # ifdef HAS_VTOHS 1816 if (datumtype == ('v' | TYPE_IS_SHRIEKING)) 1817 ai16 = (I16) vtohs((U16) ai16); 1818 # endif /* HAS_VTOHS */ 1819 if (!checksum) 1820 mPUSHi(ai16); 1821 else if (checksum > bits_in_uv) 1822 cdouble += (NV) ai16; 1823 else 1824 cuv += ai16; 1825 } 1826 break; 1827 #endif /* PERL_PACK_CAN_SHRIEKSIGN */ 1828 case 'i': 1829 case 'i' | TYPE_IS_SHRIEKING: 1830 while (len-- > 0) { 1831 int aint; 1832 SHIFT_VAR(utf8, s, strend, aint, datumtype); 1833 DO_BO_UNPACK(aint, i); 1834 if (!checksum) 1835 mPUSHi(aint); 1836 else if (checksum > bits_in_uv) 1837 cdouble += (NV)aint; 1838 else 1839 cuv += aint; 1840 } 1841 break; 1842 case 'I': 1843 case 'I' | TYPE_IS_SHRIEKING: 1844 while (len-- > 0) { 1845 unsigned int auint; 1846 SHIFT_VAR(utf8, s, strend, auint, datumtype); 1847 DO_BO_UNPACK(auint, i); 1848 if (!checksum) 1849 mPUSHu(auint); 1850 else if (checksum > bits_in_uv) 1851 cdouble += (NV)auint; 1852 else 1853 cuv += auint; 1854 } 1855 break; 1856 case 'j': 1857 while (len-- > 0) { 1858 IV aiv; 1859 SHIFT_VAR(utf8, s, strend, aiv, datumtype); 1860 #if IVSIZE == INTSIZE 1861 DO_BO_UNPACK(aiv, i); 1862 #elif IVSIZE == LONGSIZE 1863 DO_BO_UNPACK(aiv, l); 1864 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE 1865 DO_BO_UNPACK(aiv, 64); 1866 #else 1867 Perl_croak(aTHX_ "'j' not supported on this platform"); 1868 #endif 1869 if (!checksum) 1870 mPUSHi(aiv); 1871 else if (checksum > bits_in_uv) 1872 cdouble += (NV)aiv; 1873 else 1874 cuv += aiv; 1875 } 1876 break; 1877 case 'J': 1878 while (len-- > 0) { 1879 UV auv; 1880 SHIFT_VAR(utf8, s, strend, auv, datumtype); 1881 #if IVSIZE == INTSIZE 1882 DO_BO_UNPACK(auv, i); 1883 #elif IVSIZE == LONGSIZE 1884 DO_BO_UNPACK(auv, l); 1885 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE 1886 DO_BO_UNPACK(auv, 64); 1887 #else 1888 Perl_croak(aTHX_ "'J' not supported on this platform"); 1889 #endif 1890 if (!checksum) 1891 mPUSHu(auv); 1892 else if (checksum > bits_in_uv) 1893 cdouble += (NV)auv; 1894 else 1895 cuv += auv; 1896 } 1897 break; 1898 case 'l' | TYPE_IS_SHRIEKING: 1899 #if LONGSIZE != SIZE32 1900 while (len-- > 0) { 1901 long along; 1902 SHIFT_VAR(utf8, s, strend, along, datumtype); 1903 DO_BO_UNPACK(along, l); 1904 if (!checksum) 1905 mPUSHi(along); 1906 else if (checksum > bits_in_uv) 1907 cdouble += (NV)along; 1908 else 1909 cuv += along; 1910 } 1911 break; 1912 #else 1913 /* Fallthrough! */ 1914 #endif 1915 case 'l': 1916 while (len-- > 0) { 1917 I32 ai32; 1918 #if U32SIZE > SIZE32 1919 ai32 = 0; 1920 #endif 1921 SHIFT32(utf8, s, strend, &ai32, datumtype); 1922 DO_BO_UNPACK(ai32, 32); 1923 #if U32SIZE > SIZE32 1924 if (ai32 > 2147483647) ai32 -= 4294967296; 1925 #endif 1926 if (!checksum) 1927 mPUSHi(ai32); 1928 else if (checksum > bits_in_uv) 1929 cdouble += (NV)ai32; 1930 else 1931 cuv += ai32; 1932 } 1933 break; 1934 case 'L' | TYPE_IS_SHRIEKING: 1935 #if LONGSIZE != SIZE32 1936 while (len-- > 0) { 1937 unsigned long aulong; 1938 SHIFT_VAR(utf8, s, strend, aulong, datumtype); 1939 DO_BO_UNPACK(aulong, l); 1940 if (!checksum) 1941 mPUSHu(aulong); 1942 else if (checksum > bits_in_uv) 1943 cdouble += (NV)aulong; 1944 else 1945 cuv += aulong; 1946 } 1947 break; 1948 #else 1949 /* Fall through! */ 1950 #endif 1951 case 'V': 1952 case 'N': 1953 case 'L': 1954 while (len-- > 0) { 1955 U32 au32; 1956 #if U32SIZE > SIZE32 1957 au32 = 0; 1958 #endif 1959 SHIFT32(utf8, s, strend, &au32, datumtype); 1960 DO_BO_UNPACK(au32, 32); 1961 #ifdef HAS_NTOHL 1962 if (datumtype == 'N') 1963 au32 = PerlSock_ntohl(au32); 1964 #endif 1965 #ifdef HAS_VTOHL 1966 if (datumtype == 'V') 1967 au32 = vtohl(au32); 1968 #endif 1969 if (!checksum) 1970 mPUSHu(au32); 1971 else if (checksum > bits_in_uv) 1972 cdouble += (NV)au32; 1973 else 1974 cuv += au32; 1975 } 1976 break; 1977 #ifdef PERL_PACK_CAN_SHRIEKSIGN 1978 case 'V' | TYPE_IS_SHRIEKING: 1979 case 'N' | TYPE_IS_SHRIEKING: 1980 while (len-- > 0) { 1981 I32 ai32; 1982 # if U32SIZE > SIZE32 1983 ai32 = 0; 1984 # endif 1985 SHIFT32(utf8, s, strend, &ai32, datumtype); 1986 # ifdef HAS_NTOHL 1987 if (datumtype == ('N' | TYPE_IS_SHRIEKING)) 1988 ai32 = (I32)PerlSock_ntohl((U32)ai32); 1989 # endif 1990 # ifdef HAS_VTOHL 1991 if (datumtype == ('V' | TYPE_IS_SHRIEKING)) 1992 ai32 = (I32)vtohl((U32)ai32); 1993 # endif 1994 if (!checksum) 1995 mPUSHi(ai32); 1996 else if (checksum > bits_in_uv) 1997 cdouble += (NV)ai32; 1998 else 1999 cuv += ai32; 2000 } 2001 break; 2002 #endif /* PERL_PACK_CAN_SHRIEKSIGN */ 2003 case 'p': 2004 while (len-- > 0) { 2005 const char *aptr; 2006 SHIFT_VAR(utf8, s, strend, aptr, datumtype); 2007 DO_BO_UNPACK_PC(aptr); 2008 /* newSVpv generates undef if aptr is NULL */ 2009 mPUSHs(newSVpv(aptr, 0)); 2010 } 2011 break; 2012 case 'w': 2013 { 2014 UV auv = 0; 2015 U32 bytes = 0; 2016 2017 while (len > 0 && s < strend) { 2018 U8 ch; 2019 ch = SHIFT_BYTE(utf8, s, strend, datumtype); 2020 auv = (auv << 7) | (ch & 0x7f); 2021 /* UTF8_IS_XXXXX not right here - using constant 0x80 */ 2022 if (ch < 0x80) { 2023 bytes = 0; 2024 mPUSHu(auv); 2025 len--; 2026 auv = 0; 2027 continue; 2028 } 2029 if (++bytes >= sizeof(UV)) { /* promote to string */ 2030 const char *t; 2031 2032 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv); 2033 while (s < strend) { 2034 ch = SHIFT_BYTE(utf8, s, strend, datumtype); 2035 sv = mul128(sv, (U8)(ch & 0x7f)); 2036 if (!(ch & 0x80)) { 2037 bytes = 0; 2038 break; 2039 } 2040 } 2041 t = SvPV_nolen_const(sv); 2042 while (*t == '0') 2043 t++; 2044 sv_chop(sv, t); 2045 mPUSHs(sv); 2046 len--; 2047 auv = 0; 2048 } 2049 } 2050 if ((s >= strend) && bytes) 2051 Perl_croak(aTHX_ "Unterminated compressed integer in unpack"); 2052 } 2053 break; 2054 case 'P': 2055 if (symptr->howlen == e_star) 2056 Perl_croak(aTHX_ "'P' must have an explicit size in unpack"); 2057 EXTEND(SP, 1); 2058 if (s + sizeof(char*) <= strend) { 2059 char *aptr; 2060 SHIFT_VAR(utf8, s, strend, aptr, datumtype); 2061 DO_BO_UNPACK_PC(aptr); 2062 /* newSVpvn generates undef if aptr is NULL */ 2063 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP)); 2064 } 2065 break; 2066 #ifdef HAS_QUAD 2067 case 'q': 2068 while (len-- > 0) { 2069 Quad_t aquad; 2070 SHIFT_VAR(utf8, s, strend, aquad, datumtype); 2071 DO_BO_UNPACK(aquad, 64); 2072 if (!checksum) 2073 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ? 2074 newSViv((IV)aquad) : newSVnv((NV)aquad)); 2075 else if (checksum > bits_in_uv) 2076 cdouble += (NV)aquad; 2077 else 2078 cuv += aquad; 2079 } 2080 break; 2081 case 'Q': 2082 while (len-- > 0) { 2083 Uquad_t auquad; 2084 SHIFT_VAR(utf8, s, strend, auquad, datumtype); 2085 DO_BO_UNPACK(auquad, 64); 2086 if (!checksum) 2087 mPUSHs(auquad <= UV_MAX ? 2088 newSVuv((UV)auquad) : newSVnv((NV)auquad)); 2089 else if (checksum > bits_in_uv) 2090 cdouble += (NV)auquad; 2091 else 2092 cuv += auquad; 2093 } 2094 break; 2095 #endif /* HAS_QUAD */ 2096 /* float and double added gnb@melba.bby.oz.au 22/11/89 */ 2097 case 'f': 2098 while (len-- > 0) { 2099 float afloat; 2100 SHIFT_VAR(utf8, s, strend, afloat, datumtype); 2101 DO_BO_UNPACK_N(afloat, float); 2102 if (!checksum) 2103 mPUSHn(afloat); 2104 else 2105 cdouble += afloat; 2106 } 2107 break; 2108 case 'd': 2109 while (len-- > 0) { 2110 double adouble; 2111 SHIFT_VAR(utf8, s, strend, adouble, datumtype); 2112 DO_BO_UNPACK_N(adouble, double); 2113 if (!checksum) 2114 mPUSHn(adouble); 2115 else 2116 cdouble += adouble; 2117 } 2118 break; 2119 case 'F': 2120 while (len-- > 0) { 2121 NV_bytes anv; 2122 SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype); 2123 DO_BO_UNPACK_N(anv.nv, NV); 2124 if (!checksum) 2125 mPUSHn(anv.nv); 2126 else 2127 cdouble += anv.nv; 2128 } 2129 break; 2130 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) 2131 case 'D': 2132 while (len-- > 0) { 2133 ld_bytes aldouble; 2134 SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype); 2135 DO_BO_UNPACK_N(aldouble.ld, long double); 2136 if (!checksum) 2137 mPUSHn(aldouble.ld); 2138 else 2139 cdouble += aldouble.ld; 2140 } 2141 break; 2142 #endif 2143 case 'u': 2144 if (!checksum) { 2145 const STRLEN l = (STRLEN) (strend - s) * 3 / 4; 2146 sv = sv_2mortal(newSV(l)); 2147 if (l) SvPOK_on(sv); 2148 } 2149 if (utf8) { 2150 while (next_uni_uu(aTHX_ &s, strend, &len)) { 2151 I32 a, b, c, d; 2152 char hunk[3]; 2153 2154 while (len > 0) { 2155 next_uni_uu(aTHX_ &s, strend, &a); 2156 next_uni_uu(aTHX_ &s, strend, &b); 2157 next_uni_uu(aTHX_ &s, strend, &c); 2158 next_uni_uu(aTHX_ &s, strend, &d); 2159 hunk[0] = (char)((a << 2) | (b >> 4)); 2160 hunk[1] = (char)((b << 4) | (c >> 2)); 2161 hunk[2] = (char)((c << 6) | d); 2162 if (!checksum) 2163 sv_catpvn(sv, hunk, (len > 3) ? 3 : len); 2164 len -= 3; 2165 } 2166 if (s < strend) { 2167 if (*s == '\n') { 2168 s++; 2169 } 2170 else { 2171 /* possible checksum byte */ 2172 const char *skip = s+UTF8SKIP(s); 2173 if (skip < strend && *skip == '\n') 2174 s = skip+1; 2175 } 2176 } 2177 } 2178 } else { 2179 while (s < strend && *s > ' ' && ISUUCHAR(*s)) { 2180 I32 a, b, c, d; 2181 char hunk[3]; 2182 2183 len = PL_uudmap[*(U8*)s++] & 077; 2184 while (len > 0) { 2185 if (s < strend && ISUUCHAR(*s)) 2186 a = PL_uudmap[*(U8*)s++] & 077; 2187 else 2188 a = 0; 2189 if (s < strend && ISUUCHAR(*s)) 2190 b = PL_uudmap[*(U8*)s++] & 077; 2191 else 2192 b = 0; 2193 if (s < strend && ISUUCHAR(*s)) 2194 c = PL_uudmap[*(U8*)s++] & 077; 2195 else 2196 c = 0; 2197 if (s < strend && ISUUCHAR(*s)) 2198 d = PL_uudmap[*(U8*)s++] & 077; 2199 else 2200 d = 0; 2201 hunk[0] = (char)((a << 2) | (b >> 4)); 2202 hunk[1] = (char)((b << 4) | (c >> 2)); 2203 hunk[2] = (char)((c << 6) | d); 2204 if (!checksum) 2205 sv_catpvn(sv, hunk, (len > 3) ? 3 : len); 2206 len -= 3; 2207 } 2208 if (*s == '\n') 2209 s++; 2210 else /* possible checksum byte */ 2211 if (s + 1 < strend && s[1] == '\n') 2212 s += 2; 2213 } 2214 } 2215 if (!checksum) 2216 XPUSHs(sv); 2217 break; 2218 } 2219 2220 if (checksum) { 2221 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) || 2222 (checksum > bits_in_uv && 2223 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) { 2224 NV trouble, anv; 2225 2226 anv = (NV) (1 << (checksum & 15)); 2227 while (checksum >= 16) { 2228 checksum -= 16; 2229 anv *= 65536.0; 2230 } 2231 while (cdouble < 0.0) 2232 cdouble += anv; 2233 cdouble = Perl_modf(cdouble / anv, &trouble) * anv; 2234 sv = newSVnv(cdouble); 2235 } 2236 else { 2237 if (checksum < bits_in_uv) { 2238 UV mask = ((UV)1 << checksum) - 1; 2239 cuv &= mask; 2240 } 2241 sv = newSVuv(cuv); 2242 } 2243 mXPUSHs(sv); 2244 checksum = 0; 2245 } 2246 2247 if (symptr->flags & FLAG_SLASH){ 2248 if (SP - PL_stack_base - start_sp_offset <= 0) 2249 break; 2250 if( next_symbol(symptr) ){ 2251 if( symptr->howlen == e_number ) 2252 Perl_croak(aTHX_ "Count after length/code in unpack" ); 2253 if( beyond ){ 2254 /* ...end of char buffer then no decent length available */ 2255 Perl_croak(aTHX_ "length/code after end of string in unpack" ); 2256 } else { 2257 /* take top of stack (hope it's numeric) */ 2258 len = POPi; 2259 if( len < 0 ) 2260 Perl_croak(aTHX_ "Negative '/' count in unpack" ); 2261 } 2262 } else { 2263 Perl_croak(aTHX_ "Code missing after '/' in unpack" ); 2264 } 2265 datumtype = symptr->code; 2266 explicit_length = FALSE; 2267 goto redo_switch; 2268 } 2269 } 2270 2271 if (new_s) 2272 *new_s = s; 2273 PUTBACK; 2274 return SP - PL_stack_base - start_sp_offset; 2275 } 2276 2277 PP(pp_unpack) 2278 { 2279 dVAR; 2280 dSP; 2281 dPOPPOPssrl; 2282 I32 gimme = GIMME_V; 2283 STRLEN llen; 2284 STRLEN rlen; 2285 const char *pat = SvPV_const(left, llen); 2286 const char *s = SvPV_const(right, rlen); 2287 const char *strend = s + rlen; 2288 const char *patend = pat + llen; 2289 I32 cnt; 2290 2291 PUTBACK; 2292 cnt = unpackstring(pat, patend, s, strend, 2293 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0) 2294 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0)); 2295 2296 SPAGAIN; 2297 if ( !cnt && gimme == G_SCALAR ) 2298 PUSHs(&PL_sv_undef); 2299 RETURN; 2300 } 2301 2302 STATIC U8 * 2303 doencodes(U8 *h, const char *s, I32 len) 2304 { 2305 *h++ = PL_uuemap[len]; 2306 while (len > 2) { 2307 *h++ = PL_uuemap[(077 & (s[0] >> 2))]; 2308 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))]; 2309 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))]; 2310 *h++ = PL_uuemap[(077 & (s[2] & 077))]; 2311 s += 3; 2312 len -= 3; 2313 } 2314 if (len > 0) { 2315 const char r = (len > 1 ? s[1] : '\0'); 2316 *h++ = PL_uuemap[(077 & (s[0] >> 2))]; 2317 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))]; 2318 *h++ = PL_uuemap[(077 & ((r << 2) & 074))]; 2319 *h++ = PL_uuemap[0]; 2320 } 2321 *h++ = '\n'; 2322 return h; 2323 } 2324 2325 STATIC SV * 2326 S_is_an_int(pTHX_ const char *s, STRLEN l) 2327 { 2328 SV *result = newSVpvn(s, l); 2329 char *const result_c = SvPV_nolen(result); /* convenience */ 2330 char *out = result_c; 2331 bool skip = 1; 2332 bool ignore = 0; 2333 2334 PERL_ARGS_ASSERT_IS_AN_INT; 2335 2336 while (*s) { 2337 switch (*s) { 2338 case ' ': 2339 break; 2340 case '+': 2341 if (!skip) { 2342 SvREFCNT_dec(result); 2343 return (NULL); 2344 } 2345 break; 2346 case '0': 2347 case '1': 2348 case '2': 2349 case '3': 2350 case '4': 2351 case '5': 2352 case '6': 2353 case '7': 2354 case '8': 2355 case '9': 2356 skip = 0; 2357 if (!ignore) { 2358 *(out++) = *s; 2359 } 2360 break; 2361 case '.': 2362 ignore = 1; 2363 break; 2364 default: 2365 SvREFCNT_dec(result); 2366 return (NULL); 2367 } 2368 s++; 2369 } 2370 *(out++) = '\0'; 2371 SvCUR_set(result, out - result_c); 2372 return (result); 2373 } 2374 2375 /* pnum must be '\0' terminated */ 2376 STATIC int 2377 S_div128(pTHX_ SV *pnum, bool *done) 2378 { 2379 STRLEN len; 2380 char * const s = SvPV(pnum, len); 2381 char *t = s; 2382 int m = 0; 2383 2384 PERL_ARGS_ASSERT_DIV128; 2385 2386 *done = 1; 2387 while (*t) { 2388 const int i = m * 10 + (*t - '0'); 2389 const int r = (i >> 7); /* r < 10 */ 2390 m = i & 0x7F; 2391 if (r) { 2392 *done = 0; 2393 } 2394 *(t++) = '0' + r; 2395 } 2396 *(t++) = '\0'; 2397 SvCUR_set(pnum, (STRLEN) (t - s)); 2398 return (m); 2399 } 2400 2401 /* 2402 =for apidoc packlist 2403 2404 The engine implementing pack() Perl function. 2405 2406 =cut 2407 */ 2408 2409 void 2410 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, SV **beglist, SV **endlist ) 2411 { 2412 dVAR; 2413 tempsym_t sym; 2414 2415 PERL_ARGS_ASSERT_PACKLIST; 2416 2417 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK); 2418 2419 /* We're going to do changes through SvPVX(cat). Make sure it's valid. 2420 Also make sure any UTF8 flag is loaded */ 2421 SvPV_force_nolen(cat); 2422 if (DO_UTF8(cat)) 2423 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8; 2424 2425 (void)pack_rec( cat, &sym, beglist, endlist ); 2426 } 2427 2428 /* like sv_utf8_upgrade, but also repoint the group start markers */ 2429 STATIC void 2430 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) { 2431 STRLEN len; 2432 tempsym_t *group; 2433 const char *from_ptr, *from_start, *from_end, **marks, **m; 2434 char *to_start, *to_ptr; 2435 2436 if (SvUTF8(sv)) return; 2437 2438 from_start = SvPVX_const(sv); 2439 from_end = from_start + SvCUR(sv); 2440 for (from_ptr = from_start; from_ptr < from_end; from_ptr++) 2441 if (!NATIVE_IS_INVARIANT(*from_ptr)) break; 2442 if (from_ptr == from_end) { 2443 /* Simple case: no character needs to be changed */ 2444 SvUTF8_on(sv); 2445 return; 2446 } 2447 2448 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1; 2449 Newx(to_start, len, char); 2450 Copy(from_start, to_start, from_ptr-from_start, char); 2451 to_ptr = to_start + (from_ptr-from_start); 2452 2453 Newx(marks, sym_ptr->level+2, const char *); 2454 for (group=sym_ptr; group; group = group->previous) 2455 marks[group->level] = from_start + group->strbeg; 2456 marks[sym_ptr->level+1] = from_end+1; 2457 for (m = marks; *m < from_ptr; m++) 2458 *m = to_start + (*m-from_start); 2459 2460 for (;from_ptr < from_end; from_ptr++) { 2461 while (*m == from_ptr) *m++ = to_ptr; 2462 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr); 2463 } 2464 *to_ptr = 0; 2465 2466 while (*m == from_ptr) *m++ = to_ptr; 2467 if (m != marks + sym_ptr->level+1) { 2468 Safefree(marks); 2469 Safefree(to_start); 2470 Perl_croak(aTHX_ "panic: marks beyond string end, m=%p, marks=%p, " 2471 "level=%d", m, marks, sym_ptr->level); 2472 } 2473 for (group=sym_ptr; group; group = group->previous) 2474 group->strbeg = marks[group->level] - to_start; 2475 Safefree(marks); 2476 2477 if (SvOOK(sv)) { 2478 if (SvIVX(sv)) { 2479 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv)); 2480 from_start -= SvIVX(sv); 2481 SvIV_set(sv, 0); 2482 } 2483 SvFLAGS(sv) &= ~SVf_OOK; 2484 } 2485 if (SvLEN(sv) != 0) 2486 Safefree(from_start); 2487 SvPV_set(sv, to_start); 2488 SvCUR_set(sv, to_ptr - to_start); 2489 SvLEN_set(sv, len); 2490 SvUTF8_on(sv); 2491 } 2492 2493 /* Exponential string grower. Makes string extension effectively O(n) 2494 needed says how many extra bytes we need (not counting the final '\0') 2495 Only grows the string if there is an actual lack of space 2496 */ 2497 STATIC char * 2498 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) { 2499 const STRLEN cur = SvCUR(sv); 2500 const STRLEN len = SvLEN(sv); 2501 STRLEN extend; 2502 2503 PERL_ARGS_ASSERT_SV_EXP_GROW; 2504 2505 if (len - cur > needed) return SvPVX(sv); 2506 extend = needed > len ? needed : len; 2507 return SvGROW(sv, len+extend+1); 2508 } 2509 2510 STATIC 2511 SV ** 2512 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) 2513 { 2514 dVAR; 2515 tempsym_t lookahead; 2516 I32 items = endlist - beglist; 2517 bool found = next_symbol(symptr); 2518 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0; 2519 bool warn_utf8 = ckWARN(WARN_UTF8); 2520 2521 PERL_ARGS_ASSERT_PACK_REC; 2522 2523 if (symptr->level == 0 && found && symptr->code == 'U') { 2524 marked_upgrade(aTHX_ cat, symptr); 2525 symptr->flags |= FLAG_DO_UTF8; 2526 utf8 = 0; 2527 } 2528 symptr->strbeg = SvCUR(cat); 2529 2530 while (found) { 2531 SV *fromstr; 2532 STRLEN fromlen; 2533 I32 len; 2534 SV *lengthcode = NULL; 2535 I32 datumtype = symptr->code; 2536 howlen_t howlen = symptr->howlen; 2537 char *start = SvPVX(cat); 2538 char *cur = start + SvCUR(cat); 2539 2540 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no) 2541 2542 switch (howlen) { 2543 case e_star: 2544 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ? 2545 0 : items; 2546 break; 2547 default: 2548 /* e_no_len and e_number */ 2549 len = symptr->length; 2550 break; 2551 } 2552 2553 if (len) { 2554 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)]; 2555 2556 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) { 2557 /* We can process this letter. */ 2558 STRLEN size = props & PACK_SIZE_MASK; 2559 GROWING(utf8, cat, start, cur, (STRLEN) len * size); 2560 } 2561 } 2562 2563 /* Look ahead for next symbol. Do we have code/code? */ 2564 lookahead = *symptr; 2565 found = next_symbol(&lookahead); 2566 if (symptr->flags & FLAG_SLASH) { 2567 IV count; 2568 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack"); 2569 if (strchr("aAZ", lookahead.code)) { 2570 if (lookahead.howlen == e_number) count = lookahead.length; 2571 else { 2572 if (items > 0) { 2573 count = sv_len_utf8(*beglist); 2574 } 2575 else count = 0; 2576 if (lookahead.code == 'Z') count++; 2577 } 2578 } else { 2579 if (lookahead.howlen == e_number && lookahead.length < items) 2580 count = lookahead.length; 2581 else count = items; 2582 } 2583 lookahead.howlen = e_number; 2584 lookahead.length = count; 2585 lengthcode = sv_2mortal(newSViv(count)); 2586 } 2587 2588 /* Code inside the switch must take care to properly update 2589 cat (CUR length and '\0' termination) if it updated *cur and 2590 doesn't simply leave using break */ 2591 switch(TYPE_NO_ENDIANNESS(datumtype)) { 2592 default: 2593 Perl_croak(aTHX_ "Invalid type '%c' in pack", 2594 (int) TYPE_NO_MODIFIERS(datumtype)); 2595 case '%': 2596 Perl_croak(aTHX_ "'%%' may not be used in pack"); 2597 { 2598 char *from; 2599 #ifdef PERL_PACK_CAN_SHRIEKSIGN 2600 case '.' | TYPE_IS_SHRIEKING: 2601 #endif 2602 case '.': 2603 if (howlen == e_star) from = start; 2604 else if (len == 0) from = cur; 2605 else { 2606 tempsym_t *group = symptr; 2607 2608 while (--len && group) group = group->previous; 2609 from = group ? start + group->strbeg : start; 2610 } 2611 fromstr = NEXTFROM; 2612 len = SvIV(fromstr); 2613 goto resize; 2614 #ifdef PERL_PACK_CAN_SHRIEKSIGN 2615 case '@' | TYPE_IS_SHRIEKING: 2616 #endif 2617 case '@': 2618 from = start + symptr->strbeg; 2619 resize: 2620 #ifdef PERL_PACK_CAN_SHRIEKSIGN 2621 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING)) 2622 #else /* PERL_PACK_CAN_SHRIEKSIGN */ 2623 if (utf8) 2624 #endif 2625 if (len >= 0) { 2626 while (len && from < cur) { 2627 from += UTF8SKIP(from); 2628 len--; 2629 } 2630 if (from > cur) 2631 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); 2632 if (len) { 2633 /* Here we know from == cur */ 2634 grow: 2635 GROWING(0, cat, start, cur, len); 2636 Zero(cur, len, char); 2637 cur += len; 2638 } else if (from < cur) { 2639 len = cur - from; 2640 goto shrink; 2641 } else goto no_change; 2642 } else { 2643 cur = from; 2644 len = -len; 2645 goto utf8_shrink; 2646 } 2647 else { 2648 len -= cur - from; 2649 if (len > 0) goto grow; 2650 if (len == 0) goto no_change; 2651 len = -len; 2652 goto shrink; 2653 } 2654 break; 2655 } 2656 case '(': { 2657 tempsym_t savsym = *symptr; 2658 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags); 2659 symptr->flags |= group_modifiers; 2660 symptr->patend = savsym.grpend; 2661 symptr->level++; 2662 symptr->previous = &lookahead; 2663 while (len--) { 2664 U32 was_utf8; 2665 if (utf8) symptr->flags |= FLAG_PARSE_UTF8; 2666 else symptr->flags &= ~FLAG_PARSE_UTF8; 2667 was_utf8 = SvUTF8(cat); 2668 symptr->patptr = savsym.grpbeg; 2669 beglist = pack_rec(cat, symptr, beglist, endlist); 2670 if (SvUTF8(cat) != was_utf8) 2671 /* This had better be an upgrade while in utf8==0 mode */ 2672 utf8 = 1; 2673 2674 if (savsym.howlen == e_star && beglist == endlist) 2675 break; /* No way to continue */ 2676 } 2677 items = endlist - beglist; 2678 lookahead.flags = symptr->flags & ~group_modifiers; 2679 goto no_change; 2680 } 2681 case 'X' | TYPE_IS_SHRIEKING: 2682 if (!len) /* Avoid division by 0 */ 2683 len = 1; 2684 if (utf8) { 2685 char *hop, *last; 2686 I32 l = len; 2687 hop = last = start; 2688 while (hop < cur) { 2689 hop += UTF8SKIP(hop); 2690 if (--l == 0) { 2691 last = hop; 2692 l = len; 2693 } 2694 } 2695 if (last > cur) 2696 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); 2697 cur = last; 2698 break; 2699 } 2700 len = (cur-start) % len; 2701 /* FALL THROUGH */ 2702 case 'X': 2703 if (utf8) { 2704 if (len < 1) goto no_change; 2705 utf8_shrink: 2706 while (len > 0) { 2707 if (cur <= start) 2708 Perl_croak(aTHX_ "'%c' outside of string in pack", 2709 (int) TYPE_NO_MODIFIERS(datumtype)); 2710 while (--cur, UTF8_IS_CONTINUATION(*cur)) { 2711 if (cur <= start) 2712 Perl_croak(aTHX_ "'%c' outside of string in pack", 2713 (int) TYPE_NO_MODIFIERS(datumtype)); 2714 } 2715 len--; 2716 } 2717 } else { 2718 shrink: 2719 if (cur - start < len) 2720 Perl_croak(aTHX_ "'%c' outside of string in pack", 2721 (int) TYPE_NO_MODIFIERS(datumtype)); 2722 cur -= len; 2723 } 2724 if (cur < start+symptr->strbeg) { 2725 /* Make sure group starts don't point into the void */ 2726 tempsym_t *group; 2727 const STRLEN length = cur-start; 2728 for (group = symptr; 2729 group && length < group->strbeg; 2730 group = group->previous) group->strbeg = length; 2731 lookahead.strbeg = length; 2732 } 2733 break; 2734 case 'x' | TYPE_IS_SHRIEKING: { 2735 I32 ai32; 2736 if (!len) /* Avoid division by 0 */ 2737 len = 1; 2738 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len; 2739 else ai32 = (cur - start) % len; 2740 if (ai32 == 0) goto no_change; 2741 len -= ai32; 2742 } 2743 /* FALL THROUGH */ 2744 case 'x': 2745 goto grow; 2746 case 'A': 2747 case 'Z': 2748 case 'a': { 2749 const char *aptr; 2750 2751 fromstr = NEXTFROM; 2752 aptr = SvPV_const(fromstr, fromlen); 2753 if (DO_UTF8(fromstr)) { 2754 const char *end, *s; 2755 2756 if (!utf8 && !SvUTF8(cat)) { 2757 marked_upgrade(aTHX_ cat, symptr); 2758 lookahead.flags |= FLAG_DO_UTF8; 2759 lookahead.strbeg = symptr->strbeg; 2760 utf8 = 1; 2761 start = SvPVX(cat); 2762 cur = start + SvCUR(cat); 2763 } 2764 if (howlen == e_star) { 2765 if (utf8) goto string_copy; 2766 len = fromlen+1; 2767 } 2768 s = aptr; 2769 end = aptr + fromlen; 2770 fromlen = datumtype == 'Z' ? len-1 : len; 2771 while ((I32) fromlen > 0 && s < end) { 2772 s += UTF8SKIP(s); 2773 fromlen--; 2774 } 2775 if (s > end) 2776 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); 2777 if (utf8) { 2778 len = fromlen; 2779 if (datumtype == 'Z') len++; 2780 fromlen = s-aptr; 2781 len += fromlen; 2782 2783 goto string_copy; 2784 } 2785 fromlen = len - fromlen; 2786 if (datumtype == 'Z') fromlen--; 2787 if (howlen == e_star) { 2788 len = fromlen; 2789 if (datumtype == 'Z') len++; 2790 } 2791 GROWING(0, cat, start, cur, len); 2792 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen, 2793 datumtype | TYPE_IS_PACK)) 2794 Perl_croak(aTHX_ "panic: predicted utf8 length not available, " 2795 "for '%c', aptr=%p end=%p cur=%p, fromlen=%"UVuf, 2796 (int)datumtype, aptr, end, cur, (UV)fromlen); 2797 cur += fromlen; 2798 len -= fromlen; 2799 } else if (utf8) { 2800 if (howlen == e_star) { 2801 len = fromlen; 2802 if (datumtype == 'Z') len++; 2803 } 2804 if (len <= (I32) fromlen) { 2805 fromlen = len; 2806 if (datumtype == 'Z' && fromlen > 0) fromlen--; 2807 } 2808 /* assumes a byte expands to at most UTF8_EXPAND bytes on 2809 upgrade, so: 2810 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */ 2811 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len); 2812 len -= fromlen; 2813 while (fromlen > 0) { 2814 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr); 2815 aptr++; 2816 fromlen--; 2817 } 2818 } else { 2819 string_copy: 2820 if (howlen == e_star) { 2821 len = fromlen; 2822 if (datumtype == 'Z') len++; 2823 } 2824 if (len <= (I32) fromlen) { 2825 fromlen = len; 2826 if (datumtype == 'Z' && fromlen > 0) fromlen--; 2827 } 2828 GROWING(0, cat, start, cur, len); 2829 Copy(aptr, cur, fromlen, char); 2830 cur += fromlen; 2831 len -= fromlen; 2832 } 2833 memset(cur, datumtype == 'A' ? ' ' : '\0', len); 2834 cur += len; 2835 SvTAINT(cat); 2836 break; 2837 } 2838 case 'B': 2839 case 'b': { 2840 const char *str, *end; 2841 I32 l, field_len; 2842 U8 bits; 2843 bool utf8_source; 2844 U32 utf8_flags; 2845 2846 fromstr = NEXTFROM; 2847 str = SvPV_const(fromstr, fromlen); 2848 end = str + fromlen; 2849 if (DO_UTF8(fromstr)) { 2850 utf8_source = TRUE; 2851 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY; 2852 } else { 2853 utf8_source = FALSE; 2854 utf8_flags = 0; /* Unused, but keep compilers happy */ 2855 } 2856 if (howlen == e_star) len = fromlen; 2857 field_len = (len+7)/8; 2858 GROWING(utf8, cat, start, cur, field_len); 2859 if (len > (I32)fromlen) len = fromlen; 2860 bits = 0; 2861 l = 0; 2862 if (datumtype == 'B') 2863 while (l++ < len) { 2864 if (utf8_source) { 2865 UV val = 0; 2866 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2867 bits |= val & 1; 2868 } else bits |= *str++ & 1; 2869 if (l & 7) bits <<= 1; 2870 else { 2871 PUSH_BYTE(utf8, cur, bits); 2872 bits = 0; 2873 } 2874 } 2875 else 2876 /* datumtype == 'b' */ 2877 while (l++ < len) { 2878 if (utf8_source) { 2879 UV val = 0; 2880 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2881 if (val & 1) bits |= 0x80; 2882 } else if (*str++ & 1) 2883 bits |= 0x80; 2884 if (l & 7) bits >>= 1; 2885 else { 2886 PUSH_BYTE(utf8, cur, bits); 2887 bits = 0; 2888 } 2889 } 2890 l--; 2891 if (l & 7) { 2892 if (datumtype == 'B') 2893 bits <<= 7 - (l & 7); 2894 else 2895 bits >>= 7 - (l & 7); 2896 PUSH_BYTE(utf8, cur, bits); 2897 l += 7; 2898 } 2899 /* Determine how many chars are left in the requested field */ 2900 l /= 8; 2901 if (howlen == e_star) field_len = 0; 2902 else field_len -= l; 2903 Zero(cur, field_len, char); 2904 cur += field_len; 2905 break; 2906 } 2907 case 'H': 2908 case 'h': { 2909 const char *str, *end; 2910 I32 l, field_len; 2911 U8 bits; 2912 bool utf8_source; 2913 U32 utf8_flags; 2914 2915 fromstr = NEXTFROM; 2916 str = SvPV_const(fromstr, fromlen); 2917 end = str + fromlen; 2918 if (DO_UTF8(fromstr)) { 2919 utf8_source = TRUE; 2920 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY; 2921 } else { 2922 utf8_source = FALSE; 2923 utf8_flags = 0; /* Unused, but keep compilers happy */ 2924 } 2925 if (howlen == e_star) len = fromlen; 2926 field_len = (len+1)/2; 2927 GROWING(utf8, cat, start, cur, field_len); 2928 if (!utf8 && len > (I32)fromlen) len = fromlen; 2929 bits = 0; 2930 l = 0; 2931 if (datumtype == 'H') 2932 while (l++ < len) { 2933 if (utf8_source) { 2934 UV val = 0; 2935 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2936 if (val < 256 && isALPHA(val)) 2937 bits |= (val + 9) & 0xf; 2938 else 2939 bits |= val & 0xf; 2940 } else if (isALPHA(*str)) 2941 bits |= (*str++ + 9) & 0xf; 2942 else 2943 bits |= *str++ & 0xf; 2944 if (l & 1) bits <<= 4; 2945 else { 2946 PUSH_BYTE(utf8, cur, bits); 2947 bits = 0; 2948 } 2949 } 2950 else 2951 while (l++ < len) { 2952 if (utf8_source) { 2953 UV val = 0; 2954 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2955 if (val < 256 && isALPHA(val)) 2956 bits |= ((val + 9) & 0xf) << 4; 2957 else 2958 bits |= (val & 0xf) << 4; 2959 } else if (isALPHA(*str)) 2960 bits |= ((*str++ + 9) & 0xf) << 4; 2961 else 2962 bits |= (*str++ & 0xf) << 4; 2963 if (l & 1) bits >>= 4; 2964 else { 2965 PUSH_BYTE(utf8, cur, bits); 2966 bits = 0; 2967 } 2968 } 2969 l--; 2970 if (l & 1) { 2971 PUSH_BYTE(utf8, cur, bits); 2972 l++; 2973 } 2974 /* Determine how many chars are left in the requested field */ 2975 l /= 2; 2976 if (howlen == e_star) field_len = 0; 2977 else field_len -= l; 2978 Zero(cur, field_len, char); 2979 cur += field_len; 2980 break; 2981 } 2982 case 'c': 2983 while (len-- > 0) { 2984 IV aiv; 2985 fromstr = NEXTFROM; 2986 aiv = SvIV(fromstr); 2987 if ((-128 > aiv || aiv > 127)) 2988 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 2989 "Character in 'c' format wrapped in pack"); 2990 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff)); 2991 } 2992 break; 2993 case 'C': 2994 if (len == 0) { 2995 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0; 2996 break; 2997 } 2998 while (len-- > 0) { 2999 IV aiv; 3000 fromstr = NEXTFROM; 3001 aiv = SvIV(fromstr); 3002 if ((0 > aiv || aiv > 0xff)) 3003 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 3004 "Character in 'C' format wrapped in pack"); 3005 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff)); 3006 } 3007 break; 3008 case 'W': { 3009 char *end; 3010 U8 in_bytes = (U8)IN_BYTES; 3011 3012 end = start+SvLEN(cat)-1; 3013 if (utf8) end -= UTF8_MAXLEN-1; 3014 while (len-- > 0) { 3015 UV auv; 3016 fromstr = NEXTFROM; 3017 auv = SvUV(fromstr); 3018 if (in_bytes) auv = auv % 0x100; 3019 if (utf8) { 3020 W_utf8: 3021 if (cur > end) { 3022 *cur = '\0'; 3023 SvCUR_set(cat, cur - start); 3024 3025 GROWING(0, cat, start, cur, len+UTF8_MAXLEN); 3026 end = start+SvLEN(cat)-UTF8_MAXLEN; 3027 } 3028 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, 3029 NATIVE_TO_UNI(auv), 3030 warn_utf8 ? 3031 0 : UNICODE_ALLOW_ANY); 3032 } else { 3033 if (auv >= 0x100) { 3034 if (!SvUTF8(cat)) { 3035 *cur = '\0'; 3036 SvCUR_set(cat, cur - start); 3037 marked_upgrade(aTHX_ cat, symptr); 3038 lookahead.flags |= FLAG_DO_UTF8; 3039 lookahead.strbeg = symptr->strbeg; 3040 utf8 = 1; 3041 start = SvPVX(cat); 3042 cur = start + SvCUR(cat); 3043 end = start+SvLEN(cat)-UTF8_MAXLEN; 3044 goto W_utf8; 3045 } 3046 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 3047 "Character in 'W' format wrapped in pack"); 3048 auv &= 0xff; 3049 } 3050 if (cur >= end) { 3051 *cur = '\0'; 3052 SvCUR_set(cat, cur - start); 3053 GROWING(0, cat, start, cur, len+1); 3054 end = start+SvLEN(cat)-1; 3055 } 3056 *(U8 *) cur++ = (U8)auv; 3057 } 3058 } 3059 break; 3060 } 3061 case 'U': { 3062 char *end; 3063 3064 if (len == 0) { 3065 if (!(symptr->flags & FLAG_DO_UTF8)) { 3066 marked_upgrade(aTHX_ cat, symptr); 3067 lookahead.flags |= FLAG_DO_UTF8; 3068 lookahead.strbeg = symptr->strbeg; 3069 } 3070 utf8 = 0; 3071 goto no_change; 3072 } 3073 3074 end = start+SvLEN(cat); 3075 if (!utf8) end -= UTF8_MAXLEN; 3076 while (len-- > 0) { 3077 UV auv; 3078 fromstr = NEXTFROM; 3079 auv = SvUV(fromstr); 3080 if (utf8) { 3081 U8 buffer[UTF8_MAXLEN], *endb; 3082 endb = uvuni_to_utf8_flags(buffer, auv, 3083 warn_utf8 ? 3084 0 : UNICODE_ALLOW_ANY); 3085 if (cur+(endb-buffer)*UTF8_EXPAND >= end) { 3086 *cur = '\0'; 3087 SvCUR_set(cat, cur - start); 3088 GROWING(0, cat, start, cur, 3089 len+(endb-buffer)*UTF8_EXPAND); 3090 end = start+SvLEN(cat); 3091 } 3092 cur = bytes_to_uni(buffer, endb-buffer, cur); 3093 } else { 3094 if (cur >= end) { 3095 *cur = '\0'; 3096 SvCUR_set(cat, cur - start); 3097 GROWING(0, cat, start, cur, len+UTF8_MAXLEN); 3098 end = start+SvLEN(cat)-UTF8_MAXLEN; 3099 } 3100 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv, 3101 warn_utf8 ? 3102 0 : UNICODE_ALLOW_ANY); 3103 } 3104 } 3105 break; 3106 } 3107 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */ 3108 case 'f': 3109 while (len-- > 0) { 3110 float afloat; 3111 NV anv; 3112 fromstr = NEXTFROM; 3113 anv = SvNV(fromstr); 3114 # if defined(VMS) && !defined(_IEEE_FP) 3115 /* IEEE fp overflow shenanigans are unavailable on VAX and optional 3116 * on Alpha; fake it if we don't have them. 3117 */ 3118 if (anv > FLT_MAX) 3119 afloat = FLT_MAX; 3120 else if (anv < -FLT_MAX) 3121 afloat = -FLT_MAX; 3122 else afloat = (float)anv; 3123 # else 3124 afloat = (float)anv; 3125 # endif 3126 DO_BO_PACK_N(afloat, float); 3127 PUSH_VAR(utf8, cur, afloat); 3128 } 3129 break; 3130 case 'd': 3131 while (len-- > 0) { 3132 double adouble; 3133 NV anv; 3134 fromstr = NEXTFROM; 3135 anv = SvNV(fromstr); 3136 # if defined(VMS) && !defined(_IEEE_FP) 3137 /* IEEE fp overflow shenanigans are unavailable on VAX and optional 3138 * on Alpha; fake it if we don't have them. 3139 */ 3140 if (anv > DBL_MAX) 3141 adouble = DBL_MAX; 3142 else if (anv < -DBL_MAX) 3143 adouble = -DBL_MAX; 3144 else adouble = (double)anv; 3145 # else 3146 adouble = (double)anv; 3147 # endif 3148 DO_BO_PACK_N(adouble, double); 3149 PUSH_VAR(utf8, cur, adouble); 3150 } 3151 break; 3152 case 'F': { 3153 NV_bytes anv; 3154 Zero(&anv, 1, NV); /* can be long double with unused bits */ 3155 while (len-- > 0) { 3156 fromstr = NEXTFROM; 3157 #ifdef __GNUC__ 3158 /* to work round a gcc/x86 bug; don't use SvNV */ 3159 anv.nv = sv_2nv(fromstr); 3160 #else 3161 anv.nv = SvNV(fromstr); 3162 #endif 3163 DO_BO_PACK_N(anv, NV); 3164 PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes)); 3165 } 3166 break; 3167 } 3168 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) 3169 case 'D': { 3170 ld_bytes aldouble; 3171 /* long doubles can have unused bits, which may be nonzero */ 3172 Zero(&aldouble, 1, long double); 3173 while (len-- > 0) { 3174 fromstr = NEXTFROM; 3175 # ifdef __GNUC__ 3176 /* to work round a gcc/x86 bug; don't use SvNV */ 3177 aldouble.ld = (long double)sv_2nv(fromstr); 3178 # else 3179 aldouble.ld = (long double)SvNV(fromstr); 3180 # endif 3181 DO_BO_PACK_N(aldouble, long double); 3182 PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes)); 3183 } 3184 break; 3185 } 3186 #endif 3187 #ifdef PERL_PACK_CAN_SHRIEKSIGN 3188 case 'n' | TYPE_IS_SHRIEKING: 3189 #endif 3190 case 'n': 3191 while (len-- > 0) { 3192 I16 ai16; 3193 fromstr = NEXTFROM; 3194 ai16 = (I16)SvIV(fromstr); 3195 #ifdef HAS_HTONS 3196 ai16 = PerlSock_htons(ai16); 3197 #endif 3198 PUSH16(utf8, cur, &ai16); 3199 } 3200 break; 3201 #ifdef PERL_PACK_CAN_SHRIEKSIGN 3202 case 'v' | TYPE_IS_SHRIEKING: 3203 #endif 3204 case 'v': 3205 while (len-- > 0) { 3206 I16 ai16; 3207 fromstr = NEXTFROM; 3208 ai16 = (I16)SvIV(fromstr); 3209 #ifdef HAS_HTOVS 3210 ai16 = htovs(ai16); 3211 #endif 3212 PUSH16(utf8, cur, &ai16); 3213 } 3214 break; 3215 case 'S' | TYPE_IS_SHRIEKING: 3216 #if SHORTSIZE != SIZE16 3217 while (len-- > 0) { 3218 unsigned short aushort; 3219 fromstr = NEXTFROM; 3220 aushort = SvUV(fromstr); 3221 DO_BO_PACK(aushort, s); 3222 PUSH_VAR(utf8, cur, aushort); 3223 } 3224 break; 3225 #else 3226 /* Fall through! */ 3227 #endif 3228 case 'S': 3229 while (len-- > 0) { 3230 U16 au16; 3231 fromstr = NEXTFROM; 3232 au16 = (U16)SvUV(fromstr); 3233 DO_BO_PACK(au16, 16); 3234 PUSH16(utf8, cur, &au16); 3235 } 3236 break; 3237 case 's' | TYPE_IS_SHRIEKING: 3238 #if SHORTSIZE != SIZE16 3239 while (len-- > 0) { 3240 short ashort; 3241 fromstr = NEXTFROM; 3242 ashort = SvIV(fromstr); 3243 DO_BO_PACK(ashort, s); 3244 PUSH_VAR(utf8, cur, ashort); 3245 } 3246 break; 3247 #else 3248 /* Fall through! */ 3249 #endif 3250 case 's': 3251 while (len-- > 0) { 3252 I16 ai16; 3253 fromstr = NEXTFROM; 3254 ai16 = (I16)SvIV(fromstr); 3255 DO_BO_PACK(ai16, 16); 3256 PUSH16(utf8, cur, &ai16); 3257 } 3258 break; 3259 case 'I': 3260 case 'I' | TYPE_IS_SHRIEKING: 3261 while (len-- > 0) { 3262 unsigned int auint; 3263 fromstr = NEXTFROM; 3264 auint = SvUV(fromstr); 3265 DO_BO_PACK(auint, i); 3266 PUSH_VAR(utf8, cur, auint); 3267 } 3268 break; 3269 case 'j': 3270 while (len-- > 0) { 3271 IV aiv; 3272 fromstr = NEXTFROM; 3273 aiv = SvIV(fromstr); 3274 #if IVSIZE == INTSIZE 3275 DO_BO_PACK(aiv, i); 3276 #elif IVSIZE == LONGSIZE 3277 DO_BO_PACK(aiv, l); 3278 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE 3279 DO_BO_PACK(aiv, 64); 3280 #else 3281 Perl_croak(aTHX_ "'j' not supported on this platform"); 3282 #endif 3283 PUSH_VAR(utf8, cur, aiv); 3284 } 3285 break; 3286 case 'J': 3287 while (len-- > 0) { 3288 UV auv; 3289 fromstr = NEXTFROM; 3290 auv = SvUV(fromstr); 3291 #if UVSIZE == INTSIZE 3292 DO_BO_PACK(auv, i); 3293 #elif UVSIZE == LONGSIZE 3294 DO_BO_PACK(auv, l); 3295 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE 3296 DO_BO_PACK(auv, 64); 3297 #else 3298 Perl_croak(aTHX_ "'J' not supported on this platform"); 3299 #endif 3300 PUSH_VAR(utf8, cur, auv); 3301 } 3302 break; 3303 case 'w': 3304 while (len-- > 0) { 3305 NV anv; 3306 fromstr = NEXTFROM; 3307 anv = SvNV(fromstr); 3308 3309 if (anv < 0) { 3310 *cur = '\0'; 3311 SvCUR_set(cat, cur - start); 3312 Perl_croak(aTHX_ "Cannot compress negative numbers in pack"); 3313 } 3314 3315 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0, 3316 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as 3317 any negative IVs will have already been got by the croak() 3318 above. IOK is untrue for fractions, so we test them 3319 against UV_MAX_P1. */ 3320 if (SvIOK(fromstr) || anv < UV_MAX_P1) { 3321 char buf[(sizeof(UV)*CHAR_BIT)/7+1]; 3322 char *in = buf + sizeof(buf); 3323 UV auv = SvUV(fromstr); 3324 3325 do { 3326 *--in = (char)((auv & 0x7f) | 0x80); 3327 auv >>= 7; 3328 } while (auv); 3329 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */ 3330 PUSH_GROWING_BYTES(utf8, cat, start, cur, 3331 in, (buf + sizeof(buf)) - in); 3332 } else if (SvPOKp(fromstr)) 3333 goto w_string; 3334 else if (SvNOKp(fromstr)) { 3335 /* 10**NV_MAX_10_EXP is the largest power of 10 3336 so 10**(NV_MAX_10_EXP+1) is definitely unrepresentable 3337 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x: 3338 x = (NV_MAX_10_EXP+1) * log (10) / log (128) 3339 And with that many bytes only Inf can overflow. 3340 Some C compilers are strict about integral constant 3341 expressions so we conservatively divide by a slightly 3342 smaller integer instead of multiplying by the exact 3343 floating-point value. 3344 */ 3345 #ifdef NV_MAX_10_EXP 3346 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */ 3347 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */ 3348 #else 3349 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */ 3350 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */ 3351 #endif 3352 char *in = buf + sizeof(buf); 3353 3354 anv = Perl_floor(anv); 3355 do { 3356 const NV next = Perl_floor(anv / 128); 3357 if (in <= buf) /* this cannot happen ;-) */ 3358 Perl_croak(aTHX_ "Cannot compress integer in pack"); 3359 *--in = (unsigned char)(anv - (next * 128)) | 0x80; 3360 anv = next; 3361 } while (anv > 0); 3362 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */ 3363 PUSH_GROWING_BYTES(utf8, cat, start, cur, 3364 in, (buf + sizeof(buf)) - in); 3365 } else { 3366 const char *from; 3367 char *result, *in; 3368 SV *norm; 3369 STRLEN len; 3370 bool done; 3371 3372 w_string: 3373 /* Copy string and check for compliance */ 3374 from = SvPV_const(fromstr, len); 3375 if ((norm = is_an_int(from, len)) == NULL) 3376 Perl_croak(aTHX_ "Can only compress unsigned integers in pack"); 3377 3378 Newx(result, len, char); 3379 in = result + len; 3380 done = FALSE; 3381 while (!done) *--in = div128(norm, &done) | 0x80; 3382 result[len - 1] &= 0x7F; /* clear continue bit */ 3383 PUSH_GROWING_BYTES(utf8, cat, start, cur, 3384 in, (result + len) - in); 3385 Safefree(result); 3386 SvREFCNT_dec(norm); /* free norm */ 3387 } 3388 } 3389 break; 3390 case 'i': 3391 case 'i' | TYPE_IS_SHRIEKING: 3392 while (len-- > 0) { 3393 int aint; 3394 fromstr = NEXTFROM; 3395 aint = SvIV(fromstr); 3396 DO_BO_PACK(aint, i); 3397 PUSH_VAR(utf8, cur, aint); 3398 } 3399 break; 3400 #ifdef PERL_PACK_CAN_SHRIEKSIGN 3401 case 'N' | TYPE_IS_SHRIEKING: 3402 #endif 3403 case 'N': 3404 while (len-- > 0) { 3405 U32 au32; 3406 fromstr = NEXTFROM; 3407 au32 = SvUV(fromstr); 3408 #ifdef HAS_HTONL 3409 au32 = PerlSock_htonl(au32); 3410 #endif 3411 PUSH32(utf8, cur, &au32); 3412 } 3413 break; 3414 #ifdef PERL_PACK_CAN_SHRIEKSIGN 3415 case 'V' | TYPE_IS_SHRIEKING: 3416 #endif 3417 case 'V': 3418 while (len-- > 0) { 3419 U32 au32; 3420 fromstr = NEXTFROM; 3421 au32 = SvUV(fromstr); 3422 #ifdef HAS_HTOVL 3423 au32 = htovl(au32); 3424 #endif 3425 PUSH32(utf8, cur, &au32); 3426 } 3427 break; 3428 case 'L' | TYPE_IS_SHRIEKING: 3429 #if LONGSIZE != SIZE32 3430 while (len-- > 0) { 3431 unsigned long aulong; 3432 fromstr = NEXTFROM; 3433 aulong = SvUV(fromstr); 3434 DO_BO_PACK(aulong, l); 3435 PUSH_VAR(utf8, cur, aulong); 3436 } 3437 break; 3438 #else 3439 /* Fall though! */ 3440 #endif 3441 case 'L': 3442 while (len-- > 0) { 3443 U32 au32; 3444 fromstr = NEXTFROM; 3445 au32 = SvUV(fromstr); 3446 DO_BO_PACK(au32, 32); 3447 PUSH32(utf8, cur, &au32); 3448 } 3449 break; 3450 case 'l' | TYPE_IS_SHRIEKING: 3451 #if LONGSIZE != SIZE32 3452 while (len-- > 0) { 3453 long along; 3454 fromstr = NEXTFROM; 3455 along = SvIV(fromstr); 3456 DO_BO_PACK(along, l); 3457 PUSH_VAR(utf8, cur, along); 3458 } 3459 break; 3460 #else 3461 /* Fall though! */ 3462 #endif 3463 case 'l': 3464 while (len-- > 0) { 3465 I32 ai32; 3466 fromstr = NEXTFROM; 3467 ai32 = SvIV(fromstr); 3468 DO_BO_PACK(ai32, 32); 3469 PUSH32(utf8, cur, &ai32); 3470 } 3471 break; 3472 #ifdef HAS_QUAD 3473 case 'Q': 3474 while (len-- > 0) { 3475 Uquad_t auquad; 3476 fromstr = NEXTFROM; 3477 auquad = (Uquad_t) SvUV(fromstr); 3478 DO_BO_PACK(auquad, 64); 3479 PUSH_VAR(utf8, cur, auquad); 3480 } 3481 break; 3482 case 'q': 3483 while (len-- > 0) { 3484 Quad_t aquad; 3485 fromstr = NEXTFROM; 3486 aquad = (Quad_t)SvIV(fromstr); 3487 DO_BO_PACK(aquad, 64); 3488 PUSH_VAR(utf8, cur, aquad); 3489 } 3490 break; 3491 #endif /* HAS_QUAD */ 3492 case 'P': 3493 len = 1; /* assume SV is correct length */ 3494 GROWING(utf8, cat, start, cur, sizeof(char *)); 3495 /* Fall through! */ 3496 case 'p': 3497 while (len-- > 0) { 3498 const char *aptr; 3499 3500 fromstr = NEXTFROM; 3501 SvGETMAGIC(fromstr); 3502 if (!SvOK(fromstr)) aptr = NULL; 3503 else { 3504 /* XXX better yet, could spirit away the string to 3505 * a safe spot and hang on to it until the result 3506 * of pack() (and all copies of the result) are 3507 * gone. 3508 */ 3509 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) && 3510 !SvREADONLY(fromstr)))) { 3511 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 3512 "Attempt to pack pointer to temporary value"); 3513 } 3514 if (SvPOK(fromstr) || SvNIOK(fromstr)) 3515 aptr = SvPV_nomg_const_nolen(fromstr); 3516 else 3517 aptr = SvPV_force_flags_nolen(fromstr, 0); 3518 } 3519 DO_BO_PACK_PC(aptr); 3520 PUSH_VAR(utf8, cur, aptr); 3521 } 3522 break; 3523 case 'u': { 3524 const char *aptr, *aend; 3525 bool from_utf8; 3526 3527 fromstr = NEXTFROM; 3528 if (len <= 2) len = 45; 3529 else len = len / 3 * 3; 3530 if (len >= 64) { 3531 Perl_ck_warner(aTHX_ packWARN(WARN_PACK), 3532 "Field too wide in 'u' format in pack"); 3533 len = 63; 3534 } 3535 aptr = SvPV_const(fromstr, fromlen); 3536 from_utf8 = DO_UTF8(fromstr); 3537 if (from_utf8) { 3538 aend = aptr + fromlen; 3539 fromlen = sv_len_utf8_nomg(fromstr); 3540 } else aend = NULL; /* Unused, but keep compilers happy */ 3541 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2); 3542 while (fromlen > 0) { 3543 U8 *end; 3544 I32 todo; 3545 U8 hunk[1+63/3*4+1]; 3546 3547 if ((I32)fromlen > len) 3548 todo = len; 3549 else 3550 todo = fromlen; 3551 if (from_utf8) { 3552 char buffer[64]; 3553 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo, 3554 'u' | TYPE_IS_PACK)) { 3555 *cur = '\0'; 3556 SvCUR_set(cat, cur - start); 3557 Perl_croak(aTHX_ "panic: string is shorter than advertised, " 3558 "aptr=%p, aend=%p, buffer=%p, todo=%ld", 3559 aptr, aend, buffer, (long) todo); 3560 } 3561 end = doencodes(hunk, buffer, todo); 3562 } else { 3563 end = doencodes(hunk, aptr, todo); 3564 aptr += todo; 3565 } 3566 PUSH_BYTES(utf8, cur, hunk, end-hunk); 3567 fromlen -= todo; 3568 } 3569 break; 3570 } 3571 } 3572 *cur = '\0'; 3573 SvCUR_set(cat, cur - start); 3574 no_change: 3575 *symptr = lookahead; 3576 } 3577 return beglist; 3578 } 3579 #undef NEXTFROM 3580 3581 3582 PP(pp_pack) 3583 { 3584 dVAR; dSP; dMARK; dORIGMARK; dTARGET; 3585 SV *cat = TARG; 3586 STRLEN fromlen; 3587 SV *pat_sv = *++MARK; 3588 const char *pat = SvPV_const(pat_sv, fromlen); 3589 const char *patend = pat + fromlen; 3590 3591 MARK++; 3592 sv_setpvs(cat, ""); 3593 SvUTF8_off(cat); 3594 3595 packlist(cat, pat, patend, MARK, SP + 1); 3596 3597 SvSETMAGIC(cat); 3598 SP = ORIGMARK; 3599 PUSHs(cat); 3600 RETURN; 3601 } 3602 3603 /* 3604 * Local variables: 3605 * c-indentation-style: bsd 3606 * c-basic-offset: 4 3607 * indent-tabs-mode: nil 3608 * End: 3609 * 3610 * ex: set ts=8 sts=4 sw=4 et: 3611 */ 3612