1 /* pp_pack.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * He still hopefully carried some of his gear in his pack: a small tinder-box, 13 * two small shallow pans, the smaller fitting into the larger; inside them a 14 * wooden spoon, a short two-pronged fork and some skewers were stowed; and 15 * hidden at the bottom of the pack in a flat wooden box a dwindling treasure, 16 * some salt. 17 * 18 * [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"] 19 */ 20 21 /* This file contains pp ("push/pop") functions that 22 * execute the opcodes that make up a perl program. A typical pp function 23 * expects to find its arguments on the stack, and usually pushes its 24 * results onto the stack, hence the 'pp' terminology. Each OP structure 25 * contains a pointer to the relevant pp_foo() function. 26 * 27 * This particular file just contains pp_pack() and pp_unpack(). See the 28 * other pp*.c files for the rest of the pp_ functions. 29 */ 30 31 #include "EXTERN.h" 32 #define PERL_IN_PP_PACK_C 33 #include "perl.h" 34 35 /* Types used by pack/unpack */ 36 typedef enum { 37 e_no_len, /* no length */ 38 e_number, /* number, [] */ 39 e_star /* asterisk */ 40 } howlen_t; 41 42 typedef struct tempsym { 43 const char* patptr; /* current template char */ 44 const char* patend; /* one after last char */ 45 const char* grpbeg; /* 1st char of ()-group */ 46 const char* grpend; /* end of ()-group */ 47 I32 code; /* template code (!<>) */ 48 I32 length; /* length/repeat count */ 49 howlen_t howlen; /* how length is given */ 50 int level; /* () nesting level */ 51 U32 flags; /* /=4, comma=2, pack=1 */ 52 /* and group modifiers */ 53 STRLEN strbeg; /* offset of group start */ 54 struct tempsym *previous; /* previous group */ 55 } tempsym_t; 56 57 #define TEMPSYM_INIT(symptr, p, e, f) \ 58 STMT_START { \ 59 (symptr)->patptr = (p); \ 60 (symptr)->patend = (e); \ 61 (symptr)->grpbeg = NULL; \ 62 (symptr)->grpend = NULL; \ 63 (symptr)->grpend = NULL; \ 64 (symptr)->code = 0; \ 65 (symptr)->length = 0; \ 66 (symptr)->howlen = e_no_len; \ 67 (symptr)->level = 0; \ 68 (symptr)->flags = (f); \ 69 (symptr)->strbeg = 0; \ 70 (symptr)->previous = NULL; \ 71 } STMT_END 72 73 #if PERL_VERSION >= 9 74 # define PERL_PACK_CAN_BYTEORDER 75 # define PERL_PACK_CAN_SHRIEKSIGN 76 #endif 77 78 #ifndef CHAR_BIT 79 # define CHAR_BIT 8 80 #endif 81 /* Maximum number of bytes to which a byte can grow due to upgrade */ 82 #define UTF8_EXPAND 2 83 84 /* 85 * Offset for integer pack/unpack. 86 * 87 * On architectures where I16 and I32 aren't really 16 and 32 bits, 88 * which for now are all Crays, pack and unpack have to play games. 89 */ 90 91 /* 92 * These values are required for portability of pack() output. 93 * If they're not right on your machine, then pack() and unpack() 94 * wouldn't work right anyway; you'll need to apply the Cray hack. 95 * (I'd like to check them with #if, but you can't use sizeof() in 96 * the preprocessor.) --??? 97 */ 98 /* 99 The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE 100 defines are now in config.h. --Andy Dougherty April 1998 101 */ 102 #define SIZE16 2 103 #define SIZE32 4 104 105 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack(). 106 --jhi Feb 1999 */ 107 108 #if U16SIZE > SIZE16 || U32SIZE > SIZE32 109 # if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 /* little-endian */ 110 # define OFF16(p) ((char*)(p)) 111 # define OFF32(p) ((char*)(p)) 112 # else 113 # if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 /* big-endian */ 114 # define OFF16(p) ((char*)(p) + (sizeof(U16) - SIZE16)) 115 # define OFF32(p) ((char*)(p) + (sizeof(U32) - SIZE32)) 116 # else 117 ++++ bad cray byte order 118 # endif 119 # endif 120 #else 121 # define OFF16(p) ((char *) (p)) 122 # define OFF32(p) ((char *) (p)) 123 #endif 124 125 /* Only to be used inside a loop (see the break) */ 126 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START { \ 127 if (utf8) { \ 128 if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break; \ 129 } else { \ 130 Copy(s, OFF16(p), SIZE16, char); \ 131 (s) += SIZE16; \ 132 } \ 133 } STMT_END 134 135 /* Only to be used inside a loop (see the break) */ 136 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START { \ 137 if (utf8) { \ 138 if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break; \ 139 } else { \ 140 Copy(s, OFF32(p), SIZE32, char); \ 141 (s) += SIZE32; \ 142 } \ 143 } STMT_END 144 145 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16) 146 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32) 147 148 /* Only to be used inside a loop (see the break) */ 149 #define SHIFT_VAR(utf8, s, strend, var, datumtype) \ 150 STMT_START { \ 151 if (utf8) { \ 152 if (!uni_to_bytes(aTHX_ &s, strend, \ 153 (char *) &var, sizeof(var), datumtype)) break;\ 154 } else { \ 155 Copy(s, (char *) &var, sizeof(var), char); \ 156 s += sizeof(var); \ 157 } \ 158 } STMT_END 159 160 #define PUSH_VAR(utf8, aptr, var) \ 161 PUSH_BYTES(utf8, aptr, &(var), sizeof(var)) 162 163 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */ 164 #define MAX_SUB_TEMPLATE_LEVEL 100 165 166 /* flags (note that type modifiers can also be used as flags!) */ 167 #define FLAG_WAS_UTF8 0x40 168 #define FLAG_PARSE_UTF8 0x20 /* Parse as utf8 */ 169 #define FLAG_UNPACK_ONLY_ONE 0x10 170 #define FLAG_DO_UTF8 0x08 /* The underlying string is utf8 */ 171 #define FLAG_SLASH 0x04 172 #define FLAG_COMMA 0x02 173 #define FLAG_PACK 0x01 174 175 STATIC SV * 176 S_mul128(pTHX_ SV *sv, U8 m) 177 { 178 STRLEN len; 179 char *s = SvPV(sv, len); 180 char *t; 181 182 PERL_ARGS_ASSERT_MUL128; 183 184 if (!strnEQ(s, "0000", 4)) { /* need to grow sv */ 185 SV * const tmpNew = newSVpvs("0000000000"); 186 187 sv_catsv(tmpNew, sv); 188 SvREFCNT_dec(sv); /* free old sv */ 189 sv = tmpNew; 190 s = SvPV(sv, len); 191 } 192 t = s + len - 1; 193 while (!*t) /* trailing '\0'? */ 194 t--; 195 while (t > s) { 196 const U32 i = ((*t - '0') << 7) + m; 197 *(t--) = '0' + (char)(i % 10); 198 m = (char)(i / 10); 199 } 200 return (sv); 201 } 202 203 /* Explosives and implosives. */ 204 205 #if 'I' == 73 && 'J' == 74 206 /* On an ASCII/ISO kind of system */ 207 #define ISUUCHAR(ch) ((ch) >= ' ' && (ch) < 'a') 208 #else 209 /* 210 Some other sort of character set - use memchr() so we don't match 211 the null byte. 212 */ 213 #define ISUUCHAR(ch) (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ') 214 #endif 215 216 /* type modifiers */ 217 #define TYPE_IS_SHRIEKING 0x100 218 #define TYPE_IS_BIG_ENDIAN 0x200 219 #define TYPE_IS_LITTLE_ENDIAN 0x400 220 #define TYPE_IS_PACK 0x800 221 #define TYPE_ENDIANNESS_MASK (TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN) 222 #define TYPE_MODIFIERS(t) ((t) & ~0xFF) 223 #define TYPE_NO_MODIFIERS(t) ((t) & 0xFF) 224 225 #ifdef PERL_PACK_CAN_SHRIEKSIGN 226 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@." 227 #else 228 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX" 229 #endif 230 231 #ifndef PERL_PACK_CAN_BYTEORDER 232 /* Put "can't" first because it is shorter */ 233 # define TYPE_ENDIANNESS(t) 0 234 # define TYPE_NO_ENDIANNESS(t) (t) 235 236 # define ENDIANNESS_ALLOWED_TYPES "" 237 238 # define DO_BO_UNPACK(var, type) 239 # define DO_BO_PACK(var, type) 240 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) 241 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) 242 # define DO_BO_UNPACK_N(var, type) 243 # define DO_BO_PACK_N(var, type) 244 # define DO_BO_UNPACK_P(var) 245 # define DO_BO_PACK_P(var) 246 # define DO_BO_UNPACK_PC(var) 247 # define DO_BO_PACK_PC(var) 248 249 #else /* PERL_PACK_CAN_BYTEORDER */ 250 251 # define TYPE_ENDIANNESS(t) ((t) & TYPE_ENDIANNESS_MASK) 252 # define TYPE_NO_ENDIANNESS(t) ((t) & ~TYPE_ENDIANNESS_MASK) 253 254 # define ENDIANNESS_ALLOWED_TYPES "sSiIlLqQjJfFdDpP(" 255 256 # define DO_BO_UNPACK(var, type) \ 257 STMT_START { \ 258 switch (TYPE_ENDIANNESS(datumtype)) { \ 259 case TYPE_IS_BIG_ENDIAN: var = my_betoh ## type (var); break; \ 260 case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break; \ 261 default: break; \ 262 } \ 263 } STMT_END 264 265 # define DO_BO_PACK(var, type) \ 266 STMT_START { \ 267 switch (TYPE_ENDIANNESS(datumtype)) { \ 268 case TYPE_IS_BIG_ENDIAN: var = my_htobe ## type (var); break; \ 269 case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break; \ 270 default: break; \ 271 } \ 272 } STMT_END 273 274 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast) \ 275 STMT_START { \ 276 switch (TYPE_ENDIANNESS(datumtype)) { \ 277 case TYPE_IS_BIG_ENDIAN: \ 278 var = (post_cast*) my_betoh ## type ((pre_cast) var); \ 279 break; \ 280 case TYPE_IS_LITTLE_ENDIAN: \ 281 var = (post_cast *) my_letoh ## type ((pre_cast) var); \ 282 break; \ 283 default: \ 284 break; \ 285 } \ 286 } STMT_END 287 288 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast) \ 289 STMT_START { \ 290 switch (TYPE_ENDIANNESS(datumtype)) { \ 291 case TYPE_IS_BIG_ENDIAN: \ 292 var = (post_cast *) my_htobe ## type ((pre_cast) var); \ 293 break; \ 294 case TYPE_IS_LITTLE_ENDIAN: \ 295 var = (post_cast *) my_htole ## type ((pre_cast) var); \ 296 break; \ 297 default: \ 298 break; \ 299 } \ 300 } STMT_END 301 302 # define BO_CANT_DOIT(action, type) \ 303 STMT_START { \ 304 switch (TYPE_ENDIANNESS(datumtype)) { \ 305 case TYPE_IS_BIG_ENDIAN: \ 306 Perl_croak(aTHX_ "Can't %s big-endian %ss on this " \ 307 "platform", #action, #type); \ 308 break; \ 309 case TYPE_IS_LITTLE_ENDIAN: \ 310 Perl_croak(aTHX_ "Can't %s little-endian %ss on this " \ 311 "platform", #action, #type); \ 312 break; \ 313 default: \ 314 break; \ 315 } \ 316 } STMT_END 317 318 # if PTRSIZE == INTSIZE 319 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, i, int, void) 320 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, i, int, void) 321 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, i, int, char) 322 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, i, int, char) 323 # elif PTRSIZE == LONGSIZE 324 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, long, void) 325 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, long, void) 326 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, long, char) 327 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, long, char) 328 # elif PTRSIZE == IVSIZE 329 # define DO_BO_UNPACK_P(var) DO_BO_UNPACK_PTR(var, l, IV, void) 330 # define DO_BO_PACK_P(var) DO_BO_PACK_PTR(var, l, IV, void) 331 # define DO_BO_UNPACK_PC(var) DO_BO_UNPACK_PTR(var, l, IV, char) 332 # define DO_BO_PACK_PC(var) DO_BO_PACK_PTR(var, l, IV, char) 333 # else 334 # define DO_BO_UNPACK_P(var) BO_CANT_DOIT(unpack, pointer) 335 # define DO_BO_PACK_P(var) BO_CANT_DOIT(pack, pointer) 336 # define DO_BO_UNPACK_PC(var) BO_CANT_DOIT(unpack, pointer) 337 # define DO_BO_PACK_PC(var) BO_CANT_DOIT(pack, pointer) 338 # endif 339 340 # if defined(my_htolen) && defined(my_letohn) && \ 341 defined(my_htoben) && defined(my_betohn) 342 # define DO_BO_UNPACK_N(var, type) \ 343 STMT_START { \ 344 switch (TYPE_ENDIANNESS(datumtype)) { \ 345 case TYPE_IS_BIG_ENDIAN: my_betohn(&var, sizeof(type)); break;\ 346 case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\ 347 default: break; \ 348 } \ 349 } STMT_END 350 351 # define DO_BO_PACK_N(var, type) \ 352 STMT_START { \ 353 switch (TYPE_ENDIANNESS(datumtype)) { \ 354 case TYPE_IS_BIG_ENDIAN: my_htoben(&var, sizeof(type)); break;\ 355 case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\ 356 default: break; \ 357 } \ 358 } STMT_END 359 # else 360 # define DO_BO_UNPACK_N(var, type) BO_CANT_DOIT(unpack, type) 361 # define DO_BO_PACK_N(var, type) BO_CANT_DOIT(pack, type) 362 # endif 363 364 #endif /* PERL_PACK_CAN_BYTEORDER */ 365 366 #define PACK_SIZE_CANNOT_CSUM 0x80 367 #define PACK_SIZE_UNPREDICTABLE 0x40 /* Not a fixed size element */ 368 #define PACK_SIZE_MASK 0x3F 369 370 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted 371 in). You're unlikely ever to need to regenerate them. */ 372 373 #if TYPE_IS_SHRIEKING != 0x100 374 ++++shriek offset should be 256 375 #endif 376 377 typedef U8 packprops_t; 378 #if 'J'-'I' == 1 379 /* ASCII */ 380 STATIC const packprops_t packprops[512] = { 381 /* normal */ 382 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 383 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 384 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 385 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 386 0, 0, 0, 387 /* C */ sizeof(unsigned char), 388 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) 389 /* D */ LONG_DOUBLESIZE, 390 #else 391 0, 392 #endif 393 0, 394 /* F */ NVSIZE, 395 0, 0, 396 /* I */ sizeof(unsigned int), 397 /* J */ UVSIZE, 398 0, 399 /* L */ SIZE32, 400 0, 401 /* N */ SIZE32, 402 0, 0, 403 #if defined(HAS_QUAD) 404 /* Q */ sizeof(Uquad_t), 405 #else 406 0, 407 #endif 408 0, 409 /* S */ SIZE16, 410 0, 411 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE, 412 /* V */ SIZE32, 413 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE, 414 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 415 /* c */ sizeof(char), 416 /* d */ sizeof(double), 417 0, 418 /* f */ sizeof(float), 419 0, 0, 420 /* i */ sizeof(int), 421 /* j */ IVSIZE, 422 0, 423 /* l */ SIZE32, 424 0, 425 /* n */ SIZE16, 426 0, 427 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM, 428 #if defined(HAS_QUAD) 429 /* q */ sizeof(Quad_t), 430 #else 431 0, 432 #endif 433 0, 434 /* s */ SIZE16, 435 0, 0, 436 /* v */ SIZE16, 437 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM, 438 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 439 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 446 0, 0, 0, 0, 0, 0, 0, 0, 447 /* shrieking */ 448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 449 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 451 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 452 0, 0, 0, 0, 0, 0, 0, 0, 0, 453 /* I */ sizeof(unsigned int), 454 0, 0, 455 /* L */ sizeof(unsigned long), 456 0, 457 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 458 /* N */ SIZE32, 459 #else 460 0, 461 #endif 462 0, 0, 0, 0, 463 /* S */ sizeof(unsigned short), 464 0, 0, 465 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 466 /* V */ SIZE32, 467 #else 468 0, 469 #endif 470 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 471 0, 0, 472 /* i */ sizeof(int), 473 0, 0, 474 /* l */ sizeof(long), 475 0, 476 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 477 /* n */ SIZE16, 478 #else 479 0, 480 #endif 481 0, 0, 0, 0, 482 /* s */ sizeof(short), 483 0, 0, 484 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 485 /* v */ SIZE16, 486 #else 487 0, 488 #endif 489 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 490 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 492 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 493 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 494 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 495 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 496 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 497 0, 0, 0, 0, 0, 0, 0, 0, 0 498 }; 499 #else 500 /* EBCDIC (or bust) */ 501 STATIC const packprops_t packprops[512] = { 502 /* normal */ 503 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 504 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 505 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 506 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 507 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 508 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 509 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 510 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 511 0, 0, 0, 512 /* c */ sizeof(char), 513 /* d */ sizeof(double), 514 0, 515 /* f */ sizeof(float), 516 0, 0, 517 /* i */ sizeof(int), 518 0, 0, 0, 0, 0, 0, 0, 519 /* j */ IVSIZE, 520 0, 521 /* l */ SIZE32, 522 0, 523 /* n */ SIZE16, 524 0, 525 /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM, 526 #if defined(HAS_QUAD) 527 /* q */ sizeof(Quad_t), 528 #else 529 0, 530 #endif 531 0, 0, 0, 0, 0, 0, 0, 0, 0, 532 /* s */ SIZE16, 533 0, 0, 534 /* v */ SIZE16, 535 /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM, 536 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 538 /* C */ sizeof(unsigned char), 539 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) 540 /* D */ LONG_DOUBLESIZE, 541 #else 542 0, 543 #endif 544 0, 545 /* F */ NVSIZE, 546 0, 0, 547 /* I */ sizeof(unsigned int), 548 0, 0, 0, 0, 0, 0, 0, 549 /* J */ UVSIZE, 550 0, 551 /* L */ SIZE32, 552 0, 553 /* N */ SIZE32, 554 0, 0, 555 #if defined(HAS_QUAD) 556 /* Q */ sizeof(Uquad_t), 557 #else 558 0, 559 #endif 560 0, 0, 0, 0, 0, 0, 0, 0, 0, 561 /* S */ SIZE16, 562 0, 563 /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE, 564 /* V */ SIZE32, 565 /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE, 566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 567 0, 0, 0, 0, 0, 0, 0, 0, 0, 568 /* shrieking */ 569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 571 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 573 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 574 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 575 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 577 0, 0, 0, 0, 0, 0, 0, 0, 0, 578 /* i */ sizeof(int), 579 0, 0, 0, 0, 0, 0, 0, 0, 0, 580 /* l */ sizeof(long), 581 0, 582 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 583 /* n */ SIZE16, 584 #else 585 0, 586 #endif 587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 588 /* s */ sizeof(short), 589 0, 0, 590 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 591 /* v */ SIZE16, 592 #else 593 0, 594 #endif 595 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 596 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 597 0, 0, 0, 598 /* I */ sizeof(unsigned int), 599 0, 0, 0, 0, 0, 0, 0, 0, 0, 600 /* L */ sizeof(unsigned long), 601 0, 602 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 603 /* N */ SIZE32, 604 #else 605 0, 606 #endif 607 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 608 /* S */ sizeof(unsigned short), 609 0, 0, 610 #if defined(PERL_PACK_CAN_SHRIEKSIGN) 611 /* V */ SIZE32, 612 #else 613 0, 614 #endif 615 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 616 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 617 }; 618 #endif 619 620 STATIC U8 621 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype) 622 { 623 STRLEN retlen; 624 UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, 625 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 626 /* We try to process malformed UTF-8 as much as possible (preferrably with 627 warnings), but these two mean we make no progress in the string and 628 might enter an infinite loop */ 629 if (retlen == (STRLEN) -1 || retlen == 0) 630 Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack", 631 (int) TYPE_NO_MODIFIERS(datumtype)); 632 if (val >= 0x100) { 633 if (ckWARN(WARN_UNPACK)) 634 Perl_warner(aTHX_ packWARN(WARN_UNPACK), 635 "Character in '%c' format wrapped in unpack", 636 (int) TYPE_NO_MODIFIERS(datumtype)); 637 val &= 0xff; 638 } 639 *s += retlen; 640 return (U8)val; 641 } 642 643 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \ 644 uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \ 645 *(U8 *)(s)++) 646 647 STATIC bool 648 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype) 649 { 650 UV val; 651 STRLEN retlen; 652 const char *from = *s; 653 int bad = 0; 654 const U32 flags = ckWARN(WARN_UTF8) ? 655 UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY); 656 for (;buf_len > 0; buf_len--) { 657 if (from >= end) return FALSE; 658 val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags); 659 if (retlen == (STRLEN) -1 || retlen == 0) { 660 from += UTF8SKIP(from); 661 bad |= 1; 662 } else from += retlen; 663 if (val >= 0x100) { 664 bad |= 2; 665 val &= 0xff; 666 } 667 *(U8 *)buf++ = (U8)val; 668 } 669 /* We have enough characters for the buffer. Did we have problems ? */ 670 if (bad) { 671 if (bad & 1) { 672 /* Rewalk the string fragment while warning */ 673 const char *ptr; 674 const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY; 675 for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) { 676 if (ptr >= end) break; 677 utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags); 678 } 679 if (from > end) from = end; 680 } 681 if ((bad & 2) && ckWARN(WARN_UNPACK)) 682 Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ? 683 WARN_PACK : WARN_UNPACK), 684 "Character(s) in '%c' format wrapped in %s", 685 (int) TYPE_NO_MODIFIERS(datumtype), 686 datumtype & TYPE_IS_PACK ? "pack" : "unpack"); 687 } 688 *s = from; 689 return TRUE; 690 } 691 692 STATIC bool 693 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out) 694 { 695 dVAR; 696 STRLEN retlen; 697 const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY); 698 if (val >= 0x100 || !ISUUCHAR(val) || 699 retlen == (STRLEN) -1 || retlen == 0) { 700 *out = 0; 701 return FALSE; 702 } 703 *out = PL_uudmap[val] & 077; 704 *s += retlen; 705 return TRUE; 706 } 707 708 STATIC char * 709 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) { 710 const U8 * const end = start + len; 711 712 PERL_ARGS_ASSERT_BYTES_TO_UNI; 713 714 while (start < end) { 715 const UV uv = NATIVE_TO_ASCII(*start); 716 if (UNI_IS_INVARIANT(uv)) 717 *dest++ = (char)(U8)UTF_TO_NATIVE(uv); 718 else { 719 *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv); 720 *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv); 721 } 722 start++; 723 } 724 return dest; 725 } 726 727 #define PUSH_BYTES(utf8, cur, buf, len) \ 728 STMT_START { \ 729 if (utf8) \ 730 (cur) = bytes_to_uni((U8 *) buf, len, (cur)); \ 731 else { \ 732 Copy(buf, cur, len, char); \ 733 (cur) += (len); \ 734 } \ 735 } STMT_END 736 737 #define GROWING(utf8, cat, start, cur, in_len) \ 738 STMT_START { \ 739 STRLEN glen = (in_len); \ 740 if (utf8) glen *= UTF8_EXPAND; \ 741 if ((cur) + glen >= (start) + SvLEN(cat)) { \ 742 (start) = sv_exp_grow(cat, glen); \ 743 (cur) = (start) + SvCUR(cat); \ 744 } \ 745 } STMT_END 746 747 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \ 748 STMT_START { \ 749 const STRLEN glen = (in_len); \ 750 STRLEN gl = glen; \ 751 if (utf8) gl *= UTF8_EXPAND; \ 752 if ((cur) + gl >= (start) + SvLEN(cat)) { \ 753 *cur = '\0'; \ 754 SvCUR_set((cat), (cur) - (start)); \ 755 (start) = sv_exp_grow(cat, gl); \ 756 (cur) = (start) + SvCUR(cat); \ 757 } \ 758 PUSH_BYTES(utf8, cur, buf, glen); \ 759 } STMT_END 760 761 #define PUSH_BYTE(utf8, s, byte) \ 762 STMT_START { \ 763 if (utf8) { \ 764 const U8 au8 = (byte); \ 765 (s) = bytes_to_uni(&au8, 1, (s)); \ 766 } else *(U8 *)(s)++ = (byte); \ 767 } STMT_END 768 769 /* Only to be used inside a loop (see the break) */ 770 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags) \ 771 STMT_START { \ 772 STRLEN retlen; \ 773 if (str >= end) break; \ 774 val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags); \ 775 if (retlen == (STRLEN) -1 || retlen == 0) { \ 776 *cur = '\0'; \ 777 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); \ 778 } \ 779 str += retlen; \ 780 } STMT_END 781 782 static const char *_action( const tempsym_t* symptr ) 783 { 784 return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack"); 785 } 786 787 /* Returns the sizeof() struct described by pat */ 788 STATIC I32 789 S_measure_struct(pTHX_ tempsym_t* symptr) 790 { 791 I32 total = 0; 792 793 PERL_ARGS_ASSERT_MEASURE_STRUCT; 794 795 while (next_symbol(symptr)) { 796 I32 len; 797 int size; 798 799 switch (symptr->howlen) { 800 case e_star: 801 Perl_croak(aTHX_ "Within []-length '*' not allowed in %s", 802 _action( symptr ) ); 803 break; 804 default: 805 /* e_no_len and e_number */ 806 len = symptr->length; 807 break; 808 } 809 810 size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK; 811 if (!size) { 812 int star; 813 /* endianness doesn't influence the size of a type */ 814 switch(TYPE_NO_ENDIANNESS(symptr->code)) { 815 default: 816 Perl_croak(aTHX_ "Invalid type '%c' in %s", 817 (int)TYPE_NO_MODIFIERS(symptr->code), 818 _action( symptr ) ); 819 #ifdef PERL_PACK_CAN_SHRIEKSIGN 820 case '.' | TYPE_IS_SHRIEKING: 821 case '@' | TYPE_IS_SHRIEKING: 822 #endif 823 case '@': 824 case '.': 825 case '/': 826 case 'U': /* XXXX Is it correct? */ 827 case 'w': 828 case 'u': 829 Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s", 830 (int) TYPE_NO_MODIFIERS(symptr->code), 831 _action( symptr ) ); 832 case '%': 833 size = 0; 834 break; 835 case '(': 836 { 837 tempsym_t savsym = *symptr; 838 symptr->patptr = savsym.grpbeg; 839 symptr->patend = savsym.grpend; 840 /* XXXX Theoretically, we need to measure many times at 841 different positions, since the subexpression may contain 842 alignment commands, but be not of aligned length. 843 Need to detect this and croak(). */ 844 size = measure_struct(symptr); 845 *symptr = savsym; 846 break; 847 } 848 case 'X' | TYPE_IS_SHRIEKING: 849 /* XXXX Is this useful? Then need to treat MEASURE_BACKWARDS. 850 */ 851 if (!len) /* Avoid division by 0 */ 852 len = 1; 853 len = total % len; /* Assumed: the start is aligned. */ 854 /* FALL THROUGH */ 855 case 'X': 856 size = -1; 857 if (total < len) 858 Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) ); 859 break; 860 case 'x' | TYPE_IS_SHRIEKING: 861 if (!len) /* Avoid division by 0 */ 862 len = 1; 863 star = total % len; /* Assumed: the start is aligned. */ 864 if (star) /* Other portable ways? */ 865 len = len - star; 866 else 867 len = 0; 868 /* FALL THROUGH */ 869 case 'x': 870 case 'A': 871 case 'Z': 872 case 'a': 873 size = 1; 874 break; 875 case 'B': 876 case 'b': 877 len = (len + 7)/8; 878 size = 1; 879 break; 880 case 'H': 881 case 'h': 882 len = (len + 1)/2; 883 size = 1; 884 break; 885 886 case 'P': 887 len = 1; 888 size = sizeof(char*); 889 break; 890 } 891 } 892 total += len * size; 893 } 894 return total; 895 } 896 897 898 /* locate matching closing parenthesis or bracket 899 * returns char pointer to char after match, or NULL 900 */ 901 STATIC const char * 902 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender) 903 { 904 PERL_ARGS_ASSERT_GROUP_END; 905 906 while (patptr < patend) { 907 const char c = *patptr++; 908 909 if (isSPACE(c)) 910 continue; 911 else if (c == ender) 912 return patptr-1; 913 else if (c == '#') { 914 while (patptr < patend && *patptr != '\n') 915 patptr++; 916 continue; 917 } else if (c == '(') 918 patptr = group_end(patptr, patend, ')') + 1; 919 else if (c == '[') 920 patptr = group_end(patptr, patend, ']') + 1; 921 } 922 Perl_croak(aTHX_ "No group ending character '%c' found in template", 923 ender); 924 return 0; 925 } 926 927 928 /* Convert unsigned decimal number to binary. 929 * Expects a pointer to the first digit and address of length variable 930 * Advances char pointer to 1st non-digit char and returns number 931 */ 932 STATIC const char * 933 S_get_num(pTHX_ register const char *patptr, I32 *lenptr ) 934 { 935 I32 len = *patptr++ - '0'; 936 937 PERL_ARGS_ASSERT_GET_NUM; 938 939 while (isDIGIT(*patptr)) { 940 if (len >= 0x7FFFFFFF/10) 941 Perl_croak(aTHX_ "pack/unpack repeat count overflow"); 942 len = (len * 10) + (*patptr++ - '0'); 943 } 944 *lenptr = len; 945 return patptr; 946 } 947 948 /* The marvellous template parsing routine: Using state stored in *symptr, 949 * locates next template code and count 950 */ 951 STATIC bool 952 S_next_symbol(pTHX_ tempsym_t* symptr ) 953 { 954 const char* patptr = symptr->patptr; 955 const char* const patend = symptr->patend; 956 957 PERL_ARGS_ASSERT_NEXT_SYMBOL; 958 959 symptr->flags &= ~FLAG_SLASH; 960 961 while (patptr < patend) { 962 if (isSPACE(*patptr)) 963 patptr++; 964 else if (*patptr == '#') { 965 patptr++; 966 while (patptr < patend && *patptr != '\n') 967 patptr++; 968 if (patptr < patend) 969 patptr++; 970 } else { 971 /* We should have found a template code */ 972 I32 code = *patptr++ & 0xFF; 973 U32 inherited_modifiers = 0; 974 975 if (code == ','){ /* grandfather in commas but with a warning */ 976 if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){ 977 symptr->flags |= FLAG_COMMA; 978 Perl_warner(aTHX_ packWARN(WARN_UNPACK), 979 "Invalid type ',' in %s", _action( symptr ) ); 980 } 981 continue; 982 } 983 984 /* for '(', skip to ')' */ 985 if (code == '(') { 986 if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' ) 987 Perl_croak(aTHX_ "()-group starts with a count in %s", 988 _action( symptr ) ); 989 symptr->grpbeg = patptr; 990 patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') ); 991 if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL ) 992 Perl_croak(aTHX_ "Too deeply nested ()-groups in %s", 993 _action( symptr ) ); 994 } 995 996 /* look for group modifiers to inherit */ 997 if (TYPE_ENDIANNESS(symptr->flags)) { 998 if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code))) 999 inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags); 1000 } 1001 1002 /* look for modifiers */ 1003 while (patptr < patend) { 1004 const char *allowed; 1005 I32 modifier; 1006 switch (*patptr) { 1007 case '!': 1008 modifier = TYPE_IS_SHRIEKING; 1009 allowed = SHRIEKING_ALLOWED_TYPES; 1010 break; 1011 #ifdef PERL_PACK_CAN_BYTEORDER 1012 case '>': 1013 modifier = TYPE_IS_BIG_ENDIAN; 1014 allowed = ENDIANNESS_ALLOWED_TYPES; 1015 break; 1016 case '<': 1017 modifier = TYPE_IS_LITTLE_ENDIAN; 1018 allowed = ENDIANNESS_ALLOWED_TYPES; 1019 break; 1020 #endif /* PERL_PACK_CAN_BYTEORDER */ 1021 default: 1022 allowed = ""; 1023 modifier = 0; 1024 break; 1025 } 1026 1027 if (modifier == 0) 1028 break; 1029 1030 if (!strchr(allowed, TYPE_NO_MODIFIERS(code))) 1031 Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr, 1032 allowed, _action( symptr ) ); 1033 1034 if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK) 1035 Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s", 1036 (int) TYPE_NO_MODIFIERS(code), _action( symptr ) ); 1037 else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) == 1038 TYPE_ENDIANNESS_MASK) 1039 Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s", 1040 *patptr, _action( symptr ) ); 1041 1042 if ((code & modifier) && ckWARN(WARN_UNPACK)) { 1043 Perl_warner(aTHX_ packWARN(WARN_UNPACK), 1044 "Duplicate modifier '%c' after '%c' in %s", 1045 *patptr, (int) TYPE_NO_MODIFIERS(code), 1046 _action( symptr ) ); 1047 } 1048 1049 code |= modifier; 1050 patptr++; 1051 } 1052 1053 /* inherit modifiers */ 1054 code |= inherited_modifiers; 1055 1056 /* look for count and/or / */ 1057 if (patptr < patend) { 1058 if (isDIGIT(*patptr)) { 1059 patptr = get_num( patptr, &symptr->length ); 1060 symptr->howlen = e_number; 1061 1062 } else if (*patptr == '*') { 1063 patptr++; 1064 symptr->howlen = e_star; 1065 1066 } else if (*patptr == '[') { 1067 const char* lenptr = ++patptr; 1068 symptr->howlen = e_number; 1069 patptr = group_end( patptr, patend, ']' ) + 1; 1070 /* what kind of [] is it? */ 1071 if (isDIGIT(*lenptr)) { 1072 lenptr = get_num( lenptr, &symptr->length ); 1073 if( *lenptr != ']' ) 1074 Perl_croak(aTHX_ "Malformed integer in [] in %s", 1075 _action( symptr ) ); 1076 } else { 1077 tempsym_t savsym = *symptr; 1078 symptr->patend = patptr-1; 1079 symptr->patptr = lenptr; 1080 savsym.length = measure_struct(symptr); 1081 *symptr = savsym; 1082 } 1083 } else { 1084 symptr->howlen = e_no_len; 1085 symptr->length = 1; 1086 } 1087 1088 /* try to find / */ 1089 while (patptr < patend) { 1090 if (isSPACE(*patptr)) 1091 patptr++; 1092 else if (*patptr == '#') { 1093 patptr++; 1094 while (patptr < patend && *patptr != '\n') 1095 patptr++; 1096 if (patptr < patend) 1097 patptr++; 1098 } else { 1099 if (*patptr == '/') { 1100 symptr->flags |= FLAG_SLASH; 1101 patptr++; 1102 if (patptr < patend && 1103 (isDIGIT(*patptr) || *patptr == '*' || *patptr == '[')) 1104 Perl_croak(aTHX_ "'/' does not take a repeat count in %s", 1105 _action( symptr ) ); 1106 } 1107 break; 1108 } 1109 } 1110 } else { 1111 /* at end - no count, no / */ 1112 symptr->howlen = e_no_len; 1113 symptr->length = 1; 1114 } 1115 1116 symptr->code = code; 1117 symptr->patptr = patptr; 1118 return TRUE; 1119 } 1120 } 1121 symptr->patptr = patptr; 1122 return FALSE; 1123 } 1124 1125 /* 1126 There is no way to cleanly handle the case where we should process the 1127 string per byte in its upgraded form while it's really in downgraded form 1128 (e.g. estimates like strend-s as an upper bound for the number of 1129 characters left wouldn't work). So if we foresee the need of this 1130 (pattern starts with U or contains U0), we want to work on the encoded 1131 version of the string. Users are advised to upgrade their pack string 1132 themselves if they need to do a lot of unpacks like this on it 1133 */ 1134 STATIC bool 1135 need_utf8(const char *pat, const char *patend) 1136 { 1137 bool first = TRUE; 1138 1139 PERL_ARGS_ASSERT_NEED_UTF8; 1140 1141 while (pat < patend) { 1142 if (pat[0] == '#') { 1143 pat++; 1144 pat = (const char *) memchr(pat, '\n', patend-pat); 1145 if (!pat) return FALSE; 1146 } else if (pat[0] == 'U') { 1147 if (first || pat[1] == '0') return TRUE; 1148 } else first = FALSE; 1149 pat++; 1150 } 1151 return FALSE; 1152 } 1153 1154 STATIC char 1155 first_symbol(const char *pat, const char *patend) { 1156 PERL_ARGS_ASSERT_FIRST_SYMBOL; 1157 1158 while (pat < patend) { 1159 if (pat[0] != '#') return pat[0]; 1160 pat++; 1161 pat = (const char *) memchr(pat, '\n', patend-pat); 1162 if (!pat) return 0; 1163 pat++; 1164 } 1165 return 0; 1166 } 1167 1168 /* 1169 =for apidoc unpackstring 1170 1171 The engine implementing unpack() Perl function. C<unpackstring> puts the 1172 extracted list items on the stack and returns the number of elements. 1173 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function. 1174 1175 =cut */ 1176 1177 I32 1178 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags) 1179 { 1180 tempsym_t sym; 1181 1182 PERL_ARGS_ASSERT_UNPACKSTRING; 1183 1184 if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8; 1185 else if (need_utf8(pat, patend)) { 1186 /* We probably should try to avoid this in case a scalar context call 1187 wouldn't get to the "U0" */ 1188 STRLEN len = strend - s; 1189 s = (char *) bytes_to_utf8((U8 *) s, &len); 1190 SAVEFREEPV(s); 1191 strend = s + len; 1192 flags |= FLAG_DO_UTF8; 1193 } 1194 1195 if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8)) 1196 flags |= FLAG_PARSE_UTF8; 1197 1198 TEMPSYM_INIT(&sym, pat, patend, flags); 1199 1200 return unpack_rec(&sym, s, s, strend, NULL ); 1201 } 1202 1203 STATIC I32 1204 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s ) 1205 { 1206 dVAR; dSP; 1207 SV *sv; 1208 const I32 start_sp_offset = SP - PL_stack_base; 1209 howlen_t howlen; 1210 I32 checksum = 0; 1211 UV cuv = 0; 1212 NV cdouble = 0.0; 1213 const int bits_in_uv = CHAR_BIT * sizeof(cuv); 1214 bool beyond = FALSE; 1215 bool explicit_length; 1216 const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0; 1217 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0; 1218 1219 PERL_ARGS_ASSERT_UNPACK_REC; 1220 1221 symptr->strbeg = s - strbeg; 1222 1223 while (next_symbol(symptr)) { 1224 packprops_t props; 1225 I32 len; 1226 I32 datumtype = symptr->code; 1227 /* do first one only unless in list context 1228 / is implemented by unpacking the count, then popping it from the 1229 stack, so must check that we're not in the middle of a / */ 1230 if ( unpack_only_one 1231 && (SP - PL_stack_base == start_sp_offset + 1) 1232 && (datumtype != '/') ) /* XXX can this be omitted */ 1233 break; 1234 1235 switch (howlen = symptr->howlen) { 1236 case e_star: 1237 len = strend - strbeg; /* long enough */ 1238 break; 1239 default: 1240 /* e_no_len and e_number */ 1241 len = symptr->length; 1242 break; 1243 } 1244 1245 explicit_length = TRUE; 1246 redo_switch: 1247 beyond = s >= strend; 1248 1249 props = packprops[TYPE_NO_ENDIANNESS(datumtype)]; 1250 if (props) { 1251 /* props nonzero means we can process this letter. */ 1252 const long size = props & PACK_SIZE_MASK; 1253 const long howmany = (strend - s) / size; 1254 if (len > howmany) 1255 len = howmany; 1256 1257 if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) { 1258 if (len && unpack_only_one) len = 1; 1259 EXTEND(SP, len); 1260 EXTEND_MORTAL(len); 1261 } 1262 } 1263 1264 switch(TYPE_NO_ENDIANNESS(datumtype)) { 1265 default: 1266 Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) ); 1267 1268 case '%': 1269 if (howlen == e_no_len) 1270 len = 16; /* len is not specified */ 1271 checksum = len; 1272 cuv = 0; 1273 cdouble = 0; 1274 continue; 1275 break; 1276 case '(': 1277 { 1278 tempsym_t savsym = *symptr; 1279 const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags); 1280 symptr->flags |= group_modifiers; 1281 symptr->patend = savsym.grpend; 1282 symptr->previous = &savsym; 1283 symptr->level++; 1284 PUTBACK; 1285 if (len && unpack_only_one) len = 1; 1286 while (len--) { 1287 symptr->patptr = savsym.grpbeg; 1288 if (utf8) symptr->flags |= FLAG_PARSE_UTF8; 1289 else symptr->flags &= ~FLAG_PARSE_UTF8; 1290 unpack_rec(symptr, s, strbeg, strend, &s); 1291 if (s == strend && savsym.howlen == e_star) 1292 break; /* No way to continue */ 1293 } 1294 SPAGAIN; 1295 savsym.flags = symptr->flags & ~group_modifiers; 1296 *symptr = savsym; 1297 break; 1298 } 1299 #ifdef PERL_PACK_CAN_SHRIEKSIGN 1300 case '.' | TYPE_IS_SHRIEKING: 1301 #endif 1302 case '.': { 1303 const char *from; 1304 SV *sv; 1305 #ifdef PERL_PACK_CAN_SHRIEKSIGN 1306 const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING); 1307 #else /* PERL_PACK_CAN_SHRIEKSIGN */ 1308 const bool u8 = utf8; 1309 #endif 1310 if (howlen == e_star) from = strbeg; 1311 else if (len <= 0) from = s; 1312 else { 1313 tempsym_t *group = symptr; 1314 1315 while (--len && group) group = group->previous; 1316 from = group ? strbeg + group->strbeg : strbeg; 1317 } 1318 sv = from <= s ? 1319 newSVuv( u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) : 1320 newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s))); 1321 mXPUSHs(sv); 1322 break; 1323 } 1324 #ifdef PERL_PACK_CAN_SHRIEKSIGN 1325 case '@' | TYPE_IS_SHRIEKING: 1326 #endif 1327 case '@': 1328 s = strbeg + symptr->strbeg; 1329 #ifdef PERL_PACK_CAN_SHRIEKSIGN 1330 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING)) 1331 #else /* PERL_PACK_CAN_SHRIEKSIGN */ 1332 if (utf8) 1333 #endif 1334 { 1335 while (len > 0) { 1336 if (s >= strend) 1337 Perl_croak(aTHX_ "'@' outside of string in unpack"); 1338 s += UTF8SKIP(s); 1339 len--; 1340 } 1341 if (s > strend) 1342 Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack"); 1343 } else { 1344 if (strend-s < len) 1345 Perl_croak(aTHX_ "'@' outside of string in unpack"); 1346 s += len; 1347 } 1348 break; 1349 case 'X' | TYPE_IS_SHRIEKING: 1350 if (!len) /* Avoid division by 0 */ 1351 len = 1; 1352 if (utf8) { 1353 const char *hop, *last; 1354 I32 l = len; 1355 hop = last = strbeg; 1356 while (hop < s) { 1357 hop += UTF8SKIP(hop); 1358 if (--l == 0) { 1359 last = hop; 1360 l = len; 1361 } 1362 } 1363 if (last > s) 1364 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1365 s = last; 1366 break; 1367 } 1368 len = (s - strbeg) % len; 1369 /* FALL THROUGH */ 1370 case 'X': 1371 if (utf8) { 1372 while (len > 0) { 1373 if (s <= strbeg) 1374 Perl_croak(aTHX_ "'X' outside of string in unpack"); 1375 while (--s, UTF8_IS_CONTINUATION(*s)) { 1376 if (s <= strbeg) 1377 Perl_croak(aTHX_ "'X' outside of string in unpack"); 1378 } 1379 len--; 1380 } 1381 } else { 1382 if (len > s - strbeg) 1383 Perl_croak(aTHX_ "'X' outside of string in unpack" ); 1384 s -= len; 1385 } 1386 break; 1387 case 'x' | TYPE_IS_SHRIEKING: { 1388 I32 ai32; 1389 if (!len) /* Avoid division by 0 */ 1390 len = 1; 1391 if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len; 1392 else ai32 = (s - strbeg) % len; 1393 if (ai32 == 0) break; 1394 len -= ai32; 1395 } 1396 /* FALL THROUGH */ 1397 case 'x': 1398 if (utf8) { 1399 while (len>0) { 1400 if (s >= strend) 1401 Perl_croak(aTHX_ "'x' outside of string in unpack"); 1402 s += UTF8SKIP(s); 1403 len--; 1404 } 1405 } else { 1406 if (len > strend - s) 1407 Perl_croak(aTHX_ "'x' outside of string in unpack"); 1408 s += len; 1409 } 1410 break; 1411 case '/': 1412 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack"); 1413 break; 1414 case 'A': 1415 case 'Z': 1416 case 'a': 1417 if (checksum) { 1418 /* Preliminary length estimate is assumed done in 'W' */ 1419 if (len > strend - s) len = strend - s; 1420 goto W_checksum; 1421 } 1422 if (utf8) { 1423 I32 l; 1424 const char *hop; 1425 for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) { 1426 if (hop >= strend) { 1427 if (hop > strend) 1428 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1429 break; 1430 } 1431 } 1432 if (hop > strend) 1433 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1434 len = hop - s; 1435 } else if (len > strend - s) 1436 len = strend - s; 1437 1438 if (datumtype == 'Z') { 1439 /* 'Z' strips stuff after first null */ 1440 const char *ptr, *end; 1441 end = s + len; 1442 for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break; 1443 sv = newSVpvn(s, ptr-s); 1444 if (howlen == e_star) /* exact for 'Z*' */ 1445 len = ptr-s + (ptr != strend ? 1 : 0); 1446 } else if (datumtype == 'A') { 1447 /* 'A' strips both nulls and spaces */ 1448 const char *ptr; 1449 if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) { 1450 for (ptr = s+len-1; ptr >= s; ptr--) 1451 if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) && 1452 !is_utf8_space((U8 *) ptr)) break; 1453 if (ptr >= s) ptr += UTF8SKIP(ptr); 1454 else ptr++; 1455 if (ptr > s+len) 1456 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1457 } else { 1458 for (ptr = s+len-1; ptr >= s; ptr--) 1459 if (*ptr != 0 && !isSPACE(*ptr)) break; 1460 ptr++; 1461 } 1462 sv = newSVpvn(s, ptr-s); 1463 } else sv = newSVpvn(s, len); 1464 1465 if (utf8) { 1466 SvUTF8_on(sv); 1467 /* Undo any upgrade done due to need_utf8() */ 1468 if (!(symptr->flags & FLAG_WAS_UTF8)) 1469 sv_utf8_downgrade(sv, 0); 1470 } 1471 mXPUSHs(sv); 1472 s += len; 1473 break; 1474 case 'B': 1475 case 'b': { 1476 char *str; 1477 if (howlen == e_star || len > (strend - s) * 8) 1478 len = (strend - s) * 8; 1479 if (checksum) { 1480 if (!PL_bitcount) { 1481 int bits; 1482 Newxz(PL_bitcount, 256, char); 1483 for (bits = 1; bits < 256; bits++) { 1484 if (bits & 1) PL_bitcount[bits]++; 1485 if (bits & 2) PL_bitcount[bits]++; 1486 if (bits & 4) PL_bitcount[bits]++; 1487 if (bits & 8) PL_bitcount[bits]++; 1488 if (bits & 16) PL_bitcount[bits]++; 1489 if (bits & 32) PL_bitcount[bits]++; 1490 if (bits & 64) PL_bitcount[bits]++; 1491 if (bits & 128) PL_bitcount[bits]++; 1492 } 1493 } 1494 if (utf8) 1495 while (len >= 8 && s < strend) { 1496 cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)]; 1497 len -= 8; 1498 } 1499 else 1500 while (len >= 8) { 1501 cuv += PL_bitcount[*(U8 *)s++]; 1502 len -= 8; 1503 } 1504 if (len && s < strend) { 1505 U8 bits; 1506 bits = SHIFT_BYTE(utf8, s, strend, datumtype); 1507 if (datumtype == 'b') 1508 while (len-- > 0) { 1509 if (bits & 1) cuv++; 1510 bits >>= 1; 1511 } 1512 else 1513 while (len-- > 0) { 1514 if (bits & 0x80) cuv++; 1515 bits <<= 1; 1516 } 1517 } 1518 break; 1519 } 1520 1521 sv = sv_2mortal(newSV(len ? len : 1)); 1522 SvPOK_on(sv); 1523 str = SvPVX(sv); 1524 if (datumtype == 'b') { 1525 U8 bits = 0; 1526 const I32 ai32 = len; 1527 for (len = 0; len < ai32; len++) { 1528 if (len & 7) bits >>= 1; 1529 else if (utf8) { 1530 if (s >= strend) break; 1531 bits = uni_to_byte(aTHX_ &s, strend, datumtype); 1532 } else bits = *(U8 *) s++; 1533 *str++ = bits & 1 ? '1' : '0'; 1534 } 1535 } else { 1536 U8 bits = 0; 1537 const I32 ai32 = len; 1538 for (len = 0; len < ai32; len++) { 1539 if (len & 7) bits <<= 1; 1540 else if (utf8) { 1541 if (s >= strend) break; 1542 bits = uni_to_byte(aTHX_ &s, strend, datumtype); 1543 } else bits = *(U8 *) s++; 1544 *str++ = bits & 0x80 ? '1' : '0'; 1545 } 1546 } 1547 *str = '\0'; 1548 SvCUR_set(sv, str - SvPVX_const(sv)); 1549 XPUSHs(sv); 1550 break; 1551 } 1552 case 'H': 1553 case 'h': { 1554 char *str; 1555 /* Preliminary length estimate, acceptable for utf8 too */ 1556 if (howlen == e_star || len > (strend - s) * 2) 1557 len = (strend - s) * 2; 1558 sv = sv_2mortal(newSV(len ? len : 1)); 1559 SvPOK_on(sv); 1560 str = SvPVX(sv); 1561 if (datumtype == 'h') { 1562 U8 bits = 0; 1563 I32 ai32 = len; 1564 for (len = 0; len < ai32; len++) { 1565 if (len & 1) bits >>= 4; 1566 else if (utf8) { 1567 if (s >= strend) break; 1568 bits = uni_to_byte(aTHX_ &s, strend, datumtype); 1569 } else bits = * (U8 *) s++; 1570 *str++ = PL_hexdigit[bits & 15]; 1571 } 1572 } else { 1573 U8 bits = 0; 1574 const I32 ai32 = len; 1575 for (len = 0; len < ai32; len++) { 1576 if (len & 1) bits <<= 4; 1577 else if (utf8) { 1578 if (s >= strend) break; 1579 bits = uni_to_byte(aTHX_ &s, strend, datumtype); 1580 } else bits = *(U8 *) s++; 1581 *str++ = PL_hexdigit[(bits >> 4) & 15]; 1582 } 1583 } 1584 *str = '\0'; 1585 SvCUR_set(sv, str - SvPVX_const(sv)); 1586 XPUSHs(sv); 1587 break; 1588 } 1589 case 'C': 1590 if (len == 0) { 1591 if (explicit_length) 1592 /* Switch to "character" mode */ 1593 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0; 1594 break; 1595 } 1596 /* FALL THROUGH */ 1597 case 'c': 1598 while (len-- > 0 && s < strend) { 1599 int aint; 1600 if (utf8) 1601 { 1602 STRLEN retlen; 1603 aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, 1604 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 1605 if (retlen == (STRLEN) -1 || retlen == 0) 1606 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1607 s += retlen; 1608 } 1609 else 1610 aint = *(U8 *)(s)++; 1611 if (aint >= 128 && datumtype != 'C') /* fake up signed chars */ 1612 aint -= 256; 1613 if (!checksum) 1614 mPUSHi(aint); 1615 else if (checksum > bits_in_uv) 1616 cdouble += (NV)aint; 1617 else 1618 cuv += aint; 1619 } 1620 break; 1621 case 'W': 1622 W_checksum: 1623 if (utf8) { 1624 while (len-- > 0 && s < strend) { 1625 STRLEN retlen; 1626 const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen, 1627 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY); 1628 if (retlen == (STRLEN) -1 || retlen == 0) 1629 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1630 s += retlen; 1631 if (!checksum) 1632 mPUSHu(val); 1633 else if (checksum > bits_in_uv) 1634 cdouble += (NV) val; 1635 else 1636 cuv += val; 1637 } 1638 } else if (!checksum) 1639 while (len-- > 0) { 1640 const U8 ch = *(U8 *) s++; 1641 mPUSHu(ch); 1642 } 1643 else if (checksum > bits_in_uv) 1644 while (len-- > 0) cdouble += (NV) *(U8 *) s++; 1645 else 1646 while (len-- > 0) cuv += *(U8 *) s++; 1647 break; 1648 case 'U': 1649 if (len == 0) { 1650 if (explicit_length) { 1651 /* Switch to "bytes in UTF-8" mode */ 1652 if (symptr->flags & FLAG_DO_UTF8) utf8 = 0; 1653 else 1654 /* Should be impossible due to the need_utf8() test */ 1655 Perl_croak(aTHX_ "U0 mode on a byte string"); 1656 } 1657 break; 1658 } 1659 if (len > strend - s) len = strend - s; 1660 if (!checksum) { 1661 if (len && unpack_only_one) len = 1; 1662 EXTEND(SP, len); 1663 EXTEND_MORTAL(len); 1664 } 1665 while (len-- > 0 && s < strend) { 1666 STRLEN retlen; 1667 UV auv; 1668 if (utf8) { 1669 U8 result[UTF8_MAXLEN]; 1670 const char *ptr = s; 1671 STRLEN len; 1672 /* Bug: warns about bad utf8 even if we are short on bytes 1673 and will break out of the loop */ 1674 if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1, 1675 'U')) 1676 break; 1677 len = UTF8SKIP(result); 1678 if (!uni_to_bytes(aTHX_ &ptr, strend, 1679 (char *) &result[1], len-1, 'U')) break; 1680 auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV); 1681 s = ptr; 1682 } else { 1683 auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV); 1684 if (retlen == (STRLEN) -1 || retlen == 0) 1685 Perl_croak(aTHX_ "Malformed UTF-8 string in unpack"); 1686 s += retlen; 1687 } 1688 if (!checksum) 1689 mPUSHu(auv); 1690 else if (checksum > bits_in_uv) 1691 cdouble += (NV) auv; 1692 else 1693 cuv += auv; 1694 } 1695 break; 1696 case 's' | TYPE_IS_SHRIEKING: 1697 #if SHORTSIZE != SIZE16 1698 while (len-- > 0) { 1699 short ashort; 1700 SHIFT_VAR(utf8, s, strend, ashort, datumtype); 1701 DO_BO_UNPACK(ashort, s); 1702 if (!checksum) 1703 mPUSHi(ashort); 1704 else if (checksum > bits_in_uv) 1705 cdouble += (NV)ashort; 1706 else 1707 cuv += ashort; 1708 } 1709 break; 1710 #else 1711 /* Fallthrough! */ 1712 #endif 1713 case 's': 1714 while (len-- > 0) { 1715 I16 ai16; 1716 1717 #if U16SIZE > SIZE16 1718 ai16 = 0; 1719 #endif 1720 SHIFT16(utf8, s, strend, &ai16, datumtype); 1721 DO_BO_UNPACK(ai16, 16); 1722 #if U16SIZE > SIZE16 1723 if (ai16 > 32767) 1724 ai16 -= 65536; 1725 #endif 1726 if (!checksum) 1727 mPUSHi(ai16); 1728 else if (checksum > bits_in_uv) 1729 cdouble += (NV)ai16; 1730 else 1731 cuv += ai16; 1732 } 1733 break; 1734 case 'S' | TYPE_IS_SHRIEKING: 1735 #if SHORTSIZE != SIZE16 1736 while (len-- > 0) { 1737 unsigned short aushort; 1738 SHIFT_VAR(utf8, s, strend, aushort, datumtype); 1739 DO_BO_UNPACK(aushort, s); 1740 if (!checksum) 1741 mPUSHu(aushort); 1742 else if (checksum > bits_in_uv) 1743 cdouble += (NV)aushort; 1744 else 1745 cuv += aushort; 1746 } 1747 break; 1748 #else 1749 /* Fallhrough! */ 1750 #endif 1751 case 'v': 1752 case 'n': 1753 case 'S': 1754 while (len-- > 0) { 1755 U16 au16; 1756 #if U16SIZE > SIZE16 1757 au16 = 0; 1758 #endif 1759 SHIFT16(utf8, s, strend, &au16, datumtype); 1760 DO_BO_UNPACK(au16, 16); 1761 #ifdef HAS_NTOHS 1762 if (datumtype == 'n') 1763 au16 = PerlSock_ntohs(au16); 1764 #endif 1765 #ifdef HAS_VTOHS 1766 if (datumtype == 'v') 1767 au16 = vtohs(au16); 1768 #endif 1769 if (!checksum) 1770 mPUSHu(au16); 1771 else if (checksum > bits_in_uv) 1772 cdouble += (NV) au16; 1773 else 1774 cuv += au16; 1775 } 1776 break; 1777 #ifdef PERL_PACK_CAN_SHRIEKSIGN 1778 case 'v' | TYPE_IS_SHRIEKING: 1779 case 'n' | TYPE_IS_SHRIEKING: 1780 while (len-- > 0) { 1781 I16 ai16; 1782 # if U16SIZE > SIZE16 1783 ai16 = 0; 1784 # endif 1785 SHIFT16(utf8, s, strend, &ai16, datumtype); 1786 # ifdef HAS_NTOHS 1787 if (datumtype == ('n' | TYPE_IS_SHRIEKING)) 1788 ai16 = (I16) PerlSock_ntohs((U16) ai16); 1789 # endif /* HAS_NTOHS */ 1790 # ifdef HAS_VTOHS 1791 if (datumtype == ('v' | TYPE_IS_SHRIEKING)) 1792 ai16 = (I16) vtohs((U16) ai16); 1793 # endif /* HAS_VTOHS */ 1794 if (!checksum) 1795 mPUSHi(ai16); 1796 else if (checksum > bits_in_uv) 1797 cdouble += (NV) ai16; 1798 else 1799 cuv += ai16; 1800 } 1801 break; 1802 #endif /* PERL_PACK_CAN_SHRIEKSIGN */ 1803 case 'i': 1804 case 'i' | TYPE_IS_SHRIEKING: 1805 while (len-- > 0) { 1806 int aint; 1807 SHIFT_VAR(utf8, s, strend, aint, datumtype); 1808 DO_BO_UNPACK(aint, i); 1809 if (!checksum) 1810 mPUSHi(aint); 1811 else if (checksum > bits_in_uv) 1812 cdouble += (NV)aint; 1813 else 1814 cuv += aint; 1815 } 1816 break; 1817 case 'I': 1818 case 'I' | TYPE_IS_SHRIEKING: 1819 while (len-- > 0) { 1820 unsigned int auint; 1821 SHIFT_VAR(utf8, s, strend, auint, datumtype); 1822 DO_BO_UNPACK(auint, i); 1823 if (!checksum) 1824 mPUSHu(auint); 1825 else if (checksum > bits_in_uv) 1826 cdouble += (NV)auint; 1827 else 1828 cuv += auint; 1829 } 1830 break; 1831 case 'j': 1832 while (len-- > 0) { 1833 IV aiv; 1834 SHIFT_VAR(utf8, s, strend, aiv, datumtype); 1835 #if IVSIZE == INTSIZE 1836 DO_BO_UNPACK(aiv, i); 1837 #elif IVSIZE == LONGSIZE 1838 DO_BO_UNPACK(aiv, l); 1839 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE 1840 DO_BO_UNPACK(aiv, 64); 1841 #else 1842 Perl_croak(aTHX_ "'j' not supported on this platform"); 1843 #endif 1844 if (!checksum) 1845 mPUSHi(aiv); 1846 else if (checksum > bits_in_uv) 1847 cdouble += (NV)aiv; 1848 else 1849 cuv += aiv; 1850 } 1851 break; 1852 case 'J': 1853 while (len-- > 0) { 1854 UV auv; 1855 SHIFT_VAR(utf8, s, strend, auv, datumtype); 1856 #if IVSIZE == INTSIZE 1857 DO_BO_UNPACK(auv, i); 1858 #elif IVSIZE == LONGSIZE 1859 DO_BO_UNPACK(auv, l); 1860 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE 1861 DO_BO_UNPACK(auv, 64); 1862 #else 1863 Perl_croak(aTHX_ "'J' not supported on this platform"); 1864 #endif 1865 if (!checksum) 1866 mPUSHu(auv); 1867 else if (checksum > bits_in_uv) 1868 cdouble += (NV)auv; 1869 else 1870 cuv += auv; 1871 } 1872 break; 1873 case 'l' | TYPE_IS_SHRIEKING: 1874 #if LONGSIZE != SIZE32 1875 while (len-- > 0) { 1876 long along; 1877 SHIFT_VAR(utf8, s, strend, along, datumtype); 1878 DO_BO_UNPACK(along, l); 1879 if (!checksum) 1880 mPUSHi(along); 1881 else if (checksum > bits_in_uv) 1882 cdouble += (NV)along; 1883 else 1884 cuv += along; 1885 } 1886 break; 1887 #else 1888 /* Fallthrough! */ 1889 #endif 1890 case 'l': 1891 while (len-- > 0) { 1892 I32 ai32; 1893 #if U32SIZE > SIZE32 1894 ai32 = 0; 1895 #endif 1896 SHIFT32(utf8, s, strend, &ai32, datumtype); 1897 DO_BO_UNPACK(ai32, 32); 1898 #if U32SIZE > SIZE32 1899 if (ai32 > 2147483647) ai32 -= 4294967296; 1900 #endif 1901 if (!checksum) 1902 mPUSHi(ai32); 1903 else if (checksum > bits_in_uv) 1904 cdouble += (NV)ai32; 1905 else 1906 cuv += ai32; 1907 } 1908 break; 1909 case 'L' | TYPE_IS_SHRIEKING: 1910 #if LONGSIZE != SIZE32 1911 while (len-- > 0) { 1912 unsigned long aulong; 1913 SHIFT_VAR(utf8, s, strend, aulong, datumtype); 1914 DO_BO_UNPACK(aulong, l); 1915 if (!checksum) 1916 mPUSHu(aulong); 1917 else if (checksum > bits_in_uv) 1918 cdouble += (NV)aulong; 1919 else 1920 cuv += aulong; 1921 } 1922 break; 1923 #else 1924 /* Fall through! */ 1925 #endif 1926 case 'V': 1927 case 'N': 1928 case 'L': 1929 while (len-- > 0) { 1930 U32 au32; 1931 #if U32SIZE > SIZE32 1932 au32 = 0; 1933 #endif 1934 SHIFT32(utf8, s, strend, &au32, datumtype); 1935 DO_BO_UNPACK(au32, 32); 1936 #ifdef HAS_NTOHL 1937 if (datumtype == 'N') 1938 au32 = PerlSock_ntohl(au32); 1939 #endif 1940 #ifdef HAS_VTOHL 1941 if (datumtype == 'V') 1942 au32 = vtohl(au32); 1943 #endif 1944 if (!checksum) 1945 mPUSHu(au32); 1946 else if (checksum > bits_in_uv) 1947 cdouble += (NV)au32; 1948 else 1949 cuv += au32; 1950 } 1951 break; 1952 #ifdef PERL_PACK_CAN_SHRIEKSIGN 1953 case 'V' | TYPE_IS_SHRIEKING: 1954 case 'N' | TYPE_IS_SHRIEKING: 1955 while (len-- > 0) { 1956 I32 ai32; 1957 # if U32SIZE > SIZE32 1958 ai32 = 0; 1959 # endif 1960 SHIFT32(utf8, s, strend, &ai32, datumtype); 1961 # ifdef HAS_NTOHL 1962 if (datumtype == ('N' | TYPE_IS_SHRIEKING)) 1963 ai32 = (I32)PerlSock_ntohl((U32)ai32); 1964 # endif 1965 # ifdef HAS_VTOHL 1966 if (datumtype == ('V' | TYPE_IS_SHRIEKING)) 1967 ai32 = (I32)vtohl((U32)ai32); 1968 # endif 1969 if (!checksum) 1970 mPUSHi(ai32); 1971 else if (checksum > bits_in_uv) 1972 cdouble += (NV)ai32; 1973 else 1974 cuv += ai32; 1975 } 1976 break; 1977 #endif /* PERL_PACK_CAN_SHRIEKSIGN */ 1978 case 'p': 1979 while (len-- > 0) { 1980 const char *aptr; 1981 SHIFT_VAR(utf8, s, strend, aptr, datumtype); 1982 DO_BO_UNPACK_PC(aptr); 1983 /* newSVpv generates undef if aptr is NULL */ 1984 mPUSHs(newSVpv(aptr, 0)); 1985 } 1986 break; 1987 case 'w': 1988 { 1989 UV auv = 0; 1990 U32 bytes = 0; 1991 1992 while (len > 0 && s < strend) { 1993 U8 ch; 1994 ch = SHIFT_BYTE(utf8, s, strend, datumtype); 1995 auv = (auv << 7) | (ch & 0x7f); 1996 /* UTF8_IS_XXXXX not right here - using constant 0x80 */ 1997 if (ch < 0x80) { 1998 bytes = 0; 1999 mPUSHu(auv); 2000 len--; 2001 auv = 0; 2002 continue; 2003 } 2004 if (++bytes >= sizeof(UV)) { /* promote to string */ 2005 const char *t; 2006 2007 sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv); 2008 while (s < strend) { 2009 ch = SHIFT_BYTE(utf8, s, strend, datumtype); 2010 sv = mul128(sv, (U8)(ch & 0x7f)); 2011 if (!(ch & 0x80)) { 2012 bytes = 0; 2013 break; 2014 } 2015 } 2016 t = SvPV_nolen_const(sv); 2017 while (*t == '0') 2018 t++; 2019 sv_chop(sv, t); 2020 mPUSHs(sv); 2021 len--; 2022 auv = 0; 2023 } 2024 } 2025 if ((s >= strend) && bytes) 2026 Perl_croak(aTHX_ "Unterminated compressed integer in unpack"); 2027 } 2028 break; 2029 case 'P': 2030 if (symptr->howlen == e_star) 2031 Perl_croak(aTHX_ "'P' must have an explicit size in unpack"); 2032 EXTEND(SP, 1); 2033 if (s + sizeof(char*) <= strend) { 2034 char *aptr; 2035 SHIFT_VAR(utf8, s, strend, aptr, datumtype); 2036 DO_BO_UNPACK_PC(aptr); 2037 /* newSVpvn generates undef if aptr is NULL */ 2038 PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP)); 2039 } 2040 break; 2041 #ifdef HAS_QUAD 2042 case 'q': 2043 while (len-- > 0) { 2044 Quad_t aquad; 2045 SHIFT_VAR(utf8, s, strend, aquad, datumtype); 2046 DO_BO_UNPACK(aquad, 64); 2047 if (!checksum) 2048 mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ? 2049 newSViv((IV)aquad) : newSVnv((NV)aquad)); 2050 else if (checksum > bits_in_uv) 2051 cdouble += (NV)aquad; 2052 else 2053 cuv += aquad; 2054 } 2055 break; 2056 case 'Q': 2057 while (len-- > 0) { 2058 Uquad_t auquad; 2059 SHIFT_VAR(utf8, s, strend, auquad, datumtype); 2060 DO_BO_UNPACK(auquad, 64); 2061 if (!checksum) 2062 mPUSHs(auquad <= UV_MAX ? 2063 newSVuv((UV)auquad) : newSVnv((NV)auquad)); 2064 else if (checksum > bits_in_uv) 2065 cdouble += (NV)auquad; 2066 else 2067 cuv += auquad; 2068 } 2069 break; 2070 #endif /* HAS_QUAD */ 2071 /* float and double added gnb@melba.bby.oz.au 22/11/89 */ 2072 case 'f': 2073 while (len-- > 0) { 2074 float afloat; 2075 SHIFT_VAR(utf8, s, strend, afloat, datumtype); 2076 DO_BO_UNPACK_N(afloat, float); 2077 if (!checksum) 2078 mPUSHn(afloat); 2079 else 2080 cdouble += afloat; 2081 } 2082 break; 2083 case 'd': 2084 while (len-- > 0) { 2085 double adouble; 2086 SHIFT_VAR(utf8, s, strend, adouble, datumtype); 2087 DO_BO_UNPACK_N(adouble, double); 2088 if (!checksum) 2089 mPUSHn(adouble); 2090 else 2091 cdouble += adouble; 2092 } 2093 break; 2094 case 'F': 2095 while (len-- > 0) { 2096 NV anv; 2097 SHIFT_VAR(utf8, s, strend, anv, datumtype); 2098 DO_BO_UNPACK_N(anv, NV); 2099 if (!checksum) 2100 mPUSHn(anv); 2101 else 2102 cdouble += anv; 2103 } 2104 break; 2105 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) 2106 case 'D': 2107 while (len-- > 0) { 2108 long double aldouble; 2109 SHIFT_VAR(utf8, s, strend, aldouble, datumtype); 2110 DO_BO_UNPACK_N(aldouble, long double); 2111 if (!checksum) 2112 mPUSHn(aldouble); 2113 else 2114 cdouble += aldouble; 2115 } 2116 break; 2117 #endif 2118 case 'u': 2119 { 2120 const STRLEN l = (STRLEN) (strend - s) * 3 / 4; 2121 sv = sv_2mortal(newSV(l)); 2122 if (l) SvPOK_on(sv); 2123 } 2124 if (utf8) { 2125 while (next_uni_uu(aTHX_ &s, strend, &len)) { 2126 I32 a, b, c, d; 2127 char hunk[3]; 2128 2129 while (len > 0) { 2130 next_uni_uu(aTHX_ &s, strend, &a); 2131 next_uni_uu(aTHX_ &s, strend, &b); 2132 next_uni_uu(aTHX_ &s, strend, &c); 2133 next_uni_uu(aTHX_ &s, strend, &d); 2134 hunk[0] = (char)((a << 2) | (b >> 4)); 2135 hunk[1] = (char)((b << 4) | (c >> 2)); 2136 hunk[2] = (char)((c << 6) | d); 2137 sv_catpvn(sv, hunk, (len > 3) ? 3 : len); 2138 len -= 3; 2139 } 2140 if (s < strend) { 2141 if (*s == '\n') { 2142 s++; 2143 } 2144 else { 2145 /* possible checksum byte */ 2146 const char *skip = s+UTF8SKIP(s); 2147 if (skip < strend && *skip == '\n') 2148 s = skip+1; 2149 } 2150 } 2151 } 2152 } else { 2153 while (s < strend && *s > ' ' && ISUUCHAR(*s)) { 2154 I32 a, b, c, d; 2155 char hunk[3]; 2156 2157 len = PL_uudmap[*(U8*)s++] & 077; 2158 while (len > 0) { 2159 if (s < strend && ISUUCHAR(*s)) 2160 a = PL_uudmap[*(U8*)s++] & 077; 2161 else 2162 a = 0; 2163 if (s < strend && ISUUCHAR(*s)) 2164 b = PL_uudmap[*(U8*)s++] & 077; 2165 else 2166 b = 0; 2167 if (s < strend && ISUUCHAR(*s)) 2168 c = PL_uudmap[*(U8*)s++] & 077; 2169 else 2170 c = 0; 2171 if (s < strend && ISUUCHAR(*s)) 2172 d = PL_uudmap[*(U8*)s++] & 077; 2173 else 2174 d = 0; 2175 hunk[0] = (char)((a << 2) | (b >> 4)); 2176 hunk[1] = (char)((b << 4) | (c >> 2)); 2177 hunk[2] = (char)((c << 6) | d); 2178 sv_catpvn(sv, hunk, (len > 3) ? 3 : len); 2179 len -= 3; 2180 } 2181 if (*s == '\n') 2182 s++; 2183 else /* possible checksum byte */ 2184 if (s + 1 < strend && s[1] == '\n') 2185 s += 2; 2186 } 2187 } 2188 XPUSHs(sv); 2189 break; 2190 } 2191 2192 if (checksum) { 2193 if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) || 2194 (checksum > bits_in_uv && 2195 strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) { 2196 NV trouble, anv; 2197 2198 anv = (NV) (1 << (checksum & 15)); 2199 while (checksum >= 16) { 2200 checksum -= 16; 2201 anv *= 65536.0; 2202 } 2203 while (cdouble < 0.0) 2204 cdouble += anv; 2205 cdouble = Perl_modf(cdouble / anv, &trouble) * anv; 2206 sv = newSVnv(cdouble); 2207 } 2208 else { 2209 if (checksum < bits_in_uv) { 2210 UV mask = ((UV)1 << checksum) - 1; 2211 cuv &= mask; 2212 } 2213 sv = newSVuv(cuv); 2214 } 2215 mXPUSHs(sv); 2216 checksum = 0; 2217 } 2218 2219 if (symptr->flags & FLAG_SLASH){ 2220 if (SP - PL_stack_base - start_sp_offset <= 0) 2221 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack"); 2222 if( next_symbol(symptr) ){ 2223 if( symptr->howlen == e_number ) 2224 Perl_croak(aTHX_ "Count after length/code in unpack" ); 2225 if( beyond ){ 2226 /* ...end of char buffer then no decent length available */ 2227 Perl_croak(aTHX_ "length/code after end of string in unpack" ); 2228 } else { 2229 /* take top of stack (hope it's numeric) */ 2230 len = POPi; 2231 if( len < 0 ) 2232 Perl_croak(aTHX_ "Negative '/' count in unpack" ); 2233 } 2234 } else { 2235 Perl_croak(aTHX_ "Code missing after '/' in unpack" ); 2236 } 2237 datumtype = symptr->code; 2238 explicit_length = FALSE; 2239 goto redo_switch; 2240 } 2241 } 2242 2243 if (new_s) 2244 *new_s = s; 2245 PUTBACK; 2246 return SP - PL_stack_base - start_sp_offset; 2247 } 2248 2249 PP(pp_unpack) 2250 { 2251 dVAR; 2252 dSP; 2253 dPOPPOPssrl; 2254 I32 gimme = GIMME_V; 2255 STRLEN llen; 2256 STRLEN rlen; 2257 const char *pat = SvPV_const(left, llen); 2258 const char *s = SvPV_const(right, rlen); 2259 const char *strend = s + rlen; 2260 const char *patend = pat + llen; 2261 I32 cnt; 2262 2263 PUTBACK; 2264 cnt = unpackstring(pat, patend, s, strend, 2265 ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0) 2266 | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0)); 2267 2268 SPAGAIN; 2269 if ( !cnt && gimme == G_SCALAR ) 2270 PUSHs(&PL_sv_undef); 2271 RETURN; 2272 } 2273 2274 STATIC U8 * 2275 doencodes(U8 *h, const char *s, I32 len) 2276 { 2277 *h++ = PL_uuemap[len]; 2278 while (len > 2) { 2279 *h++ = PL_uuemap[(077 & (s[0] >> 2))]; 2280 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))]; 2281 *h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))]; 2282 *h++ = PL_uuemap[(077 & (s[2] & 077))]; 2283 s += 3; 2284 len -= 3; 2285 } 2286 if (len > 0) { 2287 const char r = (len > 1 ? s[1] : '\0'); 2288 *h++ = PL_uuemap[(077 & (s[0] >> 2))]; 2289 *h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))]; 2290 *h++ = PL_uuemap[(077 & ((r << 2) & 074))]; 2291 *h++ = PL_uuemap[0]; 2292 } 2293 *h++ = '\n'; 2294 return h; 2295 } 2296 2297 STATIC SV * 2298 S_is_an_int(pTHX_ const char *s, STRLEN l) 2299 { 2300 SV *result = newSVpvn(s, l); 2301 char *const result_c = SvPV_nolen(result); /* convenience */ 2302 char *out = result_c; 2303 bool skip = 1; 2304 bool ignore = 0; 2305 2306 PERL_ARGS_ASSERT_IS_AN_INT; 2307 2308 while (*s) { 2309 switch (*s) { 2310 case ' ': 2311 break; 2312 case '+': 2313 if (!skip) { 2314 SvREFCNT_dec(result); 2315 return (NULL); 2316 } 2317 break; 2318 case '0': 2319 case '1': 2320 case '2': 2321 case '3': 2322 case '4': 2323 case '5': 2324 case '6': 2325 case '7': 2326 case '8': 2327 case '9': 2328 skip = 0; 2329 if (!ignore) { 2330 *(out++) = *s; 2331 } 2332 break; 2333 case '.': 2334 ignore = 1; 2335 break; 2336 default: 2337 SvREFCNT_dec(result); 2338 return (NULL); 2339 } 2340 s++; 2341 } 2342 *(out++) = '\0'; 2343 SvCUR_set(result, out - result_c); 2344 return (result); 2345 } 2346 2347 /* pnum must be '\0' terminated */ 2348 STATIC int 2349 S_div128(pTHX_ SV *pnum, bool *done) 2350 { 2351 STRLEN len; 2352 char * const s = SvPV(pnum, len); 2353 char *t = s; 2354 int m = 0; 2355 2356 PERL_ARGS_ASSERT_DIV128; 2357 2358 *done = 1; 2359 while (*t) { 2360 const int i = m * 10 + (*t - '0'); 2361 const int r = (i >> 7); /* r < 10 */ 2362 m = i & 0x7F; 2363 if (r) { 2364 *done = 0; 2365 } 2366 *(t++) = '0' + r; 2367 } 2368 *(t++) = '\0'; 2369 SvCUR_set(pnum, (STRLEN) (t - s)); 2370 return (m); 2371 } 2372 2373 /* 2374 =for apidoc packlist 2375 2376 The engine implementing pack() Perl function. 2377 2378 =cut 2379 */ 2380 2381 void 2382 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist ) 2383 { 2384 dVAR; 2385 tempsym_t sym; 2386 2387 PERL_ARGS_ASSERT_PACKLIST; 2388 2389 TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK); 2390 2391 /* We're going to do changes through SvPVX(cat). Make sure it's valid. 2392 Also make sure any UTF8 flag is loaded */ 2393 SvPV_force_nolen(cat); 2394 if (DO_UTF8(cat)) 2395 sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8; 2396 2397 (void)pack_rec( cat, &sym, beglist, endlist ); 2398 } 2399 2400 /* like sv_utf8_upgrade, but also repoint the group start markers */ 2401 STATIC void 2402 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) { 2403 STRLEN len; 2404 tempsym_t *group; 2405 const char *from_ptr, *from_start, *from_end, **marks, **m; 2406 char *to_start, *to_ptr; 2407 2408 if (SvUTF8(sv)) return; 2409 2410 from_start = SvPVX_const(sv); 2411 from_end = from_start + SvCUR(sv); 2412 for (from_ptr = from_start; from_ptr < from_end; from_ptr++) 2413 if (!NATIVE_IS_INVARIANT(*from_ptr)) break; 2414 if (from_ptr == from_end) { 2415 /* Simple case: no character needs to be changed */ 2416 SvUTF8_on(sv); 2417 return; 2418 } 2419 2420 len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1; 2421 Newx(to_start, len, char); 2422 Copy(from_start, to_start, from_ptr-from_start, char); 2423 to_ptr = to_start + (from_ptr-from_start); 2424 2425 Newx(marks, sym_ptr->level+2, const char *); 2426 for (group=sym_ptr; group; group = group->previous) 2427 marks[group->level] = from_start + group->strbeg; 2428 marks[sym_ptr->level+1] = from_end+1; 2429 for (m = marks; *m < from_ptr; m++) 2430 *m = to_start + (*m-from_start); 2431 2432 for (;from_ptr < from_end; from_ptr++) { 2433 while (*m == from_ptr) *m++ = to_ptr; 2434 to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr); 2435 } 2436 *to_ptr = 0; 2437 2438 while (*m == from_ptr) *m++ = to_ptr; 2439 if (m != marks + sym_ptr->level+1) { 2440 Safefree(marks); 2441 Safefree(to_start); 2442 Perl_croak(aTHX_ "Assertion: marks beyond string end"); 2443 } 2444 for (group=sym_ptr; group; group = group->previous) 2445 group->strbeg = marks[group->level] - to_start; 2446 Safefree(marks); 2447 2448 if (SvOOK(sv)) { 2449 if (SvIVX(sv)) { 2450 SvLEN_set(sv, SvLEN(sv) + SvIVX(sv)); 2451 from_start -= SvIVX(sv); 2452 SvIV_set(sv, 0); 2453 } 2454 SvFLAGS(sv) &= ~SVf_OOK; 2455 } 2456 if (SvLEN(sv) != 0) 2457 Safefree(from_start); 2458 SvPV_set(sv, to_start); 2459 SvCUR_set(sv, to_ptr - to_start); 2460 SvLEN_set(sv, len); 2461 SvUTF8_on(sv); 2462 } 2463 2464 /* Exponential string grower. Makes string extension effectively O(n) 2465 needed says how many extra bytes we need (not counting the final '\0') 2466 Only grows the string if there is an actual lack of space 2467 */ 2468 STATIC char * 2469 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) { 2470 const STRLEN cur = SvCUR(sv); 2471 const STRLEN len = SvLEN(sv); 2472 STRLEN extend; 2473 2474 PERL_ARGS_ASSERT_SV_EXP_GROW; 2475 2476 if (len - cur > needed) return SvPVX(sv); 2477 extend = needed > len ? needed : len; 2478 return SvGROW(sv, len+extend+1); 2479 } 2480 2481 STATIC 2482 SV ** 2483 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) 2484 { 2485 dVAR; 2486 tempsym_t lookahead; 2487 I32 items = endlist - beglist; 2488 bool found = next_symbol(symptr); 2489 bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0; 2490 bool warn_utf8 = ckWARN(WARN_UTF8); 2491 2492 PERL_ARGS_ASSERT_PACK_REC; 2493 2494 if (symptr->level == 0 && found && symptr->code == 'U') { 2495 marked_upgrade(aTHX_ cat, symptr); 2496 symptr->flags |= FLAG_DO_UTF8; 2497 utf8 = 0; 2498 } 2499 symptr->strbeg = SvCUR(cat); 2500 2501 while (found) { 2502 SV *fromstr; 2503 STRLEN fromlen; 2504 I32 len; 2505 SV *lengthcode = NULL; 2506 I32 datumtype = symptr->code; 2507 howlen_t howlen = symptr->howlen; 2508 char *start = SvPVX(cat); 2509 char *cur = start + SvCUR(cat); 2510 2511 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no) 2512 2513 switch (howlen) { 2514 case e_star: 2515 len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ? 2516 0 : items; 2517 break; 2518 default: 2519 /* e_no_len and e_number */ 2520 len = symptr->length; 2521 break; 2522 } 2523 2524 if (len) { 2525 packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)]; 2526 2527 if (props && !(props & PACK_SIZE_UNPREDICTABLE)) { 2528 /* We can process this letter. */ 2529 STRLEN size = props & PACK_SIZE_MASK; 2530 GROWING(utf8, cat, start, cur, (STRLEN) len * size); 2531 } 2532 } 2533 2534 /* Look ahead for next symbol. Do we have code/code? */ 2535 lookahead = *symptr; 2536 found = next_symbol(&lookahead); 2537 if (symptr->flags & FLAG_SLASH) { 2538 IV count; 2539 if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack"); 2540 if (strchr("aAZ", lookahead.code)) { 2541 if (lookahead.howlen == e_number) count = lookahead.length; 2542 else { 2543 if (items > 0) { 2544 if (SvGAMAGIC(*beglist)) { 2545 /* Avoid reading the active data more than once 2546 by copying it to a temporary. */ 2547 STRLEN len; 2548 const char *const pv = SvPV_const(*beglist, len); 2549 SV *const temp 2550 = newSVpvn_flags(pv, len, 2551 SVs_TEMP | SvUTF8(*beglist)); 2552 *beglist = temp; 2553 } 2554 count = DO_UTF8(*beglist) ? 2555 sv_len_utf8(*beglist) : sv_len(*beglist); 2556 } 2557 else count = 0; 2558 if (lookahead.code == 'Z') count++; 2559 } 2560 } else { 2561 if (lookahead.howlen == e_number && lookahead.length < items) 2562 count = lookahead.length; 2563 else count = items; 2564 } 2565 lookahead.howlen = e_number; 2566 lookahead.length = count; 2567 lengthcode = sv_2mortal(newSViv(count)); 2568 } 2569 2570 /* Code inside the switch must take care to properly update 2571 cat (CUR length and '\0' termination) if it updated *cur and 2572 doesn't simply leave using break */ 2573 switch(TYPE_NO_ENDIANNESS(datumtype)) { 2574 default: 2575 Perl_croak(aTHX_ "Invalid type '%c' in pack", 2576 (int) TYPE_NO_MODIFIERS(datumtype)); 2577 case '%': 2578 Perl_croak(aTHX_ "'%%' may not be used in pack"); 2579 { 2580 char *from; 2581 #ifdef PERL_PACK_CAN_SHRIEKSIGN 2582 case '.' | TYPE_IS_SHRIEKING: 2583 #endif 2584 case '.': 2585 if (howlen == e_star) from = start; 2586 else if (len == 0) from = cur; 2587 else { 2588 tempsym_t *group = symptr; 2589 2590 while (--len && group) group = group->previous; 2591 from = group ? start + group->strbeg : start; 2592 } 2593 fromstr = NEXTFROM; 2594 len = SvIV(fromstr); 2595 goto resize; 2596 #ifdef PERL_PACK_CAN_SHRIEKSIGN 2597 case '@' | TYPE_IS_SHRIEKING: 2598 #endif 2599 case '@': 2600 from = start + symptr->strbeg; 2601 resize: 2602 #ifdef PERL_PACK_CAN_SHRIEKSIGN 2603 if (utf8 && !(datumtype & TYPE_IS_SHRIEKING)) 2604 #else /* PERL_PACK_CAN_SHRIEKSIGN */ 2605 if (utf8) 2606 #endif 2607 if (len >= 0) { 2608 while (len && from < cur) { 2609 from += UTF8SKIP(from); 2610 len--; 2611 } 2612 if (from > cur) 2613 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); 2614 if (len) { 2615 /* Here we know from == cur */ 2616 grow: 2617 GROWING(0, cat, start, cur, len); 2618 Zero(cur, len, char); 2619 cur += len; 2620 } else if (from < cur) { 2621 len = cur - from; 2622 goto shrink; 2623 } else goto no_change; 2624 } else { 2625 cur = from; 2626 len = -len; 2627 goto utf8_shrink; 2628 } 2629 else { 2630 len -= cur - from; 2631 if (len > 0) goto grow; 2632 if (len == 0) goto no_change; 2633 len = -len; 2634 goto shrink; 2635 } 2636 break; 2637 } 2638 case '(': { 2639 tempsym_t savsym = *symptr; 2640 U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags); 2641 symptr->flags |= group_modifiers; 2642 symptr->patend = savsym.grpend; 2643 symptr->level++; 2644 symptr->previous = &lookahead; 2645 while (len--) { 2646 U32 was_utf8; 2647 if (utf8) symptr->flags |= FLAG_PARSE_UTF8; 2648 else symptr->flags &= ~FLAG_PARSE_UTF8; 2649 was_utf8 = SvUTF8(cat); 2650 symptr->patptr = savsym.grpbeg; 2651 beglist = pack_rec(cat, symptr, beglist, endlist); 2652 if (SvUTF8(cat) != was_utf8) 2653 /* This had better be an upgrade while in utf8==0 mode */ 2654 utf8 = 1; 2655 2656 if (savsym.howlen == e_star && beglist == endlist) 2657 break; /* No way to continue */ 2658 } 2659 items = endlist - beglist; 2660 lookahead.flags = symptr->flags & ~group_modifiers; 2661 goto no_change; 2662 } 2663 case 'X' | TYPE_IS_SHRIEKING: 2664 if (!len) /* Avoid division by 0 */ 2665 len = 1; 2666 if (utf8) { 2667 char *hop, *last; 2668 I32 l = len; 2669 hop = last = start; 2670 while (hop < cur) { 2671 hop += UTF8SKIP(hop); 2672 if (--l == 0) { 2673 last = hop; 2674 l = len; 2675 } 2676 } 2677 if (last > cur) 2678 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); 2679 cur = last; 2680 break; 2681 } 2682 len = (cur-start) % len; 2683 /* FALL THROUGH */ 2684 case 'X': 2685 if (utf8) { 2686 if (len < 1) goto no_change; 2687 utf8_shrink: 2688 while (len > 0) { 2689 if (cur <= start) 2690 Perl_croak(aTHX_ "'%c' outside of string in pack", 2691 (int) TYPE_NO_MODIFIERS(datumtype)); 2692 while (--cur, UTF8_IS_CONTINUATION(*cur)) { 2693 if (cur <= start) 2694 Perl_croak(aTHX_ "'%c' outside of string in pack", 2695 (int) TYPE_NO_MODIFIERS(datumtype)); 2696 } 2697 len--; 2698 } 2699 } else { 2700 shrink: 2701 if (cur - start < len) 2702 Perl_croak(aTHX_ "'%c' outside of string in pack", 2703 (int) TYPE_NO_MODIFIERS(datumtype)); 2704 cur -= len; 2705 } 2706 if (cur < start+symptr->strbeg) { 2707 /* Make sure group starts don't point into the void */ 2708 tempsym_t *group; 2709 const STRLEN length = cur-start; 2710 for (group = symptr; 2711 group && length < group->strbeg; 2712 group = group->previous) group->strbeg = length; 2713 lookahead.strbeg = length; 2714 } 2715 break; 2716 case 'x' | TYPE_IS_SHRIEKING: { 2717 I32 ai32; 2718 if (!len) /* Avoid division by 0 */ 2719 len = 1; 2720 if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len; 2721 else ai32 = (cur - start) % len; 2722 if (ai32 == 0) goto no_change; 2723 len -= ai32; 2724 } 2725 /* FALL THROUGH */ 2726 case 'x': 2727 goto grow; 2728 case 'A': 2729 case 'Z': 2730 case 'a': { 2731 const char *aptr; 2732 2733 fromstr = NEXTFROM; 2734 aptr = SvPV_const(fromstr, fromlen); 2735 if (DO_UTF8(fromstr)) { 2736 const char *end, *s; 2737 2738 if (!utf8 && !SvUTF8(cat)) { 2739 marked_upgrade(aTHX_ cat, symptr); 2740 lookahead.flags |= FLAG_DO_UTF8; 2741 lookahead.strbeg = symptr->strbeg; 2742 utf8 = 1; 2743 start = SvPVX(cat); 2744 cur = start + SvCUR(cat); 2745 } 2746 if (howlen == e_star) { 2747 if (utf8) goto string_copy; 2748 len = fromlen+1; 2749 } 2750 s = aptr; 2751 end = aptr + fromlen; 2752 fromlen = datumtype == 'Z' ? len-1 : len; 2753 while ((I32) fromlen > 0 && s < end) { 2754 s += UTF8SKIP(s); 2755 fromlen--; 2756 } 2757 if (s > end) 2758 Perl_croak(aTHX_ "Malformed UTF-8 string in pack"); 2759 if (utf8) { 2760 len = fromlen; 2761 if (datumtype == 'Z') len++; 2762 fromlen = s-aptr; 2763 len += fromlen; 2764 2765 goto string_copy; 2766 } 2767 fromlen = len - fromlen; 2768 if (datumtype == 'Z') fromlen--; 2769 if (howlen == e_star) { 2770 len = fromlen; 2771 if (datumtype == 'Z') len++; 2772 } 2773 GROWING(0, cat, start, cur, len); 2774 if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen, 2775 datumtype | TYPE_IS_PACK)) 2776 Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available"); 2777 cur += fromlen; 2778 len -= fromlen; 2779 } else if (utf8) { 2780 if (howlen == e_star) { 2781 len = fromlen; 2782 if (datumtype == 'Z') len++; 2783 } 2784 if (len <= (I32) fromlen) { 2785 fromlen = len; 2786 if (datumtype == 'Z' && fromlen > 0) fromlen--; 2787 } 2788 /* assumes a byte expands to at most UTF8_EXPAND bytes on 2789 upgrade, so: 2790 expected_length <= from_len*UTF8_EXPAND + (len-from_len) */ 2791 GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len); 2792 len -= fromlen; 2793 while (fromlen > 0) { 2794 cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr); 2795 aptr++; 2796 fromlen--; 2797 } 2798 } else { 2799 string_copy: 2800 if (howlen == e_star) { 2801 len = fromlen; 2802 if (datumtype == 'Z') len++; 2803 } 2804 if (len <= (I32) fromlen) { 2805 fromlen = len; 2806 if (datumtype == 'Z' && fromlen > 0) fromlen--; 2807 } 2808 GROWING(0, cat, start, cur, len); 2809 Copy(aptr, cur, fromlen, char); 2810 cur += fromlen; 2811 len -= fromlen; 2812 } 2813 memset(cur, datumtype == 'A' ? ' ' : '\0', len); 2814 cur += len; 2815 SvTAINT(cat); 2816 break; 2817 } 2818 case 'B': 2819 case 'b': { 2820 const char *str, *end; 2821 I32 l, field_len; 2822 U8 bits; 2823 bool utf8_source; 2824 U32 utf8_flags; 2825 2826 fromstr = NEXTFROM; 2827 str = SvPV_const(fromstr, fromlen); 2828 end = str + fromlen; 2829 if (DO_UTF8(fromstr)) { 2830 utf8_source = TRUE; 2831 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY; 2832 } else { 2833 utf8_source = FALSE; 2834 utf8_flags = 0; /* Unused, but keep compilers happy */ 2835 } 2836 if (howlen == e_star) len = fromlen; 2837 field_len = (len+7)/8; 2838 GROWING(utf8, cat, start, cur, field_len); 2839 if (len > (I32)fromlen) len = fromlen; 2840 bits = 0; 2841 l = 0; 2842 if (datumtype == 'B') 2843 while (l++ < len) { 2844 if (utf8_source) { 2845 UV val = 0; 2846 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2847 bits |= val & 1; 2848 } else bits |= *str++ & 1; 2849 if (l & 7) bits <<= 1; 2850 else { 2851 PUSH_BYTE(utf8, cur, bits); 2852 bits = 0; 2853 } 2854 } 2855 else 2856 /* datumtype == 'b' */ 2857 while (l++ < len) { 2858 if (utf8_source) { 2859 UV val = 0; 2860 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2861 if (val & 1) bits |= 0x80; 2862 } else if (*str++ & 1) 2863 bits |= 0x80; 2864 if (l & 7) bits >>= 1; 2865 else { 2866 PUSH_BYTE(utf8, cur, bits); 2867 bits = 0; 2868 } 2869 } 2870 l--; 2871 if (l & 7) { 2872 if (datumtype == 'B') 2873 bits <<= 7 - (l & 7); 2874 else 2875 bits >>= 7 - (l & 7); 2876 PUSH_BYTE(utf8, cur, bits); 2877 l += 7; 2878 } 2879 /* Determine how many chars are left in the requested field */ 2880 l /= 8; 2881 if (howlen == e_star) field_len = 0; 2882 else field_len -= l; 2883 Zero(cur, field_len, char); 2884 cur += field_len; 2885 break; 2886 } 2887 case 'H': 2888 case 'h': { 2889 const char *str, *end; 2890 I32 l, field_len; 2891 U8 bits; 2892 bool utf8_source; 2893 U32 utf8_flags; 2894 2895 fromstr = NEXTFROM; 2896 str = SvPV_const(fromstr, fromlen); 2897 end = str + fromlen; 2898 if (DO_UTF8(fromstr)) { 2899 utf8_source = TRUE; 2900 utf8_flags = warn_utf8 ? 0 : UTF8_ALLOW_ANY; 2901 } else { 2902 utf8_source = FALSE; 2903 utf8_flags = 0; /* Unused, but keep compilers happy */ 2904 } 2905 if (howlen == e_star) len = fromlen; 2906 field_len = (len+1)/2; 2907 GROWING(utf8, cat, start, cur, field_len); 2908 if (!utf8 && len > (I32)fromlen) len = fromlen; 2909 bits = 0; 2910 l = 0; 2911 if (datumtype == 'H') 2912 while (l++ < len) { 2913 if (utf8_source) { 2914 UV val = 0; 2915 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2916 if (val < 256 && isALPHA(val)) 2917 bits |= (val + 9) & 0xf; 2918 else 2919 bits |= val & 0xf; 2920 } else if (isALPHA(*str)) 2921 bits |= (*str++ + 9) & 0xf; 2922 else 2923 bits |= *str++ & 0xf; 2924 if (l & 1) bits <<= 4; 2925 else { 2926 PUSH_BYTE(utf8, cur, bits); 2927 bits = 0; 2928 } 2929 } 2930 else 2931 while (l++ < len) { 2932 if (utf8_source) { 2933 UV val = 0; 2934 NEXT_UNI_VAL(val, cur, str, end, utf8_flags); 2935 if (val < 256 && isALPHA(val)) 2936 bits |= ((val + 9) & 0xf) << 4; 2937 else 2938 bits |= (val & 0xf) << 4; 2939 } else if (isALPHA(*str)) 2940 bits |= ((*str++ + 9) & 0xf) << 4; 2941 else 2942 bits |= (*str++ & 0xf) << 4; 2943 if (l & 1) bits >>= 4; 2944 else { 2945 PUSH_BYTE(utf8, cur, bits); 2946 bits = 0; 2947 } 2948 } 2949 l--; 2950 if (l & 1) { 2951 PUSH_BYTE(utf8, cur, bits); 2952 l++; 2953 } 2954 /* Determine how many chars are left in the requested field */ 2955 l /= 2; 2956 if (howlen == e_star) field_len = 0; 2957 else field_len -= l; 2958 Zero(cur, field_len, char); 2959 cur += field_len; 2960 break; 2961 } 2962 case 'c': 2963 while (len-- > 0) { 2964 IV aiv; 2965 fromstr = NEXTFROM; 2966 aiv = SvIV(fromstr); 2967 if ((-128 > aiv || aiv > 127) && 2968 ckWARN(WARN_PACK)) 2969 Perl_warner(aTHX_ packWARN(WARN_PACK), 2970 "Character in 'c' format wrapped in pack"); 2971 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff)); 2972 } 2973 break; 2974 case 'C': 2975 if (len == 0) { 2976 utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0; 2977 break; 2978 } 2979 while (len-- > 0) { 2980 IV aiv; 2981 fromstr = NEXTFROM; 2982 aiv = SvIV(fromstr); 2983 if ((0 > aiv || aiv > 0xff) && 2984 ckWARN(WARN_PACK)) 2985 Perl_warner(aTHX_ packWARN(WARN_PACK), 2986 "Character in 'C' format wrapped in pack"); 2987 PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff)); 2988 } 2989 break; 2990 case 'W': { 2991 char *end; 2992 U8 in_bytes = (U8)IN_BYTES; 2993 2994 end = start+SvLEN(cat)-1; 2995 if (utf8) end -= UTF8_MAXLEN-1; 2996 while (len-- > 0) { 2997 UV auv; 2998 fromstr = NEXTFROM; 2999 auv = SvUV(fromstr); 3000 if (in_bytes) auv = auv % 0x100; 3001 if (utf8) { 3002 W_utf8: 3003 if (cur > end) { 3004 *cur = '\0'; 3005 SvCUR_set(cat, cur - start); 3006 3007 GROWING(0, cat, start, cur, len+UTF8_MAXLEN); 3008 end = start+SvLEN(cat)-UTF8_MAXLEN; 3009 } 3010 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, 3011 NATIVE_TO_UNI(auv), 3012 warn_utf8 ? 3013 0 : UNICODE_ALLOW_ANY); 3014 } else { 3015 if (auv >= 0x100) { 3016 if (!SvUTF8(cat)) { 3017 *cur = '\0'; 3018 SvCUR_set(cat, cur - start); 3019 marked_upgrade(aTHX_ cat, symptr); 3020 lookahead.flags |= FLAG_DO_UTF8; 3021 lookahead.strbeg = symptr->strbeg; 3022 utf8 = 1; 3023 start = SvPVX(cat); 3024 cur = start + SvCUR(cat); 3025 end = start+SvLEN(cat)-UTF8_MAXLEN; 3026 goto W_utf8; 3027 } 3028 if (ckWARN(WARN_PACK)) 3029 Perl_warner(aTHX_ packWARN(WARN_PACK), 3030 "Character in 'W' format wrapped in pack"); 3031 auv &= 0xff; 3032 } 3033 if (cur >= end) { 3034 *cur = '\0'; 3035 SvCUR_set(cat, cur - start); 3036 GROWING(0, cat, start, cur, len+1); 3037 end = start+SvLEN(cat)-1; 3038 } 3039 *(U8 *) cur++ = (U8)auv; 3040 } 3041 } 3042 break; 3043 } 3044 case 'U': { 3045 char *end; 3046 3047 if (len == 0) { 3048 if (!(symptr->flags & FLAG_DO_UTF8)) { 3049 marked_upgrade(aTHX_ cat, symptr); 3050 lookahead.flags |= FLAG_DO_UTF8; 3051 lookahead.strbeg = symptr->strbeg; 3052 } 3053 utf8 = 0; 3054 goto no_change; 3055 } 3056 3057 end = start+SvLEN(cat); 3058 if (!utf8) end -= UTF8_MAXLEN; 3059 while (len-- > 0) { 3060 UV auv; 3061 fromstr = NEXTFROM; 3062 auv = SvUV(fromstr); 3063 if (utf8) { 3064 U8 buffer[UTF8_MAXLEN], *endb; 3065 endb = uvuni_to_utf8_flags(buffer, auv, 3066 warn_utf8 ? 3067 0 : UNICODE_ALLOW_ANY); 3068 if (cur+(endb-buffer)*UTF8_EXPAND >= end) { 3069 *cur = '\0'; 3070 SvCUR_set(cat, cur - start); 3071 GROWING(0, cat, start, cur, 3072 len+(endb-buffer)*UTF8_EXPAND); 3073 end = start+SvLEN(cat); 3074 } 3075 cur = bytes_to_uni(buffer, endb-buffer, cur); 3076 } else { 3077 if (cur >= end) { 3078 *cur = '\0'; 3079 SvCUR_set(cat, cur - start); 3080 GROWING(0, cat, start, cur, len+UTF8_MAXLEN); 3081 end = start+SvLEN(cat)-UTF8_MAXLEN; 3082 } 3083 cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv, 3084 warn_utf8 ? 3085 0 : UNICODE_ALLOW_ANY); 3086 } 3087 } 3088 break; 3089 } 3090 /* Float and double added by gnb@melba.bby.oz.au 22/11/89 */ 3091 case 'f': 3092 while (len-- > 0) { 3093 float afloat; 3094 NV anv; 3095 fromstr = NEXTFROM; 3096 anv = SvNV(fromstr); 3097 #ifdef __VOS__ 3098 /* VOS does not automatically map a floating-point overflow 3099 during conversion from double to float into infinity, so we 3100 do it by hand. This code should either be generalized for 3101 any OS that needs it, or removed if and when VOS implements 3102 posix-976 (suggestion to support mapping to infinity). 3103 Paul.Green@stratus.com 02-04-02. */ 3104 { 3105 extern const float _float_constants[]; 3106 if (anv > FLT_MAX) 3107 afloat = _float_constants[0]; /* single prec. inf. */ 3108 else if (anv < -FLT_MAX) 3109 afloat = _float_constants[0]; /* single prec. inf. */ 3110 else afloat = (float) anv; 3111 } 3112 #else /* __VOS__ */ 3113 # if defined(VMS) && !defined(__IEEE_FP) 3114 /* IEEE fp overflow shenanigans are unavailable on VAX and optional 3115 * on Alpha; fake it if we don't have them. 3116 */ 3117 if (anv > FLT_MAX) 3118 afloat = FLT_MAX; 3119 else if (anv < -FLT_MAX) 3120 afloat = -FLT_MAX; 3121 else afloat = (float)anv; 3122 # else 3123 afloat = (float)anv; 3124 # endif 3125 #endif /* __VOS__ */ 3126 DO_BO_PACK_N(afloat, float); 3127 PUSH_VAR(utf8, cur, afloat); 3128 } 3129 break; 3130 case 'd': 3131 while (len-- > 0) { 3132 double adouble; 3133 NV anv; 3134 fromstr = NEXTFROM; 3135 anv = SvNV(fromstr); 3136 #ifdef __VOS__ 3137 /* VOS does not automatically map a floating-point overflow 3138 during conversion from long double to double into infinity, 3139 so we do it by hand. This code should either be generalized 3140 for any OS that needs it, or removed if and when VOS 3141 implements posix-976 (suggestion to support mapping to 3142 infinity). Paul.Green@stratus.com 02-04-02. */ 3143 { 3144 extern const double _double_constants[]; 3145 if (anv > DBL_MAX) 3146 adouble = _double_constants[0]; /* double prec. inf. */ 3147 else if (anv < -DBL_MAX) 3148 adouble = _double_constants[0]; /* double prec. inf. */ 3149 else adouble = (double) anv; 3150 } 3151 #else /* __VOS__ */ 3152 # if defined(VMS) && !defined(__IEEE_FP) 3153 /* IEEE fp overflow shenanigans are unavailable on VAX and optional 3154 * on Alpha; fake it if we don't have them. 3155 */ 3156 if (anv > DBL_MAX) 3157 adouble = DBL_MAX; 3158 else if (anv < -DBL_MAX) 3159 adouble = -DBL_MAX; 3160 else adouble = (double)anv; 3161 # else 3162 adouble = (double)anv; 3163 # endif 3164 #endif /* __VOS__ */ 3165 DO_BO_PACK_N(adouble, double); 3166 PUSH_VAR(utf8, cur, adouble); 3167 } 3168 break; 3169 case 'F': { 3170 NV anv; 3171 Zero(&anv, 1, NV); /* can be long double with unused bits */ 3172 while (len-- > 0) { 3173 fromstr = NEXTFROM; 3174 anv = SvNV(fromstr); 3175 DO_BO_PACK_N(anv, NV); 3176 PUSH_VAR(utf8, cur, anv); 3177 } 3178 break; 3179 } 3180 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) 3181 case 'D': { 3182 long double aldouble; 3183 /* long doubles can have unused bits, which may be nonzero */ 3184 Zero(&aldouble, 1, long double); 3185 while (len-- > 0) { 3186 fromstr = NEXTFROM; 3187 aldouble = (long double)SvNV(fromstr); 3188 DO_BO_PACK_N(aldouble, long double); 3189 PUSH_VAR(utf8, cur, aldouble); 3190 } 3191 break; 3192 } 3193 #endif 3194 #ifdef PERL_PACK_CAN_SHRIEKSIGN 3195 case 'n' | TYPE_IS_SHRIEKING: 3196 #endif 3197 case 'n': 3198 while (len-- > 0) { 3199 I16 ai16; 3200 fromstr = NEXTFROM; 3201 ai16 = (I16)SvIV(fromstr); 3202 #ifdef HAS_HTONS 3203 ai16 = PerlSock_htons(ai16); 3204 #endif 3205 PUSH16(utf8, cur, &ai16); 3206 } 3207 break; 3208 #ifdef PERL_PACK_CAN_SHRIEKSIGN 3209 case 'v' | TYPE_IS_SHRIEKING: 3210 #endif 3211 case 'v': 3212 while (len-- > 0) { 3213 I16 ai16; 3214 fromstr = NEXTFROM; 3215 ai16 = (I16)SvIV(fromstr); 3216 #ifdef HAS_HTOVS 3217 ai16 = htovs(ai16); 3218 #endif 3219 PUSH16(utf8, cur, &ai16); 3220 } 3221 break; 3222 case 'S' | TYPE_IS_SHRIEKING: 3223 #if SHORTSIZE != SIZE16 3224 while (len-- > 0) { 3225 unsigned short aushort; 3226 fromstr = NEXTFROM; 3227 aushort = SvUV(fromstr); 3228 DO_BO_PACK(aushort, s); 3229 PUSH_VAR(utf8, cur, aushort); 3230 } 3231 break; 3232 #else 3233 /* Fall through! */ 3234 #endif 3235 case 'S': 3236 while (len-- > 0) { 3237 U16 au16; 3238 fromstr = NEXTFROM; 3239 au16 = (U16)SvUV(fromstr); 3240 DO_BO_PACK(au16, 16); 3241 PUSH16(utf8, cur, &au16); 3242 } 3243 break; 3244 case 's' | TYPE_IS_SHRIEKING: 3245 #if SHORTSIZE != SIZE16 3246 while (len-- > 0) { 3247 short ashort; 3248 fromstr = NEXTFROM; 3249 ashort = SvIV(fromstr); 3250 DO_BO_PACK(ashort, s); 3251 PUSH_VAR(utf8, cur, ashort); 3252 } 3253 break; 3254 #else 3255 /* Fall through! */ 3256 #endif 3257 case 's': 3258 while (len-- > 0) { 3259 I16 ai16; 3260 fromstr = NEXTFROM; 3261 ai16 = (I16)SvIV(fromstr); 3262 DO_BO_PACK(ai16, 16); 3263 PUSH16(utf8, cur, &ai16); 3264 } 3265 break; 3266 case 'I': 3267 case 'I' | TYPE_IS_SHRIEKING: 3268 while (len-- > 0) { 3269 unsigned int auint; 3270 fromstr = NEXTFROM; 3271 auint = SvUV(fromstr); 3272 DO_BO_PACK(auint, i); 3273 PUSH_VAR(utf8, cur, auint); 3274 } 3275 break; 3276 case 'j': 3277 while (len-- > 0) { 3278 IV aiv; 3279 fromstr = NEXTFROM; 3280 aiv = SvIV(fromstr); 3281 #if IVSIZE == INTSIZE 3282 DO_BO_PACK(aiv, i); 3283 #elif IVSIZE == LONGSIZE 3284 DO_BO_PACK(aiv, l); 3285 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE 3286 DO_BO_PACK(aiv, 64); 3287 #else 3288 Perl_croak(aTHX_ "'j' not supported on this platform"); 3289 #endif 3290 PUSH_VAR(utf8, cur, aiv); 3291 } 3292 break; 3293 case 'J': 3294 while (len-- > 0) { 3295 UV auv; 3296 fromstr = NEXTFROM; 3297 auv = SvUV(fromstr); 3298 #if UVSIZE == INTSIZE 3299 DO_BO_PACK(auv, i); 3300 #elif UVSIZE == LONGSIZE 3301 DO_BO_PACK(auv, l); 3302 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE 3303 DO_BO_PACK(auv, 64); 3304 #else 3305 Perl_croak(aTHX_ "'J' not supported on this platform"); 3306 #endif 3307 PUSH_VAR(utf8, cur, auv); 3308 } 3309 break; 3310 case 'w': 3311 while (len-- > 0) { 3312 NV anv; 3313 fromstr = NEXTFROM; 3314 anv = SvNV(fromstr); 3315 3316 if (anv < 0) { 3317 *cur = '\0'; 3318 SvCUR_set(cat, cur - start); 3319 Perl_croak(aTHX_ "Cannot compress negative numbers in pack"); 3320 } 3321 3322 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0, 3323 which is == UV_MAX_P1. IOK is fine (instead of UV_only), as 3324 any negative IVs will have already been got by the croak() 3325 above. IOK is untrue for fractions, so we test them 3326 against UV_MAX_P1. */ 3327 if (SvIOK(fromstr) || anv < UV_MAX_P1) { 3328 char buf[(sizeof(UV)*CHAR_BIT)/7+1]; 3329 char *in = buf + sizeof(buf); 3330 UV auv = SvUV(fromstr); 3331 3332 do { 3333 *--in = (char)((auv & 0x7f) | 0x80); 3334 auv >>= 7; 3335 } while (auv); 3336 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */ 3337 PUSH_GROWING_BYTES(utf8, cat, start, cur, 3338 in, (buf + sizeof(buf)) - in); 3339 } else if (SvPOKp(fromstr)) 3340 goto w_string; 3341 else if (SvNOKp(fromstr)) { 3342 /* 10**NV_MAX_10_EXP is the largest power of 10 3343 so 10**(NV_MAX_10_EXP+1) is definately unrepresentable 3344 given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x: 3345 x = (NV_MAX_10_EXP+1) * log (10) / log (128) 3346 And with that many bytes only Inf can overflow. 3347 Some C compilers are strict about integral constant 3348 expressions so we conservatively divide by a slightly 3349 smaller integer instead of multiplying by the exact 3350 floating-point value. 3351 */ 3352 #ifdef NV_MAX_10_EXP 3353 /* char buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */ 3354 char buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */ 3355 #else 3356 /* char buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */ 3357 char buf[1 + (int)((308 + 1) / 2)]; /* valid C */ 3358 #endif 3359 char *in = buf + sizeof(buf); 3360 3361 anv = Perl_floor(anv); 3362 do { 3363 const NV next = Perl_floor(anv / 128); 3364 if (in <= buf) /* this cannot happen ;-) */ 3365 Perl_croak(aTHX_ "Cannot compress integer in pack"); 3366 *--in = (unsigned char)(anv - (next * 128)) | 0x80; 3367 anv = next; 3368 } while (anv > 0); 3369 buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */ 3370 PUSH_GROWING_BYTES(utf8, cat, start, cur, 3371 in, (buf + sizeof(buf)) - in); 3372 } else { 3373 const char *from; 3374 char *result, *in; 3375 SV *norm; 3376 STRLEN len; 3377 bool done; 3378 3379 w_string: 3380 /* Copy string and check for compliance */ 3381 from = SvPV_const(fromstr, len); 3382 if ((norm = is_an_int(from, len)) == NULL) 3383 Perl_croak(aTHX_ "Can only compress unsigned integers in pack"); 3384 3385 Newx(result, len, char); 3386 in = result + len; 3387 done = FALSE; 3388 while (!done) *--in = div128(norm, &done) | 0x80; 3389 result[len - 1] &= 0x7F; /* clear continue bit */ 3390 PUSH_GROWING_BYTES(utf8, cat, start, cur, 3391 in, (result + len) - in); 3392 Safefree(result); 3393 SvREFCNT_dec(norm); /* free norm */ 3394 } 3395 } 3396 break; 3397 case 'i': 3398 case 'i' | TYPE_IS_SHRIEKING: 3399 while (len-- > 0) { 3400 int aint; 3401 fromstr = NEXTFROM; 3402 aint = SvIV(fromstr); 3403 DO_BO_PACK(aint, i); 3404 PUSH_VAR(utf8, cur, aint); 3405 } 3406 break; 3407 #ifdef PERL_PACK_CAN_SHRIEKSIGN 3408 case 'N' | TYPE_IS_SHRIEKING: 3409 #endif 3410 case 'N': 3411 while (len-- > 0) { 3412 U32 au32; 3413 fromstr = NEXTFROM; 3414 au32 = SvUV(fromstr); 3415 #ifdef HAS_HTONL 3416 au32 = PerlSock_htonl(au32); 3417 #endif 3418 PUSH32(utf8, cur, &au32); 3419 } 3420 break; 3421 #ifdef PERL_PACK_CAN_SHRIEKSIGN 3422 case 'V' | TYPE_IS_SHRIEKING: 3423 #endif 3424 case 'V': 3425 while (len-- > 0) { 3426 U32 au32; 3427 fromstr = NEXTFROM; 3428 au32 = SvUV(fromstr); 3429 #ifdef HAS_HTOVL 3430 au32 = htovl(au32); 3431 #endif 3432 PUSH32(utf8, cur, &au32); 3433 } 3434 break; 3435 case 'L' | TYPE_IS_SHRIEKING: 3436 #if LONGSIZE != SIZE32 3437 while (len-- > 0) { 3438 unsigned long aulong; 3439 fromstr = NEXTFROM; 3440 aulong = SvUV(fromstr); 3441 DO_BO_PACK(aulong, l); 3442 PUSH_VAR(utf8, cur, aulong); 3443 } 3444 break; 3445 #else 3446 /* Fall though! */ 3447 #endif 3448 case 'L': 3449 while (len-- > 0) { 3450 U32 au32; 3451 fromstr = NEXTFROM; 3452 au32 = SvUV(fromstr); 3453 DO_BO_PACK(au32, 32); 3454 PUSH32(utf8, cur, &au32); 3455 } 3456 break; 3457 case 'l' | TYPE_IS_SHRIEKING: 3458 #if LONGSIZE != SIZE32 3459 while (len-- > 0) { 3460 long along; 3461 fromstr = NEXTFROM; 3462 along = SvIV(fromstr); 3463 DO_BO_PACK(along, l); 3464 PUSH_VAR(utf8, cur, along); 3465 } 3466 break; 3467 #else 3468 /* Fall though! */ 3469 #endif 3470 case 'l': 3471 while (len-- > 0) { 3472 I32 ai32; 3473 fromstr = NEXTFROM; 3474 ai32 = SvIV(fromstr); 3475 DO_BO_PACK(ai32, 32); 3476 PUSH32(utf8, cur, &ai32); 3477 } 3478 break; 3479 #ifdef HAS_QUAD 3480 case 'Q': 3481 while (len-- > 0) { 3482 Uquad_t auquad; 3483 fromstr = NEXTFROM; 3484 auquad = (Uquad_t) SvUV(fromstr); 3485 DO_BO_PACK(auquad, 64); 3486 PUSH_VAR(utf8, cur, auquad); 3487 } 3488 break; 3489 case 'q': 3490 while (len-- > 0) { 3491 Quad_t aquad; 3492 fromstr = NEXTFROM; 3493 aquad = (Quad_t)SvIV(fromstr); 3494 DO_BO_PACK(aquad, 64); 3495 PUSH_VAR(utf8, cur, aquad); 3496 } 3497 break; 3498 #endif /* HAS_QUAD */ 3499 case 'P': 3500 len = 1; /* assume SV is correct length */ 3501 GROWING(utf8, cat, start, cur, sizeof(char *)); 3502 /* Fall through! */ 3503 case 'p': 3504 while (len-- > 0) { 3505 const char *aptr; 3506 3507 fromstr = NEXTFROM; 3508 SvGETMAGIC(fromstr); 3509 if (!SvOK(fromstr)) aptr = NULL; 3510 else { 3511 /* XXX better yet, could spirit away the string to 3512 * a safe spot and hang on to it until the result 3513 * of pack() (and all copies of the result) are 3514 * gone. 3515 */ 3516 if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) && 3517 !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) { 3518 Perl_warner(aTHX_ packWARN(WARN_PACK), 3519 "Attempt to pack pointer to temporary value"); 3520 } 3521 if (SvPOK(fromstr) || SvNIOK(fromstr)) 3522 aptr = SvPV_nomg_const_nolen(fromstr); 3523 else 3524 aptr = SvPV_force_flags_nolen(fromstr, 0); 3525 } 3526 DO_BO_PACK_PC(aptr); 3527 PUSH_VAR(utf8, cur, aptr); 3528 } 3529 break; 3530 case 'u': { 3531 const char *aptr, *aend; 3532 bool from_utf8; 3533 3534 fromstr = NEXTFROM; 3535 if (len <= 2) len = 45; 3536 else len = len / 3 * 3; 3537 if (len >= 64) { 3538 if (ckWARN(WARN_PACK)) 3539 Perl_warner(aTHX_ packWARN(WARN_PACK), 3540 "Field too wide in 'u' format in pack"); 3541 len = 63; 3542 } 3543 aptr = SvPV_const(fromstr, fromlen); 3544 from_utf8 = DO_UTF8(fromstr); 3545 if (from_utf8) { 3546 aend = aptr + fromlen; 3547 fromlen = sv_len_utf8(fromstr); 3548 } else aend = NULL; /* Unused, but keep compilers happy */ 3549 GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2); 3550 while (fromlen > 0) { 3551 U8 *end; 3552 I32 todo; 3553 U8 hunk[1+63/3*4+1]; 3554 3555 if ((I32)fromlen > len) 3556 todo = len; 3557 else 3558 todo = fromlen; 3559 if (from_utf8) { 3560 char buffer[64]; 3561 if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo, 3562 'u' | TYPE_IS_PACK)) { 3563 *cur = '\0'; 3564 SvCUR_set(cat, cur - start); 3565 Perl_croak(aTHX_ "Assertion: string is shorter than advertised"); 3566 } 3567 end = doencodes(hunk, buffer, todo); 3568 } else { 3569 end = doencodes(hunk, aptr, todo); 3570 aptr += todo; 3571 } 3572 PUSH_BYTES(utf8, cur, hunk, end-hunk); 3573 fromlen -= todo; 3574 } 3575 break; 3576 } 3577 } 3578 *cur = '\0'; 3579 SvCUR_set(cat, cur - start); 3580 no_change: 3581 *symptr = lookahead; 3582 } 3583 return beglist; 3584 } 3585 #undef NEXTFROM 3586 3587 3588 PP(pp_pack) 3589 { 3590 dVAR; dSP; dMARK; dORIGMARK; dTARGET; 3591 register SV *cat = TARG; 3592 STRLEN fromlen; 3593 SV *pat_sv = *++MARK; 3594 register const char *pat = SvPV_const(pat_sv, fromlen); 3595 register const char *patend = pat + fromlen; 3596 3597 MARK++; 3598 sv_setpvs(cat, ""); 3599 SvUTF8_off(cat); 3600 3601 packlist(cat, pat, patend, MARK, SP + 1); 3602 3603 SvSETMAGIC(cat); 3604 SP = ORIGMARK; 3605 PUSHs(cat); 3606 RETURN; 3607 } 3608 3609 /* 3610 * Local variables: 3611 * c-indentation-style: bsd 3612 * c-basic-offset: 4 3613 * indent-tabs-mode: t 3614 * End: 3615 * 3616 * ex: set ts=8 sts=4 sw=4 noet: 3617 */ 3618