xref: /openbsd-src/gnu/usr.bin/perl/pp_pack.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*    pp_pack.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  * He still hopefully carried some of his gear in his pack: a small tinder-box,
13  * two small shallow pans, the smaller fitting into the larger; inside them a
14  * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15  * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16  * some salt.
17  *
18  *     [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
19  */
20 
21 /* This file contains pp ("push/pop") functions that
22  * execute the opcodes that make up a perl program. A typical pp function
23  * expects to find its arguments on the stack, and usually pushes its
24  * results onto the stack, hence the 'pp' terminology. Each OP structure
25  * contains a pointer to the relevant pp_foo() function.
26  *
27  * This particular file just contains pp_pack() and pp_unpack(). See the
28  * other pp*.c files for the rest of the pp_ functions.
29  */
30 
31 #include "EXTERN.h"
32 #define PERL_IN_PP_PACK_C
33 #include "perl.h"
34 
35 /* Types used by pack/unpack */
36 typedef enum {
37   e_no_len,     /* no length  */
38   e_number,     /* number, [] */
39   e_star        /* asterisk   */
40 } howlen_t;
41 
42 typedef struct tempsym {
43   const char*    patptr;   /* current template char */
44   const char*    patend;   /* one after last char   */
45   const char*    grpbeg;   /* 1st char of ()-group  */
46   const char*    grpend;   /* end of ()-group       */
47   I32      code;     /* template code (!<>)   */
48   I32      length;   /* length/repeat count   */
49   howlen_t howlen;   /* how length is given   */
50   int      level;    /* () nesting level      */
51   U32      flags;    /* /=4, comma=2, pack=1  */
52                      /*   and group modifiers */
53   STRLEN   strbeg;   /* offset of group start */
54   struct tempsym *previous; /* previous group */
55 } tempsym_t;
56 
57 #define TEMPSYM_INIT(symptr, p, e, f) \
58     STMT_START {	\
59 	(symptr)->patptr   = (p);	\
60 	(symptr)->patend   = (e);	\
61 	(symptr)->grpbeg   = NULL;	\
62 	(symptr)->grpend   = NULL;	\
63 	(symptr)->grpend   = NULL;	\
64 	(symptr)->code     = 0;		\
65 	(symptr)->length   = 0;		\
66 	(symptr)->howlen   = e_no_len;	\
67 	(symptr)->level    = 0;		\
68 	(symptr)->flags    = (f);	\
69 	(symptr)->strbeg   = 0;		\
70 	(symptr)->previous = NULL;	\
71    } STMT_END
72 
73 typedef union {
74     NV nv;
75     U8 bytes[sizeof(NV)];
76 } NV_bytes;
77 
78 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
79 typedef union {
80     long double ld;
81     U8 bytes[sizeof(long double)];
82 } ld_bytes;
83 #endif
84 
85 #if PERL_VERSION >= 9
86 # define PERL_PACK_CAN_BYTEORDER
87 # define PERL_PACK_CAN_SHRIEKSIGN
88 #endif
89 
90 #ifndef CHAR_BIT
91 # define CHAR_BIT	8
92 #endif
93 /* Maximum number of bytes to which a byte can grow due to upgrade */
94 #define UTF8_EXPAND	2
95 
96 /*
97  * Offset for integer pack/unpack.
98  *
99  * On architectures where I16 and I32 aren't really 16 and 32 bits,
100  * which for now are all Crays, pack and unpack have to play games.
101  */
102 
103 /*
104  * These values are required for portability of pack() output.
105  * If they're not right on your machine, then pack() and unpack()
106  * wouldn't work right anyway; you'll need to apply the Cray hack.
107  * (I'd like to check them with #if, but you can't use sizeof() in
108  * the preprocessor.)  --???
109  */
110 /*
111     The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
112     defines are now in config.h.  --Andy Dougherty  April 1998
113  */
114 #define SIZE16 2
115 #define SIZE32 4
116 
117 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
118    --jhi Feb 1999 */
119 
120 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
121 #  if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678    /* little-endian */
122 #    define OFF16(p)	((char*)(p))
123 #    define OFF32(p)	((char*)(p))
124 #  else
125 #    if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
126 #      define OFF16(p)	((char*)(p) + (sizeof(U16) - SIZE16))
127 #      define OFF32(p)	((char*)(p) + (sizeof(U32) - SIZE32))
128 #    else
129        ++++ bad cray byte order
130 #    endif
131 #  endif
132 #else
133 #  define OFF16(p)     ((char *) (p))
134 #  define OFF32(p)     ((char *) (p))
135 #endif
136 
137 /* Only to be used inside a loop (see the break) */
138 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START {		\
139     if (utf8) {								\
140 	if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break;	\
141     } else {								\
142 	Copy(s, OFF16(p), SIZE16, char);				\
143 	(s) += SIZE16;							\
144     }									\
145 } STMT_END
146 
147 /* Only to be used inside a loop (see the break) */
148 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START {		\
149     if (utf8) {								\
150 	if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break;	\
151     } else {								\
152 	Copy(s, OFF32(p), SIZE32, char);				\
153 	(s) += SIZE32;							\
154     }									\
155 } STMT_END
156 
157 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
158 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
159 
160 /* Only to be used inside a loop (see the break) */
161 #define SHIFT_BYTES(utf8, s, strend, buf, len, datumtype)	\
162 STMT_START {						\
163     if (utf8) {						\
164         if (!uni_to_bytes(aTHX_ &s, strend,		\
165 	  (char *) (buf), len, datumtype)) break;	\
166     } else {						\
167         Copy(s, (char *) (buf), len, char);		\
168         s += len;					\
169     }							\
170 } STMT_END
171 
172 #define SHIFT_VAR(utf8, s, strend, var, datumtype)	\
173        SHIFT_BYTES(utf8, s, strend, &(var), sizeof(var), datumtype)
174 
175 #define PUSH_VAR(utf8, aptr, var)	\
176 	PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
177 
178 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
179 #define MAX_SUB_TEMPLATE_LEVEL 100
180 
181 /* flags (note that type modifiers can also be used as flags!) */
182 #define FLAG_WAS_UTF8	      0x40
183 #define FLAG_PARSE_UTF8       0x20	/* Parse as utf8 */
184 #define FLAG_UNPACK_ONLY_ONE  0x10
185 #define FLAG_DO_UTF8          0x08	/* The underlying string is utf8 */
186 #define FLAG_SLASH            0x04
187 #define FLAG_COMMA            0x02
188 #define FLAG_PACK             0x01
189 
190 STATIC SV *
191 S_mul128(pTHX_ SV *sv, U8 m)
192 {
193   STRLEN          len;
194   char           *s = SvPV(sv, len);
195   char           *t;
196 
197   PERL_ARGS_ASSERT_MUL128;
198 
199   if (!strnEQ(s, "0000", 4)) {  /* need to grow sv */
200     SV * const tmpNew = newSVpvs("0000000000");
201 
202     sv_catsv(tmpNew, sv);
203     SvREFCNT_dec(sv);		/* free old sv */
204     sv = tmpNew;
205     s = SvPV(sv, len);
206   }
207   t = s + len - 1;
208   while (!*t)                   /* trailing '\0'? */
209     t--;
210   while (t > s) {
211     const U32 i = ((*t - '0') << 7) + m;
212     *(t--) = '0' + (char)(i % 10);
213     m = (char)(i / 10);
214   }
215   return (sv);
216 }
217 
218 /* Explosives and implosives. */
219 
220 #if 'I' == 73 && 'J' == 74
221 /* On an ASCII/ISO kind of system */
222 #define ISUUCHAR(ch)    ((ch) >= ' ' && (ch) < 'a')
223 #else
224 /*
225   Some other sort of character set - use memchr() so we don't match
226   the null byte.
227  */
228 #define ISUUCHAR(ch)    (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
229 #endif
230 
231 /* type modifiers */
232 #define TYPE_IS_SHRIEKING	0x100
233 #define TYPE_IS_BIG_ENDIAN	0x200
234 #define TYPE_IS_LITTLE_ENDIAN	0x400
235 #define TYPE_IS_PACK		0x800
236 #define TYPE_ENDIANNESS_MASK	(TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
237 #define TYPE_MODIFIERS(t)	((t) & ~0xFF)
238 #define TYPE_NO_MODIFIERS(t)	((t) & 0xFF)
239 
240 #ifdef PERL_PACK_CAN_SHRIEKSIGN
241 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
242 #else
243 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
244 #endif
245 
246 #ifndef PERL_PACK_CAN_BYTEORDER
247 /* Put "can't" first because it is shorter  */
248 # define TYPE_ENDIANNESS(t)	0
249 # define TYPE_NO_ENDIANNESS(t)	(t)
250 
251 # define ENDIANNESS_ALLOWED_TYPES   ""
252 
253 # define DO_BO_UNPACK(var, type)
254 # define DO_BO_PACK(var, type)
255 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
256 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
257 # define DO_BO_UNPACK_N(var, type)
258 # define DO_BO_PACK_N(var, type)
259 # define DO_BO_UNPACK_P(var)
260 # define DO_BO_PACK_P(var)
261 # define DO_BO_UNPACK_PC(var)
262 # define DO_BO_PACK_PC(var)
263 
264 #else /* PERL_PACK_CAN_BYTEORDER */
265 
266 # define TYPE_ENDIANNESS(t)	((t) & TYPE_ENDIANNESS_MASK)
267 # define TYPE_NO_ENDIANNESS(t)	((t) & ~TYPE_ENDIANNESS_MASK)
268 
269 # define ENDIANNESS_ALLOWED_TYPES   "sSiIlLqQjJfFdDpP("
270 
271 # define DO_BO_UNPACK(var, type)                                              \
272         STMT_START {                                                          \
273           switch (TYPE_ENDIANNESS(datumtype)) {                               \
274             case TYPE_IS_BIG_ENDIAN:    var = my_betoh ## type (var); break;  \
275             case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break;  \
276             default: break;                                                   \
277           }                                                                   \
278         } STMT_END
279 
280 # define DO_BO_PACK(var, type)                                                \
281         STMT_START {                                                          \
282           switch (TYPE_ENDIANNESS(datumtype)) {                               \
283             case TYPE_IS_BIG_ENDIAN:    var = my_htobe ## type (var); break;  \
284             case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break;  \
285             default: break;                                                   \
286           }                                                                   \
287         } STMT_END
288 
289 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)                     \
290         STMT_START {                                                          \
291           switch (TYPE_ENDIANNESS(datumtype)) {                               \
292             case TYPE_IS_BIG_ENDIAN:                                          \
293               var = (post_cast*) my_betoh ## type ((pre_cast) var);           \
294               break;                                                          \
295             case TYPE_IS_LITTLE_ENDIAN:                                       \
296               var = (post_cast *) my_letoh ## type ((pre_cast) var);          \
297               break;                                                          \
298             default:                                                          \
299               break;                                                          \
300           }                                                                   \
301         } STMT_END
302 
303 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)                       \
304         STMT_START {                                                          \
305           switch (TYPE_ENDIANNESS(datumtype)) {                               \
306             case TYPE_IS_BIG_ENDIAN:                                          \
307               var = (post_cast *) my_htobe ## type ((pre_cast) var);          \
308               break;                                                          \
309             case TYPE_IS_LITTLE_ENDIAN:                                       \
310               var = (post_cast *) my_htole ## type ((pre_cast) var);          \
311               break;                                                          \
312             default:                                                          \
313               break;                                                          \
314           }                                                                   \
315         } STMT_END
316 
317 # define BO_CANT_DOIT(action, type)                                           \
318         STMT_START {                                                          \
319           switch (TYPE_ENDIANNESS(datumtype)) {                               \
320              case TYPE_IS_BIG_ENDIAN:                                         \
321                Perl_croak(aTHX_ "Can't %s big-endian %ss on this "            \
322                                 "platform", #action, #type);                  \
323                break;                                                         \
324              case TYPE_IS_LITTLE_ENDIAN:                                      \
325                Perl_croak(aTHX_ "Can't %s little-endian %ss on this "         \
326                                 "platform", #action, #type);                  \
327                break;                                                         \
328              default:                                                         \
329                break;                                                         \
330            }                                                                  \
331          } STMT_END
332 
333 # if PTRSIZE == INTSIZE
334 #  define DO_BO_UNPACK_P(var)	DO_BO_UNPACK_PTR(var, i, int, void)
335 #  define DO_BO_PACK_P(var)	DO_BO_PACK_PTR(var, i, int, void)
336 #  define DO_BO_UNPACK_PC(var)	DO_BO_UNPACK_PTR(var, i, int, char)
337 #  define DO_BO_PACK_PC(var)	DO_BO_PACK_PTR(var, i, int, char)
338 # elif PTRSIZE == LONGSIZE
339 #  if LONGSIZE < IVSIZE && IVSIZE == 8
340 #   define DO_BO_UNPACK_P(var)	DO_BO_UNPACK_PTR(var, 64, IV, void)
341 #   define DO_BO_PACK_P(var)	DO_BO_PACK_PTR(var, 64, IV, void)
342 #   define DO_BO_UNPACK_PC(var)	DO_BO_UNPACK_PTR(var, 64, IV, char)
343 #   define DO_BO_PACK_PC(var)	DO_BO_PACK_PTR(var, 64, IV, char)
344 #  else
345 #   define DO_BO_UNPACK_P(var)	DO_BO_UNPACK_PTR(var, l, IV, void)
346 #   define DO_BO_PACK_P(var)	DO_BO_PACK_PTR(var, l, IV, void)
347 #   define DO_BO_UNPACK_PC(var)	DO_BO_UNPACK_PTR(var, l, IV, char)
348 #   define DO_BO_PACK_PC(var)	DO_BO_PACK_PTR(var, l, IV, char)
349 #  endif
350 # elif PTRSIZE == IVSIZE
351 #  define DO_BO_UNPACK_P(var)	DO_BO_UNPACK_PTR(var, l, IV, void)
352 #  define DO_BO_PACK_P(var)	DO_BO_PACK_PTR(var, l, IV, void)
353 #  define DO_BO_UNPACK_PC(var)	DO_BO_UNPACK_PTR(var, l, IV, char)
354 #  define DO_BO_PACK_PC(var)	DO_BO_PACK_PTR(var, l, IV, char)
355 # else
356 #  define DO_BO_UNPACK_P(var)	BO_CANT_DOIT(unpack, pointer)
357 #  define DO_BO_PACK_P(var)	BO_CANT_DOIT(pack, pointer)
358 #  define DO_BO_UNPACK_PC(var)	BO_CANT_DOIT(unpack, pointer)
359 #  define DO_BO_PACK_PC(var)	BO_CANT_DOIT(pack, pointer)
360 # endif
361 
362 # if defined(my_htolen) && defined(my_letohn) && \
363     defined(my_htoben) && defined(my_betohn)
364 #  define DO_BO_UNPACK_N(var, type)                                           \
365          STMT_START {                                                         \
366            switch (TYPE_ENDIANNESS(datumtype)) {                              \
367              case TYPE_IS_BIG_ENDIAN:    my_betohn(&var, sizeof(type)); break;\
368              case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
369              default: break;                                                  \
370            }                                                                  \
371          } STMT_END
372 
373 #  define DO_BO_PACK_N(var, type)                                             \
374          STMT_START {                                                         \
375            switch (TYPE_ENDIANNESS(datumtype)) {                              \
376              case TYPE_IS_BIG_ENDIAN:    my_htoben(&var, sizeof(type)); break;\
377              case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
378              default: break;                                                  \
379            }                                                                  \
380          } STMT_END
381 # else
382 #  define DO_BO_UNPACK_N(var, type)	BO_CANT_DOIT(unpack, type)
383 #  define DO_BO_PACK_N(var, type)	BO_CANT_DOIT(pack, type)
384 # endif
385 
386 #endif /* PERL_PACK_CAN_BYTEORDER */
387 
388 #define PACK_SIZE_CANNOT_CSUM		0x80
389 #define PACK_SIZE_UNPREDICTABLE		0x40	/* Not a fixed size element */
390 #define PACK_SIZE_MASK			0x3F
391 
392 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
393    in).  You're unlikely ever to need to regenerate them.  */
394 
395 #if TYPE_IS_SHRIEKING != 0x100
396    ++++shriek offset should be 256
397 #endif
398 
399 typedef U8 packprops_t;
400 #if 'J'-'I' == 1
401 /* ASCII */
402 STATIC const packprops_t packprops[512] = {
403     /* normal */
404     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
405     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
406     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
407     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
408     0, 0, 0,
409     /* C */ sizeof(unsigned char),
410 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
411     /* D */ LONG_DOUBLESIZE,
412 #else
413     0,
414 #endif
415     0,
416     /* F */ NVSIZE,
417     0, 0,
418     /* I */ sizeof(unsigned int),
419     /* J */ UVSIZE,
420     0,
421     /* L */ SIZE32,
422     0,
423     /* N */ SIZE32,
424     0, 0,
425 #if defined(HAS_QUAD)
426     /* Q */ sizeof(Uquad_t),
427 #else
428     0,
429 #endif
430     0,
431     /* S */ SIZE16,
432     0,
433     /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
434     /* V */ SIZE32,
435     /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
436     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
437     /* c */ sizeof(char),
438     /* d */ sizeof(double),
439     0,
440     /* f */ sizeof(float),
441     0, 0,
442     /* i */ sizeof(int),
443     /* j */ IVSIZE,
444     0,
445     /* l */ SIZE32,
446     0,
447     /* n */ SIZE16,
448     0,
449     /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
450 #if defined(HAS_QUAD)
451     /* q */ sizeof(Quad_t),
452 #else
453     0,
454 #endif
455     0,
456     /* s */ SIZE16,
457     0, 0,
458     /* v */ SIZE16,
459     /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
460     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
463     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
464     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
465     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468     0, 0, 0, 0, 0, 0, 0, 0,
469     /* shrieking */
470     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
472     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
473     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
474     0, 0, 0, 0, 0, 0, 0, 0, 0,
475     /* I */ sizeof(unsigned int),
476     0, 0,
477     /* L */ sizeof(unsigned long),
478     0,
479 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
480     /* N */ SIZE32,
481 #else
482     0,
483 #endif
484     0, 0, 0, 0,
485     /* S */ sizeof(unsigned short),
486     0, 0,
487 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
488     /* V */ SIZE32,
489 #else
490     0,
491 #endif
492     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493     0, 0,
494     /* i */ sizeof(int),
495     0, 0,
496     /* l */ sizeof(long),
497     0,
498 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
499     /* n */ SIZE16,
500 #else
501     0,
502 #endif
503     0, 0, 0, 0,
504     /* s */ sizeof(short),
505     0, 0,
506 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
507     /* v */ SIZE16,
508 #else
509     0,
510 #endif
511     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
512     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
515     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
516     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519     0, 0, 0, 0, 0, 0, 0, 0, 0
520 };
521 #else
522 /* EBCDIC (or bust) */
523 STATIC const packprops_t packprops[512] = {
524     /* normal */
525     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
531     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
532     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
533     0, 0, 0,
534     /* c */ sizeof(char),
535     /* d */ sizeof(double),
536     0,
537     /* f */ sizeof(float),
538     0, 0,
539     /* i */ sizeof(int),
540     0, 0, 0, 0, 0, 0, 0,
541     /* j */ IVSIZE,
542     0,
543     /* l */ SIZE32,
544     0,
545     /* n */ SIZE16,
546     0,
547     /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
548 #if defined(HAS_QUAD)
549     /* q */ sizeof(Quad_t),
550 #else
551     0,
552 #endif
553     0, 0, 0, 0, 0, 0, 0, 0, 0,
554     /* s */ SIZE16,
555     0, 0,
556     /* v */ SIZE16,
557     /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
558     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
559     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
560     /* C */ sizeof(unsigned char),
561 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
562     /* D */ LONG_DOUBLESIZE,
563 #else
564     0,
565 #endif
566     0,
567     /* F */ NVSIZE,
568     0, 0,
569     /* I */ sizeof(unsigned int),
570     0, 0, 0, 0, 0, 0, 0,
571     /* J */ UVSIZE,
572     0,
573     /* L */ SIZE32,
574     0,
575     /* N */ SIZE32,
576     0, 0,
577 #if defined(HAS_QUAD)
578     /* Q */ sizeof(Uquad_t),
579 #else
580     0,
581 #endif
582     0, 0, 0, 0, 0, 0, 0, 0, 0,
583     /* S */ SIZE16,
584     0,
585     /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
586     /* V */ SIZE32,
587     /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
588     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
589     0, 0, 0, 0, 0, 0, 0, 0, 0,
590     /* shrieking */
591     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
592     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
593     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
594     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
595     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
598     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
599     0, 0, 0, 0, 0, 0, 0, 0, 0,
600     /* i */ sizeof(int),
601     0, 0, 0, 0, 0, 0, 0, 0, 0,
602     /* l */ sizeof(long),
603     0,
604 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
605     /* n */ SIZE16,
606 #else
607     0,
608 #endif
609     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
610     /* s */ sizeof(short),
611     0, 0,
612 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
613     /* v */ SIZE16,
614 #else
615     0,
616 #endif
617     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
618     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
619     0, 0, 0,
620     /* I */ sizeof(unsigned int),
621     0, 0, 0, 0, 0, 0, 0, 0, 0,
622     /* L */ sizeof(unsigned long),
623     0,
624 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
625     /* N */ SIZE32,
626 #else
627     0,
628 #endif
629     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
630     /* S */ sizeof(unsigned short),
631     0, 0,
632 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
633     /* V */ SIZE32,
634 #else
635     0,
636 #endif
637     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638     0, 0, 0, 0, 0, 0, 0, 0, 0, 0
639 };
640 #endif
641 
642 STATIC U8
643 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
644 {
645     STRLEN retlen;
646     UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
647 			 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
648     /* We try to process malformed UTF-8 as much as possible (preferrably with
649        warnings), but these two mean we make no progress in the string and
650        might enter an infinite loop */
651     if (retlen == (STRLEN) -1 || retlen == 0)
652 	Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
653 		   (int) TYPE_NO_MODIFIERS(datumtype));
654     if (val >= 0x100) {
655 	Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
656 		       "Character in '%c' format wrapped in unpack",
657 		       (int) TYPE_NO_MODIFIERS(datumtype));
658 	val &= 0xff;
659     }
660     *s += retlen;
661     return (U8)val;
662 }
663 
664 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
665 	uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
666 	*(U8 *)(s)++)
667 
668 STATIC bool
669 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
670 {
671     UV val;
672     STRLEN retlen;
673     const char *from = *s;
674     int bad = 0;
675     const U32 flags = ckWARN(WARN_UTF8) ?
676 	UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
677     for (;buf_len > 0; buf_len--) {
678 	if (from >= end) return FALSE;
679 	val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
680 	if (retlen == (STRLEN) -1 || retlen == 0) {
681 	    from += UTF8SKIP(from);
682 	    bad |= 1;
683 	} else from += retlen;
684 	if (val >= 0x100) {
685 	    bad |= 2;
686 	    val &= 0xff;
687 	}
688 	*(U8 *)buf++ = (U8)val;
689     }
690     /* We have enough characters for the buffer. Did we have problems ? */
691     if (bad) {
692 	if (bad & 1) {
693 	    /* Rewalk the string fragment while warning */
694 	    const char *ptr;
695 	    const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
696 	    for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
697 		if (ptr >= end) break;
698 		utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
699 	    }
700 	    if (from > end) from = end;
701 	}
702 	if ((bad & 2))
703 	    Perl_ck_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
704 				       WARN_PACK : WARN_UNPACK),
705 			   "Character(s) in '%c' format wrapped in %s",
706 			   (int) TYPE_NO_MODIFIERS(datumtype),
707 			   datumtype & TYPE_IS_PACK ? "pack" : "unpack");
708     }
709     *s = from;
710     return TRUE;
711 }
712 
713 STATIC bool
714 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
715 {
716     dVAR;
717     STRLEN retlen;
718     const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
719     if (val >= 0x100 || !ISUUCHAR(val) ||
720 	retlen == (STRLEN) -1 || retlen == 0) {
721 	*out = 0;
722 	return FALSE;
723     }
724     *out = PL_uudmap[val] & 077;
725     *s += retlen;
726     return TRUE;
727 }
728 
729 STATIC char *
730 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
731     const U8 * const end = start + len;
732 
733     PERL_ARGS_ASSERT_BYTES_TO_UNI;
734 
735     while (start < end) {
736 	const UV uv = NATIVE_TO_ASCII(*start);
737 	if (UNI_IS_INVARIANT(uv))
738 	    *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
739 	else {
740 	    *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
741 	    *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
742 	}
743 	start++;
744     }
745     return dest;
746 }
747 
748 #define PUSH_BYTES(utf8, cur, buf, len)				\
749 STMT_START {							\
750     if (utf8)							\
751 	(cur) = bytes_to_uni((U8 *) buf, len, (cur));		\
752     else {							\
753 	Copy(buf, cur, len, char);				\
754 	(cur) += (len);						\
755     }								\
756 } STMT_END
757 
758 #define GROWING(utf8, cat, start, cur, in_len)	\
759 STMT_START {					\
760     STRLEN glen = (in_len);			\
761     if (utf8) glen *= UTF8_EXPAND;		\
762     if ((cur) + glen >= (start) + SvLEN(cat)) {	\
763 	(start) = sv_exp_grow(cat, glen);	\
764 	(cur) = (start) + SvCUR(cat);		\
765     }						\
766 } STMT_END
767 
768 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
769 STMT_START {					\
770     const STRLEN glen = (in_len);		\
771     STRLEN gl = glen;				\
772     if (utf8) gl *= UTF8_EXPAND;		\
773     if ((cur) + gl >= (start) + SvLEN(cat)) {	\
774         *cur = '\0';				\
775         SvCUR_set((cat), (cur) - (start));	\
776 	(start) = sv_exp_grow(cat, gl);		\
777 	(cur) = (start) + SvCUR(cat);		\
778     }						\
779     PUSH_BYTES(utf8, cur, buf, glen);		\
780 } STMT_END
781 
782 #define PUSH_BYTE(utf8, s, byte)		\
783 STMT_START {					\
784     if (utf8) {					\
785 	const U8 au8 = (byte);			\
786 	(s) = bytes_to_uni(&au8, 1, (s));	\
787     } else *(U8 *)(s)++ = (byte);		\
788 } STMT_END
789 
790 /* Only to be used inside a loop (see the break) */
791 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags)		\
792 STMT_START {							\
793     STRLEN retlen;						\
794     if (str >= end) break;					\
795     val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags);	\
796     if (retlen == (STRLEN) -1 || retlen == 0) {			\
797 	*cur = '\0';						\
798 	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");	\
799     }								\
800     str += retlen;						\
801 } STMT_END
802 
803 static const char *_action( const tempsym_t* symptr )
804 {
805     return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
806 }
807 
808 /* Returns the sizeof() struct described by pat */
809 STATIC I32
810 S_measure_struct(pTHX_ tempsym_t* symptr)
811 {
812     I32 total = 0;
813 
814     PERL_ARGS_ASSERT_MEASURE_STRUCT;
815 
816     while (next_symbol(symptr)) {
817 	I32 len;
818 	int size;
819 
820         switch (symptr->howlen) {
821 	  case e_star:
822    	    Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
823                         _action( symptr ) );
824             break;
825 	  default:
826 	    /* e_no_len and e_number */
827 	    len = symptr->length;
828 	    break;
829         }
830 
831 	size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
832 	if (!size) {
833             int star;
834 	    /* endianness doesn't influence the size of a type */
835 	    switch(TYPE_NO_ENDIANNESS(symptr->code)) {
836 	    default:
837 		Perl_croak(aTHX_ "Invalid type '%c' in %s",
838 			   (int)TYPE_NO_MODIFIERS(symptr->code),
839                            _action( symptr ) );
840 #ifdef PERL_PACK_CAN_SHRIEKSIGN
841 	    case '.' | TYPE_IS_SHRIEKING:
842 	    case '@' | TYPE_IS_SHRIEKING:
843 #endif
844 	    case '@':
845 	    case '.':
846 	    case '/':
847 	    case 'U':			/* XXXX Is it correct? */
848 	    case 'w':
849 	    case 'u':
850 		Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
851 			   (int) TYPE_NO_MODIFIERS(symptr->code),
852                            _action( symptr ) );
853 	    case '%':
854 		size = 0;
855 		break;
856 	    case '(':
857 	    {
858 		tempsym_t savsym = *symptr;
859 		symptr->patptr = savsym.grpbeg;
860 		symptr->patend = savsym.grpend;
861 		/* XXXX Theoretically, we need to measure many times at
862 		   different positions, since the subexpression may contain
863 		   alignment commands, but be not of aligned length.
864 		   Need to detect this and croak().  */
865 		size = measure_struct(symptr);
866 		*symptr = savsym;
867 		break;
868 	    }
869 	    case 'X' | TYPE_IS_SHRIEKING:
870 		/* XXXX Is this useful?  Then need to treat MEASURE_BACKWARDS.
871 		 */
872 		if (!len)		/* Avoid division by 0 */
873 		    len = 1;
874 		len = total % len;	/* Assumed: the start is aligned. */
875 		/* FALL THROUGH */
876 	    case 'X':
877 		size = -1;
878 		if (total < len)
879                     Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
880 		break;
881 	    case 'x' | TYPE_IS_SHRIEKING:
882 		if (!len)		/* Avoid division by 0 */
883 		    len = 1;
884 		star = total % len;	/* Assumed: the start is aligned. */
885 		if (star)		/* Other portable ways? */
886 		    len = len - star;
887 		else
888 		    len = 0;
889 		/* FALL THROUGH */
890 	    case 'x':
891 	    case 'A':
892 	    case 'Z':
893 	    case 'a':
894 		size = 1;
895 		break;
896 	    case 'B':
897 	    case 'b':
898 		len = (len + 7)/8;
899 		size = 1;
900 		break;
901 	    case 'H':
902 	    case 'h':
903 		len = (len + 1)/2;
904 		size = 1;
905 		break;
906 
907 	    case 'P':
908 		len = 1;
909 		size = sizeof(char*);
910 		break;
911 	    }
912 	}
913 	total += len * size;
914     }
915     return total;
916 }
917 
918 
919 /* locate matching closing parenthesis or bracket
920  * returns char pointer to char after match, or NULL
921  */
922 STATIC const char *
923 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
924 {
925     PERL_ARGS_ASSERT_GROUP_END;
926 
927     while (patptr < patend) {
928 	const char c = *patptr++;
929 
930 	if (isSPACE(c))
931 	    continue;
932 	else if (c == ender)
933 	    return patptr-1;
934 	else if (c == '#') {
935 	    while (patptr < patend && *patptr != '\n')
936 		patptr++;
937 	    continue;
938 	} else if (c == '(')
939 	    patptr = group_end(patptr, patend, ')') + 1;
940 	else if (c == '[')
941 	    patptr = group_end(patptr, patend, ']') + 1;
942     }
943     Perl_croak(aTHX_ "No group ending character '%c' found in template",
944                ender);
945     return 0;
946 }
947 
948 
949 /* Convert unsigned decimal number to binary.
950  * Expects a pointer to the first digit and address of length variable
951  * Advances char pointer to 1st non-digit char and returns number
952  */
953 STATIC const char *
954 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
955 {
956   I32 len = *patptr++ - '0';
957 
958   PERL_ARGS_ASSERT_GET_NUM;
959 
960   while (isDIGIT(*patptr)) {
961     if (len >= 0x7FFFFFFF/10)
962       Perl_croak(aTHX_ "pack/unpack repeat count overflow");
963     len = (len * 10) + (*patptr++ - '0');
964   }
965   *lenptr = len;
966   return patptr;
967 }
968 
969 /* The marvellous template parsing routine: Using state stored in *symptr,
970  * locates next template code and count
971  */
972 STATIC bool
973 S_next_symbol(pTHX_ tempsym_t* symptr )
974 {
975   const char* patptr = symptr->patptr;
976   const char* const patend = symptr->patend;
977 
978   PERL_ARGS_ASSERT_NEXT_SYMBOL;
979 
980   symptr->flags &= ~FLAG_SLASH;
981 
982   while (patptr < patend) {
983     if (isSPACE(*patptr))
984       patptr++;
985     else if (*patptr == '#') {
986       patptr++;
987       while (patptr < patend && *patptr != '\n')
988 	patptr++;
989       if (patptr < patend)
990 	patptr++;
991     } else {
992       /* We should have found a template code */
993       I32 code = *patptr++ & 0xFF;
994       U32 inherited_modifiers = 0;
995 
996       if (code == ','){ /* grandfather in commas but with a warning */
997 	if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
998           symptr->flags |= FLAG_COMMA;
999 	  Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1000 	 	      "Invalid type ',' in %s", _action( symptr ) );
1001         }
1002 	continue;
1003       }
1004 
1005       /* for '(', skip to ')' */
1006       if (code == '(') {
1007         if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
1008           Perl_croak(aTHX_ "()-group starts with a count in %s",
1009                         _action( symptr ) );
1010         symptr->grpbeg = patptr;
1011         patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
1012         if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
1013 	  Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
1014                         _action( symptr ) );
1015       }
1016 
1017       /* look for group modifiers to inherit */
1018       if (TYPE_ENDIANNESS(symptr->flags)) {
1019         if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
1020           inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1021       }
1022 
1023       /* look for modifiers */
1024       while (patptr < patend) {
1025         const char *allowed;
1026         I32 modifier;
1027         switch (*patptr) {
1028           case '!':
1029             modifier = TYPE_IS_SHRIEKING;
1030             allowed = SHRIEKING_ALLOWED_TYPES;
1031             break;
1032 #ifdef PERL_PACK_CAN_BYTEORDER
1033           case '>':
1034             modifier = TYPE_IS_BIG_ENDIAN;
1035             allowed = ENDIANNESS_ALLOWED_TYPES;
1036             break;
1037           case '<':
1038             modifier = TYPE_IS_LITTLE_ENDIAN;
1039             allowed = ENDIANNESS_ALLOWED_TYPES;
1040             break;
1041 #endif /* PERL_PACK_CAN_BYTEORDER */
1042           default:
1043             allowed = "";
1044             modifier = 0;
1045             break;
1046         }
1047 
1048         if (modifier == 0)
1049           break;
1050 
1051         if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1052           Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1053                         allowed, _action( symptr ) );
1054 
1055         if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1056           Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1057                      (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1058         else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1059                  TYPE_ENDIANNESS_MASK)
1060           Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1061                      *patptr, _action( symptr ) );
1062 
1063         if ((code & modifier)) {
1064 	    Perl_ck_warner(aTHX_ packWARN(WARN_UNPACK),
1065 			   "Duplicate modifier '%c' after '%c' in %s",
1066 			   *patptr, (int) TYPE_NO_MODIFIERS(code),
1067 			   _action( symptr ) );
1068         }
1069 
1070         code |= modifier;
1071         patptr++;
1072       }
1073 
1074       /* inherit modifiers */
1075       code |= inherited_modifiers;
1076 
1077       /* look for count and/or / */
1078       if (patptr < patend) {
1079 	if (isDIGIT(*patptr)) {
1080  	  patptr = get_num( patptr, &symptr->length );
1081           symptr->howlen = e_number;
1082 
1083         } else if (*patptr == '*') {
1084           patptr++;
1085           symptr->howlen = e_star;
1086 
1087         } else if (*patptr == '[') {
1088           const char* lenptr = ++patptr;
1089           symptr->howlen = e_number;
1090           patptr = group_end( patptr, patend, ']' ) + 1;
1091           /* what kind of [] is it? */
1092           if (isDIGIT(*lenptr)) {
1093             lenptr = get_num( lenptr, &symptr->length );
1094             if( *lenptr != ']' )
1095               Perl_croak(aTHX_ "Malformed integer in [] in %s",
1096                             _action( symptr ) );
1097           } else {
1098             tempsym_t savsym = *symptr;
1099             symptr->patend = patptr-1;
1100             symptr->patptr = lenptr;
1101             savsym.length = measure_struct(symptr);
1102             *symptr = savsym;
1103           }
1104         } else {
1105           symptr->howlen = e_no_len;
1106           symptr->length = 1;
1107         }
1108 
1109         /* try to find / */
1110         while (patptr < patend) {
1111           if (isSPACE(*patptr))
1112             patptr++;
1113           else if (*patptr == '#') {
1114             patptr++;
1115             while (patptr < patend && *patptr != '\n')
1116 	      patptr++;
1117             if (patptr < patend)
1118 	      patptr++;
1119           } else {
1120             if (*patptr == '/') {
1121               symptr->flags |= FLAG_SLASH;
1122               patptr++;
1123               if (patptr < patend &&
1124                   (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1125                 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1126                             _action( symptr ) );
1127             }
1128             break;
1129 	  }
1130 	}
1131       } else {
1132         /* at end - no count, no / */
1133         symptr->howlen = e_no_len;
1134         symptr->length = 1;
1135       }
1136 
1137       symptr->code = code;
1138       symptr->patptr = patptr;
1139       return TRUE;
1140     }
1141   }
1142   symptr->patptr = patptr;
1143   return FALSE;
1144 }
1145 
1146 /*
1147    There is no way to cleanly handle the case where we should process the
1148    string per byte in its upgraded form while it's really in downgraded form
1149    (e.g. estimates like strend-s as an upper bound for the number of
1150    characters left wouldn't work). So if we foresee the need of this
1151    (pattern starts with U or contains U0), we want to work on the encoded
1152    version of the string. Users are advised to upgrade their pack string
1153    themselves if they need to do a lot of unpacks like this on it
1154 */
1155 STATIC bool
1156 need_utf8(const char *pat, const char *patend)
1157 {
1158     bool first = TRUE;
1159 
1160     PERL_ARGS_ASSERT_NEED_UTF8;
1161 
1162     while (pat < patend) {
1163 	if (pat[0] == '#') {
1164 	    pat++;
1165 	    pat = (const char *) memchr(pat, '\n', patend-pat);
1166 	    if (!pat) return FALSE;
1167 	} else if (pat[0] == 'U') {
1168 	    if (first || pat[1] == '0') return TRUE;
1169 	} else first = FALSE;
1170 	pat++;
1171     }
1172     return FALSE;
1173 }
1174 
1175 STATIC char
1176 first_symbol(const char *pat, const char *patend) {
1177     PERL_ARGS_ASSERT_FIRST_SYMBOL;
1178 
1179     while (pat < patend) {
1180 	if (pat[0] != '#') return pat[0];
1181 	pat++;
1182 	pat = (const char *) memchr(pat, '\n', patend-pat);
1183 	if (!pat) return 0;
1184 	pat++;
1185     }
1186     return 0;
1187 }
1188 
1189 /*
1190 =for apidoc unpackstring
1191 
1192 The engine implementing unpack() Perl function. C<unpackstring> puts the
1193 extracted list items on the stack and returns the number of elements.
1194 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1195 
1196 =cut */
1197 
1198 I32
1199 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1200 {
1201     tempsym_t sym;
1202 
1203     PERL_ARGS_ASSERT_UNPACKSTRING;
1204 
1205     if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1206     else if (need_utf8(pat, patend)) {
1207 	/* We probably should try to avoid this in case a scalar context call
1208 	   wouldn't get to the "U0" */
1209 	STRLEN len = strend - s;
1210 	s = (char *) bytes_to_utf8((U8 *) s, &len);
1211 	SAVEFREEPV(s);
1212 	strend = s + len;
1213 	flags |= FLAG_DO_UTF8;
1214     }
1215 
1216     if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1217 	flags |= FLAG_PARSE_UTF8;
1218 
1219     TEMPSYM_INIT(&sym, pat, patend, flags);
1220 
1221     return unpack_rec(&sym, s, s, strend, NULL );
1222 }
1223 
1224 STATIC I32
1225 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1226 {
1227     dVAR; dSP;
1228     SV *sv;
1229     const I32 start_sp_offset = SP - PL_stack_base;
1230     howlen_t howlen;
1231     I32 checksum = 0;
1232     UV cuv = 0;
1233     NV cdouble = 0.0;
1234     const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1235     bool beyond = FALSE;
1236     bool explicit_length;
1237     const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1238     bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1239 
1240     PERL_ARGS_ASSERT_UNPACK_REC;
1241 
1242     symptr->strbeg = s - strbeg;
1243 
1244     while (next_symbol(symptr)) {
1245 	packprops_t props;
1246 	I32 len;
1247         I32 datumtype = symptr->code;
1248 	/* do first one only unless in list context
1249 	   / is implemented by unpacking the count, then popping it from the
1250 	   stack, so must check that we're not in the middle of a /  */
1251         if ( unpack_only_one
1252 	     && (SP - PL_stack_base == start_sp_offset + 1)
1253 	     && (datumtype != '/') )   /* XXX can this be omitted */
1254             break;
1255 
1256         switch (howlen = symptr->howlen) {
1257 	  case e_star:
1258 	    len = strend - strbeg;	/* long enough */
1259 	    break;
1260 	  default:
1261 	    /* e_no_len and e_number */
1262 	    len = symptr->length;
1263 	    break;
1264         }
1265 
1266         explicit_length = TRUE;
1267       redo_switch:
1268         beyond = s >= strend;
1269 
1270 	props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1271 	if (props) {
1272 	    /* props nonzero means we can process this letter. */
1273             const long size = props & PACK_SIZE_MASK;
1274             const long howmany = (strend - s) / size;
1275 	    if (len > howmany)
1276 		len = howmany;
1277 
1278 	    if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1279 		if (len && unpack_only_one) len = 1;
1280 		EXTEND(SP, len);
1281 		EXTEND_MORTAL(len);
1282 	    }
1283 	}
1284 
1285 	switch(TYPE_NO_ENDIANNESS(datumtype)) {
1286 	default:
1287 	    Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1288 
1289 	case '%':
1290 	    if (howlen == e_no_len)
1291 		len = 16;		/* len is not specified */
1292 	    checksum = len;
1293 	    cuv = 0;
1294 	    cdouble = 0;
1295 	    continue;
1296 	    break;
1297 	case '(':
1298 	{
1299             tempsym_t savsym = *symptr;
1300             const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1301 	    symptr->flags |= group_modifiers;
1302             symptr->patend = savsym.grpend;
1303 	    symptr->previous = &savsym;
1304             symptr->level++;
1305 	    PUTBACK;
1306 	    if (len && unpack_only_one) len = 1;
1307 	    while (len--) {
1308   	        symptr->patptr = savsym.grpbeg;
1309 		if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
1310 		else      symptr->flags &= ~FLAG_PARSE_UTF8;
1311  	        unpack_rec(symptr, s, strbeg, strend, &s);
1312                 if (s == strend && savsym.howlen == e_star)
1313 		    break; /* No way to continue */
1314 	    }
1315 	    SPAGAIN;
1316             savsym.flags = symptr->flags & ~group_modifiers;
1317             *symptr = savsym;
1318 	    break;
1319 	}
1320 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1321 	case '.' | TYPE_IS_SHRIEKING:
1322 #endif
1323 	case '.': {
1324 	    const char *from;
1325 	    SV *sv;
1326 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1327 	    const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1328 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1329 	    const bool u8 = utf8;
1330 #endif
1331 	    if (howlen == e_star) from = strbeg;
1332 	    else if (len <= 0) from = s;
1333 	    else {
1334 		tempsym_t *group = symptr;
1335 
1336 		while (--len && group) group = group->previous;
1337 		from = group ? strbeg + group->strbeg : strbeg;
1338 	    }
1339 	    sv = from <= s ?
1340 		newSVuv(  u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1341 		newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1342 	    mXPUSHs(sv);
1343 	    break;
1344 	}
1345 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1346 	case '@' | TYPE_IS_SHRIEKING:
1347 #endif
1348 	case '@':
1349 	    s = strbeg + symptr->strbeg;
1350 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1351 	    if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
1352 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1353 	    if (utf8)
1354 #endif
1355 	    {
1356 		while (len > 0) {
1357 		    if (s >= strend)
1358 			Perl_croak(aTHX_ "'@' outside of string in unpack");
1359 		    s += UTF8SKIP(s);
1360 		    len--;
1361 		}
1362 		if (s > strend)
1363 		    Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1364 	    } else {
1365 		if (strend-s < len)
1366 		    Perl_croak(aTHX_ "'@' outside of string in unpack");
1367 		s += len;
1368 	    }
1369 	    break;
1370  	case 'X' | TYPE_IS_SHRIEKING:
1371  	    if (!len)			/* Avoid division by 0 */
1372  		len = 1;
1373 	    if (utf8) {
1374 		const char *hop, *last;
1375 		I32 l = len;
1376 		hop = last = strbeg;
1377 		while (hop < s) {
1378 		    hop += UTF8SKIP(hop);
1379 		    if (--l == 0) {
1380 			last = hop;
1381 			l = len;
1382 		    }
1383 		}
1384 		if (last > s)
1385 		    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1386 		s = last;
1387 		break;
1388 	    }
1389 	    len = (s - strbeg) % len;
1390  	    /* FALL THROUGH */
1391 	case 'X':
1392 	    if (utf8) {
1393 		while (len > 0) {
1394 		    if (s <= strbeg)
1395 			Perl_croak(aTHX_ "'X' outside of string in unpack");
1396 		    while (--s, UTF8_IS_CONTINUATION(*s)) {
1397 			if (s <= strbeg)
1398 			    Perl_croak(aTHX_ "'X' outside of string in unpack");
1399 		    }
1400 		    len--;
1401 		}
1402 	    } else {
1403 		if (len > s - strbeg)
1404 		    Perl_croak(aTHX_ "'X' outside of string in unpack" );
1405 		s -= len;
1406 	    }
1407 	    break;
1408  	case 'x' | TYPE_IS_SHRIEKING: {
1409             I32 ai32;
1410  	    if (!len)			/* Avoid division by 0 */
1411  		len = 1;
1412 	    if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1413 	    else      ai32 = (s - strbeg)                         % len;
1414 	    if (ai32 == 0) break;
1415 	    len -= ai32;
1416             }
1417  	    /* FALL THROUGH */
1418 	case 'x':
1419 	    if (utf8) {
1420 		while (len>0) {
1421 		    if (s >= strend)
1422 			Perl_croak(aTHX_ "'x' outside of string in unpack");
1423 		    s += UTF8SKIP(s);
1424 		    len--;
1425 		}
1426 	    } else {
1427 		if (len > strend - s)
1428 		    Perl_croak(aTHX_ "'x' outside of string in unpack");
1429 		s += len;
1430 	    }
1431 	    break;
1432 	case '/':
1433 	    Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1434             break;
1435 	case 'A':
1436 	case 'Z':
1437 	case 'a':
1438 	    if (checksum) {
1439 		/* Preliminary length estimate is assumed done in 'W' */
1440 		if (len > strend - s) len = strend - s;
1441 		goto W_checksum;
1442 	    }
1443 	    if (utf8) {
1444 		I32 l;
1445 		const char *hop;
1446 		for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1447 		    if (hop >= strend) {
1448 			if (hop > strend)
1449 			    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1450 			break;
1451 		    }
1452 		}
1453 		if (hop > strend)
1454 		    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1455 		len = hop - s;
1456 	    } else if (len > strend - s)
1457 		len = strend - s;
1458 
1459 	    if (datumtype == 'Z') {
1460 		/* 'Z' strips stuff after first null */
1461 		const char *ptr, *end;
1462 		end = s + len;
1463 		for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1464 		sv = newSVpvn(s, ptr-s);
1465 		if (howlen == e_star) /* exact for 'Z*' */
1466 		    len = ptr-s + (ptr != strend ? 1 : 0);
1467 	    } else if (datumtype == 'A') {
1468 		/* 'A' strips both nulls and spaces */
1469 		const char *ptr;
1470 		if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1471 		    for (ptr = s+len-1; ptr >= s; ptr--)
1472 			if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1473 			    !is_utf8_space((U8 *) ptr)) break;
1474 		    if (ptr >= s) ptr += UTF8SKIP(ptr);
1475 		    else ptr++;
1476 		    if (ptr > s+len)
1477 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1478 		} else {
1479 		    for (ptr = s+len-1; ptr >= s; ptr--)
1480 			if (*ptr != 0 && !isSPACE(*ptr)) break;
1481 		    ptr++;
1482 		}
1483 		sv = newSVpvn(s, ptr-s);
1484 	    } else sv = newSVpvn(s, len);
1485 
1486 	    if (utf8) {
1487 		SvUTF8_on(sv);
1488 		/* Undo any upgrade done due to need_utf8() */
1489 		if (!(symptr->flags & FLAG_WAS_UTF8))
1490 		    sv_utf8_downgrade(sv, 0);
1491 	    }
1492 	    mXPUSHs(sv);
1493 	    s += len;
1494 	    break;
1495 	case 'B':
1496 	case 'b': {
1497 	    char *str;
1498 	    if (howlen == e_star || len > (strend - s) * 8)
1499 		len = (strend - s) * 8;
1500 	    if (checksum) {
1501 		if (utf8)
1502 		    while (len >= 8 && s < strend) {
1503 			cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1504 			len -= 8;
1505 		    }
1506 		else
1507 		    while (len >= 8) {
1508 			cuv += PL_bitcount[*(U8 *)s++];
1509 			len -= 8;
1510 		    }
1511 		if (len && s < strend) {
1512 		    U8 bits;
1513 		    bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1514 		    if (datumtype == 'b')
1515 			while (len-- > 0) {
1516 			    if (bits & 1) cuv++;
1517 			    bits >>= 1;
1518 			}
1519 		    else
1520 			while (len-- > 0) {
1521 			    if (bits & 0x80) cuv++;
1522 			    bits <<= 1;
1523 			}
1524 		}
1525 		break;
1526 	    }
1527 
1528 	    sv = sv_2mortal(newSV(len ? len : 1));
1529 	    SvPOK_on(sv);
1530 	    str = SvPVX(sv);
1531 	    if (datumtype == 'b') {
1532 		U8 bits = 0;
1533 		const I32 ai32 = len;
1534 		for (len = 0; len < ai32; len++) {
1535 		    if (len & 7) bits >>= 1;
1536 		    else if (utf8) {
1537 			if (s >= strend) break;
1538 			bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1539 		    } else bits = *(U8 *) s++;
1540 		    *str++ = bits & 1 ? '1' : '0';
1541 		}
1542 	    } else {
1543 		U8 bits = 0;
1544 		const I32 ai32 = len;
1545 		for (len = 0; len < ai32; len++) {
1546 		    if (len & 7) bits <<= 1;
1547 		    else if (utf8) {
1548 			if (s >= strend) break;
1549 			bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1550 		    } else bits = *(U8 *) s++;
1551 		    *str++ = bits & 0x80 ? '1' : '0';
1552 		}
1553 	    }
1554 	    *str = '\0';
1555 	    SvCUR_set(sv, str - SvPVX_const(sv));
1556 	    XPUSHs(sv);
1557 	    break;
1558 	}
1559 	case 'H':
1560 	case 'h': {
1561 	    char *str;
1562 	    /* Preliminary length estimate, acceptable for utf8 too */
1563 	    if (howlen == e_star || len > (strend - s) * 2)
1564 		len = (strend - s) * 2;
1565 	    if (!checksum) {
1566 		sv = sv_2mortal(newSV(len ? len : 1));
1567 		SvPOK_on(sv);
1568 		str = SvPVX(sv);
1569 	    }
1570 	    if (datumtype == 'h') {
1571 		U8 bits = 0;
1572 		I32 ai32 = len;
1573 		for (len = 0; len < ai32; len++) {
1574 		    if (len & 1) bits >>= 4;
1575 		    else if (utf8) {
1576 			if (s >= strend) break;
1577 			bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1578 		    } else bits = * (U8 *) s++;
1579 		    if (!checksum)
1580 			*str++ = PL_hexdigit[bits & 15];
1581 		}
1582 	    } else {
1583 		U8 bits = 0;
1584 		const I32 ai32 = len;
1585 		for (len = 0; len < ai32; len++) {
1586 		    if (len & 1) bits <<= 4;
1587 		    else if (utf8) {
1588 			if (s >= strend) break;
1589 			bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1590 		    } else bits = *(U8 *) s++;
1591 		    if (!checksum)
1592 			*str++ = PL_hexdigit[(bits >> 4) & 15];
1593 		}
1594 	    }
1595 	    if (!checksum) {
1596 		*str = '\0';
1597 		SvCUR_set(sv, str - SvPVX_const(sv));
1598 		XPUSHs(sv);
1599 	    }
1600 	    break;
1601 	}
1602 	case 'C':
1603             if (len == 0) {
1604                 if (explicit_length)
1605 		    /* Switch to "character" mode */
1606 		    utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1607 		break;
1608 	    }
1609 	    /* FALL THROUGH */
1610 	case 'c':
1611 	    while (len-- > 0 && s < strend) {
1612 		int aint;
1613 		if (utf8)
1614 		  {
1615 		    STRLEN retlen;
1616 		    aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1617 				 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1618 		    if (retlen == (STRLEN) -1 || retlen == 0)
1619 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1620 		    s += retlen;
1621 		  }
1622 		else
1623 		  aint = *(U8 *)(s)++;
1624 		if (aint >= 128 && datumtype != 'C')	/* fake up signed chars */
1625 		    aint -= 256;
1626 		if (!checksum)
1627 		    mPUSHi(aint);
1628 		else if (checksum > bits_in_uv)
1629 		    cdouble += (NV)aint;
1630 		else
1631 		    cuv += aint;
1632 	    }
1633 	    break;
1634 	case 'W':
1635 	  W_checksum:
1636 	    if (utf8) {
1637 		while (len-- > 0 && s < strend) {
1638 		    STRLEN retlen;
1639 		    const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1640 					 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1641 		    if (retlen == (STRLEN) -1 || retlen == 0)
1642 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1643 		    s += retlen;
1644 		    if (!checksum)
1645 			mPUSHu(val);
1646 		    else if (checksum > bits_in_uv)
1647 			cdouble += (NV) val;
1648 		    else
1649 			cuv += val;
1650 		}
1651 	    } else if (!checksum)
1652 		while (len-- > 0) {
1653 		    const U8 ch = *(U8 *) s++;
1654 		    mPUSHu(ch);
1655 	    }
1656 	    else if (checksum > bits_in_uv)
1657 		while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1658 	    else
1659 		while (len-- > 0) cuv += *(U8 *) s++;
1660 	    break;
1661 	case 'U':
1662 	    if (len == 0) {
1663                 if (explicit_length) {
1664 		    /* Switch to "bytes in UTF-8" mode */
1665 		    if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1666 		    else
1667 			/* Should be impossible due to the need_utf8() test */
1668 			Perl_croak(aTHX_ "U0 mode on a byte string");
1669 		}
1670 		break;
1671 	    }
1672 	    if (len > strend - s) len = strend - s;
1673 	    if (!checksum) {
1674 		if (len && unpack_only_one) len = 1;
1675 		EXTEND(SP, len);
1676 		EXTEND_MORTAL(len);
1677 	    }
1678 	    while (len-- > 0 && s < strend) {
1679 		STRLEN retlen;
1680 		UV auv;
1681 		if (utf8) {
1682 		    U8 result[UTF8_MAXLEN];
1683 		    const char *ptr = s;
1684 		    STRLEN len;
1685 		    /* Bug: warns about bad utf8 even if we are short on bytes
1686 		       and will break out of the loop */
1687 		    if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1688 				      'U'))
1689 			break;
1690 		    len = UTF8SKIP(result);
1691 		    if (!uni_to_bytes(aTHX_ &ptr, strend,
1692 				      (char *) &result[1], len-1, 'U')) break;
1693 		    auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1694 		    s = ptr;
1695 		} else {
1696 		    auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1697 		    if (retlen == (STRLEN) -1 || retlen == 0)
1698 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1699 		    s += retlen;
1700 		}
1701 		if (!checksum)
1702 		    mPUSHu(auv);
1703 		else if (checksum > bits_in_uv)
1704 		    cdouble += (NV) auv;
1705 		else
1706 		    cuv += auv;
1707 	    }
1708 	    break;
1709 	case 's' | TYPE_IS_SHRIEKING:
1710 #if SHORTSIZE != SIZE16
1711 	    while (len-- > 0) {
1712 		short ashort;
1713 		SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1714 		DO_BO_UNPACK(ashort, s);
1715 		if (!checksum)
1716 		    mPUSHi(ashort);
1717 		else if (checksum > bits_in_uv)
1718 		    cdouble += (NV)ashort;
1719 		else
1720 		    cuv += ashort;
1721 	    }
1722 	    break;
1723 #else
1724 	    /* Fallthrough! */
1725 #endif
1726 	case 's':
1727 	    while (len-- > 0) {
1728 		I16 ai16;
1729 
1730 #if U16SIZE > SIZE16
1731 		ai16 = 0;
1732 #endif
1733 		SHIFT16(utf8, s, strend, &ai16, datumtype);
1734 		DO_BO_UNPACK(ai16, 16);
1735 #if U16SIZE > SIZE16
1736 		if (ai16 > 32767)
1737 		    ai16 -= 65536;
1738 #endif
1739 		if (!checksum)
1740 		    mPUSHi(ai16);
1741 		else if (checksum > bits_in_uv)
1742 		    cdouble += (NV)ai16;
1743 		else
1744 		    cuv += ai16;
1745 	    }
1746 	    break;
1747 	case 'S' | TYPE_IS_SHRIEKING:
1748 #if SHORTSIZE != SIZE16
1749 	    while (len-- > 0) {
1750 		unsigned short aushort;
1751 		SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1752 		DO_BO_UNPACK(aushort, s);
1753 		if (!checksum)
1754 		    mPUSHu(aushort);
1755 		else if (checksum > bits_in_uv)
1756 		    cdouble += (NV)aushort;
1757 		else
1758 		    cuv += aushort;
1759 	    }
1760 	    break;
1761 #else
1762             /* Fallhrough! */
1763 #endif
1764 	case 'v':
1765 	case 'n':
1766 	case 'S':
1767 	    while (len-- > 0) {
1768 		U16 au16;
1769 #if U16SIZE > SIZE16
1770 		au16 = 0;
1771 #endif
1772 		SHIFT16(utf8, s, strend, &au16, datumtype);
1773 		DO_BO_UNPACK(au16, 16);
1774 #ifdef HAS_NTOHS
1775 		if (datumtype == 'n')
1776 		    au16 = PerlSock_ntohs(au16);
1777 #endif
1778 #ifdef HAS_VTOHS
1779 		if (datumtype == 'v')
1780 		    au16 = vtohs(au16);
1781 #endif
1782 		if (!checksum)
1783 		    mPUSHu(au16);
1784 		else if (checksum > bits_in_uv)
1785 		    cdouble += (NV) au16;
1786 		else
1787 		    cuv += au16;
1788 	    }
1789 	    break;
1790 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1791 	case 'v' | TYPE_IS_SHRIEKING:
1792 	case 'n' | TYPE_IS_SHRIEKING:
1793 	    while (len-- > 0) {
1794 		I16 ai16;
1795 # if U16SIZE > SIZE16
1796 		ai16 = 0;
1797 # endif
1798 		SHIFT16(utf8, s, strend, &ai16, datumtype);
1799 # ifdef HAS_NTOHS
1800 		if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1801 		    ai16 = (I16) PerlSock_ntohs((U16) ai16);
1802 # endif /* HAS_NTOHS */
1803 # ifdef HAS_VTOHS
1804 		if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1805 		    ai16 = (I16) vtohs((U16) ai16);
1806 # endif /* HAS_VTOHS */
1807 		if (!checksum)
1808 		    mPUSHi(ai16);
1809 		else if (checksum > bits_in_uv)
1810 		    cdouble += (NV) ai16;
1811 		else
1812 		    cuv += ai16;
1813 	    }
1814 	    break;
1815 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1816 	case 'i':
1817 	case 'i' | TYPE_IS_SHRIEKING:
1818 	    while (len-- > 0) {
1819 		int aint;
1820 		SHIFT_VAR(utf8, s, strend, aint, datumtype);
1821 		DO_BO_UNPACK(aint, i);
1822 		if (!checksum)
1823 		    mPUSHi(aint);
1824 		else if (checksum > bits_in_uv)
1825 		    cdouble += (NV)aint;
1826 		else
1827 		    cuv += aint;
1828 	    }
1829 	    break;
1830 	case 'I':
1831 	case 'I' | TYPE_IS_SHRIEKING:
1832 	    while (len-- > 0) {
1833 		unsigned int auint;
1834 		SHIFT_VAR(utf8, s, strend, auint, datumtype);
1835 		DO_BO_UNPACK(auint, i);
1836 		if (!checksum)
1837 		    mPUSHu(auint);
1838 		else if (checksum > bits_in_uv)
1839 		    cdouble += (NV)auint;
1840 		else
1841 		    cuv += auint;
1842 	    }
1843 	    break;
1844 	case 'j':
1845 	    while (len-- > 0) {
1846 		IV aiv;
1847 		SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1848 #if IVSIZE == INTSIZE
1849 		DO_BO_UNPACK(aiv, i);
1850 #elif IVSIZE == LONGSIZE
1851 		DO_BO_UNPACK(aiv, l);
1852 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1853 		DO_BO_UNPACK(aiv, 64);
1854 #else
1855 		Perl_croak(aTHX_ "'j' not supported on this platform");
1856 #endif
1857 		if (!checksum)
1858 		    mPUSHi(aiv);
1859 		else if (checksum > bits_in_uv)
1860 		    cdouble += (NV)aiv;
1861 		else
1862 		    cuv += aiv;
1863 	    }
1864 	    break;
1865 	case 'J':
1866 	    while (len-- > 0) {
1867 		UV auv;
1868 		SHIFT_VAR(utf8, s, strend, auv, datumtype);
1869 #if IVSIZE == INTSIZE
1870 		DO_BO_UNPACK(auv, i);
1871 #elif IVSIZE == LONGSIZE
1872 		DO_BO_UNPACK(auv, l);
1873 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1874 		DO_BO_UNPACK(auv, 64);
1875 #else
1876 		Perl_croak(aTHX_ "'J' not supported on this platform");
1877 #endif
1878 		if (!checksum)
1879 		    mPUSHu(auv);
1880 		else if (checksum > bits_in_uv)
1881 		    cdouble += (NV)auv;
1882 		else
1883 		    cuv += auv;
1884 	    }
1885 	    break;
1886 	case 'l' | TYPE_IS_SHRIEKING:
1887 #if LONGSIZE != SIZE32
1888 	    while (len-- > 0) {
1889 		long along;
1890 		SHIFT_VAR(utf8, s, strend, along, datumtype);
1891 		DO_BO_UNPACK(along, l);
1892 		if (!checksum)
1893 		    mPUSHi(along);
1894 		else if (checksum > bits_in_uv)
1895 		    cdouble += (NV)along;
1896 		else
1897 		    cuv += along;
1898 	    }
1899 	    break;
1900 #else
1901 	    /* Fallthrough! */
1902 #endif
1903 	case 'l':
1904 	    while (len-- > 0) {
1905 		I32 ai32;
1906 #if U32SIZE > SIZE32
1907 		ai32 = 0;
1908 #endif
1909 		SHIFT32(utf8, s, strend, &ai32, datumtype);
1910 		DO_BO_UNPACK(ai32, 32);
1911 #if U32SIZE > SIZE32
1912 		if (ai32 > 2147483647) ai32 -= 4294967296;
1913 #endif
1914 		if (!checksum)
1915 		    mPUSHi(ai32);
1916 		else if (checksum > bits_in_uv)
1917 		    cdouble += (NV)ai32;
1918 		else
1919 		    cuv += ai32;
1920 	    }
1921 	    break;
1922 	case 'L' | TYPE_IS_SHRIEKING:
1923 #if LONGSIZE != SIZE32
1924 	    while (len-- > 0) {
1925 		unsigned long aulong;
1926 		SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1927 		DO_BO_UNPACK(aulong, l);
1928 		if (!checksum)
1929 		    mPUSHu(aulong);
1930 		else if (checksum > bits_in_uv)
1931 		    cdouble += (NV)aulong;
1932 		else
1933 		    cuv += aulong;
1934 	    }
1935 	    break;
1936 #else
1937             /* Fall through! */
1938 #endif
1939 	case 'V':
1940 	case 'N':
1941 	case 'L':
1942 	    while (len-- > 0) {
1943 		U32 au32;
1944 #if U32SIZE > SIZE32
1945 		au32 = 0;
1946 #endif
1947 		SHIFT32(utf8, s, strend, &au32, datumtype);
1948 		DO_BO_UNPACK(au32, 32);
1949 #ifdef HAS_NTOHL
1950 		if (datumtype == 'N')
1951 		    au32 = PerlSock_ntohl(au32);
1952 #endif
1953 #ifdef HAS_VTOHL
1954 		if (datumtype == 'V')
1955 		    au32 = vtohl(au32);
1956 #endif
1957 		if (!checksum)
1958 		    mPUSHu(au32);
1959 		else if (checksum > bits_in_uv)
1960 		    cdouble += (NV)au32;
1961 		else
1962 		    cuv += au32;
1963 	    }
1964 	    break;
1965 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1966 	case 'V' | TYPE_IS_SHRIEKING:
1967 	case 'N' | TYPE_IS_SHRIEKING:
1968 	    while (len-- > 0) {
1969 		I32 ai32;
1970 # if U32SIZE > SIZE32
1971 		ai32 = 0;
1972 # endif
1973 		SHIFT32(utf8, s, strend, &ai32, datumtype);
1974 # ifdef HAS_NTOHL
1975 		if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1976 		    ai32 = (I32)PerlSock_ntohl((U32)ai32);
1977 # endif
1978 # ifdef HAS_VTOHL
1979 		if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1980 		    ai32 = (I32)vtohl((U32)ai32);
1981 # endif
1982 		if (!checksum)
1983 		    mPUSHi(ai32);
1984 		else if (checksum > bits_in_uv)
1985 		    cdouble += (NV)ai32;
1986 		else
1987 		    cuv += ai32;
1988 	    }
1989 	    break;
1990 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1991 	case 'p':
1992 	    while (len-- > 0) {
1993 		const char *aptr;
1994 		SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1995 		DO_BO_UNPACK_PC(aptr);
1996 		/* newSVpv generates undef if aptr is NULL */
1997 		mPUSHs(newSVpv(aptr, 0));
1998 	    }
1999 	    break;
2000 	case 'w':
2001 	    {
2002 		UV auv = 0;
2003 		U32 bytes = 0;
2004 
2005 		while (len > 0 && s < strend) {
2006 		    U8 ch;
2007 		    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2008 		    auv = (auv << 7) | (ch & 0x7f);
2009 		    /* UTF8_IS_XXXXX not right here - using constant 0x80 */
2010 		    if (ch < 0x80) {
2011 			bytes = 0;
2012 			mPUSHu(auv);
2013 			len--;
2014 			auv = 0;
2015 			continue;
2016 		    }
2017 		    if (++bytes >= sizeof(UV)) {	/* promote to string */
2018 			const char *t;
2019 
2020 			sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
2021 			while (s < strend) {
2022 			    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2023 			    sv = mul128(sv, (U8)(ch & 0x7f));
2024 			    if (!(ch & 0x80)) {
2025 				bytes = 0;
2026 				break;
2027 			    }
2028 			}
2029 			t = SvPV_nolen_const(sv);
2030 			while (*t == '0')
2031 			    t++;
2032 			sv_chop(sv, t);
2033 			mPUSHs(sv);
2034 			len--;
2035 			auv = 0;
2036 		    }
2037 		}
2038 		if ((s >= strend) && bytes)
2039 		    Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2040 	    }
2041 	    break;
2042 	case 'P':
2043 	    if (symptr->howlen == e_star)
2044 	        Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2045 	    EXTEND(SP, 1);
2046 	    if (s + sizeof(char*) <= strend) {
2047 		char *aptr;
2048 		SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2049 		DO_BO_UNPACK_PC(aptr);
2050 		/* newSVpvn generates undef if aptr is NULL */
2051 		PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
2052 	    }
2053 	    break;
2054 #ifdef HAS_QUAD
2055 	case 'q':
2056 	    while (len-- > 0) {
2057 		Quad_t aquad;
2058 		SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2059 		DO_BO_UNPACK(aquad, 64);
2060 		if (!checksum)
2061                     mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2062 			   newSViv((IV)aquad) : newSVnv((NV)aquad));
2063 		else if (checksum > bits_in_uv)
2064 		    cdouble += (NV)aquad;
2065 		else
2066 		    cuv += aquad;
2067 	    }
2068 	    break;
2069 	case 'Q':
2070 	    while (len-- > 0) {
2071 		Uquad_t auquad;
2072 		SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2073 		DO_BO_UNPACK(auquad, 64);
2074 		if (!checksum)
2075 		    mPUSHs(auquad <= UV_MAX ?
2076 			   newSVuv((UV)auquad) : newSVnv((NV)auquad));
2077 		else if (checksum > bits_in_uv)
2078 		    cdouble += (NV)auquad;
2079 		else
2080 		    cuv += auquad;
2081 	    }
2082 	    break;
2083 #endif /* HAS_QUAD */
2084 	/* float and double added gnb@melba.bby.oz.au 22/11/89 */
2085 	case 'f':
2086 	    while (len-- > 0) {
2087 		float afloat;
2088 		SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2089 		DO_BO_UNPACK_N(afloat, float);
2090 		if (!checksum)
2091 		    mPUSHn(afloat);
2092 		else
2093 		    cdouble += afloat;
2094 	    }
2095 	    break;
2096 	case 'd':
2097 	    while (len-- > 0) {
2098 		double adouble;
2099 		SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2100 		DO_BO_UNPACK_N(adouble, double);
2101 		if (!checksum)
2102 		    mPUSHn(adouble);
2103 		else
2104 		    cdouble += adouble;
2105 	    }
2106 	    break;
2107 	case 'F':
2108 	    while (len-- > 0) {
2109 		NV_bytes anv;
2110 		SHIFT_BYTES(utf8, s, strend, anv.bytes, sizeof(anv.bytes), datumtype);
2111 		DO_BO_UNPACK_N(anv.nv, NV);
2112 		if (!checksum)
2113 		    mPUSHn(anv.nv);
2114 		else
2115 		    cdouble += anv.nv;
2116 	    }
2117 	    break;
2118 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2119 	case 'D':
2120 	    while (len-- > 0) {
2121 		ld_bytes aldouble;
2122 		SHIFT_BYTES(utf8, s, strend, aldouble.bytes, sizeof(aldouble.bytes), datumtype);
2123 		DO_BO_UNPACK_N(aldouble.ld, long double);
2124 		if (!checksum)
2125 		    mPUSHn(aldouble.ld);
2126 		else
2127 		    cdouble += aldouble.ld;
2128 	    }
2129 	    break;
2130 #endif
2131 	case 'u':
2132 	    if (!checksum) {
2133                 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2134 		sv = sv_2mortal(newSV(l));
2135 		if (l) SvPOK_on(sv);
2136 	    }
2137 	    if (utf8) {
2138 		while (next_uni_uu(aTHX_ &s, strend, &len)) {
2139 		    I32 a, b, c, d;
2140 		    char hunk[3];
2141 
2142 		    while (len > 0) {
2143 			next_uni_uu(aTHX_ &s, strend, &a);
2144 			next_uni_uu(aTHX_ &s, strend, &b);
2145 			next_uni_uu(aTHX_ &s, strend, &c);
2146 			next_uni_uu(aTHX_ &s, strend, &d);
2147 			hunk[0] = (char)((a << 2) | (b >> 4));
2148 			hunk[1] = (char)((b << 4) | (c >> 2));
2149 			hunk[2] = (char)((c << 6) | d);
2150 			if (!checksum)
2151 			    sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2152 			len -= 3;
2153 		    }
2154 		    if (s < strend) {
2155 			if (*s == '\n') {
2156                             s++;
2157                         }
2158 			else {
2159 			    /* possible checksum byte */
2160 			    const char *skip = s+UTF8SKIP(s);
2161 			    if (skip < strend && *skip == '\n')
2162                                 s = skip+1;
2163 			}
2164 		    }
2165 		}
2166 	    } else {
2167 		while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2168 		    I32 a, b, c, d;
2169 		    char hunk[3];
2170 
2171 		    len = PL_uudmap[*(U8*)s++] & 077;
2172 		    while (len > 0) {
2173 			if (s < strend && ISUUCHAR(*s))
2174 			    a = PL_uudmap[*(U8*)s++] & 077;
2175 			else
2176 			    a = 0;
2177 			if (s < strend && ISUUCHAR(*s))
2178 			    b = PL_uudmap[*(U8*)s++] & 077;
2179 			else
2180 			    b = 0;
2181 			if (s < strend && ISUUCHAR(*s))
2182 			    c = PL_uudmap[*(U8*)s++] & 077;
2183 			else
2184 			    c = 0;
2185 			if (s < strend && ISUUCHAR(*s))
2186 			    d = PL_uudmap[*(U8*)s++] & 077;
2187 			else
2188 			    d = 0;
2189 			hunk[0] = (char)((a << 2) | (b >> 4));
2190 			hunk[1] = (char)((b << 4) | (c >> 2));
2191 			hunk[2] = (char)((c << 6) | d);
2192 			if (!checksum)
2193 			    sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2194 			len -= 3;
2195 		    }
2196 		    if (*s == '\n')
2197 			s++;
2198 		    else	/* possible checksum byte */
2199 			if (s + 1 < strend && s[1] == '\n')
2200 			    s += 2;
2201 		}
2202 	    }
2203 	    if (!checksum)
2204 		XPUSHs(sv);
2205 	    break;
2206 	}
2207 
2208 	if (checksum) {
2209 	    if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2210 	      (checksum > bits_in_uv &&
2211 	       strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2212 		NV trouble, anv;
2213 
2214                 anv = (NV) (1 << (checksum & 15));
2215 		while (checksum >= 16) {
2216 		    checksum -= 16;
2217 		    anv *= 65536.0;
2218 		}
2219 		while (cdouble < 0.0)
2220 		    cdouble += anv;
2221 		cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2222 		sv = newSVnv(cdouble);
2223 	    }
2224 	    else {
2225 		if (checksum < bits_in_uv) {
2226 		    UV mask = ((UV)1 << checksum) - 1;
2227 		    cuv &= mask;
2228 		}
2229 		sv = newSVuv(cuv);
2230 	    }
2231 	    mXPUSHs(sv);
2232 	    checksum = 0;
2233 	}
2234 
2235         if (symptr->flags & FLAG_SLASH){
2236             if (SP - PL_stack_base - start_sp_offset <= 0)
2237                 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2238             if( next_symbol(symptr) ){
2239               if( symptr->howlen == e_number )
2240 		Perl_croak(aTHX_ "Count after length/code in unpack" );
2241               if( beyond ){
2242          	/* ...end of char buffer then no decent length available */
2243 		Perl_croak(aTHX_ "length/code after end of string in unpack" );
2244               } else {
2245          	/* take top of stack (hope it's numeric) */
2246                 len = POPi;
2247                 if( len < 0 )
2248                     Perl_croak(aTHX_ "Negative '/' count in unpack" );
2249               }
2250             } else {
2251 		Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2252             }
2253             datumtype = symptr->code;
2254             explicit_length = FALSE;
2255 	    goto redo_switch;
2256         }
2257     }
2258 
2259     if (new_s)
2260 	*new_s = s;
2261     PUTBACK;
2262     return SP - PL_stack_base - start_sp_offset;
2263 }
2264 
2265 PP(pp_unpack)
2266 {
2267     dVAR;
2268     dSP;
2269     dPOPPOPssrl;
2270     I32 gimme = GIMME_V;
2271     STRLEN llen;
2272     STRLEN rlen;
2273     const char *pat = SvPV_const(left,  llen);
2274     const char *s   = SvPV_const(right, rlen);
2275     const char *strend = s + rlen;
2276     const char *patend = pat + llen;
2277     I32 cnt;
2278 
2279     PUTBACK;
2280     cnt = unpackstring(pat, patend, s, strend,
2281 		     ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2282 		     | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2283 
2284     SPAGAIN;
2285     if ( !cnt && gimme == G_SCALAR )
2286        PUSHs(&PL_sv_undef);
2287     RETURN;
2288 }
2289 
2290 STATIC U8 *
2291 doencodes(U8 *h, const char *s, I32 len)
2292 {
2293     *h++ = PL_uuemap[len];
2294     while (len > 2) {
2295 	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
2296 	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2297 	*h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2298 	*h++ = PL_uuemap[(077 & (s[2] & 077))];
2299 	s += 3;
2300 	len -= 3;
2301     }
2302     if (len > 0) {
2303         const char r = (len > 1 ? s[1] : '\0');
2304 	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
2305 	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2306 	*h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2307 	*h++ = PL_uuemap[0];
2308     }
2309     *h++ = '\n';
2310     return h;
2311 }
2312 
2313 STATIC SV *
2314 S_is_an_int(pTHX_ const char *s, STRLEN l)
2315 {
2316   SV *result = newSVpvn(s, l);
2317   char *const result_c = SvPV_nolen(result);	/* convenience */
2318   char *out = result_c;
2319   bool skip = 1;
2320   bool ignore = 0;
2321 
2322   PERL_ARGS_ASSERT_IS_AN_INT;
2323 
2324   while (*s) {
2325     switch (*s) {
2326     case ' ':
2327       break;
2328     case '+':
2329       if (!skip) {
2330 	SvREFCNT_dec(result);
2331 	return (NULL);
2332       }
2333       break;
2334     case '0':
2335     case '1':
2336     case '2':
2337     case '3':
2338     case '4':
2339     case '5':
2340     case '6':
2341     case '7':
2342     case '8':
2343     case '9':
2344       skip = 0;
2345       if (!ignore) {
2346 	*(out++) = *s;
2347       }
2348       break;
2349     case '.':
2350       ignore = 1;
2351       break;
2352     default:
2353       SvREFCNT_dec(result);
2354       return (NULL);
2355     }
2356     s++;
2357   }
2358   *(out++) = '\0';
2359   SvCUR_set(result, out - result_c);
2360   return (result);
2361 }
2362 
2363 /* pnum must be '\0' terminated */
2364 STATIC int
2365 S_div128(pTHX_ SV *pnum, bool *done)
2366 {
2367     STRLEN len;
2368     char * const s = SvPV(pnum, len);
2369     char *t = s;
2370     int m = 0;
2371 
2372     PERL_ARGS_ASSERT_DIV128;
2373 
2374     *done = 1;
2375     while (*t) {
2376 	const int i = m * 10 + (*t - '0');
2377 	const int r = (i >> 7); /* r < 10 */
2378 	m = i & 0x7F;
2379 	if (r) {
2380 	    *done = 0;
2381 	}
2382 	*(t++) = '0' + r;
2383     }
2384     *(t++) = '\0';
2385     SvCUR_set(pnum, (STRLEN) (t - s));
2386     return (m);
2387 }
2388 
2389 /*
2390 =for apidoc packlist
2391 
2392 The engine implementing pack() Perl function.
2393 
2394 =cut
2395 */
2396 
2397 void
2398 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2399 {
2400     dVAR;
2401     tempsym_t sym;
2402 
2403     PERL_ARGS_ASSERT_PACKLIST;
2404 
2405     TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2406 
2407     /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2408        Also make sure any UTF8 flag is loaded */
2409     SvPV_force_nolen(cat);
2410     if (DO_UTF8(cat))
2411 	sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2412 
2413     (void)pack_rec( cat, &sym, beglist, endlist );
2414 }
2415 
2416 /* like sv_utf8_upgrade, but also repoint the group start markers */
2417 STATIC void
2418 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2419     STRLEN len;
2420     tempsym_t *group;
2421     const char *from_ptr, *from_start, *from_end, **marks, **m;
2422     char *to_start, *to_ptr;
2423 
2424     if (SvUTF8(sv)) return;
2425 
2426     from_start = SvPVX_const(sv);
2427     from_end = from_start + SvCUR(sv);
2428     for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2429 	if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2430     if (from_ptr == from_end) {
2431 	/* Simple case: no character needs to be changed */
2432 	SvUTF8_on(sv);
2433 	return;
2434     }
2435 
2436     len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2437     Newx(to_start, len, char);
2438     Copy(from_start, to_start, from_ptr-from_start, char);
2439     to_ptr = to_start + (from_ptr-from_start);
2440 
2441     Newx(marks, sym_ptr->level+2, const char *);
2442     for (group=sym_ptr; group; group = group->previous)
2443 	marks[group->level] = from_start + group->strbeg;
2444     marks[sym_ptr->level+1] = from_end+1;
2445     for (m = marks; *m < from_ptr; m++)
2446 	*m = to_start + (*m-from_start);
2447 
2448     for (;from_ptr < from_end; from_ptr++) {
2449 	while (*m == from_ptr) *m++ = to_ptr;
2450 	to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2451     }
2452     *to_ptr = 0;
2453 
2454     while (*m == from_ptr) *m++ = to_ptr;
2455     if (m != marks + sym_ptr->level+1) {
2456 	Safefree(marks);
2457 	Safefree(to_start);
2458 	Perl_croak(aTHX_ "panic: marks beyond string end");
2459     }
2460     for (group=sym_ptr; group; group = group->previous)
2461 	group->strbeg = marks[group->level] - to_start;
2462     Safefree(marks);
2463 
2464     if (SvOOK(sv)) {
2465 	if (SvIVX(sv)) {
2466 	    SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2467 	    from_start -= SvIVX(sv);
2468 	    SvIV_set(sv, 0);
2469 	}
2470 	SvFLAGS(sv) &= ~SVf_OOK;
2471     }
2472     if (SvLEN(sv) != 0)
2473 	Safefree(from_start);
2474     SvPV_set(sv, to_start);
2475     SvCUR_set(sv, to_ptr - to_start);
2476     SvLEN_set(sv, len);
2477     SvUTF8_on(sv);
2478 }
2479 
2480 /* Exponential string grower. Makes string extension effectively O(n)
2481    needed says how many extra bytes we need (not counting the final '\0')
2482    Only grows the string if there is an actual lack of space
2483 */
2484 STATIC char *
2485 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2486     const STRLEN cur = SvCUR(sv);
2487     const STRLEN len = SvLEN(sv);
2488     STRLEN extend;
2489 
2490     PERL_ARGS_ASSERT_SV_EXP_GROW;
2491 
2492     if (len - cur > needed) return SvPVX(sv);
2493     extend = needed > len ? needed : len;
2494     return SvGROW(sv, len+extend+1);
2495 }
2496 
2497 STATIC
2498 SV **
2499 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2500 {
2501     dVAR;
2502     tempsym_t lookahead;
2503     I32 items  = endlist - beglist;
2504     bool found = next_symbol(symptr);
2505     bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2506     bool warn_utf8 = ckWARN(WARN_UTF8);
2507 
2508     PERL_ARGS_ASSERT_PACK_REC;
2509 
2510     if (symptr->level == 0 && found && symptr->code == 'U') {
2511 	marked_upgrade(aTHX_ cat, symptr);
2512 	symptr->flags |= FLAG_DO_UTF8;
2513 	utf8 = 0;
2514     }
2515     symptr->strbeg = SvCUR(cat);
2516 
2517     while (found) {
2518 	SV *fromstr;
2519 	STRLEN fromlen;
2520 	I32 len;
2521 	SV *lengthcode = NULL;
2522         I32 datumtype = symptr->code;
2523         howlen_t howlen = symptr->howlen;
2524 	char *start = SvPVX(cat);
2525 	char *cur   = start + SvCUR(cat);
2526 
2527 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2528 
2529         switch (howlen) {
2530 	  case e_star:
2531 	    len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2532 		0 : items;
2533 	    break;
2534 	  default:
2535 	    /* e_no_len and e_number */
2536 	    len = symptr->length;
2537 	    break;
2538         }
2539 
2540 	if (len) {
2541 	    packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2542 
2543 	    if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2544 		/* We can process this letter. */
2545 		STRLEN size = props & PACK_SIZE_MASK;
2546 		GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2547 	    }
2548         }
2549 
2550         /* Look ahead for next symbol. Do we have code/code? */
2551         lookahead = *symptr;
2552         found = next_symbol(&lookahead);
2553 	if (symptr->flags & FLAG_SLASH) {
2554 	    IV count;
2555 	    if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2556 	    if (strchr("aAZ", lookahead.code)) {
2557 		if (lookahead.howlen == e_number) count = lookahead.length;
2558 		else {
2559 		    if (items > 0) {
2560 			if (SvGAMAGIC(*beglist)) {
2561 			    /* Avoid reading the active data more than once
2562 			       by copying it to a temporary.  */
2563 			    STRLEN len;
2564 			    const char *const pv = SvPV_const(*beglist, len);
2565 			    SV *const temp
2566 				= newSVpvn_flags(pv, len,
2567 						 SVs_TEMP | SvUTF8(*beglist));
2568 			    *beglist = temp;
2569 			}
2570 			count = DO_UTF8(*beglist) ?
2571 			    sv_len_utf8(*beglist) : sv_len(*beglist);
2572 		    }
2573 		    else count = 0;
2574 		    if (lookahead.code == 'Z') count++;
2575 		}
2576 	    } else {
2577 		if (lookahead.howlen == e_number && lookahead.length < items)
2578 		    count = lookahead.length;
2579 		else count = items;
2580 	    }
2581 	    lookahead.howlen = e_number;
2582 	    lookahead.length = count;
2583 	    lengthcode = sv_2mortal(newSViv(count));
2584 	}
2585 
2586 	/* Code inside the switch must take care to properly update
2587 	   cat (CUR length and '\0' termination) if it updated *cur and
2588 	   doesn't simply leave using break */
2589 	switch(TYPE_NO_ENDIANNESS(datumtype)) {
2590 	default:
2591 	    Perl_croak(aTHX_ "Invalid type '%c' in pack",
2592 		       (int) TYPE_NO_MODIFIERS(datumtype));
2593 	case '%':
2594 	    Perl_croak(aTHX_ "'%%' may not be used in pack");
2595 	{
2596 	    char *from;
2597 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2598 	case '.' | TYPE_IS_SHRIEKING:
2599 #endif
2600 	case '.':
2601 	    if (howlen == e_star) from = start;
2602 	    else if (len == 0) from = cur;
2603 	    else {
2604 		tempsym_t *group = symptr;
2605 
2606 		while (--len && group) group = group->previous;
2607 		from = group ? start + group->strbeg : start;
2608 	    }
2609 	    fromstr = NEXTFROM;
2610 	    len = SvIV(fromstr);
2611 	    goto resize;
2612 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2613 	case '@' | TYPE_IS_SHRIEKING:
2614 #endif
2615 	case '@':
2616 	    from = start + symptr->strbeg;
2617 	  resize:
2618 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2619 	    if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
2620 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2621 	    if (utf8)
2622 #endif
2623 		if (len >= 0) {
2624 		    while (len && from < cur) {
2625 			from += UTF8SKIP(from);
2626 			len--;
2627 		    }
2628 		    if (from > cur)
2629 			Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2630 		    if (len) {
2631 			/* Here we know from == cur */
2632 		      grow:
2633 			GROWING(0, cat, start, cur, len);
2634 			Zero(cur, len, char);
2635 			cur += len;
2636 		    } else if (from < cur) {
2637 			len = cur - from;
2638 			goto shrink;
2639 		    } else goto no_change;
2640 		} else {
2641 		    cur = from;
2642 		    len = -len;
2643 		    goto utf8_shrink;
2644 		}
2645 	    else {
2646 		len -= cur - from;
2647 		if (len > 0) goto grow;
2648 		if (len == 0) goto no_change;
2649 		len = -len;
2650 		goto shrink;
2651 	    }
2652 	    break;
2653 	}
2654 	case '(': {
2655             tempsym_t savsym = *symptr;
2656 	    U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2657 	    symptr->flags |= group_modifiers;
2658             symptr->patend = savsym.grpend;
2659             symptr->level++;
2660 	    symptr->previous = &lookahead;
2661 	    while (len--) {
2662 		U32 was_utf8;
2663 		if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
2664 		else      symptr->flags &= ~FLAG_PARSE_UTF8;
2665 		was_utf8 = SvUTF8(cat);
2666   	        symptr->patptr = savsym.grpbeg;
2667 		beglist = pack_rec(cat, symptr, beglist, endlist);
2668 		if (SvUTF8(cat) != was_utf8)
2669 		    /* This had better be an upgrade while in utf8==0 mode */
2670 		    utf8 = 1;
2671 
2672 		if (savsym.howlen == e_star && beglist == endlist)
2673 		    break;		/* No way to continue */
2674 	    }
2675 	    items = endlist - beglist;
2676 	    lookahead.flags  = symptr->flags & ~group_modifiers;
2677 	    goto no_change;
2678 	}
2679 	case 'X' | TYPE_IS_SHRIEKING:
2680 	    if (!len)			/* Avoid division by 0 */
2681 		len = 1;
2682 	    if (utf8) {
2683 		char *hop, *last;
2684 		I32 l = len;
2685 		hop = last = start;
2686 		while (hop < cur) {
2687 		    hop += UTF8SKIP(hop);
2688 		    if (--l == 0) {
2689 			last = hop;
2690 			l = len;
2691 		    }
2692 		}
2693 		if (last > cur)
2694 		    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2695 		cur = last;
2696 		break;
2697 	    }
2698 	    len = (cur-start) % len;
2699 	    /* FALL THROUGH */
2700 	case 'X':
2701 	    if (utf8) {
2702 		if (len < 1) goto no_change;
2703 	      utf8_shrink:
2704 		while (len > 0) {
2705 		    if (cur <= start)
2706 			Perl_croak(aTHX_ "'%c' outside of string in pack",
2707 				   (int) TYPE_NO_MODIFIERS(datumtype));
2708 		    while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2709 			if (cur <= start)
2710 			    Perl_croak(aTHX_ "'%c' outside of string in pack",
2711 				       (int) TYPE_NO_MODIFIERS(datumtype));
2712 		    }
2713 		    len--;
2714 		}
2715 	    } else {
2716 	      shrink:
2717 		if (cur - start < len)
2718 		    Perl_croak(aTHX_ "'%c' outside of string in pack",
2719 			       (int) TYPE_NO_MODIFIERS(datumtype));
2720 		cur -= len;
2721 	    }
2722 	    if (cur < start+symptr->strbeg) {
2723 		/* Make sure group starts don't point into the void */
2724 		tempsym_t *group;
2725 		const STRLEN length = cur-start;
2726 		for (group = symptr;
2727 		     group && length < group->strbeg;
2728 		     group = group->previous) group->strbeg = length;
2729 		lookahead.strbeg = length;
2730 	    }
2731 	    break;
2732 	case 'x' | TYPE_IS_SHRIEKING: {
2733 	    I32 ai32;
2734 	    if (!len)			/* Avoid division by 0 */
2735 		len = 1;
2736 	    if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2737 	    else      ai32 = (cur - start) % len;
2738 	    if (ai32 == 0) goto no_change;
2739 	    len -= ai32;
2740 	}
2741 	/* FALL THROUGH */
2742 	case 'x':
2743 	    goto grow;
2744 	case 'A':
2745 	case 'Z':
2746 	case 'a': {
2747 	    const char *aptr;
2748 
2749 	    fromstr = NEXTFROM;
2750 	    aptr = SvPV_const(fromstr, fromlen);
2751 	    if (DO_UTF8(fromstr)) {
2752                 const char *end, *s;
2753 
2754 		if (!utf8 && !SvUTF8(cat)) {
2755 		    marked_upgrade(aTHX_ cat, symptr);
2756 		    lookahead.flags |= FLAG_DO_UTF8;
2757 		    lookahead.strbeg = symptr->strbeg;
2758 		    utf8 = 1;
2759 		    start = SvPVX(cat);
2760 		    cur = start + SvCUR(cat);
2761 		}
2762 		if (howlen == e_star) {
2763 		    if (utf8) goto string_copy;
2764 		    len = fromlen+1;
2765 		}
2766 		s = aptr;
2767 		end = aptr + fromlen;
2768 		fromlen = datumtype == 'Z' ? len-1 : len;
2769 		while ((I32) fromlen > 0 && s < end) {
2770 		    s += UTF8SKIP(s);
2771 		    fromlen--;
2772 		}
2773 		if (s > end)
2774 		    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2775 		if (utf8) {
2776 		    len = fromlen;
2777 		    if (datumtype == 'Z') len++;
2778 		    fromlen = s-aptr;
2779 		    len += fromlen;
2780 
2781 		    goto string_copy;
2782 		}
2783 		fromlen = len - fromlen;
2784 		if (datumtype == 'Z') fromlen--;
2785 		if (howlen == e_star) {
2786 		    len = fromlen;
2787 		    if (datumtype == 'Z') len++;
2788 		}
2789 		GROWING(0, cat, start, cur, len);
2790 		if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2791 				  datumtype | TYPE_IS_PACK))
2792 		    Perl_croak(aTHX_ "panic: predicted utf8 length not available");
2793 		cur += fromlen;
2794 		len -= fromlen;
2795 	    } else if (utf8) {
2796 		if (howlen == e_star) {
2797 		    len = fromlen;
2798 		    if (datumtype == 'Z') len++;
2799 		}
2800 		if (len <= (I32) fromlen) {
2801 		    fromlen = len;
2802 		    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2803 		}
2804 		/* assumes a byte expands to at most UTF8_EXPAND bytes on
2805 		   upgrade, so:
2806 		   expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2807 		GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2808 		len -= fromlen;
2809 		while (fromlen > 0) {
2810 		    cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2811 		    aptr++;
2812 		    fromlen--;
2813 		}
2814 	    } else {
2815 	      string_copy:
2816 		if (howlen == e_star) {
2817 		    len = fromlen;
2818 		    if (datumtype == 'Z') len++;
2819 		}
2820 		if (len <= (I32) fromlen) {
2821 		    fromlen = len;
2822 		    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2823 		}
2824 		GROWING(0, cat, start, cur, len);
2825 		Copy(aptr, cur, fromlen, char);
2826 		cur += fromlen;
2827 		len -= fromlen;
2828 	    }
2829 	    memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2830 	    cur += len;
2831 	    SvTAINT(cat);
2832 	    break;
2833 	}
2834 	case 'B':
2835 	case 'b': {
2836 	    const char *str, *end;
2837 	    I32 l, field_len;
2838 	    U8 bits;
2839 	    bool utf8_source;
2840 	    U32 utf8_flags;
2841 
2842 	    fromstr = NEXTFROM;
2843 	    str = SvPV_const(fromstr, fromlen);
2844 	    end = str + fromlen;
2845 	    if (DO_UTF8(fromstr)) {
2846 		utf8_source = TRUE;
2847 		utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2848 	    } else {
2849 		utf8_source = FALSE;
2850 		utf8_flags  = 0; /* Unused, but keep compilers happy */
2851 	    }
2852 	    if (howlen == e_star) len = fromlen;
2853 	    field_len = (len+7)/8;
2854 	    GROWING(utf8, cat, start, cur, field_len);
2855 	    if (len > (I32)fromlen) len = fromlen;
2856 	    bits = 0;
2857 	    l = 0;
2858 	    if (datumtype == 'B')
2859 		while (l++ < len) {
2860 		    if (utf8_source) {
2861 			UV val = 0;
2862 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2863 			bits |= val & 1;
2864 		    } else bits |= *str++ & 1;
2865 		    if (l & 7) bits <<= 1;
2866 		    else {
2867 			PUSH_BYTE(utf8, cur, bits);
2868 			bits = 0;
2869 		    }
2870 		}
2871 	    else
2872 		/* datumtype == 'b' */
2873 		while (l++ < len) {
2874 		    if (utf8_source) {
2875 			UV val = 0;
2876 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2877 			if (val & 1) bits |= 0x80;
2878 		    } else if (*str++ & 1)
2879 			bits |= 0x80;
2880 		    if (l & 7) bits >>= 1;
2881 		    else {
2882 			PUSH_BYTE(utf8, cur, bits);
2883 			bits = 0;
2884 		    }
2885 		}
2886 	    l--;
2887 	    if (l & 7) {
2888 		if (datumtype == 'B')
2889 		    bits <<= 7 - (l & 7);
2890 		else
2891 		    bits >>= 7 - (l & 7);
2892 		PUSH_BYTE(utf8, cur, bits);
2893 		l += 7;
2894 	    }
2895 	    /* Determine how many chars are left in the requested field */
2896 	    l /= 8;
2897 	    if (howlen == e_star) field_len = 0;
2898 	    else field_len -= l;
2899 	    Zero(cur, field_len, char);
2900 	    cur += field_len;
2901 	    break;
2902 	}
2903 	case 'H':
2904 	case 'h': {
2905 	    const char *str, *end;
2906 	    I32 l, field_len;
2907 	    U8 bits;
2908 	    bool utf8_source;
2909 	    U32 utf8_flags;
2910 
2911 	    fromstr = NEXTFROM;
2912 	    str = SvPV_const(fromstr, fromlen);
2913 	    end = str + fromlen;
2914 	    if (DO_UTF8(fromstr)) {
2915 		utf8_source = TRUE;
2916 		utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2917 	    } else {
2918 		utf8_source = FALSE;
2919 		utf8_flags  = 0; /* Unused, but keep compilers happy */
2920 	    }
2921 	    if (howlen == e_star) len = fromlen;
2922 	    field_len = (len+1)/2;
2923 	    GROWING(utf8, cat, start, cur, field_len);
2924 	    if (!utf8 && len > (I32)fromlen) len = fromlen;
2925 	    bits = 0;
2926 	    l = 0;
2927 	    if (datumtype == 'H')
2928 		while (l++ < len) {
2929 		    if (utf8_source) {
2930 			UV val = 0;
2931 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2932 			if (val < 256 && isALPHA(val))
2933 			    bits |= (val + 9) & 0xf;
2934 			else
2935 			    bits |= val & 0xf;
2936 		    } else if (isALPHA(*str))
2937 			bits |= (*str++ + 9) & 0xf;
2938 		    else
2939 			bits |= *str++ & 0xf;
2940 		    if (l & 1) bits <<= 4;
2941 		    else {
2942 			PUSH_BYTE(utf8, cur, bits);
2943 			bits = 0;
2944 		    }
2945 		}
2946 	    else
2947 		while (l++ < len) {
2948 		    if (utf8_source) {
2949 			UV val = 0;
2950 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2951 			if (val < 256 && isALPHA(val))
2952 			    bits |= ((val + 9) & 0xf) << 4;
2953 			else
2954 			    bits |= (val & 0xf) << 4;
2955 		    } else if (isALPHA(*str))
2956 			bits |= ((*str++ + 9) & 0xf) << 4;
2957 		    else
2958 			bits |= (*str++ & 0xf) << 4;
2959 		    if (l & 1) bits >>= 4;
2960 		    else {
2961 			PUSH_BYTE(utf8, cur, bits);
2962 			bits = 0;
2963 		    }
2964 		}
2965 	    l--;
2966 	    if (l & 1) {
2967 		PUSH_BYTE(utf8, cur, bits);
2968 		l++;
2969 	    }
2970 	    /* Determine how many chars are left in the requested field */
2971 	    l /= 2;
2972 	    if (howlen == e_star) field_len = 0;
2973 	    else field_len -= l;
2974 	    Zero(cur, field_len, char);
2975 	    cur += field_len;
2976 	    break;
2977 	}
2978 	case 'c':
2979 	    while (len-- > 0) {
2980 		IV aiv;
2981 		fromstr = NEXTFROM;
2982 		aiv = SvIV(fromstr);
2983 		if ((-128 > aiv || aiv > 127))
2984 		    Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
2985 				   "Character in 'c' format wrapped in pack");
2986 		PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2987 	    }
2988 	    break;
2989 	case 'C':
2990 	    if (len == 0) {
2991 		utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2992 		break;
2993 	    }
2994 	    while (len-- > 0) {
2995 		IV aiv;
2996 		fromstr = NEXTFROM;
2997 		aiv = SvIV(fromstr);
2998 		if ((0 > aiv || aiv > 0xff))
2999 		    Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3000 				   "Character in 'C' format wrapped in pack");
3001 		PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
3002 	    }
3003 	    break;
3004 	case 'W': {
3005 	    char *end;
3006 	    U8 in_bytes = (U8)IN_BYTES;
3007 
3008 	    end = start+SvLEN(cat)-1;
3009 	    if (utf8) end -= UTF8_MAXLEN-1;
3010 	    while (len-- > 0) {
3011 		UV auv;
3012 		fromstr = NEXTFROM;
3013 		auv = SvUV(fromstr);
3014 		if (in_bytes) auv = auv % 0x100;
3015 		if (utf8) {
3016 		  W_utf8:
3017 		    if (cur > end) {
3018 			*cur = '\0';
3019 			SvCUR_set(cat, cur - start);
3020 
3021 			GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3022 			end = start+SvLEN(cat)-UTF8_MAXLEN;
3023 		    }
3024 		    cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3025 						       NATIVE_TO_UNI(auv),
3026 						       warn_utf8 ?
3027 						       0 : UNICODE_ALLOW_ANY);
3028 		} else {
3029 		    if (auv >= 0x100) {
3030 			if (!SvUTF8(cat)) {
3031 			    *cur = '\0';
3032 			    SvCUR_set(cat, cur - start);
3033 			    marked_upgrade(aTHX_ cat, symptr);
3034 			    lookahead.flags |= FLAG_DO_UTF8;
3035 			    lookahead.strbeg = symptr->strbeg;
3036 			    utf8 = 1;
3037 			    start = SvPVX(cat);
3038 			    cur = start + SvCUR(cat);
3039 			    end = start+SvLEN(cat)-UTF8_MAXLEN;
3040 			    goto W_utf8;
3041 			}
3042 			Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3043 				       "Character in 'W' format wrapped in pack");
3044 			auv &= 0xff;
3045 		    }
3046 		    if (cur >= end) {
3047 			*cur = '\0';
3048 			SvCUR_set(cat, cur - start);
3049 			GROWING(0, cat, start, cur, len+1);
3050 			end = start+SvLEN(cat)-1;
3051 		    }
3052 		    *(U8 *) cur++ = (U8)auv;
3053 		}
3054 	    }
3055 	    break;
3056 	}
3057 	case 'U': {
3058 	    char *end;
3059 
3060 	    if (len == 0) {
3061 		if (!(symptr->flags & FLAG_DO_UTF8)) {
3062 		    marked_upgrade(aTHX_ cat, symptr);
3063 		    lookahead.flags |= FLAG_DO_UTF8;
3064 		    lookahead.strbeg = symptr->strbeg;
3065 		}
3066 		utf8 = 0;
3067 		goto no_change;
3068 	    }
3069 
3070 	    end = start+SvLEN(cat);
3071 	    if (!utf8) end -= UTF8_MAXLEN;
3072 	    while (len-- > 0) {
3073 		UV auv;
3074 		fromstr = NEXTFROM;
3075 		auv = SvUV(fromstr);
3076 		if (utf8) {
3077 		    U8 buffer[UTF8_MAXLEN], *endb;
3078 		    endb = uvuni_to_utf8_flags(buffer, auv,
3079 					       warn_utf8 ?
3080 					       0 : UNICODE_ALLOW_ANY);
3081 		    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3082 			*cur = '\0';
3083 			SvCUR_set(cat, cur - start);
3084 			GROWING(0, cat, start, cur,
3085 				len+(endb-buffer)*UTF8_EXPAND);
3086 			end = start+SvLEN(cat);
3087 		    }
3088 		    cur = bytes_to_uni(buffer, endb-buffer, cur);
3089 		} else {
3090 		    if (cur >= end) {
3091 			*cur = '\0';
3092 			SvCUR_set(cat, cur - start);
3093 			GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3094 			end = start+SvLEN(cat)-UTF8_MAXLEN;
3095 		    }
3096 		    cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3097 						       warn_utf8 ?
3098 						       0 : UNICODE_ALLOW_ANY);
3099 		}
3100 	    }
3101 	    break;
3102 	}
3103 	/* Float and double added by gnb@melba.bby.oz.au  22/11/89 */
3104 	case 'f':
3105 	    while (len-- > 0) {
3106 		float afloat;
3107 		NV anv;
3108 		fromstr = NEXTFROM;
3109 		anv = SvNV(fromstr);
3110 #ifdef __VOS__
3111 		/* VOS does not automatically map a floating-point overflow
3112 		   during conversion from double to float into infinity, so we
3113 		   do it by hand.  This code should either be generalized for
3114 		   any OS that needs it, or removed if and when VOS implements
3115 		   posix-976 (suggestion to support mapping to infinity).
3116 		   Paul.Green@stratus.com 02-04-02.  */
3117 {
3118 extern const float _float_constants[];
3119 		if (anv > FLT_MAX)
3120 		    afloat = _float_constants[0];   /* single prec. inf. */
3121 		else if (anv < -FLT_MAX)
3122 		    afloat = _float_constants[0];   /* single prec. inf. */
3123 		else afloat = (float) anv;
3124 }
3125 #else /* __VOS__ */
3126 # if defined(VMS) && !defined(__IEEE_FP)
3127 		/* IEEE fp overflow shenanigans are unavailable on VAX and optional
3128 		 * on Alpha; fake it if we don't have them.
3129 		 */
3130 		if (anv > FLT_MAX)
3131 		    afloat = FLT_MAX;
3132 		else if (anv < -FLT_MAX)
3133 		    afloat = -FLT_MAX;
3134 		else afloat = (float)anv;
3135 # else
3136 		afloat = (float)anv;
3137 # endif
3138 #endif /* __VOS__ */
3139 		DO_BO_PACK_N(afloat, float);
3140 		PUSH_VAR(utf8, cur, afloat);
3141 	    }
3142 	    break;
3143 	case 'd':
3144 	    while (len-- > 0) {
3145 		double adouble;
3146 		NV anv;
3147 		fromstr = NEXTFROM;
3148 		anv = SvNV(fromstr);
3149 #ifdef __VOS__
3150 		/* VOS does not automatically map a floating-point overflow
3151 		   during conversion from long double to double into infinity,
3152 		   so we do it by hand.  This code should either be generalized
3153 		   for any OS that needs it, or removed if and when VOS
3154 		   implements posix-976 (suggestion to support mapping to
3155 		   infinity).  Paul.Green@stratus.com 02-04-02.  */
3156 {
3157 extern const double _double_constants[];
3158 		if (anv > DBL_MAX)
3159 		    adouble = _double_constants[0];   /* double prec. inf. */
3160 		else if (anv < -DBL_MAX)
3161 		    adouble = _double_constants[0];   /* double prec. inf. */
3162 		else adouble = (double) anv;
3163 }
3164 #else /* __VOS__ */
3165 # if defined(VMS) && !defined(__IEEE_FP)
3166 		/* IEEE fp overflow shenanigans are unavailable on VAX and optional
3167 		 * on Alpha; fake it if we don't have them.
3168 		 */
3169 		if (anv > DBL_MAX)
3170 		    adouble = DBL_MAX;
3171 		else if (anv < -DBL_MAX)
3172 		    adouble = -DBL_MAX;
3173 		else adouble = (double)anv;
3174 # else
3175 		adouble = (double)anv;
3176 # endif
3177 #endif /* __VOS__ */
3178 		DO_BO_PACK_N(adouble, double);
3179 		PUSH_VAR(utf8, cur, adouble);
3180 	    }
3181 	    break;
3182 	case 'F': {
3183 	    NV_bytes anv;
3184 	    Zero(&anv, 1, NV); /* can be long double with unused bits */
3185 	    while (len-- > 0) {
3186 		fromstr = NEXTFROM;
3187 		anv.nv = SvNV(fromstr);
3188 		DO_BO_PACK_N(anv, NV);
3189 		PUSH_BYTES(utf8, cur, anv.bytes, sizeof(anv.bytes));
3190 	    }
3191 	    break;
3192 	}
3193 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3194 	case 'D': {
3195 	    ld_bytes aldouble;
3196 	    /* long doubles can have unused bits, which may be nonzero */
3197 	    Zero(&aldouble, 1, long double);
3198 	    while (len-- > 0) {
3199 		fromstr = NEXTFROM;
3200 		aldouble.ld = (long double)SvNV(fromstr);
3201 		DO_BO_PACK_N(aldouble, long double);
3202 		PUSH_BYTES(utf8, cur, aldouble.bytes, sizeof(aldouble.bytes));
3203 	    }
3204 	    break;
3205 	}
3206 #endif
3207 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3208 	case 'n' | TYPE_IS_SHRIEKING:
3209 #endif
3210 	case 'n':
3211 	    while (len-- > 0) {
3212 		I16 ai16;
3213 		fromstr = NEXTFROM;
3214 		ai16 = (I16)SvIV(fromstr);
3215 #ifdef HAS_HTONS
3216 		ai16 = PerlSock_htons(ai16);
3217 #endif
3218 		PUSH16(utf8, cur, &ai16);
3219 	    }
3220 	    break;
3221 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3222 	case 'v' | TYPE_IS_SHRIEKING:
3223 #endif
3224 	case 'v':
3225 	    while (len-- > 0) {
3226 		I16 ai16;
3227 		fromstr = NEXTFROM;
3228 		ai16 = (I16)SvIV(fromstr);
3229 #ifdef HAS_HTOVS
3230 		ai16 = htovs(ai16);
3231 #endif
3232 		PUSH16(utf8, cur, &ai16);
3233 	    }
3234 	    break;
3235         case 'S' | TYPE_IS_SHRIEKING:
3236 #if SHORTSIZE != SIZE16
3237 	    while (len-- > 0) {
3238 		unsigned short aushort;
3239 		fromstr = NEXTFROM;
3240 		aushort = SvUV(fromstr);
3241 		DO_BO_PACK(aushort, s);
3242 		PUSH_VAR(utf8, cur, aushort);
3243 	    }
3244             break;
3245 #else
3246             /* Fall through! */
3247 #endif
3248 	case 'S':
3249 	    while (len-- > 0) {
3250 		U16 au16;
3251 		fromstr = NEXTFROM;
3252 		au16 = (U16)SvUV(fromstr);
3253 		DO_BO_PACK(au16, 16);
3254 		PUSH16(utf8, cur, &au16);
3255 	    }
3256 	    break;
3257 	case 's' | TYPE_IS_SHRIEKING:
3258 #if SHORTSIZE != SIZE16
3259 	    while (len-- > 0) {
3260 		short ashort;
3261 		fromstr = NEXTFROM;
3262 		ashort = SvIV(fromstr);
3263 		DO_BO_PACK(ashort, s);
3264 		PUSH_VAR(utf8, cur, ashort);
3265 	    }
3266             break;
3267 #else
3268             /* Fall through! */
3269 #endif
3270 	case 's':
3271 	    while (len-- > 0) {
3272 		I16 ai16;
3273 		fromstr = NEXTFROM;
3274 		ai16 = (I16)SvIV(fromstr);
3275 		DO_BO_PACK(ai16, 16);
3276 		PUSH16(utf8, cur, &ai16);
3277 	    }
3278 	    break;
3279 	case 'I':
3280 	case 'I' | TYPE_IS_SHRIEKING:
3281 	    while (len-- > 0) {
3282 		unsigned int auint;
3283 		fromstr = NEXTFROM;
3284 		auint = SvUV(fromstr);
3285 		DO_BO_PACK(auint, i);
3286 		PUSH_VAR(utf8, cur, auint);
3287 	    }
3288 	    break;
3289 	case 'j':
3290 	    while (len-- > 0) {
3291 		IV aiv;
3292 		fromstr = NEXTFROM;
3293 		aiv = SvIV(fromstr);
3294 #if IVSIZE == INTSIZE
3295 		DO_BO_PACK(aiv, i);
3296 #elif IVSIZE == LONGSIZE
3297 		DO_BO_PACK(aiv, l);
3298 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3299 		DO_BO_PACK(aiv, 64);
3300 #else
3301 		Perl_croak(aTHX_ "'j' not supported on this platform");
3302 #endif
3303 		PUSH_VAR(utf8, cur, aiv);
3304 	    }
3305 	    break;
3306 	case 'J':
3307 	    while (len-- > 0) {
3308 		UV auv;
3309 		fromstr = NEXTFROM;
3310 		auv = SvUV(fromstr);
3311 #if UVSIZE == INTSIZE
3312 		DO_BO_PACK(auv, i);
3313 #elif UVSIZE == LONGSIZE
3314 		DO_BO_PACK(auv, l);
3315 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3316 		DO_BO_PACK(auv, 64);
3317 #else
3318 		Perl_croak(aTHX_ "'J' not supported on this platform");
3319 #endif
3320 		PUSH_VAR(utf8, cur, auv);
3321 	    }
3322 	    break;
3323 	case 'w':
3324             while (len-- > 0) {
3325 		NV anv;
3326 		fromstr = NEXTFROM;
3327 		anv = SvNV(fromstr);
3328 
3329 		if (anv < 0) {
3330 		    *cur = '\0';
3331 		    SvCUR_set(cat, cur - start);
3332 		    Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3333 		}
3334 
3335                 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3336                    which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3337                    any negative IVs will have already been got by the croak()
3338                    above. IOK is untrue for fractions, so we test them
3339                    against UV_MAX_P1.  */
3340 		if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3341 		    char   buf[(sizeof(UV)*CHAR_BIT)/7+1];
3342 		    char  *in = buf + sizeof(buf);
3343 		    UV     auv = SvUV(fromstr);
3344 
3345 		    do {
3346 			*--in = (char)((auv & 0x7f) | 0x80);
3347 			auv >>= 7;
3348 		    } while (auv);
3349 		    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3350 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
3351 				       in, (buf + sizeof(buf)) - in);
3352 		} else if (SvPOKp(fromstr))
3353 		    goto w_string;
3354 		else if (SvNOKp(fromstr)) {
3355 		    /* 10**NV_MAX_10_EXP is the largest power of 10
3356 		       so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3357 		       given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3358 		       x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3359 		       And with that many bytes only Inf can overflow.
3360 		       Some C compilers are strict about integral constant
3361 		       expressions so we conservatively divide by a slightly
3362 		       smaller integer instead of multiplying by the exact
3363 		       floating-point value.
3364 		    */
3365 #ifdef NV_MAX_10_EXP
3366 		    /* char   buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3367 		    char   buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3368 #else
3369 		    /* char   buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3370 		    char   buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3371 #endif
3372 		    char  *in = buf + sizeof(buf);
3373 
3374 		    anv = Perl_floor(anv);
3375 		    do {
3376 			const NV next = Perl_floor(anv / 128);
3377 			if (in <= buf)  /* this cannot happen ;-) */
3378 			    Perl_croak(aTHX_ "Cannot compress integer in pack");
3379 			*--in = (unsigned char)(anv - (next * 128)) | 0x80;
3380 			anv = next;
3381 		    } while (anv > 0);
3382 		    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3383 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
3384 				       in, (buf + sizeof(buf)) - in);
3385 		} else {
3386 		    const char     *from;
3387 		    char           *result, *in;
3388 		    SV             *norm;
3389 		    STRLEN          len;
3390 		    bool            done;
3391 
3392 		  w_string:
3393 		    /* Copy string and check for compliance */
3394 		    from = SvPV_const(fromstr, len);
3395 		    if ((norm = is_an_int(from, len)) == NULL)
3396 			Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3397 
3398 		    Newx(result, len, char);
3399 		    in = result + len;
3400 		    done = FALSE;
3401 		    while (!done) *--in = div128(norm, &done) | 0x80;
3402 		    result[len - 1] &= 0x7F; /* clear continue bit */
3403 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
3404 				       in, (result + len) - in);
3405 		    Safefree(result);
3406 		    SvREFCNT_dec(norm);	/* free norm */
3407 		}
3408 	    }
3409             break;
3410 	case 'i':
3411 	case 'i' | TYPE_IS_SHRIEKING:
3412 	    while (len-- > 0) {
3413 		int aint;
3414 		fromstr = NEXTFROM;
3415 		aint = SvIV(fromstr);
3416 		DO_BO_PACK(aint, i);
3417 		PUSH_VAR(utf8, cur, aint);
3418 	    }
3419 	    break;
3420 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3421 	case 'N' | TYPE_IS_SHRIEKING:
3422 #endif
3423 	case 'N':
3424 	    while (len-- > 0) {
3425 		U32 au32;
3426 		fromstr = NEXTFROM;
3427 		au32 = SvUV(fromstr);
3428 #ifdef HAS_HTONL
3429 		au32 = PerlSock_htonl(au32);
3430 #endif
3431 		PUSH32(utf8, cur, &au32);
3432 	    }
3433 	    break;
3434 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3435 	case 'V' | TYPE_IS_SHRIEKING:
3436 #endif
3437 	case 'V':
3438 	    while (len-- > 0) {
3439 		U32 au32;
3440 		fromstr = NEXTFROM;
3441 		au32 = SvUV(fromstr);
3442 #ifdef HAS_HTOVL
3443 		au32 = htovl(au32);
3444 #endif
3445 		PUSH32(utf8, cur, &au32);
3446 	    }
3447 	    break;
3448 	case 'L' | TYPE_IS_SHRIEKING:
3449 #if LONGSIZE != SIZE32
3450 	    while (len-- > 0) {
3451 		unsigned long aulong;
3452 		fromstr = NEXTFROM;
3453 		aulong = SvUV(fromstr);
3454 		DO_BO_PACK(aulong, l);
3455 		PUSH_VAR(utf8, cur, aulong);
3456 	    }
3457 	    break;
3458 #else
3459             /* Fall though! */
3460 #endif
3461 	case 'L':
3462 	    while (len-- > 0) {
3463 		U32 au32;
3464 		fromstr = NEXTFROM;
3465 		au32 = SvUV(fromstr);
3466 		DO_BO_PACK(au32, 32);
3467 		PUSH32(utf8, cur, &au32);
3468 	    }
3469 	    break;
3470 	case 'l' | TYPE_IS_SHRIEKING:
3471 #if LONGSIZE != SIZE32
3472 	    while (len-- > 0) {
3473 		long along;
3474 		fromstr = NEXTFROM;
3475 		along = SvIV(fromstr);
3476 		DO_BO_PACK(along, l);
3477 		PUSH_VAR(utf8, cur, along);
3478 	    }
3479 	    break;
3480 #else
3481             /* Fall though! */
3482 #endif
3483 	case 'l':
3484             while (len-- > 0) {
3485 		I32 ai32;
3486 		fromstr = NEXTFROM;
3487 		ai32 = SvIV(fromstr);
3488 		DO_BO_PACK(ai32, 32);
3489 		PUSH32(utf8, cur, &ai32);
3490 	    }
3491 	    break;
3492 #ifdef HAS_QUAD
3493 	case 'Q':
3494 	    while (len-- > 0) {
3495 		Uquad_t auquad;
3496 		fromstr = NEXTFROM;
3497 		auquad = (Uquad_t) SvUV(fromstr);
3498 		DO_BO_PACK(auquad, 64);
3499 		PUSH_VAR(utf8, cur, auquad);
3500 	    }
3501 	    break;
3502 	case 'q':
3503 	    while (len-- > 0) {
3504 		Quad_t aquad;
3505 		fromstr = NEXTFROM;
3506 		aquad = (Quad_t)SvIV(fromstr);
3507 		DO_BO_PACK(aquad, 64);
3508 		PUSH_VAR(utf8, cur, aquad);
3509 	    }
3510 	    break;
3511 #endif /* HAS_QUAD */
3512 	case 'P':
3513 	    len = 1;		/* assume SV is correct length */
3514 	    GROWING(utf8, cat, start, cur, sizeof(char *));
3515 	    /* Fall through! */
3516 	case 'p':
3517 	    while (len-- > 0) {
3518 		const char *aptr;
3519 
3520 		fromstr = NEXTFROM;
3521 		SvGETMAGIC(fromstr);
3522 		if (!SvOK(fromstr)) aptr = NULL;
3523 		else {
3524 		    /* XXX better yet, could spirit away the string to
3525 		     * a safe spot and hang on to it until the result
3526 		     * of pack() (and all copies of the result) are
3527 		     * gone.
3528 		     */
3529 		    if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3530 			     !SvREADONLY(fromstr)))) {
3531 			Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3532 				       "Attempt to pack pointer to temporary value");
3533 		    }
3534 		    if (SvPOK(fromstr) || SvNIOK(fromstr))
3535 			aptr = SvPV_nomg_const_nolen(fromstr);
3536 		    else
3537 			aptr = SvPV_force_flags_nolen(fromstr, 0);
3538 		}
3539 		DO_BO_PACK_PC(aptr);
3540 		PUSH_VAR(utf8, cur, aptr);
3541 	    }
3542 	    break;
3543 	case 'u': {
3544 	    const char *aptr, *aend;
3545 	    bool from_utf8;
3546 
3547 	    fromstr = NEXTFROM;
3548 	    if (len <= 2) len = 45;
3549 	    else len = len / 3 * 3;
3550 	    if (len >= 64) {
3551 		Perl_ck_warner(aTHX_ packWARN(WARN_PACK),
3552 			       "Field too wide in 'u' format in pack");
3553 		len = 63;
3554 	    }
3555 	    aptr = SvPV_const(fromstr, fromlen);
3556 	    from_utf8 = DO_UTF8(fromstr);
3557 	    if (from_utf8) {
3558 		aend = aptr + fromlen;
3559 		fromlen = sv_len_utf8(fromstr);
3560 	    } else aend = NULL; /* Unused, but keep compilers happy */
3561 	    GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3562 	    while (fromlen > 0) {
3563 		U8 *end;
3564 		I32 todo;
3565 		U8 hunk[1+63/3*4+1];
3566 
3567 		if ((I32)fromlen > len)
3568 		    todo = len;
3569 		else
3570 		    todo = fromlen;
3571 		if (from_utf8) {
3572 		    char buffer[64];
3573 		    if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3574 				      'u' | TYPE_IS_PACK)) {
3575 			*cur = '\0';
3576 			SvCUR_set(cat, cur - start);
3577 			Perl_croak(aTHX_ "panic: string is shorter than advertised");
3578 		    }
3579 		    end = doencodes(hunk, buffer, todo);
3580 		} else {
3581 		    end = doencodes(hunk, aptr, todo);
3582 		    aptr += todo;
3583 		}
3584 		PUSH_BYTES(utf8, cur, hunk, end-hunk);
3585 		fromlen -= todo;
3586 	    }
3587 	    break;
3588 	}
3589 	}
3590 	*cur = '\0';
3591 	SvCUR_set(cat, cur - start);
3592       no_change:
3593 	*symptr = lookahead;
3594     }
3595     return beglist;
3596 }
3597 #undef NEXTFROM
3598 
3599 
3600 PP(pp_pack)
3601 {
3602     dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3603     register SV *cat = TARG;
3604     STRLEN fromlen;
3605     SV *pat_sv = *++MARK;
3606     register const char *pat = SvPV_const(pat_sv, fromlen);
3607     register const char *patend = pat + fromlen;
3608 
3609     MARK++;
3610     sv_setpvs(cat, "");
3611     SvUTF8_off(cat);
3612 
3613     packlist(cat, pat, patend, MARK, SP + 1);
3614 
3615     SvSETMAGIC(cat);
3616     SP = ORIGMARK;
3617     PUSHs(cat);
3618     RETURN;
3619 }
3620 
3621 /*
3622  * Local variables:
3623  * c-indentation-style: bsd
3624  * c-basic-offset: 4
3625  * indent-tabs-mode: t
3626  * End:
3627  *
3628  * ex: set ts=8 sts=4 sw=4 noet:
3629  */
3630