xref: /openbsd-src/gnu/usr.bin/perl/pp_pack.c (revision 43003dfe3ad45d1698bed8a37f2b0f5b14f20d4f)
1 /*    pp_pack.c
2  *
3  *    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4  *    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5  *
6  *    You may distribute under the terms of either the GNU General Public
7  *    License or the Artistic License, as specified in the README file.
8  *
9  */
10 
11 /*
12  * He still hopefully carried some of his gear in his pack: a small tinder-box,
13  * two small shallow pans, the smaller fitting into the larger; inside them a
14  * wooden spoon, a short two-pronged fork and some skewers were stowed; and
15  * hidden at the bottom of the pack in a flat wooden box a dwindling treasure,
16  * some salt.
17  *
18  *     [p.653 of _The Lord of the Rings_, IV/iv: "Of Herbs and Stewed Rabbit"]
19  */
20 
21 /* This file contains pp ("push/pop") functions that
22  * execute the opcodes that make up a perl program. A typical pp function
23  * expects to find its arguments on the stack, and usually pushes its
24  * results onto the stack, hence the 'pp' terminology. Each OP structure
25  * contains a pointer to the relevant pp_foo() function.
26  *
27  * This particular file just contains pp_pack() and pp_unpack(). See the
28  * other pp*.c files for the rest of the pp_ functions.
29  */
30 
31 #include "EXTERN.h"
32 #define PERL_IN_PP_PACK_C
33 #include "perl.h"
34 
35 /* Types used by pack/unpack */
36 typedef enum {
37   e_no_len,     /* no length  */
38   e_number,     /* number, [] */
39   e_star        /* asterisk   */
40 } howlen_t;
41 
42 typedef struct tempsym {
43   const char*    patptr;   /* current template char */
44   const char*    patend;   /* one after last char   */
45   const char*    grpbeg;   /* 1st char of ()-group  */
46   const char*    grpend;   /* end of ()-group       */
47   I32      code;     /* template code (!<>)   */
48   I32      length;   /* length/repeat count   */
49   howlen_t howlen;   /* how length is given   */
50   int      level;    /* () nesting level      */
51   U32      flags;    /* /=4, comma=2, pack=1  */
52                      /*   and group modifiers */
53   STRLEN   strbeg;   /* offset of group start */
54   struct tempsym *previous; /* previous group */
55 } tempsym_t;
56 
57 #define TEMPSYM_INIT(symptr, p, e, f) \
58     STMT_START {	\
59 	(symptr)->patptr   = (p);	\
60 	(symptr)->patend   = (e);	\
61 	(symptr)->grpbeg   = NULL;	\
62 	(symptr)->grpend   = NULL;	\
63 	(symptr)->grpend   = NULL;	\
64 	(symptr)->code     = 0;		\
65 	(symptr)->length   = 0;		\
66 	(symptr)->howlen   = e_no_len;	\
67 	(symptr)->level    = 0;		\
68 	(symptr)->flags    = (f);	\
69 	(symptr)->strbeg   = 0;		\
70 	(symptr)->previous = NULL;	\
71    } STMT_END
72 
73 #if PERL_VERSION >= 9
74 # define PERL_PACK_CAN_BYTEORDER
75 # define PERL_PACK_CAN_SHRIEKSIGN
76 #endif
77 
78 #ifndef CHAR_BIT
79 # define CHAR_BIT	8
80 #endif
81 /* Maximum number of bytes to which a byte can grow due to upgrade */
82 #define UTF8_EXPAND	2
83 
84 /*
85  * Offset for integer pack/unpack.
86  *
87  * On architectures where I16 and I32 aren't really 16 and 32 bits,
88  * which for now are all Crays, pack and unpack have to play games.
89  */
90 
91 /*
92  * These values are required for portability of pack() output.
93  * If they're not right on your machine, then pack() and unpack()
94  * wouldn't work right anyway; you'll need to apply the Cray hack.
95  * (I'd like to check them with #if, but you can't use sizeof() in
96  * the preprocessor.)  --???
97  */
98 /*
99     The appropriate SHORTSIZE, INTSIZE, LONGSIZE, and LONGLONGSIZE
100     defines are now in config.h.  --Andy Dougherty  April 1998
101  */
102 #define SIZE16 2
103 #define SIZE32 4
104 
105 /* CROSSCOMPILE and MULTIARCH are going to affect pp_pack() and pp_unpack().
106    --jhi Feb 1999 */
107 
108 #if U16SIZE > SIZE16 || U32SIZE > SIZE32
109 #  if BYTEORDER == 0x1234 || BYTEORDER == 0x12345678    /* little-endian */
110 #    define OFF16(p)	((char*)(p))
111 #    define OFF32(p)	((char*)(p))
112 #  else
113 #    if BYTEORDER == 0x4321 || BYTEORDER == 0x87654321  /* big-endian */
114 #      define OFF16(p)	((char*)(p) + (sizeof(U16) - SIZE16))
115 #      define OFF32(p)	((char*)(p) + (sizeof(U32) - SIZE32))
116 #    else
117        ++++ bad cray byte order
118 #    endif
119 #  endif
120 #else
121 #  define OFF16(p)     ((char *) (p))
122 #  define OFF32(p)     ((char *) (p))
123 #endif
124 
125 /* Only to be used inside a loop (see the break) */
126 #define SHIFT16(utf8, s, strend, p, datumtype) STMT_START {		\
127     if (utf8) {								\
128 	if (!uni_to_bytes(aTHX_ &(s), strend, OFF16(p), SIZE16, datumtype)) break;	\
129     } else {								\
130 	Copy(s, OFF16(p), SIZE16, char);				\
131 	(s) += SIZE16;							\
132     }									\
133 } STMT_END
134 
135 /* Only to be used inside a loop (see the break) */
136 #define SHIFT32(utf8, s, strend, p, datumtype) STMT_START {		\
137     if (utf8) {								\
138 	if (!uni_to_bytes(aTHX_ &(s), strend, OFF32(p), SIZE32, datumtype)) break;	\
139     } else {								\
140 	Copy(s, OFF32(p), SIZE32, char);				\
141 	(s) += SIZE32;							\
142     }									\
143 } STMT_END
144 
145 #define PUSH16(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF16(p), SIZE16)
146 #define PUSH32(utf8, cur, p) PUSH_BYTES(utf8, cur, OFF32(p), SIZE32)
147 
148 /* Only to be used inside a loop (see the break) */
149 #define SHIFT_VAR(utf8, s, strend, var, datumtype)	\
150 STMT_START {						\
151     if (utf8) {						\
152         if (!uni_to_bytes(aTHX_ &s, strend,		\
153             (char *) &var, sizeof(var), datumtype)) break;\
154     } else {						\
155         Copy(s, (char *) &var, sizeof(var), char);	\
156         s += sizeof(var);				\
157     }							\
158 } STMT_END
159 
160 #define PUSH_VAR(utf8, aptr, var)	\
161 	PUSH_BYTES(utf8, aptr, &(var), sizeof(var))
162 
163 /* Avoid stack overflow due to pathological templates. 100 should be plenty. */
164 #define MAX_SUB_TEMPLATE_LEVEL 100
165 
166 /* flags (note that type modifiers can also be used as flags!) */
167 #define FLAG_WAS_UTF8	      0x40
168 #define FLAG_PARSE_UTF8       0x20	/* Parse as utf8 */
169 #define FLAG_UNPACK_ONLY_ONE  0x10
170 #define FLAG_DO_UTF8          0x08	/* The underlying string is utf8 */
171 #define FLAG_SLASH            0x04
172 #define FLAG_COMMA            0x02
173 #define FLAG_PACK             0x01
174 
175 STATIC SV *
176 S_mul128(pTHX_ SV *sv, U8 m)
177 {
178   STRLEN          len;
179   char           *s = SvPV(sv, len);
180   char           *t;
181 
182   PERL_ARGS_ASSERT_MUL128;
183 
184   if (!strnEQ(s, "0000", 4)) {  /* need to grow sv */
185     SV * const tmpNew = newSVpvs("0000000000");
186 
187     sv_catsv(tmpNew, sv);
188     SvREFCNT_dec(sv);		/* free old sv */
189     sv = tmpNew;
190     s = SvPV(sv, len);
191   }
192   t = s + len - 1;
193   while (!*t)                   /* trailing '\0'? */
194     t--;
195   while (t > s) {
196     const U32 i = ((*t - '0') << 7) + m;
197     *(t--) = '0' + (char)(i % 10);
198     m = (char)(i / 10);
199   }
200   return (sv);
201 }
202 
203 /* Explosives and implosives. */
204 
205 #if 'I' == 73 && 'J' == 74
206 /* On an ASCII/ISO kind of system */
207 #define ISUUCHAR(ch)    ((ch) >= ' ' && (ch) < 'a')
208 #else
209 /*
210   Some other sort of character set - use memchr() so we don't match
211   the null byte.
212  */
213 #define ISUUCHAR(ch)    (memchr(PL_uuemap, (ch), sizeof(PL_uuemap)-1) || (ch) == ' ')
214 #endif
215 
216 /* type modifiers */
217 #define TYPE_IS_SHRIEKING	0x100
218 #define TYPE_IS_BIG_ENDIAN	0x200
219 #define TYPE_IS_LITTLE_ENDIAN	0x400
220 #define TYPE_IS_PACK		0x800
221 #define TYPE_ENDIANNESS_MASK	(TYPE_IS_BIG_ENDIAN|TYPE_IS_LITTLE_ENDIAN)
222 #define TYPE_MODIFIERS(t)	((t) & ~0xFF)
223 #define TYPE_NO_MODIFIERS(t)	((t) & 0xFF)
224 
225 #ifdef PERL_PACK_CAN_SHRIEKSIGN
226 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxXnNvV@."
227 #else
228 # define SHRIEKING_ALLOWED_TYPES "sSiIlLxX"
229 #endif
230 
231 #ifndef PERL_PACK_CAN_BYTEORDER
232 /* Put "can't" first because it is shorter  */
233 # define TYPE_ENDIANNESS(t)	0
234 # define TYPE_NO_ENDIANNESS(t)	(t)
235 
236 # define ENDIANNESS_ALLOWED_TYPES   ""
237 
238 # define DO_BO_UNPACK(var, type)
239 # define DO_BO_PACK(var, type)
240 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)
241 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)
242 # define DO_BO_UNPACK_N(var, type)
243 # define DO_BO_PACK_N(var, type)
244 # define DO_BO_UNPACK_P(var)
245 # define DO_BO_PACK_P(var)
246 # define DO_BO_UNPACK_PC(var)
247 # define DO_BO_PACK_PC(var)
248 
249 #else /* PERL_PACK_CAN_BYTEORDER */
250 
251 # define TYPE_ENDIANNESS(t)	((t) & TYPE_ENDIANNESS_MASK)
252 # define TYPE_NO_ENDIANNESS(t)	((t) & ~TYPE_ENDIANNESS_MASK)
253 
254 # define ENDIANNESS_ALLOWED_TYPES   "sSiIlLqQjJfFdDpP("
255 
256 # define DO_BO_UNPACK(var, type)                                              \
257         STMT_START {                                                          \
258           switch (TYPE_ENDIANNESS(datumtype)) {                               \
259             case TYPE_IS_BIG_ENDIAN:    var = my_betoh ## type (var); break;  \
260             case TYPE_IS_LITTLE_ENDIAN: var = my_letoh ## type (var); break;  \
261             default: break;                                                   \
262           }                                                                   \
263         } STMT_END
264 
265 # define DO_BO_PACK(var, type)                                                \
266         STMT_START {                                                          \
267           switch (TYPE_ENDIANNESS(datumtype)) {                               \
268             case TYPE_IS_BIG_ENDIAN:    var = my_htobe ## type (var); break;  \
269             case TYPE_IS_LITTLE_ENDIAN: var = my_htole ## type (var); break;  \
270             default: break;                                                   \
271           }                                                                   \
272         } STMT_END
273 
274 # define DO_BO_UNPACK_PTR(var, type, pre_cast, post_cast)                     \
275         STMT_START {                                                          \
276           switch (TYPE_ENDIANNESS(datumtype)) {                               \
277             case TYPE_IS_BIG_ENDIAN:                                          \
278               var = (post_cast*) my_betoh ## type ((pre_cast) var);           \
279               break;                                                          \
280             case TYPE_IS_LITTLE_ENDIAN:                                       \
281               var = (post_cast *) my_letoh ## type ((pre_cast) var);          \
282               break;                                                          \
283             default:                                                          \
284               break;                                                          \
285           }                                                                   \
286         } STMT_END
287 
288 # define DO_BO_PACK_PTR(var, type, pre_cast, post_cast)                       \
289         STMT_START {                                                          \
290           switch (TYPE_ENDIANNESS(datumtype)) {                               \
291             case TYPE_IS_BIG_ENDIAN:                                          \
292               var = (post_cast *) my_htobe ## type ((pre_cast) var);          \
293               break;                                                          \
294             case TYPE_IS_LITTLE_ENDIAN:                                       \
295               var = (post_cast *) my_htole ## type ((pre_cast) var);          \
296               break;                                                          \
297             default:                                                          \
298               break;                                                          \
299           }                                                                   \
300         } STMT_END
301 
302 # define BO_CANT_DOIT(action, type)                                           \
303         STMT_START {                                                          \
304           switch (TYPE_ENDIANNESS(datumtype)) {                               \
305              case TYPE_IS_BIG_ENDIAN:                                         \
306                Perl_croak(aTHX_ "Can't %s big-endian %ss on this "            \
307                                 "platform", #action, #type);                  \
308                break;                                                         \
309              case TYPE_IS_LITTLE_ENDIAN:                                      \
310                Perl_croak(aTHX_ "Can't %s little-endian %ss on this "         \
311                                 "platform", #action, #type);                  \
312                break;                                                         \
313              default:                                                         \
314                break;                                                         \
315            }                                                                  \
316          } STMT_END
317 
318 # if PTRSIZE == INTSIZE
319 #  define DO_BO_UNPACK_P(var)	DO_BO_UNPACK_PTR(var, i, int, void)
320 #  define DO_BO_PACK_P(var)	DO_BO_PACK_PTR(var, i, int, void)
321 #  define DO_BO_UNPACK_PC(var)	DO_BO_UNPACK_PTR(var, i, int, char)
322 #  define DO_BO_PACK_PC(var)	DO_BO_PACK_PTR(var, i, int, char)
323 # elif PTRSIZE == LONGSIZE
324 #  define DO_BO_UNPACK_P(var)	DO_BO_UNPACK_PTR(var, l, long, void)
325 #  define DO_BO_PACK_P(var)	DO_BO_PACK_PTR(var, l, long, void)
326 #  define DO_BO_UNPACK_PC(var)	DO_BO_UNPACK_PTR(var, l, long, char)
327 #  define DO_BO_PACK_PC(var)	DO_BO_PACK_PTR(var, l, long, char)
328 # elif PTRSIZE == IVSIZE
329 #  define DO_BO_UNPACK_P(var)	DO_BO_UNPACK_PTR(var, l, IV, void)
330 #  define DO_BO_PACK_P(var)	DO_BO_PACK_PTR(var, l, IV, void)
331 #  define DO_BO_UNPACK_PC(var)	DO_BO_UNPACK_PTR(var, l, IV, char)
332 #  define DO_BO_PACK_PC(var)	DO_BO_PACK_PTR(var, l, IV, char)
333 # else
334 #  define DO_BO_UNPACK_P(var)	BO_CANT_DOIT(unpack, pointer)
335 #  define DO_BO_PACK_P(var)	BO_CANT_DOIT(pack, pointer)
336 #  define DO_BO_UNPACK_PC(var)	BO_CANT_DOIT(unpack, pointer)
337 #  define DO_BO_PACK_PC(var)	BO_CANT_DOIT(pack, pointer)
338 # endif
339 
340 # if defined(my_htolen) && defined(my_letohn) && \
341     defined(my_htoben) && defined(my_betohn)
342 #  define DO_BO_UNPACK_N(var, type)                                           \
343          STMT_START {                                                         \
344            switch (TYPE_ENDIANNESS(datumtype)) {                              \
345              case TYPE_IS_BIG_ENDIAN:    my_betohn(&var, sizeof(type)); break;\
346              case TYPE_IS_LITTLE_ENDIAN: my_letohn(&var, sizeof(type)); break;\
347              default: break;                                                  \
348            }                                                                  \
349          } STMT_END
350 
351 #  define DO_BO_PACK_N(var, type)                                             \
352          STMT_START {                                                         \
353            switch (TYPE_ENDIANNESS(datumtype)) {                              \
354              case TYPE_IS_BIG_ENDIAN:    my_htoben(&var, sizeof(type)); break;\
355              case TYPE_IS_LITTLE_ENDIAN: my_htolen(&var, sizeof(type)); break;\
356              default: break;                                                  \
357            }                                                                  \
358          } STMT_END
359 # else
360 #  define DO_BO_UNPACK_N(var, type)	BO_CANT_DOIT(unpack, type)
361 #  define DO_BO_PACK_N(var, type)	BO_CANT_DOIT(pack, type)
362 # endif
363 
364 #endif /* PERL_PACK_CAN_BYTEORDER */
365 
366 #define PACK_SIZE_CANNOT_CSUM		0x80
367 #define PACK_SIZE_UNPREDICTABLE		0x40	/* Not a fixed size element */
368 #define PACK_SIZE_MASK			0x3F
369 
370 /* These tables are regenerated by genpacksizetables.pl (and then hand pasted
371    in).  You're unlikely ever to need to regenerate them.  */
372 
373 #if TYPE_IS_SHRIEKING != 0x100
374    ++++shriek offset should be 256
375 #endif
376 
377 typedef U8 packprops_t;
378 #if 'J'-'I' == 1
379 /* ASCII */
380 STATIC const packprops_t packprops[512] = {
381     /* normal */
382     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
383     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
384     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
385     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
386     0, 0, 0,
387     /* C */ sizeof(unsigned char),
388 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
389     /* D */ LONG_DOUBLESIZE,
390 #else
391     0,
392 #endif
393     0,
394     /* F */ NVSIZE,
395     0, 0,
396     /* I */ sizeof(unsigned int),
397     /* J */ UVSIZE,
398     0,
399     /* L */ SIZE32,
400     0,
401     /* N */ SIZE32,
402     0, 0,
403 #if defined(HAS_QUAD)
404     /* Q */ sizeof(Uquad_t),
405 #else
406     0,
407 #endif
408     0,
409     /* S */ SIZE16,
410     0,
411     /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
412     /* V */ SIZE32,
413     /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
414     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
415     /* c */ sizeof(char),
416     /* d */ sizeof(double),
417     0,
418     /* f */ sizeof(float),
419     0, 0,
420     /* i */ sizeof(int),
421     /* j */ IVSIZE,
422     0,
423     /* l */ SIZE32,
424     0,
425     /* n */ SIZE16,
426     0,
427     /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
428 #if defined(HAS_QUAD)
429     /* q */ sizeof(Quad_t),
430 #else
431     0,
432 #endif
433     0,
434     /* s */ SIZE16,
435     0, 0,
436     /* v */ SIZE16,
437     /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
438     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
439     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
440     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
441     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
442     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
443     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
444     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
445     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446     0, 0, 0, 0, 0, 0, 0, 0,
447     /* shrieking */
448     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
449     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
450     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
451     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452     0, 0, 0, 0, 0, 0, 0, 0, 0,
453     /* I */ sizeof(unsigned int),
454     0, 0,
455     /* L */ sizeof(unsigned long),
456     0,
457 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
458     /* N */ SIZE32,
459 #else
460     0,
461 #endif
462     0, 0, 0, 0,
463     /* S */ sizeof(unsigned short),
464     0, 0,
465 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
466     /* V */ SIZE32,
467 #else
468     0,
469 #endif
470     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
471     0, 0,
472     /* i */ sizeof(int),
473     0, 0,
474     /* l */ sizeof(long),
475     0,
476 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
477     /* n */ SIZE16,
478 #else
479     0,
480 #endif
481     0, 0, 0, 0,
482     /* s */ sizeof(short),
483     0, 0,
484 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
485     /* v */ SIZE16,
486 #else
487     0,
488 #endif
489     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
494     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
497     0, 0, 0, 0, 0, 0, 0, 0, 0
498 };
499 #else
500 /* EBCDIC (or bust) */
501 STATIC const packprops_t packprops[512] = {
502     /* normal */
503     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
506     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
507     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
508     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
510     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
511     0, 0, 0,
512     /* c */ sizeof(char),
513     /* d */ sizeof(double),
514     0,
515     /* f */ sizeof(float),
516     0, 0,
517     /* i */ sizeof(int),
518     0, 0, 0, 0, 0, 0, 0,
519     /* j */ IVSIZE,
520     0,
521     /* l */ SIZE32,
522     0,
523     /* n */ SIZE16,
524     0,
525     /* p */ sizeof(char *) | PACK_SIZE_CANNOT_CSUM,
526 #if defined(HAS_QUAD)
527     /* q */ sizeof(Quad_t),
528 #else
529     0,
530 #endif
531     0, 0, 0, 0, 0, 0, 0, 0, 0,
532     /* s */ SIZE16,
533     0, 0,
534     /* v */ SIZE16,
535     /* w */ sizeof(char) | PACK_SIZE_UNPREDICTABLE | PACK_SIZE_CANNOT_CSUM,
536     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
537     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
538     /* C */ sizeof(unsigned char),
539 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
540     /* D */ LONG_DOUBLESIZE,
541 #else
542     0,
543 #endif
544     0,
545     /* F */ NVSIZE,
546     0, 0,
547     /* I */ sizeof(unsigned int),
548     0, 0, 0, 0, 0, 0, 0,
549     /* J */ UVSIZE,
550     0,
551     /* L */ SIZE32,
552     0,
553     /* N */ SIZE32,
554     0, 0,
555 #if defined(HAS_QUAD)
556     /* Q */ sizeof(Uquad_t),
557 #else
558     0,
559 #endif
560     0, 0, 0, 0, 0, 0, 0, 0, 0,
561     /* S */ SIZE16,
562     0,
563     /* U */ sizeof(char) | PACK_SIZE_UNPREDICTABLE,
564     /* V */ SIZE32,
565     /* W */ sizeof(unsigned char) | PACK_SIZE_UNPREDICTABLE,
566     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567     0, 0, 0, 0, 0, 0, 0, 0, 0,
568     /* shrieking */
569     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
570     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
571     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
572     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
573     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
574     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
575     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
576     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
577     0, 0, 0, 0, 0, 0, 0, 0, 0,
578     /* i */ sizeof(int),
579     0, 0, 0, 0, 0, 0, 0, 0, 0,
580     /* l */ sizeof(long),
581     0,
582 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
583     /* n */ SIZE16,
584 #else
585     0,
586 #endif
587     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
588     /* s */ sizeof(short),
589     0, 0,
590 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
591     /* v */ SIZE16,
592 #else
593     0,
594 #endif
595     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
596     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
597     0, 0, 0,
598     /* I */ sizeof(unsigned int),
599     0, 0, 0, 0, 0, 0, 0, 0, 0,
600     /* L */ sizeof(unsigned long),
601     0,
602 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
603     /* N */ SIZE32,
604 #else
605     0,
606 #endif
607     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
608     /* S */ sizeof(unsigned short),
609     0, 0,
610 #if defined(PERL_PACK_CAN_SHRIEKSIGN)
611     /* V */ SIZE32,
612 #else
613     0,
614 #endif
615     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
616     0, 0, 0, 0, 0, 0, 0, 0, 0, 0
617 };
618 #endif
619 
620 STATIC U8
621 uni_to_byte(pTHX_ const char **s, const char *end, I32 datumtype)
622 {
623     STRLEN retlen;
624     UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen,
625 			 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
626     /* We try to process malformed UTF-8 as much as possible (preferrably with
627        warnings), but these two mean we make no progress in the string and
628        might enter an infinite loop */
629     if (retlen == (STRLEN) -1 || retlen == 0)
630 	Perl_croak(aTHX_ "Malformed UTF-8 string in '%c' format in unpack",
631 		   (int) TYPE_NO_MODIFIERS(datumtype));
632     if (val >= 0x100) {
633 	if (ckWARN(WARN_UNPACK))
634 	Perl_warner(aTHX_ packWARN(WARN_UNPACK),
635 		    "Character in '%c' format wrapped in unpack",
636 		    (int) TYPE_NO_MODIFIERS(datumtype));
637 	val &= 0xff;
638     }
639     *s += retlen;
640     return (U8)val;
641 }
642 
643 #define SHIFT_BYTE(utf8, s, strend, datumtype) ((utf8) ? \
644 	uni_to_byte(aTHX_ &(s), (strend), (datumtype)) : \
645 	*(U8 *)(s)++)
646 
647 STATIC bool
648 uni_to_bytes(pTHX_ const char **s, const char *end, const char *buf, int buf_len, I32 datumtype)
649 {
650     UV val;
651     STRLEN retlen;
652     const char *from = *s;
653     int bad = 0;
654     const U32 flags = ckWARN(WARN_UTF8) ?
655 	UTF8_CHECK_ONLY : (UTF8_CHECK_ONLY | UTF8_ALLOW_ANY);
656     for (;buf_len > 0; buf_len--) {
657 	if (from >= end) return FALSE;
658 	val = utf8n_to_uvchr((U8 *) from, end-from, &retlen, flags);
659 	if (retlen == (STRLEN) -1 || retlen == 0) {
660 	    from += UTF8SKIP(from);
661 	    bad |= 1;
662 	} else from += retlen;
663 	if (val >= 0x100) {
664 	    bad |= 2;
665 	    val &= 0xff;
666 	}
667 	*(U8 *)buf++ = (U8)val;
668     }
669     /* We have enough characters for the buffer. Did we have problems ? */
670     if (bad) {
671 	if (bad & 1) {
672 	    /* Rewalk the string fragment while warning */
673 	    const char *ptr;
674 	    const int flags = ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY;
675 	    for (ptr = *s; ptr < from; ptr += UTF8SKIP(ptr)) {
676 		if (ptr >= end) break;
677 		utf8n_to_uvuni((U8 *) ptr, end-ptr, &retlen, flags);
678 	    }
679 	    if (from > end) from = end;
680 	}
681 	if ((bad & 2) && ckWARN(WARN_UNPACK))
682 	    Perl_warner(aTHX_ packWARN(datumtype & TYPE_IS_PACK ?
683 				       WARN_PACK : WARN_UNPACK),
684 			"Character(s) in '%c' format wrapped in %s",
685 			(int) TYPE_NO_MODIFIERS(datumtype),
686 			datumtype & TYPE_IS_PACK ? "pack" : "unpack");
687     }
688     *s = from;
689     return TRUE;
690 }
691 
692 STATIC bool
693 next_uni_uu(pTHX_ const char **s, const char *end, I32 *out)
694 {
695     dVAR;
696     STRLEN retlen;
697     const UV val = utf8n_to_uvchr((U8 *) *s, end-*s, &retlen, UTF8_CHECK_ONLY);
698     if (val >= 0x100 || !ISUUCHAR(val) ||
699 	retlen == (STRLEN) -1 || retlen == 0) {
700 	*out = 0;
701 	return FALSE;
702     }
703     *out = PL_uudmap[val] & 077;
704     *s += retlen;
705     return TRUE;
706 }
707 
708 STATIC char *
709 S_bytes_to_uni(const U8 *start, STRLEN len, char *dest) {
710     const U8 * const end = start + len;
711 
712     PERL_ARGS_ASSERT_BYTES_TO_UNI;
713 
714     while (start < end) {
715 	const UV uv = NATIVE_TO_ASCII(*start);
716 	if (UNI_IS_INVARIANT(uv))
717 	    *dest++ = (char)(U8)UTF_TO_NATIVE(uv);
718 	else {
719 	    *dest++ = (char)(U8)UTF8_EIGHT_BIT_HI(uv);
720 	    *dest++ = (char)(U8)UTF8_EIGHT_BIT_LO(uv);
721 	}
722 	start++;
723     }
724     return dest;
725 }
726 
727 #define PUSH_BYTES(utf8, cur, buf, len)				\
728 STMT_START {							\
729     if (utf8)							\
730 	(cur) = bytes_to_uni((U8 *) buf, len, (cur));		\
731     else {							\
732 	Copy(buf, cur, len, char);				\
733 	(cur) += (len);						\
734     }								\
735 } STMT_END
736 
737 #define GROWING(utf8, cat, start, cur, in_len)	\
738 STMT_START {					\
739     STRLEN glen = (in_len);			\
740     if (utf8) glen *= UTF8_EXPAND;		\
741     if ((cur) + glen >= (start) + SvLEN(cat)) {	\
742 	(start) = sv_exp_grow(cat, glen);	\
743 	(cur) = (start) + SvCUR(cat);		\
744     }						\
745 } STMT_END
746 
747 #define PUSH_GROWING_BYTES(utf8, cat, start, cur, buf, in_len) \
748 STMT_START {					\
749     const STRLEN glen = (in_len);		\
750     STRLEN gl = glen;				\
751     if (utf8) gl *= UTF8_EXPAND;		\
752     if ((cur) + gl >= (start) + SvLEN(cat)) {	\
753         *cur = '\0';				\
754         SvCUR_set((cat), (cur) - (start));	\
755 	(start) = sv_exp_grow(cat, gl);		\
756 	(cur) = (start) + SvCUR(cat);		\
757     }						\
758     PUSH_BYTES(utf8, cur, buf, glen);		\
759 } STMT_END
760 
761 #define PUSH_BYTE(utf8, s, byte)		\
762 STMT_START {					\
763     if (utf8) {					\
764 	const U8 au8 = (byte);			\
765 	(s) = bytes_to_uni(&au8, 1, (s));	\
766     } else *(U8 *)(s)++ = (byte);		\
767 } STMT_END
768 
769 /* Only to be used inside a loop (see the break) */
770 #define NEXT_UNI_VAL(val, cur, str, end, utf8_flags)		\
771 STMT_START {							\
772     STRLEN retlen;						\
773     if (str >= end) break;					\
774     val = utf8n_to_uvchr((U8 *) str, end-str, &retlen, utf8_flags);	\
775     if (retlen == (STRLEN) -1 || retlen == 0) {			\
776 	*cur = '\0';						\
777 	Perl_croak(aTHX_ "Malformed UTF-8 string in pack");	\
778     }								\
779     str += retlen;						\
780 } STMT_END
781 
782 static const char *_action( const tempsym_t* symptr )
783 {
784     return (const char *)(( symptr->flags & FLAG_PACK ) ? "pack" : "unpack");
785 }
786 
787 /* Returns the sizeof() struct described by pat */
788 STATIC I32
789 S_measure_struct(pTHX_ tempsym_t* symptr)
790 {
791     I32 total = 0;
792 
793     PERL_ARGS_ASSERT_MEASURE_STRUCT;
794 
795     while (next_symbol(symptr)) {
796 	I32 len;
797 	int size;
798 
799         switch (symptr->howlen) {
800 	  case e_star:
801    	    Perl_croak(aTHX_ "Within []-length '*' not allowed in %s",
802                         _action( symptr ) );
803             break;
804 	  default:
805 	    /* e_no_len and e_number */
806 	    len = symptr->length;
807 	    break;
808         }
809 
810 	size = packprops[TYPE_NO_ENDIANNESS(symptr->code)] & PACK_SIZE_MASK;
811 	if (!size) {
812             int star;
813 	    /* endianness doesn't influence the size of a type */
814 	    switch(TYPE_NO_ENDIANNESS(symptr->code)) {
815 	    default:
816 		Perl_croak(aTHX_ "Invalid type '%c' in %s",
817 			   (int)TYPE_NO_MODIFIERS(symptr->code),
818                            _action( symptr ) );
819 #ifdef PERL_PACK_CAN_SHRIEKSIGN
820 	    case '.' | TYPE_IS_SHRIEKING:
821 	    case '@' | TYPE_IS_SHRIEKING:
822 #endif
823 	    case '@':
824 	    case '.':
825 	    case '/':
826 	    case 'U':			/* XXXX Is it correct? */
827 	    case 'w':
828 	    case 'u':
829 		Perl_croak(aTHX_ "Within []-length '%c' not allowed in %s",
830 			   (int) TYPE_NO_MODIFIERS(symptr->code),
831                            _action( symptr ) );
832 	    case '%':
833 		size = 0;
834 		break;
835 	    case '(':
836 	    {
837 		tempsym_t savsym = *symptr;
838 		symptr->patptr = savsym.grpbeg;
839 		symptr->patend = savsym.grpend;
840 		/* XXXX Theoretically, we need to measure many times at
841 		   different positions, since the subexpression may contain
842 		   alignment commands, but be not of aligned length.
843 		   Need to detect this and croak().  */
844 		size = measure_struct(symptr);
845 		*symptr = savsym;
846 		break;
847 	    }
848 	    case 'X' | TYPE_IS_SHRIEKING:
849 		/* XXXX Is this useful?  Then need to treat MEASURE_BACKWARDS.
850 		 */
851 		if (!len)		/* Avoid division by 0 */
852 		    len = 1;
853 		len = total % len;	/* Assumed: the start is aligned. */
854 		/* FALL THROUGH */
855 	    case 'X':
856 		size = -1;
857 		if (total < len)
858                     Perl_croak(aTHX_ "'X' outside of string in %s", _action( symptr ) );
859 		break;
860 	    case 'x' | TYPE_IS_SHRIEKING:
861 		if (!len)		/* Avoid division by 0 */
862 		    len = 1;
863 		star = total % len;	/* Assumed: the start is aligned. */
864 		if (star)		/* Other portable ways? */
865 		    len = len - star;
866 		else
867 		    len = 0;
868 		/* FALL THROUGH */
869 	    case 'x':
870 	    case 'A':
871 	    case 'Z':
872 	    case 'a':
873 		size = 1;
874 		break;
875 	    case 'B':
876 	    case 'b':
877 		len = (len + 7)/8;
878 		size = 1;
879 		break;
880 	    case 'H':
881 	    case 'h':
882 		len = (len + 1)/2;
883 		size = 1;
884 		break;
885 
886 	    case 'P':
887 		len = 1;
888 		size = sizeof(char*);
889 		break;
890 	    }
891 	}
892 	total += len * size;
893     }
894     return total;
895 }
896 
897 
898 /* locate matching closing parenthesis or bracket
899  * returns char pointer to char after match, or NULL
900  */
901 STATIC const char *
902 S_group_end(pTHX_ register const char *patptr, register const char *patend, char ender)
903 {
904     PERL_ARGS_ASSERT_GROUP_END;
905 
906     while (patptr < patend) {
907 	const char c = *patptr++;
908 
909 	if (isSPACE(c))
910 	    continue;
911 	else if (c == ender)
912 	    return patptr-1;
913 	else if (c == '#') {
914 	    while (patptr < patend && *patptr != '\n')
915 		patptr++;
916 	    continue;
917 	} else if (c == '(')
918 	    patptr = group_end(patptr, patend, ')') + 1;
919 	else if (c == '[')
920 	    patptr = group_end(patptr, patend, ']') + 1;
921     }
922     Perl_croak(aTHX_ "No group ending character '%c' found in template",
923                ender);
924     return 0;
925 }
926 
927 
928 /* Convert unsigned decimal number to binary.
929  * Expects a pointer to the first digit and address of length variable
930  * Advances char pointer to 1st non-digit char and returns number
931  */
932 STATIC const char *
933 S_get_num(pTHX_ register const char *patptr, I32 *lenptr )
934 {
935   I32 len = *patptr++ - '0';
936 
937   PERL_ARGS_ASSERT_GET_NUM;
938 
939   while (isDIGIT(*patptr)) {
940     if (len >= 0x7FFFFFFF/10)
941       Perl_croak(aTHX_ "pack/unpack repeat count overflow");
942     len = (len * 10) + (*patptr++ - '0');
943   }
944   *lenptr = len;
945   return patptr;
946 }
947 
948 /* The marvellous template parsing routine: Using state stored in *symptr,
949  * locates next template code and count
950  */
951 STATIC bool
952 S_next_symbol(pTHX_ tempsym_t* symptr )
953 {
954   const char* patptr = symptr->patptr;
955   const char* const patend = symptr->patend;
956 
957   PERL_ARGS_ASSERT_NEXT_SYMBOL;
958 
959   symptr->flags &= ~FLAG_SLASH;
960 
961   while (patptr < patend) {
962     if (isSPACE(*patptr))
963       patptr++;
964     else if (*patptr == '#') {
965       patptr++;
966       while (patptr < patend && *patptr != '\n')
967 	patptr++;
968       if (patptr < patend)
969 	patptr++;
970     } else {
971       /* We should have found a template code */
972       I32 code = *patptr++ & 0xFF;
973       U32 inherited_modifiers = 0;
974 
975       if (code == ','){ /* grandfather in commas but with a warning */
976 	if (((symptr->flags & FLAG_COMMA) == 0) && ckWARN(WARN_UNPACK)){
977           symptr->flags |= FLAG_COMMA;
978 	  Perl_warner(aTHX_ packWARN(WARN_UNPACK),
979 	 	      "Invalid type ',' in %s", _action( symptr ) );
980         }
981 	continue;
982       }
983 
984       /* for '(', skip to ')' */
985       if (code == '(') {
986         if( isDIGIT(*patptr) || *patptr == '*' || *patptr == '[' )
987           Perl_croak(aTHX_ "()-group starts with a count in %s",
988                         _action( symptr ) );
989         symptr->grpbeg = patptr;
990         patptr = 1 + ( symptr->grpend = group_end(patptr, patend, ')') );
991         if( symptr->level >= MAX_SUB_TEMPLATE_LEVEL )
992 	  Perl_croak(aTHX_ "Too deeply nested ()-groups in %s",
993                         _action( symptr ) );
994       }
995 
996       /* look for group modifiers to inherit */
997       if (TYPE_ENDIANNESS(symptr->flags)) {
998         if (strchr(ENDIANNESS_ALLOWED_TYPES, TYPE_NO_MODIFIERS(code)))
999           inherited_modifiers |= TYPE_ENDIANNESS(symptr->flags);
1000       }
1001 
1002       /* look for modifiers */
1003       while (patptr < patend) {
1004         const char *allowed;
1005         I32 modifier;
1006         switch (*patptr) {
1007           case '!':
1008             modifier = TYPE_IS_SHRIEKING;
1009             allowed = SHRIEKING_ALLOWED_TYPES;
1010             break;
1011 #ifdef PERL_PACK_CAN_BYTEORDER
1012           case '>':
1013             modifier = TYPE_IS_BIG_ENDIAN;
1014             allowed = ENDIANNESS_ALLOWED_TYPES;
1015             break;
1016           case '<':
1017             modifier = TYPE_IS_LITTLE_ENDIAN;
1018             allowed = ENDIANNESS_ALLOWED_TYPES;
1019             break;
1020 #endif /* PERL_PACK_CAN_BYTEORDER */
1021           default:
1022             allowed = "";
1023             modifier = 0;
1024             break;
1025         }
1026 
1027         if (modifier == 0)
1028           break;
1029 
1030         if (!strchr(allowed, TYPE_NO_MODIFIERS(code)))
1031           Perl_croak(aTHX_ "'%c' allowed only after types %s in %s", *patptr,
1032                         allowed, _action( symptr ) );
1033 
1034         if (TYPE_ENDIANNESS(code | modifier) == TYPE_ENDIANNESS_MASK)
1035           Perl_croak(aTHX_ "Can't use both '<' and '>' after type '%c' in %s",
1036                      (int) TYPE_NO_MODIFIERS(code), _action( symptr ) );
1037         else if (TYPE_ENDIANNESS(code | modifier | inherited_modifiers) ==
1038                  TYPE_ENDIANNESS_MASK)
1039           Perl_croak(aTHX_ "Can't use '%c' in a group with different byte-order in %s",
1040                      *patptr, _action( symptr ) );
1041 
1042         if ((code & modifier) && ckWARN(WARN_UNPACK)) {
1043 	    Perl_warner(aTHX_ packWARN(WARN_UNPACK),
1044                         "Duplicate modifier '%c' after '%c' in %s",
1045                         *patptr, (int) TYPE_NO_MODIFIERS(code),
1046                         _action( symptr ) );
1047         }
1048 
1049         code |= modifier;
1050         patptr++;
1051       }
1052 
1053       /* inherit modifiers */
1054       code |= inherited_modifiers;
1055 
1056       /* look for count and/or / */
1057       if (patptr < patend) {
1058 	if (isDIGIT(*patptr)) {
1059  	  patptr = get_num( patptr, &symptr->length );
1060           symptr->howlen = e_number;
1061 
1062         } else if (*patptr == '*') {
1063           patptr++;
1064           symptr->howlen = e_star;
1065 
1066         } else if (*patptr == '[') {
1067           const char* lenptr = ++patptr;
1068           symptr->howlen = e_number;
1069           patptr = group_end( patptr, patend, ']' ) + 1;
1070           /* what kind of [] is it? */
1071           if (isDIGIT(*lenptr)) {
1072             lenptr = get_num( lenptr, &symptr->length );
1073             if( *lenptr != ']' )
1074               Perl_croak(aTHX_ "Malformed integer in [] in %s",
1075                             _action( symptr ) );
1076           } else {
1077             tempsym_t savsym = *symptr;
1078             symptr->patend = patptr-1;
1079             symptr->patptr = lenptr;
1080             savsym.length = measure_struct(symptr);
1081             *symptr = savsym;
1082           }
1083         } else {
1084           symptr->howlen = e_no_len;
1085           symptr->length = 1;
1086         }
1087 
1088         /* try to find / */
1089         while (patptr < patend) {
1090           if (isSPACE(*patptr))
1091             patptr++;
1092           else if (*patptr == '#') {
1093             patptr++;
1094             while (patptr < patend && *patptr != '\n')
1095 	      patptr++;
1096             if (patptr < patend)
1097 	      patptr++;
1098           } else {
1099             if (*patptr == '/') {
1100               symptr->flags |= FLAG_SLASH;
1101               patptr++;
1102               if (patptr < patend &&
1103                   (isDIGIT(*patptr) || *patptr == '*' || *patptr == '['))
1104                 Perl_croak(aTHX_ "'/' does not take a repeat count in %s",
1105                             _action( symptr ) );
1106             }
1107             break;
1108 	  }
1109 	}
1110       } else {
1111         /* at end - no count, no / */
1112         symptr->howlen = e_no_len;
1113         symptr->length = 1;
1114       }
1115 
1116       symptr->code = code;
1117       symptr->patptr = patptr;
1118       return TRUE;
1119     }
1120   }
1121   symptr->patptr = patptr;
1122   return FALSE;
1123 }
1124 
1125 /*
1126    There is no way to cleanly handle the case where we should process the
1127    string per byte in its upgraded form while it's really in downgraded form
1128    (e.g. estimates like strend-s as an upper bound for the number of
1129    characters left wouldn't work). So if we foresee the need of this
1130    (pattern starts with U or contains U0), we want to work on the encoded
1131    version of the string. Users are advised to upgrade their pack string
1132    themselves if they need to do a lot of unpacks like this on it
1133 */
1134 STATIC bool
1135 need_utf8(const char *pat, const char *patend)
1136 {
1137     bool first = TRUE;
1138 
1139     PERL_ARGS_ASSERT_NEED_UTF8;
1140 
1141     while (pat < patend) {
1142 	if (pat[0] == '#') {
1143 	    pat++;
1144 	    pat = (const char *) memchr(pat, '\n', patend-pat);
1145 	    if (!pat) return FALSE;
1146 	} else if (pat[0] == 'U') {
1147 	    if (first || pat[1] == '0') return TRUE;
1148 	} else first = FALSE;
1149 	pat++;
1150     }
1151     return FALSE;
1152 }
1153 
1154 STATIC char
1155 first_symbol(const char *pat, const char *patend) {
1156     PERL_ARGS_ASSERT_FIRST_SYMBOL;
1157 
1158     while (pat < patend) {
1159 	if (pat[0] != '#') return pat[0];
1160 	pat++;
1161 	pat = (const char *) memchr(pat, '\n', patend-pat);
1162 	if (!pat) return 0;
1163 	pat++;
1164     }
1165     return 0;
1166 }
1167 
1168 /*
1169 =for apidoc unpackstring
1170 
1171 The engine implementing unpack() Perl function. C<unpackstring> puts the
1172 extracted list items on the stack and returns the number of elements.
1173 Issue C<PUTBACK> before and C<SPAGAIN> after the call to this function.
1174 
1175 =cut */
1176 
1177 I32
1178 Perl_unpackstring(pTHX_ const char *pat, const char *patend, const char *s, const char *strend, U32 flags)
1179 {
1180     tempsym_t sym;
1181 
1182     PERL_ARGS_ASSERT_UNPACKSTRING;
1183 
1184     if (flags & FLAG_DO_UTF8) flags |= FLAG_WAS_UTF8;
1185     else if (need_utf8(pat, patend)) {
1186 	/* We probably should try to avoid this in case a scalar context call
1187 	   wouldn't get to the "U0" */
1188 	STRLEN len = strend - s;
1189 	s = (char *) bytes_to_utf8((U8 *) s, &len);
1190 	SAVEFREEPV(s);
1191 	strend = s + len;
1192 	flags |= FLAG_DO_UTF8;
1193     }
1194 
1195     if (first_symbol(pat, patend) != 'U' && (flags & FLAG_DO_UTF8))
1196 	flags |= FLAG_PARSE_UTF8;
1197 
1198     TEMPSYM_INIT(&sym, pat, patend, flags);
1199 
1200     return unpack_rec(&sym, s, s, strend, NULL );
1201 }
1202 
1203 STATIC I32
1204 S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const char *strend, const char **new_s )
1205 {
1206     dVAR; dSP;
1207     SV *sv;
1208     const I32 start_sp_offset = SP - PL_stack_base;
1209     howlen_t howlen;
1210     I32 checksum = 0;
1211     UV cuv = 0;
1212     NV cdouble = 0.0;
1213     const int bits_in_uv = CHAR_BIT * sizeof(cuv);
1214     bool beyond = FALSE;
1215     bool explicit_length;
1216     const bool unpack_only_one = (symptr->flags & FLAG_UNPACK_ONLY_ONE) != 0;
1217     bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
1218 
1219     PERL_ARGS_ASSERT_UNPACK_REC;
1220 
1221     symptr->strbeg = s - strbeg;
1222 
1223     while (next_symbol(symptr)) {
1224 	packprops_t props;
1225 	I32 len;
1226         I32 datumtype = symptr->code;
1227 	/* do first one only unless in list context
1228 	   / is implemented by unpacking the count, then popping it from the
1229 	   stack, so must check that we're not in the middle of a /  */
1230         if ( unpack_only_one
1231 	     && (SP - PL_stack_base == start_sp_offset + 1)
1232 	     && (datumtype != '/') )   /* XXX can this be omitted */
1233             break;
1234 
1235         switch (howlen = symptr->howlen) {
1236 	  case e_star:
1237 	    len = strend - strbeg;	/* long enough */
1238 	    break;
1239 	  default:
1240 	    /* e_no_len and e_number */
1241 	    len = symptr->length;
1242 	    break;
1243         }
1244 
1245         explicit_length = TRUE;
1246       redo_switch:
1247         beyond = s >= strend;
1248 
1249 	props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
1250 	if (props) {
1251 	    /* props nonzero means we can process this letter. */
1252             const long size = props & PACK_SIZE_MASK;
1253             const long howmany = (strend - s) / size;
1254 	    if (len > howmany)
1255 		len = howmany;
1256 
1257 	    if (!checksum || (props & PACK_SIZE_CANNOT_CSUM)) {
1258 		if (len && unpack_only_one) len = 1;
1259 		EXTEND(SP, len);
1260 		EXTEND_MORTAL(len);
1261 	    }
1262 	}
1263 
1264 	switch(TYPE_NO_ENDIANNESS(datumtype)) {
1265 	default:
1266 	    Perl_croak(aTHX_ "Invalid type '%c' in unpack", (int)TYPE_NO_MODIFIERS(datumtype) );
1267 
1268 	case '%':
1269 	    if (howlen == e_no_len)
1270 		len = 16;		/* len is not specified */
1271 	    checksum = len;
1272 	    cuv = 0;
1273 	    cdouble = 0;
1274 	    continue;
1275 	    break;
1276 	case '(':
1277 	{
1278             tempsym_t savsym = *symptr;
1279             const U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
1280 	    symptr->flags |= group_modifiers;
1281             symptr->patend = savsym.grpend;
1282 	    symptr->previous = &savsym;
1283             symptr->level++;
1284 	    PUTBACK;
1285 	    if (len && unpack_only_one) len = 1;
1286 	    while (len--) {
1287   	        symptr->patptr = savsym.grpbeg;
1288 		if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
1289 		else      symptr->flags &= ~FLAG_PARSE_UTF8;
1290  	        unpack_rec(symptr, s, strbeg, strend, &s);
1291                 if (s == strend && savsym.howlen == e_star)
1292 		    break; /* No way to continue */
1293 	    }
1294 	    SPAGAIN;
1295             savsym.flags = symptr->flags & ~group_modifiers;
1296             *symptr = savsym;
1297 	    break;
1298 	}
1299 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1300 	case '.' | TYPE_IS_SHRIEKING:
1301 #endif
1302 	case '.': {
1303 	    const char *from;
1304 	    SV *sv;
1305 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1306 	    const bool u8 = utf8 && !(datumtype & TYPE_IS_SHRIEKING);
1307 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1308 	    const bool u8 = utf8;
1309 #endif
1310 	    if (howlen == e_star) from = strbeg;
1311 	    else if (len <= 0) from = s;
1312 	    else {
1313 		tempsym_t *group = symptr;
1314 
1315 		while (--len && group) group = group->previous;
1316 		from = group ? strbeg + group->strbeg : strbeg;
1317 	    }
1318 	    sv = from <= s ?
1319 		newSVuv(  u8 ? (UV) utf8_length((const U8*)from, (const U8*)s) : (UV) (s-from)) :
1320 		newSViv(-(u8 ? (IV) utf8_length((const U8*)s, (const U8*)from) : (IV) (from-s)));
1321 	    mXPUSHs(sv);
1322 	    break;
1323 	}
1324 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1325 	case '@' | TYPE_IS_SHRIEKING:
1326 #endif
1327 	case '@':
1328 	    s = strbeg + symptr->strbeg;
1329 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1330 	    if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
1331 #else /* PERL_PACK_CAN_SHRIEKSIGN */
1332 	    if (utf8)
1333 #endif
1334 	    {
1335 		while (len > 0) {
1336 		    if (s >= strend)
1337 			Perl_croak(aTHX_ "'@' outside of string in unpack");
1338 		    s += UTF8SKIP(s);
1339 		    len--;
1340 		}
1341 		if (s > strend)
1342 		    Perl_croak(aTHX_ "'@' outside of string with malformed UTF-8 in unpack");
1343 	    } else {
1344 		if (strend-s < len)
1345 		    Perl_croak(aTHX_ "'@' outside of string in unpack");
1346 		s += len;
1347 	    }
1348 	    break;
1349  	case 'X' | TYPE_IS_SHRIEKING:
1350  	    if (!len)			/* Avoid division by 0 */
1351  		len = 1;
1352 	    if (utf8) {
1353 		const char *hop, *last;
1354 		I32 l = len;
1355 		hop = last = strbeg;
1356 		while (hop < s) {
1357 		    hop += UTF8SKIP(hop);
1358 		    if (--l == 0) {
1359 			last = hop;
1360 			l = len;
1361 		    }
1362 		}
1363 		if (last > s)
1364 		    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1365 		s = last;
1366 		break;
1367 	    }
1368 	    len = (s - strbeg) % len;
1369  	    /* FALL THROUGH */
1370 	case 'X':
1371 	    if (utf8) {
1372 		while (len > 0) {
1373 		    if (s <= strbeg)
1374 			Perl_croak(aTHX_ "'X' outside of string in unpack");
1375 		    while (--s, UTF8_IS_CONTINUATION(*s)) {
1376 			if (s <= strbeg)
1377 			    Perl_croak(aTHX_ "'X' outside of string in unpack");
1378 		    }
1379 		    len--;
1380 		}
1381 	    } else {
1382 		if (len > s - strbeg)
1383 		    Perl_croak(aTHX_ "'X' outside of string in unpack" );
1384 		s -= len;
1385 	    }
1386 	    break;
1387  	case 'x' | TYPE_IS_SHRIEKING: {
1388             I32 ai32;
1389  	    if (!len)			/* Avoid division by 0 */
1390  		len = 1;
1391 	    if (utf8) ai32 = utf8_length((U8 *) strbeg, (U8 *) s) % len;
1392 	    else      ai32 = (s - strbeg)                         % len;
1393 	    if (ai32 == 0) break;
1394 	    len -= ai32;
1395             }
1396  	    /* FALL THROUGH */
1397 	case 'x':
1398 	    if (utf8) {
1399 		while (len>0) {
1400 		    if (s >= strend)
1401 			Perl_croak(aTHX_ "'x' outside of string in unpack");
1402 		    s += UTF8SKIP(s);
1403 		    len--;
1404 		}
1405 	    } else {
1406 		if (len > strend - s)
1407 		    Perl_croak(aTHX_ "'x' outside of string in unpack");
1408 		s += len;
1409 	    }
1410 	    break;
1411 	case '/':
1412 	    Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
1413             break;
1414 	case 'A':
1415 	case 'Z':
1416 	case 'a':
1417 	    if (checksum) {
1418 		/* Preliminary length estimate is assumed done in 'W' */
1419 		if (len > strend - s) len = strend - s;
1420 		goto W_checksum;
1421 	    }
1422 	    if (utf8) {
1423 		I32 l;
1424 		const char *hop;
1425 		for (l=len, hop=s; l>0; l--, hop += UTF8SKIP(hop)) {
1426 		    if (hop >= strend) {
1427 			if (hop > strend)
1428 			    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1429 			break;
1430 		    }
1431 		}
1432 		if (hop > strend)
1433 		    Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1434 		len = hop - s;
1435 	    } else if (len > strend - s)
1436 		len = strend - s;
1437 
1438 	    if (datumtype == 'Z') {
1439 		/* 'Z' strips stuff after first null */
1440 		const char *ptr, *end;
1441 		end = s + len;
1442 		for (ptr = s; ptr < end; ptr++) if (*ptr == 0) break;
1443 		sv = newSVpvn(s, ptr-s);
1444 		if (howlen == e_star) /* exact for 'Z*' */
1445 		    len = ptr-s + (ptr != strend ? 1 : 0);
1446 	    } else if (datumtype == 'A') {
1447 		/* 'A' strips both nulls and spaces */
1448 		const char *ptr;
1449 		if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
1450 		    for (ptr = s+len-1; ptr >= s; ptr--)
1451 			if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
1452 			    !is_utf8_space((U8 *) ptr)) break;
1453 		    if (ptr >= s) ptr += UTF8SKIP(ptr);
1454 		    else ptr++;
1455 		    if (ptr > s+len)
1456 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1457 		} else {
1458 		    for (ptr = s+len-1; ptr >= s; ptr--)
1459 			if (*ptr != 0 && !isSPACE(*ptr)) break;
1460 		    ptr++;
1461 		}
1462 		sv = newSVpvn(s, ptr-s);
1463 	    } else sv = newSVpvn(s, len);
1464 
1465 	    if (utf8) {
1466 		SvUTF8_on(sv);
1467 		/* Undo any upgrade done due to need_utf8() */
1468 		if (!(symptr->flags & FLAG_WAS_UTF8))
1469 		    sv_utf8_downgrade(sv, 0);
1470 	    }
1471 	    mXPUSHs(sv);
1472 	    s += len;
1473 	    break;
1474 	case 'B':
1475 	case 'b': {
1476 	    char *str;
1477 	    if (howlen == e_star || len > (strend - s) * 8)
1478 		len = (strend - s) * 8;
1479 	    if (checksum) {
1480 		if (!PL_bitcount) {
1481 		    int bits;
1482 		    Newxz(PL_bitcount, 256, char);
1483 		    for (bits = 1; bits < 256; bits++) {
1484 			if (bits & 1)	PL_bitcount[bits]++;
1485 			if (bits & 2)	PL_bitcount[bits]++;
1486 			if (bits & 4)	PL_bitcount[bits]++;
1487 			if (bits & 8)	PL_bitcount[bits]++;
1488 			if (bits & 16)	PL_bitcount[bits]++;
1489 			if (bits & 32)	PL_bitcount[bits]++;
1490 			if (bits & 64)	PL_bitcount[bits]++;
1491 			if (bits & 128)	PL_bitcount[bits]++;
1492 		    }
1493 		}
1494 		if (utf8)
1495 		    while (len >= 8 && s < strend) {
1496 			cuv += PL_bitcount[uni_to_byte(aTHX_ &s, strend, datumtype)];
1497 			len -= 8;
1498 		    }
1499 		else
1500 		    while (len >= 8) {
1501 			cuv += PL_bitcount[*(U8 *)s++];
1502 			len -= 8;
1503 		    }
1504 		if (len && s < strend) {
1505 		    U8 bits;
1506 		    bits = SHIFT_BYTE(utf8, s, strend, datumtype);
1507 		    if (datumtype == 'b')
1508 			while (len-- > 0) {
1509 			    if (bits & 1) cuv++;
1510 			    bits >>= 1;
1511 			}
1512 		    else
1513 			while (len-- > 0) {
1514 			    if (bits & 0x80) cuv++;
1515 			    bits <<= 1;
1516 			}
1517 		}
1518 		break;
1519 	    }
1520 
1521 	    sv = sv_2mortal(newSV(len ? len : 1));
1522 	    SvPOK_on(sv);
1523 	    str = SvPVX(sv);
1524 	    if (datumtype == 'b') {
1525 		U8 bits = 0;
1526 		const I32 ai32 = len;
1527 		for (len = 0; len < ai32; len++) {
1528 		    if (len & 7) bits >>= 1;
1529 		    else if (utf8) {
1530 			if (s >= strend) break;
1531 			bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1532 		    } else bits = *(U8 *) s++;
1533 		    *str++ = bits & 1 ? '1' : '0';
1534 		}
1535 	    } else {
1536 		U8 bits = 0;
1537 		const I32 ai32 = len;
1538 		for (len = 0; len < ai32; len++) {
1539 		    if (len & 7) bits <<= 1;
1540 		    else if (utf8) {
1541 			if (s >= strend) break;
1542 			bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1543 		    } else bits = *(U8 *) s++;
1544 		    *str++ = bits & 0x80 ? '1' : '0';
1545 		}
1546 	    }
1547 	    *str = '\0';
1548 	    SvCUR_set(sv, str - SvPVX_const(sv));
1549 	    XPUSHs(sv);
1550 	    break;
1551 	}
1552 	case 'H':
1553 	case 'h': {
1554 	    char *str;
1555 	    /* Preliminary length estimate, acceptable for utf8 too */
1556 	    if (howlen == e_star || len > (strend - s) * 2)
1557 		len = (strend - s) * 2;
1558 	    sv = sv_2mortal(newSV(len ? len : 1));
1559 	    SvPOK_on(sv);
1560 	    str = SvPVX(sv);
1561 	    if (datumtype == 'h') {
1562 		U8 bits = 0;
1563 		I32 ai32 = len;
1564 		for (len = 0; len < ai32; len++) {
1565 		    if (len & 1) bits >>= 4;
1566 		    else if (utf8) {
1567 			if (s >= strend) break;
1568 			bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1569 		    } else bits = * (U8 *) s++;
1570 		    *str++ = PL_hexdigit[bits & 15];
1571 		}
1572 	    } else {
1573 		U8 bits = 0;
1574 		const I32 ai32 = len;
1575 		for (len = 0; len < ai32; len++) {
1576 		    if (len & 1) bits <<= 4;
1577 		    else if (utf8) {
1578 			if (s >= strend) break;
1579 			bits = uni_to_byte(aTHX_ &s, strend, datumtype);
1580 		    } else bits = *(U8 *) s++;
1581 		    *str++ = PL_hexdigit[(bits >> 4) & 15];
1582 		}
1583 	    }
1584 	    *str = '\0';
1585 	    SvCUR_set(sv, str - SvPVX_const(sv));
1586 	    XPUSHs(sv);
1587 	    break;
1588 	}
1589 	case 'C':
1590             if (len == 0) {
1591                 if (explicit_length)
1592 		    /* Switch to "character" mode */
1593 		    utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
1594 		break;
1595 	    }
1596 	    /* FALL THROUGH */
1597 	case 'c':
1598 	    while (len-- > 0 && s < strend) {
1599 		int aint;
1600 		if (utf8)
1601 		  {
1602 		    STRLEN retlen;
1603 		    aint = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1604 				 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1605 		    if (retlen == (STRLEN) -1 || retlen == 0)
1606 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1607 		    s += retlen;
1608 		  }
1609 		else
1610 		  aint = *(U8 *)(s)++;
1611 		if (aint >= 128 && datumtype != 'C')	/* fake up signed chars */
1612 		    aint -= 256;
1613 		if (!checksum)
1614 		    mPUSHi(aint);
1615 		else if (checksum > bits_in_uv)
1616 		    cdouble += (NV)aint;
1617 		else
1618 		    cuv += aint;
1619 	    }
1620 	    break;
1621 	case 'W':
1622 	  W_checksum:
1623 	    if (utf8) {
1624 		while (len-- > 0 && s < strend) {
1625 		    STRLEN retlen;
1626 		    const UV val = utf8n_to_uvchr((U8 *) s, strend-s, &retlen,
1627 					 ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
1628 		    if (retlen == (STRLEN) -1 || retlen == 0)
1629 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1630 		    s += retlen;
1631 		    if (!checksum)
1632 			mPUSHu(val);
1633 		    else if (checksum > bits_in_uv)
1634 			cdouble += (NV) val;
1635 		    else
1636 			cuv += val;
1637 		}
1638 	    } else if (!checksum)
1639 		while (len-- > 0) {
1640 		    const U8 ch = *(U8 *) s++;
1641 		    mPUSHu(ch);
1642 	    }
1643 	    else if (checksum > bits_in_uv)
1644 		while (len-- > 0) cdouble += (NV) *(U8 *) s++;
1645 	    else
1646 		while (len-- > 0) cuv += *(U8 *) s++;
1647 	    break;
1648 	case 'U':
1649 	    if (len == 0) {
1650                 if (explicit_length) {
1651 		    /* Switch to "bytes in UTF-8" mode */
1652 		    if (symptr->flags & FLAG_DO_UTF8) utf8 = 0;
1653 		    else
1654 			/* Should be impossible due to the need_utf8() test */
1655 			Perl_croak(aTHX_ "U0 mode on a byte string");
1656 		}
1657 		break;
1658 	    }
1659 	    if (len > strend - s) len = strend - s;
1660 	    if (!checksum) {
1661 		if (len && unpack_only_one) len = 1;
1662 		EXTEND(SP, len);
1663 		EXTEND_MORTAL(len);
1664 	    }
1665 	    while (len-- > 0 && s < strend) {
1666 		STRLEN retlen;
1667 		UV auv;
1668 		if (utf8) {
1669 		    U8 result[UTF8_MAXLEN];
1670 		    const char *ptr = s;
1671 		    STRLEN len;
1672 		    /* Bug: warns about bad utf8 even if we are short on bytes
1673 		       and will break out of the loop */
1674 		    if (!uni_to_bytes(aTHX_ &ptr, strend, (char *) result, 1,
1675 				      'U'))
1676 			break;
1677 		    len = UTF8SKIP(result);
1678 		    if (!uni_to_bytes(aTHX_ &ptr, strend,
1679 				      (char *) &result[1], len-1, 'U')) break;
1680 		    auv = utf8n_to_uvuni(result, len, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1681 		    s = ptr;
1682 		} else {
1683 		    auv = utf8n_to_uvuni((U8*)s, strend - s, &retlen, ckWARN(WARN_UTF8) ? 0 : UTF8_ALLOW_ANYUV);
1684 		    if (retlen == (STRLEN) -1 || retlen == 0)
1685 			Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
1686 		    s += retlen;
1687 		}
1688 		if (!checksum)
1689 		    mPUSHu(auv);
1690 		else if (checksum > bits_in_uv)
1691 		    cdouble += (NV) auv;
1692 		else
1693 		    cuv += auv;
1694 	    }
1695 	    break;
1696 	case 's' | TYPE_IS_SHRIEKING:
1697 #if SHORTSIZE != SIZE16
1698 	    while (len-- > 0) {
1699 		short ashort;
1700 		SHIFT_VAR(utf8, s, strend, ashort, datumtype);
1701 		DO_BO_UNPACK(ashort, s);
1702 		if (!checksum)
1703 		    mPUSHi(ashort);
1704 		else if (checksum > bits_in_uv)
1705 		    cdouble += (NV)ashort;
1706 		else
1707 		    cuv += ashort;
1708 	    }
1709 	    break;
1710 #else
1711 	    /* Fallthrough! */
1712 #endif
1713 	case 's':
1714 	    while (len-- > 0) {
1715 		I16 ai16;
1716 
1717 #if U16SIZE > SIZE16
1718 		ai16 = 0;
1719 #endif
1720 		SHIFT16(utf8, s, strend, &ai16, datumtype);
1721 		DO_BO_UNPACK(ai16, 16);
1722 #if U16SIZE > SIZE16
1723 		if (ai16 > 32767)
1724 		    ai16 -= 65536;
1725 #endif
1726 		if (!checksum)
1727 		    mPUSHi(ai16);
1728 		else if (checksum > bits_in_uv)
1729 		    cdouble += (NV)ai16;
1730 		else
1731 		    cuv += ai16;
1732 	    }
1733 	    break;
1734 	case 'S' | TYPE_IS_SHRIEKING:
1735 #if SHORTSIZE != SIZE16
1736 	    while (len-- > 0) {
1737 		unsigned short aushort;
1738 		SHIFT_VAR(utf8, s, strend, aushort, datumtype);
1739 		DO_BO_UNPACK(aushort, s);
1740 		if (!checksum)
1741 		    mPUSHu(aushort);
1742 		else if (checksum > bits_in_uv)
1743 		    cdouble += (NV)aushort;
1744 		else
1745 		    cuv += aushort;
1746 	    }
1747 	    break;
1748 #else
1749             /* Fallhrough! */
1750 #endif
1751 	case 'v':
1752 	case 'n':
1753 	case 'S':
1754 	    while (len-- > 0) {
1755 		U16 au16;
1756 #if U16SIZE > SIZE16
1757 		au16 = 0;
1758 #endif
1759 		SHIFT16(utf8, s, strend, &au16, datumtype);
1760 		DO_BO_UNPACK(au16, 16);
1761 #ifdef HAS_NTOHS
1762 		if (datumtype == 'n')
1763 		    au16 = PerlSock_ntohs(au16);
1764 #endif
1765 #ifdef HAS_VTOHS
1766 		if (datumtype == 'v')
1767 		    au16 = vtohs(au16);
1768 #endif
1769 		if (!checksum)
1770 		    mPUSHu(au16);
1771 		else if (checksum > bits_in_uv)
1772 		    cdouble += (NV) au16;
1773 		else
1774 		    cuv += au16;
1775 	    }
1776 	    break;
1777 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1778 	case 'v' | TYPE_IS_SHRIEKING:
1779 	case 'n' | TYPE_IS_SHRIEKING:
1780 	    while (len-- > 0) {
1781 		I16 ai16;
1782 # if U16SIZE > SIZE16
1783 		ai16 = 0;
1784 # endif
1785 		SHIFT16(utf8, s, strend, &ai16, datumtype);
1786 # ifdef HAS_NTOHS
1787 		if (datumtype == ('n' | TYPE_IS_SHRIEKING))
1788 		    ai16 = (I16) PerlSock_ntohs((U16) ai16);
1789 # endif /* HAS_NTOHS */
1790 # ifdef HAS_VTOHS
1791 		if (datumtype == ('v' | TYPE_IS_SHRIEKING))
1792 		    ai16 = (I16) vtohs((U16) ai16);
1793 # endif /* HAS_VTOHS */
1794 		if (!checksum)
1795 		    mPUSHi(ai16);
1796 		else if (checksum > bits_in_uv)
1797 		    cdouble += (NV) ai16;
1798 		else
1799 		    cuv += ai16;
1800 	    }
1801 	    break;
1802 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1803 	case 'i':
1804 	case 'i' | TYPE_IS_SHRIEKING:
1805 	    while (len-- > 0) {
1806 		int aint;
1807 		SHIFT_VAR(utf8, s, strend, aint, datumtype);
1808 		DO_BO_UNPACK(aint, i);
1809 		if (!checksum)
1810 		    mPUSHi(aint);
1811 		else if (checksum > bits_in_uv)
1812 		    cdouble += (NV)aint;
1813 		else
1814 		    cuv += aint;
1815 	    }
1816 	    break;
1817 	case 'I':
1818 	case 'I' | TYPE_IS_SHRIEKING:
1819 	    while (len-- > 0) {
1820 		unsigned int auint;
1821 		SHIFT_VAR(utf8, s, strend, auint, datumtype);
1822 		DO_BO_UNPACK(auint, i);
1823 		if (!checksum)
1824 		    mPUSHu(auint);
1825 		else if (checksum > bits_in_uv)
1826 		    cdouble += (NV)auint;
1827 		else
1828 		    cuv += auint;
1829 	    }
1830 	    break;
1831 	case 'j':
1832 	    while (len-- > 0) {
1833 		IV aiv;
1834 		SHIFT_VAR(utf8, s, strend, aiv, datumtype);
1835 #if IVSIZE == INTSIZE
1836 		DO_BO_UNPACK(aiv, i);
1837 #elif IVSIZE == LONGSIZE
1838 		DO_BO_UNPACK(aiv, l);
1839 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1840 		DO_BO_UNPACK(aiv, 64);
1841 #else
1842 		Perl_croak(aTHX_ "'j' not supported on this platform");
1843 #endif
1844 		if (!checksum)
1845 		    mPUSHi(aiv);
1846 		else if (checksum > bits_in_uv)
1847 		    cdouble += (NV)aiv;
1848 		else
1849 		    cuv += aiv;
1850 	    }
1851 	    break;
1852 	case 'J':
1853 	    while (len-- > 0) {
1854 		UV auv;
1855 		SHIFT_VAR(utf8, s, strend, auv, datumtype);
1856 #if IVSIZE == INTSIZE
1857 		DO_BO_UNPACK(auv, i);
1858 #elif IVSIZE == LONGSIZE
1859 		DO_BO_UNPACK(auv, l);
1860 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
1861 		DO_BO_UNPACK(auv, 64);
1862 #else
1863 		Perl_croak(aTHX_ "'J' not supported on this platform");
1864 #endif
1865 		if (!checksum)
1866 		    mPUSHu(auv);
1867 		else if (checksum > bits_in_uv)
1868 		    cdouble += (NV)auv;
1869 		else
1870 		    cuv += auv;
1871 	    }
1872 	    break;
1873 	case 'l' | TYPE_IS_SHRIEKING:
1874 #if LONGSIZE != SIZE32
1875 	    while (len-- > 0) {
1876 		long along;
1877 		SHIFT_VAR(utf8, s, strend, along, datumtype);
1878 		DO_BO_UNPACK(along, l);
1879 		if (!checksum)
1880 		    mPUSHi(along);
1881 		else if (checksum > bits_in_uv)
1882 		    cdouble += (NV)along;
1883 		else
1884 		    cuv += along;
1885 	    }
1886 	    break;
1887 #else
1888 	    /* Fallthrough! */
1889 #endif
1890 	case 'l':
1891 	    while (len-- > 0) {
1892 		I32 ai32;
1893 #if U32SIZE > SIZE32
1894 		ai32 = 0;
1895 #endif
1896 		SHIFT32(utf8, s, strend, &ai32, datumtype);
1897 		DO_BO_UNPACK(ai32, 32);
1898 #if U32SIZE > SIZE32
1899 		if (ai32 > 2147483647) ai32 -= 4294967296;
1900 #endif
1901 		if (!checksum)
1902 		    mPUSHi(ai32);
1903 		else if (checksum > bits_in_uv)
1904 		    cdouble += (NV)ai32;
1905 		else
1906 		    cuv += ai32;
1907 	    }
1908 	    break;
1909 	case 'L' | TYPE_IS_SHRIEKING:
1910 #if LONGSIZE != SIZE32
1911 	    while (len-- > 0) {
1912 		unsigned long aulong;
1913 		SHIFT_VAR(utf8, s, strend, aulong, datumtype);
1914 		DO_BO_UNPACK(aulong, l);
1915 		if (!checksum)
1916 		    mPUSHu(aulong);
1917 		else if (checksum > bits_in_uv)
1918 		    cdouble += (NV)aulong;
1919 		else
1920 		    cuv += aulong;
1921 	    }
1922 	    break;
1923 #else
1924             /* Fall through! */
1925 #endif
1926 	case 'V':
1927 	case 'N':
1928 	case 'L':
1929 	    while (len-- > 0) {
1930 		U32 au32;
1931 #if U32SIZE > SIZE32
1932 		au32 = 0;
1933 #endif
1934 		SHIFT32(utf8, s, strend, &au32, datumtype);
1935 		DO_BO_UNPACK(au32, 32);
1936 #ifdef HAS_NTOHL
1937 		if (datumtype == 'N')
1938 		    au32 = PerlSock_ntohl(au32);
1939 #endif
1940 #ifdef HAS_VTOHL
1941 		if (datumtype == 'V')
1942 		    au32 = vtohl(au32);
1943 #endif
1944 		if (!checksum)
1945 		    mPUSHu(au32);
1946 		else if (checksum > bits_in_uv)
1947 		    cdouble += (NV)au32;
1948 		else
1949 		    cuv += au32;
1950 	    }
1951 	    break;
1952 #ifdef PERL_PACK_CAN_SHRIEKSIGN
1953 	case 'V' | TYPE_IS_SHRIEKING:
1954 	case 'N' | TYPE_IS_SHRIEKING:
1955 	    while (len-- > 0) {
1956 		I32 ai32;
1957 # if U32SIZE > SIZE32
1958 		ai32 = 0;
1959 # endif
1960 		SHIFT32(utf8, s, strend, &ai32, datumtype);
1961 # ifdef HAS_NTOHL
1962 		if (datumtype == ('N' | TYPE_IS_SHRIEKING))
1963 		    ai32 = (I32)PerlSock_ntohl((U32)ai32);
1964 # endif
1965 # ifdef HAS_VTOHL
1966 		if (datumtype == ('V' | TYPE_IS_SHRIEKING))
1967 		    ai32 = (I32)vtohl((U32)ai32);
1968 # endif
1969 		if (!checksum)
1970 		    mPUSHi(ai32);
1971 		else if (checksum > bits_in_uv)
1972 		    cdouble += (NV)ai32;
1973 		else
1974 		    cuv += ai32;
1975 	    }
1976 	    break;
1977 #endif /* PERL_PACK_CAN_SHRIEKSIGN */
1978 	case 'p':
1979 	    while (len-- > 0) {
1980 		const char *aptr;
1981 		SHIFT_VAR(utf8, s, strend, aptr, datumtype);
1982 		DO_BO_UNPACK_PC(aptr);
1983 		/* newSVpv generates undef if aptr is NULL */
1984 		mPUSHs(newSVpv(aptr, 0));
1985 	    }
1986 	    break;
1987 	case 'w':
1988 	    {
1989 		UV auv = 0;
1990 		U32 bytes = 0;
1991 
1992 		while (len > 0 && s < strend) {
1993 		    U8 ch;
1994 		    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
1995 		    auv = (auv << 7) | (ch & 0x7f);
1996 		    /* UTF8_IS_XXXXX not right here - using constant 0x80 */
1997 		    if (ch < 0x80) {
1998 			bytes = 0;
1999 			mPUSHu(auv);
2000 			len--;
2001 			auv = 0;
2002 			continue;
2003 		    }
2004 		    if (++bytes >= sizeof(UV)) {	/* promote to string */
2005 			const char *t;
2006 
2007 			sv = Perl_newSVpvf(aTHX_ "%.*"UVuf, (int)TYPE_DIGITS(UV), auv);
2008 			while (s < strend) {
2009 			    ch = SHIFT_BYTE(utf8, s, strend, datumtype);
2010 			    sv = mul128(sv, (U8)(ch & 0x7f));
2011 			    if (!(ch & 0x80)) {
2012 				bytes = 0;
2013 				break;
2014 			    }
2015 			}
2016 			t = SvPV_nolen_const(sv);
2017 			while (*t == '0')
2018 			    t++;
2019 			sv_chop(sv, t);
2020 			mPUSHs(sv);
2021 			len--;
2022 			auv = 0;
2023 		    }
2024 		}
2025 		if ((s >= strend) && bytes)
2026 		    Perl_croak(aTHX_ "Unterminated compressed integer in unpack");
2027 	    }
2028 	    break;
2029 	case 'P':
2030 	    if (symptr->howlen == e_star)
2031 	        Perl_croak(aTHX_ "'P' must have an explicit size in unpack");
2032 	    EXTEND(SP, 1);
2033 	    if (s + sizeof(char*) <= strend) {
2034 		char *aptr;
2035 		SHIFT_VAR(utf8, s, strend, aptr, datumtype);
2036 		DO_BO_UNPACK_PC(aptr);
2037 		/* newSVpvn generates undef if aptr is NULL */
2038 		PUSHs(newSVpvn_flags(aptr, len, SVs_TEMP));
2039 	    }
2040 	    break;
2041 #ifdef HAS_QUAD
2042 	case 'q':
2043 	    while (len-- > 0) {
2044 		Quad_t aquad;
2045 		SHIFT_VAR(utf8, s, strend, aquad, datumtype);
2046 		DO_BO_UNPACK(aquad, 64);
2047 		if (!checksum)
2048                     mPUSHs(aquad >= IV_MIN && aquad <= IV_MAX ?
2049 			   newSViv((IV)aquad) : newSVnv((NV)aquad));
2050 		else if (checksum > bits_in_uv)
2051 		    cdouble += (NV)aquad;
2052 		else
2053 		    cuv += aquad;
2054 	    }
2055 	    break;
2056 	case 'Q':
2057 	    while (len-- > 0) {
2058 		Uquad_t auquad;
2059 		SHIFT_VAR(utf8, s, strend, auquad, datumtype);
2060 		DO_BO_UNPACK(auquad, 64);
2061 		if (!checksum)
2062 		    mPUSHs(auquad <= UV_MAX ?
2063 			   newSVuv((UV)auquad) : newSVnv((NV)auquad));
2064 		else if (checksum > bits_in_uv)
2065 		    cdouble += (NV)auquad;
2066 		else
2067 		    cuv += auquad;
2068 	    }
2069 	    break;
2070 #endif /* HAS_QUAD */
2071 	/* float and double added gnb@melba.bby.oz.au 22/11/89 */
2072 	case 'f':
2073 	    while (len-- > 0) {
2074 		float afloat;
2075 		SHIFT_VAR(utf8, s, strend, afloat, datumtype);
2076 		DO_BO_UNPACK_N(afloat, float);
2077 		if (!checksum)
2078 		    mPUSHn(afloat);
2079 		else
2080 		    cdouble += afloat;
2081 	    }
2082 	    break;
2083 	case 'd':
2084 	    while (len-- > 0) {
2085 		double adouble;
2086 		SHIFT_VAR(utf8, s, strend, adouble, datumtype);
2087 		DO_BO_UNPACK_N(adouble, double);
2088 		if (!checksum)
2089 		    mPUSHn(adouble);
2090 		else
2091 		    cdouble += adouble;
2092 	    }
2093 	    break;
2094 	case 'F':
2095 	    while (len-- > 0) {
2096 		NV anv;
2097 		SHIFT_VAR(utf8, s, strend, anv, datumtype);
2098 		DO_BO_UNPACK_N(anv, NV);
2099 		if (!checksum)
2100 		    mPUSHn(anv);
2101 		else
2102 		    cdouble += anv;
2103 	    }
2104 	    break;
2105 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
2106 	case 'D':
2107 	    while (len-- > 0) {
2108 		long double aldouble;
2109 		SHIFT_VAR(utf8, s, strend, aldouble, datumtype);
2110 		DO_BO_UNPACK_N(aldouble, long double);
2111 		if (!checksum)
2112 		    mPUSHn(aldouble);
2113 		else
2114 		    cdouble += aldouble;
2115 	    }
2116 	    break;
2117 #endif
2118 	case 'u':
2119 	    {
2120                 const STRLEN l = (STRLEN) (strend - s) * 3 / 4;
2121 		sv = sv_2mortal(newSV(l));
2122 		if (l) SvPOK_on(sv);
2123 	    }
2124 	    if (utf8) {
2125 		while (next_uni_uu(aTHX_ &s, strend, &len)) {
2126 		    I32 a, b, c, d;
2127 		    char hunk[3];
2128 
2129 		    while (len > 0) {
2130 			next_uni_uu(aTHX_ &s, strend, &a);
2131 			next_uni_uu(aTHX_ &s, strend, &b);
2132 			next_uni_uu(aTHX_ &s, strend, &c);
2133 			next_uni_uu(aTHX_ &s, strend, &d);
2134 			hunk[0] = (char)((a << 2) | (b >> 4));
2135 			hunk[1] = (char)((b << 4) | (c >> 2));
2136 			hunk[2] = (char)((c << 6) | d);
2137 			sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2138 			len -= 3;
2139 		    }
2140 		    if (s < strend) {
2141 			if (*s == '\n') {
2142                             s++;
2143                         }
2144 			else {
2145 			    /* possible checksum byte */
2146 			    const char *skip = s+UTF8SKIP(s);
2147 			    if (skip < strend && *skip == '\n')
2148                                 s = skip+1;
2149 			}
2150 		    }
2151 		}
2152 	    } else {
2153 		while (s < strend && *s > ' ' && ISUUCHAR(*s)) {
2154 		    I32 a, b, c, d;
2155 		    char hunk[3];
2156 
2157 		    len = PL_uudmap[*(U8*)s++] & 077;
2158 		    while (len > 0) {
2159 			if (s < strend && ISUUCHAR(*s))
2160 			    a = PL_uudmap[*(U8*)s++] & 077;
2161 			else
2162 			    a = 0;
2163 			if (s < strend && ISUUCHAR(*s))
2164 			    b = PL_uudmap[*(U8*)s++] & 077;
2165 			else
2166 			    b = 0;
2167 			if (s < strend && ISUUCHAR(*s))
2168 			    c = PL_uudmap[*(U8*)s++] & 077;
2169 			else
2170 			    c = 0;
2171 			if (s < strend && ISUUCHAR(*s))
2172 			    d = PL_uudmap[*(U8*)s++] & 077;
2173 			else
2174 			    d = 0;
2175 			hunk[0] = (char)((a << 2) | (b >> 4));
2176 			hunk[1] = (char)((b << 4) | (c >> 2));
2177 			hunk[2] = (char)((c << 6) | d);
2178 			sv_catpvn(sv, hunk, (len > 3) ? 3 : len);
2179 			len -= 3;
2180 		    }
2181 		    if (*s == '\n')
2182 			s++;
2183 		    else	/* possible checksum byte */
2184 			if (s + 1 < strend && s[1] == '\n')
2185 			    s += 2;
2186 		}
2187 	    }
2188 	    XPUSHs(sv);
2189 	    break;
2190 	}
2191 
2192 	if (checksum) {
2193 	    if (strchr("fFdD", TYPE_NO_MODIFIERS(datumtype)) ||
2194 	      (checksum > bits_in_uv &&
2195 	       strchr("cCsSiIlLnNUWvVqQjJ", TYPE_NO_MODIFIERS(datumtype))) ) {
2196 		NV trouble, anv;
2197 
2198                 anv = (NV) (1 << (checksum & 15));
2199 		while (checksum >= 16) {
2200 		    checksum -= 16;
2201 		    anv *= 65536.0;
2202 		}
2203 		while (cdouble < 0.0)
2204 		    cdouble += anv;
2205 		cdouble = Perl_modf(cdouble / anv, &trouble) * anv;
2206 		sv = newSVnv(cdouble);
2207 	    }
2208 	    else {
2209 		if (checksum < bits_in_uv) {
2210 		    UV mask = ((UV)1 << checksum) - 1;
2211 		    cuv &= mask;
2212 		}
2213 		sv = newSVuv(cuv);
2214 	    }
2215 	    mXPUSHs(sv);
2216 	    checksum = 0;
2217 	}
2218 
2219         if (symptr->flags & FLAG_SLASH){
2220             if (SP - PL_stack_base - start_sp_offset <= 0)
2221                 Perl_croak(aTHX_ "'/' must follow a numeric type in unpack");
2222             if( next_symbol(symptr) ){
2223               if( symptr->howlen == e_number )
2224 		Perl_croak(aTHX_ "Count after length/code in unpack" );
2225               if( beyond ){
2226          	/* ...end of char buffer then no decent length available */
2227 		Perl_croak(aTHX_ "length/code after end of string in unpack" );
2228               } else {
2229          	/* take top of stack (hope it's numeric) */
2230                 len = POPi;
2231                 if( len < 0 )
2232                     Perl_croak(aTHX_ "Negative '/' count in unpack" );
2233               }
2234             } else {
2235 		Perl_croak(aTHX_ "Code missing after '/' in unpack" );
2236             }
2237             datumtype = symptr->code;
2238             explicit_length = FALSE;
2239 	    goto redo_switch;
2240         }
2241     }
2242 
2243     if (new_s)
2244 	*new_s = s;
2245     PUTBACK;
2246     return SP - PL_stack_base - start_sp_offset;
2247 }
2248 
2249 PP(pp_unpack)
2250 {
2251     dVAR;
2252     dSP;
2253     dPOPPOPssrl;
2254     I32 gimme = GIMME_V;
2255     STRLEN llen;
2256     STRLEN rlen;
2257     const char *pat = SvPV_const(left,  llen);
2258     const char *s   = SvPV_const(right, rlen);
2259     const char *strend = s + rlen;
2260     const char *patend = pat + llen;
2261     I32 cnt;
2262 
2263     PUTBACK;
2264     cnt = unpackstring(pat, patend, s, strend,
2265 		     ((gimme == G_SCALAR) ? FLAG_UNPACK_ONLY_ONE : 0)
2266 		     | (DO_UTF8(right) ? FLAG_DO_UTF8 : 0));
2267 
2268     SPAGAIN;
2269     if ( !cnt && gimme == G_SCALAR )
2270        PUSHs(&PL_sv_undef);
2271     RETURN;
2272 }
2273 
2274 STATIC U8 *
2275 doencodes(U8 *h, const char *s, I32 len)
2276 {
2277     *h++ = PL_uuemap[len];
2278     while (len > 2) {
2279 	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
2280 	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((s[1] >> 4) & 017)))];
2281 	*h++ = PL_uuemap[(077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03)))];
2282 	*h++ = PL_uuemap[(077 & (s[2] & 077))];
2283 	s += 3;
2284 	len -= 3;
2285     }
2286     if (len > 0) {
2287         const char r = (len > 1 ? s[1] : '\0');
2288 	*h++ = PL_uuemap[(077 & (s[0] >> 2))];
2289 	*h++ = PL_uuemap[(077 & (((s[0] << 4) & 060) | ((r >> 4) & 017)))];
2290 	*h++ = PL_uuemap[(077 & ((r << 2) & 074))];
2291 	*h++ = PL_uuemap[0];
2292     }
2293     *h++ = '\n';
2294     return h;
2295 }
2296 
2297 STATIC SV *
2298 S_is_an_int(pTHX_ const char *s, STRLEN l)
2299 {
2300   SV *result = newSVpvn(s, l);
2301   char *const result_c = SvPV_nolen(result);	/* convenience */
2302   char *out = result_c;
2303   bool skip = 1;
2304   bool ignore = 0;
2305 
2306   PERL_ARGS_ASSERT_IS_AN_INT;
2307 
2308   while (*s) {
2309     switch (*s) {
2310     case ' ':
2311       break;
2312     case '+':
2313       if (!skip) {
2314 	SvREFCNT_dec(result);
2315 	return (NULL);
2316       }
2317       break;
2318     case '0':
2319     case '1':
2320     case '2':
2321     case '3':
2322     case '4':
2323     case '5':
2324     case '6':
2325     case '7':
2326     case '8':
2327     case '9':
2328       skip = 0;
2329       if (!ignore) {
2330 	*(out++) = *s;
2331       }
2332       break;
2333     case '.':
2334       ignore = 1;
2335       break;
2336     default:
2337       SvREFCNT_dec(result);
2338       return (NULL);
2339     }
2340     s++;
2341   }
2342   *(out++) = '\0';
2343   SvCUR_set(result, out - result_c);
2344   return (result);
2345 }
2346 
2347 /* pnum must be '\0' terminated */
2348 STATIC int
2349 S_div128(pTHX_ SV *pnum, bool *done)
2350 {
2351     STRLEN len;
2352     char * const s = SvPV(pnum, len);
2353     char *t = s;
2354     int m = 0;
2355 
2356     PERL_ARGS_ASSERT_DIV128;
2357 
2358     *done = 1;
2359     while (*t) {
2360 	const int i = m * 10 + (*t - '0');
2361 	const int r = (i >> 7); /* r < 10 */
2362 	m = i & 0x7F;
2363 	if (r) {
2364 	    *done = 0;
2365 	}
2366 	*(t++) = '0' + r;
2367     }
2368     *(t++) = '\0';
2369     SvCUR_set(pnum, (STRLEN) (t - s));
2370     return (m);
2371 }
2372 
2373 /*
2374 =for apidoc packlist
2375 
2376 The engine implementing pack() Perl function.
2377 
2378 =cut
2379 */
2380 
2381 void
2382 Perl_packlist(pTHX_ SV *cat, const char *pat, const char *patend, register SV **beglist, SV **endlist )
2383 {
2384     dVAR;
2385     tempsym_t sym;
2386 
2387     PERL_ARGS_ASSERT_PACKLIST;
2388 
2389     TEMPSYM_INIT(&sym, pat, patend, FLAG_PACK);
2390 
2391     /* We're going to do changes through SvPVX(cat). Make sure it's valid.
2392        Also make sure any UTF8 flag is loaded */
2393     SvPV_force_nolen(cat);
2394     if (DO_UTF8(cat))
2395 	sym.flags |= FLAG_PARSE_UTF8 | FLAG_DO_UTF8;
2396 
2397     (void)pack_rec( cat, &sym, beglist, endlist );
2398 }
2399 
2400 /* like sv_utf8_upgrade, but also repoint the group start markers */
2401 STATIC void
2402 marked_upgrade(pTHX_ SV *sv, tempsym_t *sym_ptr) {
2403     STRLEN len;
2404     tempsym_t *group;
2405     const char *from_ptr, *from_start, *from_end, **marks, **m;
2406     char *to_start, *to_ptr;
2407 
2408     if (SvUTF8(sv)) return;
2409 
2410     from_start = SvPVX_const(sv);
2411     from_end = from_start + SvCUR(sv);
2412     for (from_ptr = from_start; from_ptr < from_end; from_ptr++)
2413 	if (!NATIVE_IS_INVARIANT(*from_ptr)) break;
2414     if (from_ptr == from_end) {
2415 	/* Simple case: no character needs to be changed */
2416 	SvUTF8_on(sv);
2417 	return;
2418     }
2419 
2420     len = (from_end-from_ptr)*UTF8_EXPAND+(from_ptr-from_start)+1;
2421     Newx(to_start, len, char);
2422     Copy(from_start, to_start, from_ptr-from_start, char);
2423     to_ptr = to_start + (from_ptr-from_start);
2424 
2425     Newx(marks, sym_ptr->level+2, const char *);
2426     for (group=sym_ptr; group; group = group->previous)
2427 	marks[group->level] = from_start + group->strbeg;
2428     marks[sym_ptr->level+1] = from_end+1;
2429     for (m = marks; *m < from_ptr; m++)
2430 	*m = to_start + (*m-from_start);
2431 
2432     for (;from_ptr < from_end; from_ptr++) {
2433 	while (*m == from_ptr) *m++ = to_ptr;
2434 	to_ptr = (char *) uvchr_to_utf8((U8 *) to_ptr, *(U8 *) from_ptr);
2435     }
2436     *to_ptr = 0;
2437 
2438     while (*m == from_ptr) *m++ = to_ptr;
2439     if (m != marks + sym_ptr->level+1) {
2440 	Safefree(marks);
2441 	Safefree(to_start);
2442 	Perl_croak(aTHX_ "Assertion: marks beyond string end");
2443     }
2444     for (group=sym_ptr; group; group = group->previous)
2445 	group->strbeg = marks[group->level] - to_start;
2446     Safefree(marks);
2447 
2448     if (SvOOK(sv)) {
2449 	if (SvIVX(sv)) {
2450 	    SvLEN_set(sv, SvLEN(sv) + SvIVX(sv));
2451 	    from_start -= SvIVX(sv);
2452 	    SvIV_set(sv, 0);
2453 	}
2454 	SvFLAGS(sv) &= ~SVf_OOK;
2455     }
2456     if (SvLEN(sv) != 0)
2457 	Safefree(from_start);
2458     SvPV_set(sv, to_start);
2459     SvCUR_set(sv, to_ptr - to_start);
2460     SvLEN_set(sv, len);
2461     SvUTF8_on(sv);
2462 }
2463 
2464 /* Exponential string grower. Makes string extension effectively O(n)
2465    needed says how many extra bytes we need (not counting the final '\0')
2466    Only grows the string if there is an actual lack of space
2467 */
2468 STATIC char *
2469 S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) {
2470     const STRLEN cur = SvCUR(sv);
2471     const STRLEN len = SvLEN(sv);
2472     STRLEN extend;
2473 
2474     PERL_ARGS_ASSERT_SV_EXP_GROW;
2475 
2476     if (len - cur > needed) return SvPVX(sv);
2477     extend = needed > len ? needed : len;
2478     return SvGROW(sv, len+extend+1);
2479 }
2480 
2481 STATIC
2482 SV **
2483 S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
2484 {
2485     dVAR;
2486     tempsym_t lookahead;
2487     I32 items  = endlist - beglist;
2488     bool found = next_symbol(symptr);
2489     bool utf8 = (symptr->flags & FLAG_PARSE_UTF8) ? 1 : 0;
2490     bool warn_utf8 = ckWARN(WARN_UTF8);
2491 
2492     PERL_ARGS_ASSERT_PACK_REC;
2493 
2494     if (symptr->level == 0 && found && symptr->code == 'U') {
2495 	marked_upgrade(aTHX_ cat, symptr);
2496 	symptr->flags |= FLAG_DO_UTF8;
2497 	utf8 = 0;
2498     }
2499     symptr->strbeg = SvCUR(cat);
2500 
2501     while (found) {
2502 	SV *fromstr;
2503 	STRLEN fromlen;
2504 	I32 len;
2505 	SV *lengthcode = NULL;
2506         I32 datumtype = symptr->code;
2507         howlen_t howlen = symptr->howlen;
2508 	char *start = SvPVX(cat);
2509 	char *cur   = start + SvCUR(cat);
2510 
2511 #define NEXTFROM (lengthcode ? lengthcode : items-- > 0 ? *beglist++ : &PL_sv_no)
2512 
2513         switch (howlen) {
2514 	  case e_star:
2515 	    len = strchr("@Xxu", TYPE_NO_MODIFIERS(datumtype)) ?
2516 		0 : items;
2517 	    break;
2518 	  default:
2519 	    /* e_no_len and e_number */
2520 	    len = symptr->length;
2521 	    break;
2522         }
2523 
2524 	if (len) {
2525 	    packprops_t props = packprops[TYPE_NO_ENDIANNESS(datumtype)];
2526 
2527 	    if (props && !(props & PACK_SIZE_UNPREDICTABLE)) {
2528 		/* We can process this letter. */
2529 		STRLEN size = props & PACK_SIZE_MASK;
2530 		GROWING(utf8, cat, start, cur, (STRLEN) len * size);
2531 	    }
2532         }
2533 
2534         /* Look ahead for next symbol. Do we have code/code? */
2535         lookahead = *symptr;
2536         found = next_symbol(&lookahead);
2537 	if (symptr->flags & FLAG_SLASH) {
2538 	    IV count;
2539 	    if (!found) Perl_croak(aTHX_ "Code missing after '/' in pack");
2540 	    if (strchr("aAZ", lookahead.code)) {
2541 		if (lookahead.howlen == e_number) count = lookahead.length;
2542 		else {
2543 		    if (items > 0) {
2544 			if (SvGAMAGIC(*beglist)) {
2545 			    /* Avoid reading the active data more than once
2546 			       by copying it to a temporary.  */
2547 			    STRLEN len;
2548 			    const char *const pv = SvPV_const(*beglist, len);
2549 			    SV *const temp
2550 				= newSVpvn_flags(pv, len,
2551 						 SVs_TEMP | SvUTF8(*beglist));
2552 			    *beglist = temp;
2553 			}
2554 			count = DO_UTF8(*beglist) ?
2555 			    sv_len_utf8(*beglist) : sv_len(*beglist);
2556 		    }
2557 		    else count = 0;
2558 		    if (lookahead.code == 'Z') count++;
2559 		}
2560 	    } else {
2561 		if (lookahead.howlen == e_number && lookahead.length < items)
2562 		    count = lookahead.length;
2563 		else count = items;
2564 	    }
2565 	    lookahead.howlen = e_number;
2566 	    lookahead.length = count;
2567 	    lengthcode = sv_2mortal(newSViv(count));
2568 	}
2569 
2570 	/* Code inside the switch must take care to properly update
2571 	   cat (CUR length and '\0' termination) if it updated *cur and
2572 	   doesn't simply leave using break */
2573 	switch(TYPE_NO_ENDIANNESS(datumtype)) {
2574 	default:
2575 	    Perl_croak(aTHX_ "Invalid type '%c' in pack",
2576 		       (int) TYPE_NO_MODIFIERS(datumtype));
2577 	case '%':
2578 	    Perl_croak(aTHX_ "'%%' may not be used in pack");
2579 	{
2580 	    char *from;
2581 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2582 	case '.' | TYPE_IS_SHRIEKING:
2583 #endif
2584 	case '.':
2585 	    if (howlen == e_star) from = start;
2586 	    else if (len == 0) from = cur;
2587 	    else {
2588 		tempsym_t *group = symptr;
2589 
2590 		while (--len && group) group = group->previous;
2591 		from = group ? start + group->strbeg : start;
2592 	    }
2593 	    fromstr = NEXTFROM;
2594 	    len = SvIV(fromstr);
2595 	    goto resize;
2596 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2597 	case '@' | TYPE_IS_SHRIEKING:
2598 #endif
2599 	case '@':
2600 	    from = start + symptr->strbeg;
2601 	  resize:
2602 #ifdef PERL_PACK_CAN_SHRIEKSIGN
2603 	    if (utf8  && !(datumtype & TYPE_IS_SHRIEKING))
2604 #else /* PERL_PACK_CAN_SHRIEKSIGN */
2605 	    if (utf8)
2606 #endif
2607 		if (len >= 0) {
2608 		    while (len && from < cur) {
2609 			from += UTF8SKIP(from);
2610 			len--;
2611 		    }
2612 		    if (from > cur)
2613 			Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2614 		    if (len) {
2615 			/* Here we know from == cur */
2616 		      grow:
2617 			GROWING(0, cat, start, cur, len);
2618 			Zero(cur, len, char);
2619 			cur += len;
2620 		    } else if (from < cur) {
2621 			len = cur - from;
2622 			goto shrink;
2623 		    } else goto no_change;
2624 		} else {
2625 		    cur = from;
2626 		    len = -len;
2627 		    goto utf8_shrink;
2628 		}
2629 	    else {
2630 		len -= cur - from;
2631 		if (len > 0) goto grow;
2632 		if (len == 0) goto no_change;
2633 		len = -len;
2634 		goto shrink;
2635 	    }
2636 	    break;
2637 	}
2638 	case '(': {
2639             tempsym_t savsym = *symptr;
2640 	    U32 group_modifiers = TYPE_MODIFIERS(datumtype & ~symptr->flags);
2641 	    symptr->flags |= group_modifiers;
2642             symptr->patend = savsym.grpend;
2643             symptr->level++;
2644 	    symptr->previous = &lookahead;
2645 	    while (len--) {
2646 		U32 was_utf8;
2647 		if (utf8) symptr->flags |=  FLAG_PARSE_UTF8;
2648 		else      symptr->flags &= ~FLAG_PARSE_UTF8;
2649 		was_utf8 = SvUTF8(cat);
2650   	        symptr->patptr = savsym.grpbeg;
2651 		beglist = pack_rec(cat, symptr, beglist, endlist);
2652 		if (SvUTF8(cat) != was_utf8)
2653 		    /* This had better be an upgrade while in utf8==0 mode */
2654 		    utf8 = 1;
2655 
2656 		if (savsym.howlen == e_star && beglist == endlist)
2657 		    break;		/* No way to continue */
2658 	    }
2659 	    items = endlist - beglist;
2660 	    lookahead.flags  = symptr->flags & ~group_modifiers;
2661 	    goto no_change;
2662 	}
2663 	case 'X' | TYPE_IS_SHRIEKING:
2664 	    if (!len)			/* Avoid division by 0 */
2665 		len = 1;
2666 	    if (utf8) {
2667 		char *hop, *last;
2668 		I32 l = len;
2669 		hop = last = start;
2670 		while (hop < cur) {
2671 		    hop += UTF8SKIP(hop);
2672 		    if (--l == 0) {
2673 			last = hop;
2674 			l = len;
2675 		    }
2676 		}
2677 		if (last > cur)
2678 		    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2679 		cur = last;
2680 		break;
2681 	    }
2682 	    len = (cur-start) % len;
2683 	    /* FALL THROUGH */
2684 	case 'X':
2685 	    if (utf8) {
2686 		if (len < 1) goto no_change;
2687 	      utf8_shrink:
2688 		while (len > 0) {
2689 		    if (cur <= start)
2690 			Perl_croak(aTHX_ "'%c' outside of string in pack",
2691 				   (int) TYPE_NO_MODIFIERS(datumtype));
2692 		    while (--cur, UTF8_IS_CONTINUATION(*cur)) {
2693 			if (cur <= start)
2694 			    Perl_croak(aTHX_ "'%c' outside of string in pack",
2695 				       (int) TYPE_NO_MODIFIERS(datumtype));
2696 		    }
2697 		    len--;
2698 		}
2699 	    } else {
2700 	      shrink:
2701 		if (cur - start < len)
2702 		    Perl_croak(aTHX_ "'%c' outside of string in pack",
2703 			       (int) TYPE_NO_MODIFIERS(datumtype));
2704 		cur -= len;
2705 	    }
2706 	    if (cur < start+symptr->strbeg) {
2707 		/* Make sure group starts don't point into the void */
2708 		tempsym_t *group;
2709 		const STRLEN length = cur-start;
2710 		for (group = symptr;
2711 		     group && length < group->strbeg;
2712 		     group = group->previous) group->strbeg = length;
2713 		lookahead.strbeg = length;
2714 	    }
2715 	    break;
2716 	case 'x' | TYPE_IS_SHRIEKING: {
2717 	    I32 ai32;
2718 	    if (!len)			/* Avoid division by 0 */
2719 		len = 1;
2720 	    if (utf8) ai32 = utf8_length((U8 *) start, (U8 *) cur) % len;
2721 	    else      ai32 = (cur - start) % len;
2722 	    if (ai32 == 0) goto no_change;
2723 	    len -= ai32;
2724 	}
2725 	/* FALL THROUGH */
2726 	case 'x':
2727 	    goto grow;
2728 	case 'A':
2729 	case 'Z':
2730 	case 'a': {
2731 	    const char *aptr;
2732 
2733 	    fromstr = NEXTFROM;
2734 	    aptr = SvPV_const(fromstr, fromlen);
2735 	    if (DO_UTF8(fromstr)) {
2736                 const char *end, *s;
2737 
2738 		if (!utf8 && !SvUTF8(cat)) {
2739 		    marked_upgrade(aTHX_ cat, symptr);
2740 		    lookahead.flags |= FLAG_DO_UTF8;
2741 		    lookahead.strbeg = symptr->strbeg;
2742 		    utf8 = 1;
2743 		    start = SvPVX(cat);
2744 		    cur = start + SvCUR(cat);
2745 		}
2746 		if (howlen == e_star) {
2747 		    if (utf8) goto string_copy;
2748 		    len = fromlen+1;
2749 		}
2750 		s = aptr;
2751 		end = aptr + fromlen;
2752 		fromlen = datumtype == 'Z' ? len-1 : len;
2753 		while ((I32) fromlen > 0 && s < end) {
2754 		    s += UTF8SKIP(s);
2755 		    fromlen--;
2756 		}
2757 		if (s > end)
2758 		    Perl_croak(aTHX_ "Malformed UTF-8 string in pack");
2759 		if (utf8) {
2760 		    len = fromlen;
2761 		    if (datumtype == 'Z') len++;
2762 		    fromlen = s-aptr;
2763 		    len += fromlen;
2764 
2765 		    goto string_copy;
2766 		}
2767 		fromlen = len - fromlen;
2768 		if (datumtype == 'Z') fromlen--;
2769 		if (howlen == e_star) {
2770 		    len = fromlen;
2771 		    if (datumtype == 'Z') len++;
2772 		}
2773 		GROWING(0, cat, start, cur, len);
2774 		if (!uni_to_bytes(aTHX_ &aptr, end, cur, fromlen,
2775 				  datumtype | TYPE_IS_PACK))
2776 		    Perl_croak(aTHX_ "Perl bug: predicted utf8 length not available");
2777 		cur += fromlen;
2778 		len -= fromlen;
2779 	    } else if (utf8) {
2780 		if (howlen == e_star) {
2781 		    len = fromlen;
2782 		    if (datumtype == 'Z') len++;
2783 		}
2784 		if (len <= (I32) fromlen) {
2785 		    fromlen = len;
2786 		    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2787 		}
2788 		/* assumes a byte expands to at most UTF8_EXPAND bytes on
2789 		   upgrade, so:
2790 		   expected_length <= from_len*UTF8_EXPAND + (len-from_len) */
2791 		GROWING(0, cat, start, cur, fromlen*(UTF8_EXPAND-1)+len);
2792 		len -= fromlen;
2793 		while (fromlen > 0) {
2794 		    cur = (char *) uvchr_to_utf8((U8 *) cur, * (U8 *) aptr);
2795 		    aptr++;
2796 		    fromlen--;
2797 		}
2798 	    } else {
2799 	      string_copy:
2800 		if (howlen == e_star) {
2801 		    len = fromlen;
2802 		    if (datumtype == 'Z') len++;
2803 		}
2804 		if (len <= (I32) fromlen) {
2805 		    fromlen = len;
2806 		    if (datumtype == 'Z' && fromlen > 0) fromlen--;
2807 		}
2808 		GROWING(0, cat, start, cur, len);
2809 		Copy(aptr, cur, fromlen, char);
2810 		cur += fromlen;
2811 		len -= fromlen;
2812 	    }
2813 	    memset(cur, datumtype == 'A' ? ' ' : '\0', len);
2814 	    cur += len;
2815 	    SvTAINT(cat);
2816 	    break;
2817 	}
2818 	case 'B':
2819 	case 'b': {
2820 	    const char *str, *end;
2821 	    I32 l, field_len;
2822 	    U8 bits;
2823 	    bool utf8_source;
2824 	    U32 utf8_flags;
2825 
2826 	    fromstr = NEXTFROM;
2827 	    str = SvPV_const(fromstr, fromlen);
2828 	    end = str + fromlen;
2829 	    if (DO_UTF8(fromstr)) {
2830 		utf8_source = TRUE;
2831 		utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2832 	    } else {
2833 		utf8_source = FALSE;
2834 		utf8_flags  = 0; /* Unused, but keep compilers happy */
2835 	    }
2836 	    if (howlen == e_star) len = fromlen;
2837 	    field_len = (len+7)/8;
2838 	    GROWING(utf8, cat, start, cur, field_len);
2839 	    if (len > (I32)fromlen) len = fromlen;
2840 	    bits = 0;
2841 	    l = 0;
2842 	    if (datumtype == 'B')
2843 		while (l++ < len) {
2844 		    if (utf8_source) {
2845 			UV val = 0;
2846 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2847 			bits |= val & 1;
2848 		    } else bits |= *str++ & 1;
2849 		    if (l & 7) bits <<= 1;
2850 		    else {
2851 			PUSH_BYTE(utf8, cur, bits);
2852 			bits = 0;
2853 		    }
2854 		}
2855 	    else
2856 		/* datumtype == 'b' */
2857 		while (l++ < len) {
2858 		    if (utf8_source) {
2859 			UV val = 0;
2860 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2861 			if (val & 1) bits |= 0x80;
2862 		    } else if (*str++ & 1)
2863 			bits |= 0x80;
2864 		    if (l & 7) bits >>= 1;
2865 		    else {
2866 			PUSH_BYTE(utf8, cur, bits);
2867 			bits = 0;
2868 		    }
2869 		}
2870 	    l--;
2871 	    if (l & 7) {
2872 		if (datumtype == 'B')
2873 		    bits <<= 7 - (l & 7);
2874 		else
2875 		    bits >>= 7 - (l & 7);
2876 		PUSH_BYTE(utf8, cur, bits);
2877 		l += 7;
2878 	    }
2879 	    /* Determine how many chars are left in the requested field */
2880 	    l /= 8;
2881 	    if (howlen == e_star) field_len = 0;
2882 	    else field_len -= l;
2883 	    Zero(cur, field_len, char);
2884 	    cur += field_len;
2885 	    break;
2886 	}
2887 	case 'H':
2888 	case 'h': {
2889 	    const char *str, *end;
2890 	    I32 l, field_len;
2891 	    U8 bits;
2892 	    bool utf8_source;
2893 	    U32 utf8_flags;
2894 
2895 	    fromstr = NEXTFROM;
2896 	    str = SvPV_const(fromstr, fromlen);
2897 	    end = str + fromlen;
2898 	    if (DO_UTF8(fromstr)) {
2899 		utf8_source = TRUE;
2900 		utf8_flags  = warn_utf8 ? 0 : UTF8_ALLOW_ANY;
2901 	    } else {
2902 		utf8_source = FALSE;
2903 		utf8_flags  = 0; /* Unused, but keep compilers happy */
2904 	    }
2905 	    if (howlen == e_star) len = fromlen;
2906 	    field_len = (len+1)/2;
2907 	    GROWING(utf8, cat, start, cur, field_len);
2908 	    if (!utf8 && len > (I32)fromlen) len = fromlen;
2909 	    bits = 0;
2910 	    l = 0;
2911 	    if (datumtype == 'H')
2912 		while (l++ < len) {
2913 		    if (utf8_source) {
2914 			UV val = 0;
2915 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2916 			if (val < 256 && isALPHA(val))
2917 			    bits |= (val + 9) & 0xf;
2918 			else
2919 			    bits |= val & 0xf;
2920 		    } else if (isALPHA(*str))
2921 			bits |= (*str++ + 9) & 0xf;
2922 		    else
2923 			bits |= *str++ & 0xf;
2924 		    if (l & 1) bits <<= 4;
2925 		    else {
2926 			PUSH_BYTE(utf8, cur, bits);
2927 			bits = 0;
2928 		    }
2929 		}
2930 	    else
2931 		while (l++ < len) {
2932 		    if (utf8_source) {
2933 			UV val = 0;
2934 			NEXT_UNI_VAL(val, cur, str, end, utf8_flags);
2935 			if (val < 256 && isALPHA(val))
2936 			    bits |= ((val + 9) & 0xf) << 4;
2937 			else
2938 			    bits |= (val & 0xf) << 4;
2939 		    } else if (isALPHA(*str))
2940 			bits |= ((*str++ + 9) & 0xf) << 4;
2941 		    else
2942 			bits |= (*str++ & 0xf) << 4;
2943 		    if (l & 1) bits >>= 4;
2944 		    else {
2945 			PUSH_BYTE(utf8, cur, bits);
2946 			bits = 0;
2947 		    }
2948 		}
2949 	    l--;
2950 	    if (l & 1) {
2951 		PUSH_BYTE(utf8, cur, bits);
2952 		l++;
2953 	    }
2954 	    /* Determine how many chars are left in the requested field */
2955 	    l /= 2;
2956 	    if (howlen == e_star) field_len = 0;
2957 	    else field_len -= l;
2958 	    Zero(cur, field_len, char);
2959 	    cur += field_len;
2960 	    break;
2961 	}
2962 	case 'c':
2963 	    while (len-- > 0) {
2964 		IV aiv;
2965 		fromstr = NEXTFROM;
2966 		aiv = SvIV(fromstr);
2967 		if ((-128 > aiv || aiv > 127) &&
2968 		    ckWARN(WARN_PACK))
2969 		    Perl_warner(aTHX_ packWARN(WARN_PACK),
2970 				"Character in 'c' format wrapped in pack");
2971 		PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2972 	    }
2973 	    break;
2974 	case 'C':
2975 	    if (len == 0) {
2976 		utf8 = (symptr->flags & FLAG_DO_UTF8) ? 1 : 0;
2977 		break;
2978 	    }
2979 	    while (len-- > 0) {
2980 		IV aiv;
2981 		fromstr = NEXTFROM;
2982 		aiv = SvIV(fromstr);
2983 		if ((0 > aiv || aiv > 0xff) &&
2984 		    ckWARN(WARN_PACK))
2985 		    Perl_warner(aTHX_ packWARN(WARN_PACK),
2986 				"Character in 'C' format wrapped in pack");
2987 		PUSH_BYTE(utf8, cur, (U8)(aiv & 0xff));
2988 	    }
2989 	    break;
2990 	case 'W': {
2991 	    char *end;
2992 	    U8 in_bytes = (U8)IN_BYTES;
2993 
2994 	    end = start+SvLEN(cat)-1;
2995 	    if (utf8) end -= UTF8_MAXLEN-1;
2996 	    while (len-- > 0) {
2997 		UV auv;
2998 		fromstr = NEXTFROM;
2999 		auv = SvUV(fromstr);
3000 		if (in_bytes) auv = auv % 0x100;
3001 		if (utf8) {
3002 		  W_utf8:
3003 		    if (cur > end) {
3004 			*cur = '\0';
3005 			SvCUR_set(cat, cur - start);
3006 
3007 			GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3008 			end = start+SvLEN(cat)-UTF8_MAXLEN;
3009 		    }
3010 		    cur = (char *) uvuni_to_utf8_flags((U8 *) cur,
3011 						       NATIVE_TO_UNI(auv),
3012 						       warn_utf8 ?
3013 						       0 : UNICODE_ALLOW_ANY);
3014 		} else {
3015 		    if (auv >= 0x100) {
3016 			if (!SvUTF8(cat)) {
3017 			    *cur = '\0';
3018 			    SvCUR_set(cat, cur - start);
3019 			    marked_upgrade(aTHX_ cat, symptr);
3020 			    lookahead.flags |= FLAG_DO_UTF8;
3021 			    lookahead.strbeg = symptr->strbeg;
3022 			    utf8 = 1;
3023 			    start = SvPVX(cat);
3024 			    cur = start + SvCUR(cat);
3025 			    end = start+SvLEN(cat)-UTF8_MAXLEN;
3026 			    goto W_utf8;
3027 			}
3028 			if (ckWARN(WARN_PACK))
3029 			    Perl_warner(aTHX_ packWARN(WARN_PACK),
3030 					"Character in 'W' format wrapped in pack");
3031 			auv &= 0xff;
3032 		    }
3033 		    if (cur >= end) {
3034 			*cur = '\0';
3035 			SvCUR_set(cat, cur - start);
3036 			GROWING(0, cat, start, cur, len+1);
3037 			end = start+SvLEN(cat)-1;
3038 		    }
3039 		    *(U8 *) cur++ = (U8)auv;
3040 		}
3041 	    }
3042 	    break;
3043 	}
3044 	case 'U': {
3045 	    char *end;
3046 
3047 	    if (len == 0) {
3048 		if (!(symptr->flags & FLAG_DO_UTF8)) {
3049 		    marked_upgrade(aTHX_ cat, symptr);
3050 		    lookahead.flags |= FLAG_DO_UTF8;
3051 		    lookahead.strbeg = symptr->strbeg;
3052 		}
3053 		utf8 = 0;
3054 		goto no_change;
3055 	    }
3056 
3057 	    end = start+SvLEN(cat);
3058 	    if (!utf8) end -= UTF8_MAXLEN;
3059 	    while (len-- > 0) {
3060 		UV auv;
3061 		fromstr = NEXTFROM;
3062 		auv = SvUV(fromstr);
3063 		if (utf8) {
3064 		    U8 buffer[UTF8_MAXLEN], *endb;
3065 		    endb = uvuni_to_utf8_flags(buffer, auv,
3066 					       warn_utf8 ?
3067 					       0 : UNICODE_ALLOW_ANY);
3068 		    if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
3069 			*cur = '\0';
3070 			SvCUR_set(cat, cur - start);
3071 			GROWING(0, cat, start, cur,
3072 				len+(endb-buffer)*UTF8_EXPAND);
3073 			end = start+SvLEN(cat);
3074 		    }
3075 		    cur = bytes_to_uni(buffer, endb-buffer, cur);
3076 		} else {
3077 		    if (cur >= end) {
3078 			*cur = '\0';
3079 			SvCUR_set(cat, cur - start);
3080 			GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
3081 			end = start+SvLEN(cat)-UTF8_MAXLEN;
3082 		    }
3083 		    cur = (char *) uvuni_to_utf8_flags((U8 *) cur, auv,
3084 						       warn_utf8 ?
3085 						       0 : UNICODE_ALLOW_ANY);
3086 		}
3087 	    }
3088 	    break;
3089 	}
3090 	/* Float and double added by gnb@melba.bby.oz.au  22/11/89 */
3091 	case 'f':
3092 	    while (len-- > 0) {
3093 		float afloat;
3094 		NV anv;
3095 		fromstr = NEXTFROM;
3096 		anv = SvNV(fromstr);
3097 #ifdef __VOS__
3098 		/* VOS does not automatically map a floating-point overflow
3099 		   during conversion from double to float into infinity, so we
3100 		   do it by hand.  This code should either be generalized for
3101 		   any OS that needs it, or removed if and when VOS implements
3102 		   posix-976 (suggestion to support mapping to infinity).
3103 		   Paul.Green@stratus.com 02-04-02.  */
3104 {
3105 extern const float _float_constants[];
3106 		if (anv > FLT_MAX)
3107 		    afloat = _float_constants[0];   /* single prec. inf. */
3108 		else if (anv < -FLT_MAX)
3109 		    afloat = _float_constants[0];   /* single prec. inf. */
3110 		else afloat = (float) anv;
3111 }
3112 #else /* __VOS__ */
3113 # if defined(VMS) && !defined(__IEEE_FP)
3114 		/* IEEE fp overflow shenanigans are unavailable on VAX and optional
3115 		 * on Alpha; fake it if we don't have them.
3116 		 */
3117 		if (anv > FLT_MAX)
3118 		    afloat = FLT_MAX;
3119 		else if (anv < -FLT_MAX)
3120 		    afloat = -FLT_MAX;
3121 		else afloat = (float)anv;
3122 # else
3123 		afloat = (float)anv;
3124 # endif
3125 #endif /* __VOS__ */
3126 		DO_BO_PACK_N(afloat, float);
3127 		PUSH_VAR(utf8, cur, afloat);
3128 	    }
3129 	    break;
3130 	case 'd':
3131 	    while (len-- > 0) {
3132 		double adouble;
3133 		NV anv;
3134 		fromstr = NEXTFROM;
3135 		anv = SvNV(fromstr);
3136 #ifdef __VOS__
3137 		/* VOS does not automatically map a floating-point overflow
3138 		   during conversion from long double to double into infinity,
3139 		   so we do it by hand.  This code should either be generalized
3140 		   for any OS that needs it, or removed if and when VOS
3141 		   implements posix-976 (suggestion to support mapping to
3142 		   infinity).  Paul.Green@stratus.com 02-04-02.  */
3143 {
3144 extern const double _double_constants[];
3145 		if (anv > DBL_MAX)
3146 		    adouble = _double_constants[0];   /* double prec. inf. */
3147 		else if (anv < -DBL_MAX)
3148 		    adouble = _double_constants[0];   /* double prec. inf. */
3149 		else adouble = (double) anv;
3150 }
3151 #else /* __VOS__ */
3152 # if defined(VMS) && !defined(__IEEE_FP)
3153 		/* IEEE fp overflow shenanigans are unavailable on VAX and optional
3154 		 * on Alpha; fake it if we don't have them.
3155 		 */
3156 		if (anv > DBL_MAX)
3157 		    adouble = DBL_MAX;
3158 		else if (anv < -DBL_MAX)
3159 		    adouble = -DBL_MAX;
3160 		else adouble = (double)anv;
3161 # else
3162 		adouble = (double)anv;
3163 # endif
3164 #endif /* __VOS__ */
3165 		DO_BO_PACK_N(adouble, double);
3166 		PUSH_VAR(utf8, cur, adouble);
3167 	    }
3168 	    break;
3169 	case 'F': {
3170 	    NV anv;
3171 	    Zero(&anv, 1, NV); /* can be long double with unused bits */
3172 	    while (len-- > 0) {
3173 		fromstr = NEXTFROM;
3174 		anv = SvNV(fromstr);
3175 		DO_BO_PACK_N(anv, NV);
3176 		PUSH_VAR(utf8, cur, anv);
3177 	    }
3178 	    break;
3179 	}
3180 #if defined(HAS_LONG_DOUBLE) && defined(USE_LONG_DOUBLE)
3181 	case 'D': {
3182 	    long double aldouble;
3183 	    /* long doubles can have unused bits, which may be nonzero */
3184 	    Zero(&aldouble, 1, long double);
3185 	    while (len-- > 0) {
3186 		fromstr = NEXTFROM;
3187 		aldouble = (long double)SvNV(fromstr);
3188 		DO_BO_PACK_N(aldouble, long double);
3189 		PUSH_VAR(utf8, cur, aldouble);
3190 	    }
3191 	    break;
3192 	}
3193 #endif
3194 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3195 	case 'n' | TYPE_IS_SHRIEKING:
3196 #endif
3197 	case 'n':
3198 	    while (len-- > 0) {
3199 		I16 ai16;
3200 		fromstr = NEXTFROM;
3201 		ai16 = (I16)SvIV(fromstr);
3202 #ifdef HAS_HTONS
3203 		ai16 = PerlSock_htons(ai16);
3204 #endif
3205 		PUSH16(utf8, cur, &ai16);
3206 	    }
3207 	    break;
3208 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3209 	case 'v' | TYPE_IS_SHRIEKING:
3210 #endif
3211 	case 'v':
3212 	    while (len-- > 0) {
3213 		I16 ai16;
3214 		fromstr = NEXTFROM;
3215 		ai16 = (I16)SvIV(fromstr);
3216 #ifdef HAS_HTOVS
3217 		ai16 = htovs(ai16);
3218 #endif
3219 		PUSH16(utf8, cur, &ai16);
3220 	    }
3221 	    break;
3222         case 'S' | TYPE_IS_SHRIEKING:
3223 #if SHORTSIZE != SIZE16
3224 	    while (len-- > 0) {
3225 		unsigned short aushort;
3226 		fromstr = NEXTFROM;
3227 		aushort = SvUV(fromstr);
3228 		DO_BO_PACK(aushort, s);
3229 		PUSH_VAR(utf8, cur, aushort);
3230 	    }
3231             break;
3232 #else
3233             /* Fall through! */
3234 #endif
3235 	case 'S':
3236 	    while (len-- > 0) {
3237 		U16 au16;
3238 		fromstr = NEXTFROM;
3239 		au16 = (U16)SvUV(fromstr);
3240 		DO_BO_PACK(au16, 16);
3241 		PUSH16(utf8, cur, &au16);
3242 	    }
3243 	    break;
3244 	case 's' | TYPE_IS_SHRIEKING:
3245 #if SHORTSIZE != SIZE16
3246 	    while (len-- > 0) {
3247 		short ashort;
3248 		fromstr = NEXTFROM;
3249 		ashort = SvIV(fromstr);
3250 		DO_BO_PACK(ashort, s);
3251 		PUSH_VAR(utf8, cur, ashort);
3252 	    }
3253             break;
3254 #else
3255             /* Fall through! */
3256 #endif
3257 	case 's':
3258 	    while (len-- > 0) {
3259 		I16 ai16;
3260 		fromstr = NEXTFROM;
3261 		ai16 = (I16)SvIV(fromstr);
3262 		DO_BO_PACK(ai16, 16);
3263 		PUSH16(utf8, cur, &ai16);
3264 	    }
3265 	    break;
3266 	case 'I':
3267 	case 'I' | TYPE_IS_SHRIEKING:
3268 	    while (len-- > 0) {
3269 		unsigned int auint;
3270 		fromstr = NEXTFROM;
3271 		auint = SvUV(fromstr);
3272 		DO_BO_PACK(auint, i);
3273 		PUSH_VAR(utf8, cur, auint);
3274 	    }
3275 	    break;
3276 	case 'j':
3277 	    while (len-- > 0) {
3278 		IV aiv;
3279 		fromstr = NEXTFROM;
3280 		aiv = SvIV(fromstr);
3281 #if IVSIZE == INTSIZE
3282 		DO_BO_PACK(aiv, i);
3283 #elif IVSIZE == LONGSIZE
3284 		DO_BO_PACK(aiv, l);
3285 #elif defined(HAS_QUAD) && IVSIZE == U64SIZE
3286 		DO_BO_PACK(aiv, 64);
3287 #else
3288 		Perl_croak(aTHX_ "'j' not supported on this platform");
3289 #endif
3290 		PUSH_VAR(utf8, cur, aiv);
3291 	    }
3292 	    break;
3293 	case 'J':
3294 	    while (len-- > 0) {
3295 		UV auv;
3296 		fromstr = NEXTFROM;
3297 		auv = SvUV(fromstr);
3298 #if UVSIZE == INTSIZE
3299 		DO_BO_PACK(auv, i);
3300 #elif UVSIZE == LONGSIZE
3301 		DO_BO_PACK(auv, l);
3302 #elif defined(HAS_QUAD) && UVSIZE == U64SIZE
3303 		DO_BO_PACK(auv, 64);
3304 #else
3305 		Perl_croak(aTHX_ "'J' not supported on this platform");
3306 #endif
3307 		PUSH_VAR(utf8, cur, auv);
3308 	    }
3309 	    break;
3310 	case 'w':
3311             while (len-- > 0) {
3312 		NV anv;
3313 		fromstr = NEXTFROM;
3314 		anv = SvNV(fromstr);
3315 
3316 		if (anv < 0) {
3317 		    *cur = '\0';
3318 		    SvCUR_set(cat, cur - start);
3319 		    Perl_croak(aTHX_ "Cannot compress negative numbers in pack");
3320 		}
3321 
3322                 /* 0xFFFFFFFFFFFFFFFF may cast to 18446744073709551616.0,
3323                    which is == UV_MAX_P1. IOK is fine (instead of UV_only), as
3324                    any negative IVs will have already been got by the croak()
3325                    above. IOK is untrue for fractions, so we test them
3326                    against UV_MAX_P1.  */
3327 		if (SvIOK(fromstr) || anv < UV_MAX_P1) {
3328 		    char   buf[(sizeof(UV)*CHAR_BIT)/7+1];
3329 		    char  *in = buf + sizeof(buf);
3330 		    UV     auv = SvUV(fromstr);
3331 
3332 		    do {
3333 			*--in = (char)((auv & 0x7f) | 0x80);
3334 			auv >>= 7;
3335 		    } while (auv);
3336 		    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3337 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
3338 				       in, (buf + sizeof(buf)) - in);
3339 		} else if (SvPOKp(fromstr))
3340 		    goto w_string;
3341 		else if (SvNOKp(fromstr)) {
3342 		    /* 10**NV_MAX_10_EXP is the largest power of 10
3343 		       so 10**(NV_MAX_10_EXP+1) is definately unrepresentable
3344 		       given 10**(NV_MAX_10_EXP+1) == 128 ** x solve for x:
3345 		       x = (NV_MAX_10_EXP+1) * log (10) / log (128)
3346 		       And with that many bytes only Inf can overflow.
3347 		       Some C compilers are strict about integral constant
3348 		       expressions so we conservatively divide by a slightly
3349 		       smaller integer instead of multiplying by the exact
3350 		       floating-point value.
3351 		    */
3352 #ifdef NV_MAX_10_EXP
3353 		    /* char   buf[1 + (int)((NV_MAX_10_EXP + 1) * 0.47456)]; -- invalid C */
3354 		    char   buf[1 + (int)((NV_MAX_10_EXP + 1) / 2)]; /* valid C */
3355 #else
3356 		    /* char   buf[1 + (int)((308 + 1) * 0.47456)]; -- invalid C */
3357 		    char   buf[1 + (int)((308 + 1) / 2)]; /* valid C */
3358 #endif
3359 		    char  *in = buf + sizeof(buf);
3360 
3361 		    anv = Perl_floor(anv);
3362 		    do {
3363 			const NV next = Perl_floor(anv / 128);
3364 			if (in <= buf)  /* this cannot happen ;-) */
3365 			    Perl_croak(aTHX_ "Cannot compress integer in pack");
3366 			*--in = (unsigned char)(anv - (next * 128)) | 0x80;
3367 			anv = next;
3368 		    } while (anv > 0);
3369 		    buf[sizeof(buf) - 1] &= 0x7f; /* clear continue bit */
3370 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
3371 				       in, (buf + sizeof(buf)) - in);
3372 		} else {
3373 		    const char     *from;
3374 		    char           *result, *in;
3375 		    SV             *norm;
3376 		    STRLEN          len;
3377 		    bool            done;
3378 
3379 		  w_string:
3380 		    /* Copy string and check for compliance */
3381 		    from = SvPV_const(fromstr, len);
3382 		    if ((norm = is_an_int(from, len)) == NULL)
3383 			Perl_croak(aTHX_ "Can only compress unsigned integers in pack");
3384 
3385 		    Newx(result, len, char);
3386 		    in = result + len;
3387 		    done = FALSE;
3388 		    while (!done) *--in = div128(norm, &done) | 0x80;
3389 		    result[len - 1] &= 0x7F; /* clear continue bit */
3390 		    PUSH_GROWING_BYTES(utf8, cat, start, cur,
3391 				       in, (result + len) - in);
3392 		    Safefree(result);
3393 		    SvREFCNT_dec(norm);	/* free norm */
3394 		}
3395 	    }
3396             break;
3397 	case 'i':
3398 	case 'i' | TYPE_IS_SHRIEKING:
3399 	    while (len-- > 0) {
3400 		int aint;
3401 		fromstr = NEXTFROM;
3402 		aint = SvIV(fromstr);
3403 		DO_BO_PACK(aint, i);
3404 		PUSH_VAR(utf8, cur, aint);
3405 	    }
3406 	    break;
3407 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3408 	case 'N' | TYPE_IS_SHRIEKING:
3409 #endif
3410 	case 'N':
3411 	    while (len-- > 0) {
3412 		U32 au32;
3413 		fromstr = NEXTFROM;
3414 		au32 = SvUV(fromstr);
3415 #ifdef HAS_HTONL
3416 		au32 = PerlSock_htonl(au32);
3417 #endif
3418 		PUSH32(utf8, cur, &au32);
3419 	    }
3420 	    break;
3421 #ifdef PERL_PACK_CAN_SHRIEKSIGN
3422 	case 'V' | TYPE_IS_SHRIEKING:
3423 #endif
3424 	case 'V':
3425 	    while (len-- > 0) {
3426 		U32 au32;
3427 		fromstr = NEXTFROM;
3428 		au32 = SvUV(fromstr);
3429 #ifdef HAS_HTOVL
3430 		au32 = htovl(au32);
3431 #endif
3432 		PUSH32(utf8, cur, &au32);
3433 	    }
3434 	    break;
3435 	case 'L' | TYPE_IS_SHRIEKING:
3436 #if LONGSIZE != SIZE32
3437 	    while (len-- > 0) {
3438 		unsigned long aulong;
3439 		fromstr = NEXTFROM;
3440 		aulong = SvUV(fromstr);
3441 		DO_BO_PACK(aulong, l);
3442 		PUSH_VAR(utf8, cur, aulong);
3443 	    }
3444 	    break;
3445 #else
3446             /* Fall though! */
3447 #endif
3448 	case 'L':
3449 	    while (len-- > 0) {
3450 		U32 au32;
3451 		fromstr = NEXTFROM;
3452 		au32 = SvUV(fromstr);
3453 		DO_BO_PACK(au32, 32);
3454 		PUSH32(utf8, cur, &au32);
3455 	    }
3456 	    break;
3457 	case 'l' | TYPE_IS_SHRIEKING:
3458 #if LONGSIZE != SIZE32
3459 	    while (len-- > 0) {
3460 		long along;
3461 		fromstr = NEXTFROM;
3462 		along = SvIV(fromstr);
3463 		DO_BO_PACK(along, l);
3464 		PUSH_VAR(utf8, cur, along);
3465 	    }
3466 	    break;
3467 #else
3468             /* Fall though! */
3469 #endif
3470 	case 'l':
3471             while (len-- > 0) {
3472 		I32 ai32;
3473 		fromstr = NEXTFROM;
3474 		ai32 = SvIV(fromstr);
3475 		DO_BO_PACK(ai32, 32);
3476 		PUSH32(utf8, cur, &ai32);
3477 	    }
3478 	    break;
3479 #ifdef HAS_QUAD
3480 	case 'Q':
3481 	    while (len-- > 0) {
3482 		Uquad_t auquad;
3483 		fromstr = NEXTFROM;
3484 		auquad = (Uquad_t) SvUV(fromstr);
3485 		DO_BO_PACK(auquad, 64);
3486 		PUSH_VAR(utf8, cur, auquad);
3487 	    }
3488 	    break;
3489 	case 'q':
3490 	    while (len-- > 0) {
3491 		Quad_t aquad;
3492 		fromstr = NEXTFROM;
3493 		aquad = (Quad_t)SvIV(fromstr);
3494 		DO_BO_PACK(aquad, 64);
3495 		PUSH_VAR(utf8, cur, aquad);
3496 	    }
3497 	    break;
3498 #endif /* HAS_QUAD */
3499 	case 'P':
3500 	    len = 1;		/* assume SV is correct length */
3501 	    GROWING(utf8, cat, start, cur, sizeof(char *));
3502 	    /* Fall through! */
3503 	case 'p':
3504 	    while (len-- > 0) {
3505 		const char *aptr;
3506 
3507 		fromstr = NEXTFROM;
3508 		SvGETMAGIC(fromstr);
3509 		if (!SvOK(fromstr)) aptr = NULL;
3510 		else {
3511 		    /* XXX better yet, could spirit away the string to
3512 		     * a safe spot and hang on to it until the result
3513 		     * of pack() (and all copies of the result) are
3514 		     * gone.
3515 		     */
3516 		    if ((SvTEMP(fromstr) || (SvPADTMP(fromstr) &&
3517 			     !SvREADONLY(fromstr))) && ckWARN(WARN_PACK)) {
3518 			Perl_warner(aTHX_ packWARN(WARN_PACK),
3519 				    "Attempt to pack pointer to temporary value");
3520 		    }
3521 		    if (SvPOK(fromstr) || SvNIOK(fromstr))
3522 			aptr = SvPV_nomg_const_nolen(fromstr);
3523 		    else
3524 			aptr = SvPV_force_flags_nolen(fromstr, 0);
3525 		}
3526 		DO_BO_PACK_PC(aptr);
3527 		PUSH_VAR(utf8, cur, aptr);
3528 	    }
3529 	    break;
3530 	case 'u': {
3531 	    const char *aptr, *aend;
3532 	    bool from_utf8;
3533 
3534 	    fromstr = NEXTFROM;
3535 	    if (len <= 2) len = 45;
3536 	    else len = len / 3 * 3;
3537 	    if (len >= 64) {
3538 		if (ckWARN(WARN_PACK))
3539 		    Perl_warner(aTHX_ packWARN(WARN_PACK),
3540 			    "Field too wide in 'u' format in pack");
3541 		len = 63;
3542 	    }
3543 	    aptr = SvPV_const(fromstr, fromlen);
3544 	    from_utf8 = DO_UTF8(fromstr);
3545 	    if (from_utf8) {
3546 		aend = aptr + fromlen;
3547 		fromlen = sv_len_utf8(fromstr);
3548 	    } else aend = NULL; /* Unused, but keep compilers happy */
3549 	    GROWING(utf8, cat, start, cur, (fromlen+2) / 3 * 4 + (fromlen+len-1)/len * 2);
3550 	    while (fromlen > 0) {
3551 		U8 *end;
3552 		I32 todo;
3553 		U8 hunk[1+63/3*4+1];
3554 
3555 		if ((I32)fromlen > len)
3556 		    todo = len;
3557 		else
3558 		    todo = fromlen;
3559 		if (from_utf8) {
3560 		    char buffer[64];
3561 		    if (!uni_to_bytes(aTHX_ &aptr, aend, buffer, todo,
3562 				      'u' | TYPE_IS_PACK)) {
3563 			*cur = '\0';
3564 			SvCUR_set(cat, cur - start);
3565 			Perl_croak(aTHX_ "Assertion: string is shorter than advertised");
3566 		    }
3567 		    end = doencodes(hunk, buffer, todo);
3568 		} else {
3569 		    end = doencodes(hunk, aptr, todo);
3570 		    aptr += todo;
3571 		}
3572 		PUSH_BYTES(utf8, cur, hunk, end-hunk);
3573 		fromlen -= todo;
3574 	    }
3575 	    break;
3576 	}
3577 	}
3578 	*cur = '\0';
3579 	SvCUR_set(cat, cur - start);
3580       no_change:
3581 	*symptr = lookahead;
3582     }
3583     return beglist;
3584 }
3585 #undef NEXTFROM
3586 
3587 
3588 PP(pp_pack)
3589 {
3590     dVAR; dSP; dMARK; dORIGMARK; dTARGET;
3591     register SV *cat = TARG;
3592     STRLEN fromlen;
3593     SV *pat_sv = *++MARK;
3594     register const char *pat = SvPV_const(pat_sv, fromlen);
3595     register const char *patend = pat + fromlen;
3596 
3597     MARK++;
3598     sv_setpvs(cat, "");
3599     SvUTF8_off(cat);
3600 
3601     packlist(cat, pat, patend, MARK, SP + 1);
3602 
3603     SvSETMAGIC(cat);
3604     SP = ORIGMARK;
3605     PUSHs(cat);
3606     RETURN;
3607 }
3608 
3609 /*
3610  * Local variables:
3611  * c-indentation-style: bsd
3612  * c-basic-offset: 4
3613  * indent-tabs-mode: t
3614  * End:
3615  *
3616  * ex: set ts=8 sts=4 sw=4 noet:
3617  */
3618